/*
 * Decompiled with CFR 0.152.
 */
package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;

import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.model.Model;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
import de.lmu.ifi.dbs.elki.math.MeanVariance;
import de.lmu.ifi.dbs.elki.math.linearalgebra.VMath;
import de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.DoubleDynamicHistogram;
import de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.DoubleHistogram;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.exceptions.TooManyRetriesException;
import de.lmu.ifi.dbs.elki.utilities.io.FormatUtil;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
import de.lmu.ifi.dbs.elki.utilities.random.RandomFactory;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import net.jafama.FastMath;

@Reference(authors="R. Haralick, R. Harpaz", title="Linear manifold clustering in high dimensional spaces by stochastic search", booktitle="Pattern Recognition volume 40, Issue 10", url="https://doi.org/10.1016/j.patcog.2007.01.020", bibkey="DBLP:journals/pr/HaralickH07")
public class LMCLUS
extends AbstractAlgorithm<Clustering<Model>>
implements ClusteringAlgorithm<Clustering<Model>> {
    private static final Logging LOG = Logging.getLogger(LMCLUS.class);
    private static final double LOG_NOT_FROM_ONE_CLUSTER_PROBABILITY = Math.log(0.2);
    private static final int BINS = 50;
    private final double sensitivityThreshold;
    private final int maxLMDim;
    private final int minsize;
    private final int samplingLevel;
    private final RandomFactory rnd;

    public LMCLUS(int maxdim, int minsize, int samplingLevel, double sensitivityThreshold, RandomFactory rnd) {
        this.maxLMDim = maxdim;
        this.minsize = minsize;
        this.samplingLevel = samplingLevel;
        this.sensitivityThreshold = sensitivityThreshold;
        this.rnd = rnd;
    }

    public Clustering<Model> run(Database database, Relation<NumberVector> relation) {
        Clustering<Model> ret = new Clustering<Model>("LMCLUS Clustering", "lmclus-clustering");
        FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Clustered objects", relation.size(), LOG) : null;
        IndefiniteProgress cprogress = LOG.isVerbose() ? new IndefiniteProgress("Clusters found", LOG) : null;
        HashSetModifiableDBIDs unclustered = DBIDUtil.newHashSet(relation.getDBIDs());
        Random r = this.rnd.getSingleThreadedRandom();
        int maxdim = Math.min(this.maxLMDim, RelationUtil.dimensionality(relation));
        int cnum = 0;
        while (unclustered.size() > this.minsize) {
            ModifiableDBIDs current = unclustered;
            int lmDim = 1;
            block1: for (int k = 1; k <= maxdim; ++k) {
                while (true) {
                    Separation separation = this.findSeparation(relation, current, k, r);
                    if (separation.goodness <= this.sensitivityThreshold) continue block1;
                    ArrayModifiableDBIDs subset = DBIDUtil.newArray(current.size());
                    DBIDIter iter = current.iter();
                    while (iter.valid()) {
                        if (this.deviation(VMath.minusEquals(relation.get(iter).toArray(), separation.originV), separation.basis) < separation.threshold) {
                            subset.add(iter);
                        }
                        iter.advance();
                    }
                    if (subset.size() < this.minsize) continue block1;
                    current = subset;
                    lmDim = k;
                }
            }
            if (current.size() < this.minsize || current == unclustered) break;
            Cluster cluster = new Cluster(current);
            cluster.setName("Cluster_" + lmDim + "d_" + cnum);
            ++cnum;
            ret.addToplevelCluster(cluster);
            unclustered.removeDBIDs(current);
            if (progress != null) {
                progress.setProcessed(relation.size() - unclustered.size(), LOG);
            }
            if (cprogress == null) continue;
            cprogress.setProcessed(cnum, LOG);
        }
        if (unclustered.size() > 0) {
            ret.addToplevelCluster(new Cluster((DBIDs)unclustered, true));
        }
        if (progress != null) {
            progress.setProcessed(relation.size(), LOG);
            progress.ensureCompleted(LOG);
        }
        LOG.setCompleted(cprogress);
        return ret;
    }

    private double deviation(double[] delta, double[][] beta) {
        double b;
        double a = VMath.squareSum(delta);
        return a > (b = VMath.squareSum(VMath.transposeTimes(beta, delta))) ? FastMath.sqrt(a - b) : 0.0;
    }

    private Separation findSeparation(Relation<NumberVector> relation, DBIDs currentids, int dimension, Random r) {
        Separation separation = new Separation();
        int samples = (int)Math.min(LOG_NOT_FROM_ONE_CLUSTER_PROBABILITY / FastMath.log1p(-FastMath.powFast(this.samplingLevel, -dimension)), (double)currentids.size());
        int remaining_retries = 100;
        for (int i = 1; i <= samples; ++i) {
            ModifiableDBIDs sample = DBIDUtil.randomSample(currentids, dimension + 1, r);
            DBIDIter iter = sample.iter();
            double[] originV = relation.get(iter).toArray();
            iter.advance();
            ArrayList<double[]> vectors = new ArrayList<double[]>(sample.size() - 1);
            while (iter.valid()) {
                double[] vec = relation.get(iter).toArray();
                vectors.add(VMath.minusEquals(vec, originV));
                iter.advance();
            }
            double[][] basis = this.generateOrthonormalBasis(vectors);
            if (basis == null) {
                --i;
                if (--remaining_retries >= 0) continue;
                throw new TooManyRetriesException("Too many retries in sampling, and always a linear dependant data set.");
            }
            DoubleDynamicHistogram histogram = new DoubleDynamicHistogram(50);
            double w = 1.0 / (double)currentids.size();
            DBIDIter iter2 = currentids.iter();
            while (iter2.valid()) {
                if (!sample.contains(iter2)) {
                    double[] vec = VMath.minusEquals(relation.get(iter2).toArray(), originV);
                    double distance = this.deviation(vec, basis);
                    histogram.increment(distance, w);
                }
                iter2.advance();
            }
            double[] th = this.findAndEvaluateThreshold(histogram);
            if (!(th[1] > separation.goodness)) continue;
            separation.goodness = th[1];
            separation.threshold = th[0];
            separation.originV = originV;
            separation.basis = basis;
        }
        return separation;
    }

    private double[][] generateOrthonormalBasis(List<double[]> vectors) {
        double[] first = VMath.normalizeEquals(vectors.get(0));
        double[][] ret = new double[first.length][vectors.size()];
        VMath.setCol(ret, 0, first);
        for (int i = 1; i < vectors.size(); ++i) {
            double[] v_i = vectors.get(i);
            double[] u_i = (double[])v_i.clone();
            for (int j = 0; j < i; ++j) {
                double[] v_j = VMath.getCol(ret, j);
                double f = VMath.transposeTimes(v_i, v_j);
                if (Double.isNaN(f)) {
                    if (LOG.isDebuggingFine()) {
                        LOG.debugFine("Zero vector encountered? " + FormatUtil.format(v_j));
                    }
                    return null;
                }
                VMath.minusTimesEquals(u_i, v_j, f);
            }
            double len_u_i = VMath.euclideanLength(u_i);
            if (len_u_i < Double.MIN_NORMAL) {
                if (LOG.isDebuggingFine()) {
                    LOG.debugFine("Points not independent - no orthonormalization.");
                }
                return null;
            }
            VMath.timesEquals(u_i, 1.0 / len_u_i);
            VMath.setCol(ret, i, u_i);
        }
        return ret;
    }

    private double[] findAndEvaluateThreshold(DoubleDynamicHistogram histogram) {
        int n = histogram.getNumBins();
        double[] p1 = new double[n];
        double[] p2 = new double[n];
        double[] mu1 = new double[n];
        double[] mu2 = new double[n];
        double[] sigma1 = new double[n];
        double[] sigma2 = new double[n];
        double[] jt = new double[n];
        MeanVariance mv = new MeanVariance();
        DoubleHistogram.Iter forward = histogram.iter();
        int i = 0;
        while (forward.valid()) {
            p1[i] = forward.getValue() + (i > 0 ? p1[i - 1] : 0.0);
            mv.put(i, forward.getValue());
            mu1[i] = mv.getMean();
            sigma1[i] = mv.getNaiveStddev();
            ++i;
            forward.advance();
        }
        mv = new MeanVariance();
        DoubleHistogram.Iter backwards = histogram.iter();
        backwards.seek(histogram.getNumBins() - 1);
        int j = n - 1;
        while (backwards.valid()) {
            p2[j] = backwards.getValue() + (j + 1 < n ? p2[j + 1] : 0.0);
            mv.put(j, backwards.getValue());
            mu2[j] = mv.getMean();
            sigma2[j] = mv.getNaiveStddev();
            --j;
            backwards.retract();
        }
        for (int i2 = 0; i2 < n; ++i2) {
            jt[i2] = 1.0 + 2.0 * (p1[i2] * (FastMath.log(sigma1[i2]) - FastMath.log(p1[i2])) + p2[i2] * (FastMath.log(sigma2[i2]) - FastMath.log(p2[i2])));
        }
        int bestpos = -1;
        double bestgoodness = Double.NEGATIVE_INFINITY;
        double devPrev = jt[1] - jt[0];
        for (int i3 = 1; i3 < jt.length - 1; ++i3) {
            double devCur = jt[i3 + 1] - jt[i3];
            if (devCur >= 0.0 && devPrev <= 0.0) {
                double goodness;
                int j2;
                double lowestMaxima = Double.POSITIVE_INFINITY;
                for (j2 = i3 - 1; j2 > 0; --j2) {
                    if (!(jt[j2 - 1] < jt[j2])) continue;
                    lowestMaxima = Math.min(lowestMaxima, jt[j2]);
                    break;
                }
                for (j2 = i3 + 1; j2 < n - 2; ++j2) {
                    if (!(jt[j2 + 1] < jt[j2])) continue;
                    lowestMaxima = Math.min(lowestMaxima, jt[j2]);
                    break;
                }
                double localDepth = lowestMaxima - jt[i3];
                double mud = mu1[i3] - mu2[i3];
                double discriminability = mud * mud / (sigma1[i3] * sigma1[i3] + sigma2[i3] * sigma2[i3]);
                if (Double.isNaN(discriminability)) {
                    discriminability = -1.0;
                }
                if ((goodness = localDepth * discriminability) > bestgoodness) {
                    bestgoodness = goodness;
                    bestpos = i3;
                }
            }
            devPrev = devCur;
        }
        DoubleHistogram.Iter iter = histogram.iter();
        iter.seek(bestpos);
        return new double[]{iter.getRight(), bestgoodness};
    }

    @Override
    protected Logging getLogger() {
        return LOG;
    }

    @Override
    public TypeInformation[] getInputTypeRestriction() {
        return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
    }

    public static class Parameterizer
    extends AbstractParameterizer {
        public static final OptionID MAXDIM_ID = new OptionID("lmclus.maxdim", "Maximum linear manifold dimension to search.");
        public static final OptionID MINSIZE_ID = new OptionID("lmclus.minsize", "Minimum cluster size to allow.");
        public static final OptionID SAMPLINGL_ID = new OptionID("lmclus.sampling-level", "A number used to determine how many samples are taken in each search.");
        public static final OptionID THRESHOLD_ID = new OptionID("lmclus.threshold", "Threshold to determine if a cluster was found.");
        public static final OptionID RANDOM_ID = new OptionID("lmclus.seed", "Random generator seed.");
        private int maxdim = Integer.MAX_VALUE;
        private int minsize;
        private int samplingLevel;
        private double threshold;
        private RandomFactory rnd;

        @Override
        protected void makeOptions(Parameterization config) {
            RandomParameter rndP;
            DoubleParameter sensivityThresholdP;
            IntParameter samplingLevelP;
            IntParameter minsizeP;
            super.makeOptions(config);
            IntParameter maxLMDimP = (IntParameter)((IntParameter)new IntParameter(MAXDIM_ID).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT)).setOptional(true);
            if (config.grab(maxLMDimP)) {
                this.maxdim = (Integer)maxLMDimP.getValue();
            }
            if (config.grab(minsizeP = (IntParameter)new IntParameter(MINSIZE_ID).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT))) {
                this.minsize = (Integer)minsizeP.getValue();
            }
            if (config.grab(samplingLevelP = new IntParameter(SAMPLINGL_ID, 100))) {
                this.samplingLevel = (Integer)samplingLevelP.getValue();
            }
            if (config.grab(sensivityThresholdP = new DoubleParameter(THRESHOLD_ID))) {
                this.threshold = (Double)sensivityThresholdP.getValue();
            }
            if (config.grab(rndP = new RandomParameter(RANDOM_ID))) {
                this.rnd = (RandomFactory)rndP.getValue();
            }
        }

        @Override
        protected LMCLUS makeInstance() {
            return new LMCLUS(this.maxdim, this.minsize, this.samplingLevel, this.threshold, this.rnd);
        }
    }

    private static class Separation {
        double goodness = Double.NEGATIVE_INFINITY;
        double threshold = Double.NEGATIVE_INFINITY;
        double[][] basis = null;
        double[] originV = null;

        private Separation() {
        }
    }
}

