/*
 * Decompiled with CFR 0.152.
 */
package de.lmu.ifi.dbs.elki.algorithm.statistics;

import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelOrAllInOneClustering;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayMIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.MeanVariance;
import de.lmu.ifi.dbs.elki.result.CollectionResult;
import de.lmu.ifi.dbs.elki.result.HistogramResult;
import de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.AbstractObjDynamicHistogram;
import de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.ObjHistogram;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.exceptions.EmptyDataException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import java.util.TreeSet;
import net.jafama.FastMath;

@Title(value="Distance Histogram")
@Description(value="Computes a histogram over the distances occurring in the data set.")
public class DistanceStatisticsWithClasses<O>
extends AbstractDistanceBasedAlgorithm<O, CollectionResult<double[]>> {
    private static final Logging LOG = Logging.getLogger(DistanceStatisticsWithClasses.class);
    protected int numbin;
    protected boolean sampling = false;
    protected boolean exact = false;

    public DistanceStatisticsWithClasses(DistanceFunction<? super O> distanceFunction, int numbins, boolean exact, boolean sampling) {
        super(distanceFunction);
        this.numbin = numbins;
        this.exact = exact;
        this.sampling = sampling;
    }

    @Override
    public HistogramResult run(Database database) {
        Relation relation = database.getRelation(this.getInputTypeRestriction()[0], new Object[0]);
        DistanceQuery distFunc = database.getDistanceQuery(relation, this.getDistanceFunction(), new Object[0]);
        StepProgress stepprog = LOG.isVerbose() ? new StepProgress("Distance statistics", 2) : null;
        DoubleMinMax gminmax = new DoubleMinMax();
        List split = ((Clustering)new ByLabelOrAllInOneClustering().run(database)).getAllClusters();
        DoubleMinMax giminmax = new DoubleMinMax();
        DoubleMinMax gominmax = new DoubleMinMax();
        MeanVariance mimin = new MeanVariance();
        MeanVariance mimax = new MeanVariance();
        MeanVariance midif = new MeanVariance();
        MeanVariance momin = new MeanVariance();
        MeanVariance momax = new MeanVariance();
        MeanVariance modif = new MeanVariance();
        LOG.beginStep(stepprog, 1, "Prepare histogram.");
        if (this.exact) {
            gminmax = this.exactMinMax(relation, distFunc);
        } else if (this.sampling) {
            gminmax = this.sampleMinMax(relation, distFunc);
        }
        ObjHistogram histogram = gminmax.isValid() ? new ObjHistogram<long[]>(this.numbin, gminmax.getMin(), gminmax.getMax(), () -> new long[2]) : new AbstractObjDynamicHistogram<long[]>(this.numbin){

            @Override
            protected long[] downsample(Object[] data, int start, int end, int size) {
                long[] ret = new long[2];
                for (int i = start; i < end; ++i) {
                    long[] existing = (long[])data[i];
                    if (existing == null) continue;
                    for (int c = 0; c < 2; ++c) {
                        int n = c;
                        ret[n] = ret[n] + existing[c];
                    }
                }
                return ret;
            }

            @Override
            protected long[] aggregate(long[] first, long[] second) {
                for (int c = 0; c < 2; ++c) {
                    int n = c;
                    first[n] = first[n] + second[c];
                }
                return first;
            }

            @Override
            protected long[] cloneForCache(long[] data) {
                return (long[])data.clone();
            }

            @Override
            protected long[] makeObject() {
                return new long[2];
            }
        };
        LOG.beginStep(stepprog, 2, "Build histogram.");
        FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Distance computations", relation.size(), LOG) : null;
        for (Cluster cluster : split) {
            DBIDIter id1 = cluster.getIDs().iter();
            while (id1.valid()) {
                DoubleMinMax iminmax = new DoubleMinMax();
                DBIDIter iter2 = cluster.getIDs().iter();
                while (iter2.valid()) {
                    if (!DBIDUtil.equal(id1, iter2)) {
                        double d = distFunc.distance((DBIDRef)id1, (DBIDRef)iter2);
                        long[] lArray = (long[])histogram.get(d);
                        lArray[0] = lArray[0] + 1L;
                        iminmax.put(d);
                    }
                    iter2.advance();
                }
                mimin.put(iminmax.getMin());
                mimax.put(iminmax.getMax());
                midif.put(iminmax.getDiff());
                giminmax.put(iminmax.getMin());
                giminmax.put(iminmax.getMax());
                DoubleMinMax ominmax = new DoubleMinMax();
                for (Cluster cluster2 : split) {
                    if (cluster2 == cluster) continue;
                    DBIDIter iter22 = cluster2.getIDs().iter();
                    while (iter22.valid()) {
                        if (!DBIDUtil.equal(id1, iter22)) {
                            double d = distFunc.distance((DBIDRef)id1, (DBIDRef)iter22);
                            long[] lArray = (long[])histogram.get(d);
                            lArray[1] = lArray[1] + 1L;
                            ominmax.put(d);
                        }
                        iter22.advance();
                    }
                }
                momin.put(ominmax.getMin());
                momax.put(ominmax.getMax());
                modif.put(ominmax.getDiff());
                gominmax.put(ominmax.getMin());
                gominmax.put(ominmax.getMax());
                LOG.incrementProcessed(progress);
                id1.advance();
            }
        }
        LOG.ensureCompleted(progress);
        gminmax.put(gominmax);
        LOG.setCompleted(stepprog);
        long inum = 0L;
        long onum = 0L;
        ObjHistogram.Iter iter = histogram.iter();
        while (iter.valid()) {
            inum += ((long[])iter.getValue())[0];
            onum += ((long[])iter.getValue())[1];
            iter.advance();
        }
        long bnum = inum + onum;
        ArrayList<double[]> arrayList = new ArrayList<double[]>(this.numbin);
        ObjHistogram.Iter iter2 = histogram.iter();
        while (iter2.valid()) {
            long[] value = (long[])iter2.getValue();
            double icof = inum == 0L ? 0.0 : (double)value[0] / (double)inum / histogram.getBinsize();
            double icaf = (double)value[0] / (double)bnum / histogram.getBinsize();
            double ocof = onum == 0L ? 0.0 : (double)value[1] / (double)onum / histogram.getBinsize();
            double ocaf = (double)value[1] / (double)bnum / histogram.getBinsize();
            arrayList.add(new double[]{iter2.getCenter(), icof, icaf, ocof, ocaf});
            iter2.advance();
        }
        HistogramResult result = new HistogramResult("Distance Histogram", "distance-histogram", (Collection<double[]>)arrayList);
        result.addHeader("Absolute minimum distance (abs): " + gminmax.getMin());
        result.addHeader("Absolute maximum distance (abs): " + gminmax.getMax());
        result.addHeader("In-Cluster minimum distance (abs, avg, stddev): " + giminmax.getMin() + " " + mimin.getMean() + " " + mimin.getSampleStddev());
        result.addHeader("In-Cluster maximum distance (abs, avg, stddev): " + giminmax.getMax() + " " + mimax.getMean() + " " + mimax.getSampleStddev());
        result.addHeader("Other-Cluster minimum distance (abs, avg, stddev): " + gominmax.getMin() + " " + momin.getMean() + " " + momin.getSampleStddev());
        result.addHeader("Other-Cluster maximum distance (abs, avg, stddev): " + gominmax.getMax() + " " + momax.getMean() + " " + momax.getSampleStddev());
        result.addHeader("Column description: bin center, in-cluster only frequency, in-cluster all frequency, other-cluster only frequency, other cluster all frequency");
        result.addHeader("In-cluster value count: " + inum + " other cluster value count: " + onum);
        return result;
    }

    private DoubleMinMax sampleMinMax(Relation<O> relation, DistanceQuery<O> distFunc) {
        int size = relation.size();
        Random rnd = new Random();
        int k = (int)Math.max(25.0, FastMath.pow(relation.size(), 0.2));
        TreeSet<DoubleDBIDPair> minhotset = new TreeSet<DoubleDBIDPair>();
        TreeSet<DoubleDBIDPair> maxhotset = new TreeSet<DoubleDBIDPair>(Collections.reverseOrder());
        int randomsize = (int)Math.max(25.0, FastMath.pow(relation.size(), 0.2));
        double rprob = (double)randomsize / (double)size;
        ArrayModifiableDBIDs randomset = DBIDUtil.newArray(randomsize);
        DBIDIter iter = relation.iterDBIDs();
        if (!iter.valid()) {
            throw new EmptyDataException();
        }
        DBID firstid = DBIDUtil.deref(iter);
        iter.advance();
        minhotset.add(DBIDUtil.newPair(Double.MAX_VALUE, (DBIDRef)firstid));
        maxhotset.add(DBIDUtil.newPair(Double.MIN_VALUE, (DBIDRef)firstid));
        while (iter.valid()) {
            ArrayList<DoubleDBIDPair> np = new ArrayList<DoubleDBIDPair>(k * 2 + randomsize * 2);
            for (DoubleDBIDPair pair : minhotset) {
                if (DBIDUtil.equal(iter, pair)) continue;
                double d = distFunc.distance((DBIDRef)iter, (DBIDRef)pair);
                np.add(DBIDUtil.newPair(d, (DBIDRef)iter));
                np.add(DBIDUtil.newPair(d, (DBIDRef)pair));
            }
            DBIDArrayMIter iter2 = randomset.iter();
            while (iter2.valid()) {
                double d = distFunc.distance((DBIDRef)iter, (DBIDRef)iter2);
                np.add(DBIDUtil.newPair(d, (DBIDRef)iter));
                np.add(DBIDUtil.newPair(d, (DBIDRef)iter2));
                iter2.advance();
            }
            minhotset.addAll(np);
            DistanceStatisticsWithClasses.shrinkHeap(minhotset, k);
            ArrayList<DoubleDBIDPair> np2 = new ArrayList<DoubleDBIDPair>(k * 2 + randomsize * 2);
            for (DoubleDBIDPair pair : minhotset) {
                if (DBIDUtil.equal(iter, pair)) continue;
                double d = distFunc.distance((DBIDRef)iter, (DBIDRef)pair);
                np2.add(DBIDUtil.newPair(d, (DBIDRef)iter));
                np2.add(DBIDUtil.newPair(d, (DBIDRef)pair));
            }
            DBIDArrayMIter iter22 = randomset.iter();
            while (iter22.valid()) {
                double d = distFunc.distance((DBIDRef)iter, (DBIDRef)iter22);
                np.add(DBIDUtil.newPair(d, (DBIDRef)iter));
                np.add(DBIDUtil.newPair(d, (DBIDRef)iter22));
                iter22.advance();
            }
            maxhotset.addAll(np2);
            DistanceStatisticsWithClasses.shrinkHeap(maxhotset, k);
            if (randomset.size() < randomsize) {
                randomset.add(iter);
            } else if (rnd.nextDouble() < rprob) {
                randomset.set((int)Math.floor(rnd.nextDouble() * (double)randomsize), iter);
            }
            iter.advance();
        }
        return new DoubleMinMax(((DoubleDBIDPair)minhotset.first()).doubleValue(), ((DoubleDBIDPair)maxhotset.first()).doubleValue());
    }

    private DoubleMinMax exactMinMax(Relation<O> relation, DistanceQuery<O> distFunc) {
        FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Exact fitting distance computations", relation.size(), LOG) : null;
        DoubleMinMax minmax = new DoubleMinMax();
        DBIDIter iditer = relation.iterDBIDs();
        while (iditer.valid()) {
            DBIDIter iditer2 = relation.iterDBIDs();
            while (iditer2.valid()) {
                if (!DBIDUtil.equal(iditer, iditer2)) {
                    double d = distFunc.distance((DBIDRef)iditer, (DBIDRef)iditer2);
                    minmax.put(d);
                }
                iditer2.advance();
            }
            LOG.incrementProcessed(progress);
            iditer.advance();
        }
        LOG.ensureCompleted(progress);
        return minmax;
    }

    private static void shrinkHeap(TreeSet<DoubleDBIDPair> hotset, int k) {
        HashSetModifiableDBIDs seenids = DBIDUtil.newHashSet(2 * k);
        int cnt = 0;
        Iterator<DoubleDBIDPair> i = hotset.iterator();
        while (i.hasNext()) {
            DoubleDBIDPair p = i.next();
            if (cnt > k || seenids.contains(p)) {
                i.remove();
                continue;
            }
            seenids.add(p);
            ++cnt;
        }
    }

    @Override
    public TypeInformation[] getInputTypeRestriction() {
        return TypeUtil.array(this.getDistanceFunction().getInputTypeRestriction());
    }

    @Override
    protected Logging getLogger() {
        return LOG;
    }

    public static class Parameterizer<O>
    extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
        public static final OptionID EXACT_ID = new OptionID("diststat.exact", "In a first pass, compute the exact minimum and maximum, at the cost of O(2*n*n) instead of O(n*n). The number of resulting bins is guaranteed to be as requested.");
        public static final OptionID SAMPLING_ID = new OptionID("diststat.sampling", "Enable sampling of O(n) size to determine the minimum and maximum distances approximately. The resulting number of bins can be larger than the given n.");
        public static final OptionID HISTOGRAM_BINS_ID = new OptionID("diststat.bins", "Number of bins to use in the histogram. By default, it is only guaranteed to be within 1*n and 2*n of the given number.");
        protected int numbin = 20;
        protected boolean sampling = false;
        protected boolean exact = false;

        @Override
        protected void makeOptions(Parameterization config) {
            Flag samplingF;
            Flag exactF;
            super.makeOptions(config);
            IntParameter numbinP = (IntParameter)new IntParameter(HISTOGRAM_BINS_ID, 20).addConstraint((ParameterConstraint)CommonConstraints.GREATER_THAN_ONE_INT);
            if (config.grab(numbinP)) {
                this.numbin = (Integer)numbinP.getValue();
            }
            if (config.grab(exactF = new Flag(EXACT_ID))) {
                this.exact = (Boolean)exactF.getValue();
            }
            if (!this.exact && config.grab(samplingF = new Flag(SAMPLING_ID))) {
                this.sampling = (Boolean)samplingF.getValue();
            }
        }

        @Override
        protected DistanceStatisticsWithClasses<O> makeInstance() {
            return new DistanceStatisticsWithClasses(this.distanceFunction, this.numbin, this.exact, this.sampling);
        }
    }
}

