/*
 * Decompiled with CFR 0.152.
 */
package de.lmu.ifi.dbs.elki.algorithm.clustering.uncertain;

import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.DistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMedoidsPAM;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.DoubleVector;
import de.lmu.ifi.dbs.elki.data.model.Model;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.data.uncertain.UncertainObject;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ProxyDatabase;
import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDFactory;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRange;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.similarityfunction.cluster.ClusteringAdjustedRandIndexSimilarityFunction;
import de.lmu.ifi.dbs.elki.distance.similarityfunction.cluster.ClusteringDistanceSimilarityFunction;
import de.lmu.ifi.dbs.elki.index.distancematrix.PrecomputedDistanceMatrix;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution;
import de.lmu.ifi.dbs.elki.result.BasicResult;
import de.lmu.ifi.dbs.elki.result.EvaluationResult;
import de.lmu.ifi.dbs.elki.result.Result;
import de.lmu.ifi.dbs.elki.result.ResultHierarchy;
import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.It;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ChainedParameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
import de.lmu.ifi.dbs.elki.utilities.random.RandomFactory;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import net.jafama.FastMath;

@Reference(authors="Andreas Z\u00fcfle, Tobias Emrich, Klaus Arthur Schmid, Nikos Mamoulis, Arthur Zimek, Mathias Renz", title="Representative clustering of uncertain data", booktitle="Proc. 20th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining", url="https://doi.org/10.1145/2623330.2623725", bibkey="DBLP:conf/kdd/ZufleESMZR14")
public class RepresentativeUncertainClustering
extends AbstractAlgorithm<Clustering<Model>>
implements ClusteringAlgorithm<Clustering<Model>> {
    private static final Logging LOG = Logging.getLogger(RepresentativeUncertainClustering.class);
    protected ClusteringDistanceSimilarityFunction distance;
    protected ClusteringAlgorithm<?> metaAlgorithm;
    protected ClusteringAlgorithm<?> samplesAlgorithm;
    protected int numsamples;
    protected RandomFactory random;
    protected double alpha;
    protected boolean keep;

    public RepresentativeUncertainClustering(ClusteringDistanceSimilarityFunction distance, ClusteringAlgorithm<?> metaAlgorithm, ClusteringAlgorithm<?> samplesAlgorithm, int numsamples, RandomFactory random, double alpha, boolean keep) {
        this.samplesAlgorithm = samplesAlgorithm;
        this.numsamples = numsamples;
        this.metaAlgorithm = metaAlgorithm;
        this.distance = distance;
        this.random = random;
        this.alpha = alpha;
        this.keep = keep;
    }

    public Clustering<?> run(Database database, Relation<? extends UncertainObject> relation) {
        ResultHierarchy hierarchy = database.getHierarchy();
        ArrayList clusterings = new ArrayList();
        int dim = RelationUtil.dimensionality(relation);
        DBIDs ids = relation.getDBIDs();
        BasicResult samples = new BasicResult("Samples", "samples");
        Random rand = this.random.getSingleThreadedRandom();
        FiniteProgress sampleP = LOG.isVerbose() ? new FiniteProgress("Clustering samples", this.numsamples, LOG) : null;
        for (int i = 0; i < this.numsamples; ++i) {
            WritableDataStore<DoubleVector> store = DataStoreUtil.makeStorage(ids, 30, DoubleVector.class);
            DBIDIter iter = ids.iter();
            while (iter.valid()) {
                store.put(iter, relation.get(iter).drawSample(rand));
                iter.advance();
            }
            clusterings.add(this.runClusteringAlgorithm(hierarchy, samples, ids, store, dim, "Sample " + i));
            LOG.incrementProcessed(sampleP);
        }
        LOG.ensureCompleted(sampleP);
        DBIDRange rids = DBIDFactory.FACTORY.generateStaticDBIDRange(clusterings.size());
        WritableDataStore<Clustering> datastore = DataStoreUtil.makeStorage(rids, 30, Clustering.class);
        Iterator it2 = clusterings.iterator();
        DBIDArrayIter iter = rids.iter();
        while (iter.valid()) {
            datastore.put(iter, (Clustering)it2.next());
            iter.advance();
        }
        assert (rids.size() == clusterings.size());
        MaterializedRelation<Clustering> crel = new MaterializedRelation<Clustering>(Clustering.TYPE, rids, "Clusterings", datastore);
        PrecomputedDistanceMatrix<Clustering> mat = new PrecomputedDistanceMatrix<Clustering>(crel, rids, this.distance);
        mat.initialize();
        ProxyDatabase d = new ProxyDatabase((DBIDs)rids, crel);
        d.getHierarchy().add(crel, mat);
        Result c = this.metaAlgorithm.run(d);
        d.getHierarchy().remove(d, c);
        BasicResult reps = new BasicResult("Representants", "representative");
        hierarchy.add(relation, reps);
        DistanceQuery<Clustering> dq = mat.getDistanceQuery(this.distance, new Object[0]);
        List cl = ((Clustering)c).getAllClusters();
        ArrayList<DoubleObjPair<Clustering>> evaluated = new ArrayList<DoubleObjPair<Clustering>>(cl.size());
        for (Cluster cluster : cl) {
            double besttau = Double.POSITIVE_INFINITY;
            Clustering bestc = null;
            DBIDIter it1 = cluster.getIDs().iter();
            while (it1.valid()) {
                double tau = 0.0;
                Clustering curc = (Clustering)crel.get(it1);
                DBIDIter it22 = cluster.getIDs().iter();
                while (it22.valid()) {
                    if (!DBIDUtil.equal(it1, it22)) {
                        double di = dq.distance(curc, (Clustering)((Object)it22));
                        tau = di > tau ? di : tau;
                    }
                    it22.advance();
                }
                if (tau < besttau) {
                    besttau = tau;
                    bestc = curc;
                }
                it1.advance();
            }
            if (bestc == null) continue;
            double gtau = 0.0;
            DBIDIter it23 = crel.iterDBIDs();
            while (it23.valid()) {
                double di = dq.distance(bestc, (Clustering)((Object)it23));
                gtau = di > gtau ? di : gtau;
                it23.advance();
            }
            double cprob = this.computeConfidence(cluster.size(), crel.size());
            hierarchy.add(bestc, new RepresentativenessEvaluation(gtau, besttau, cprob));
            evaluated.add(new DoubleObjPair<Clustering>(cprob, bestc));
        }
        Collections.sort(evaluated, Collections.reverseOrder());
        for (DoubleObjPair doubleObjPair : evaluated) {
            It<Relation> it = hierarchy.iterParents(doubleObjPair.second).filter(Relation.class);
            while (it.valid()) {
                hierarchy.add(reps, it.get());
                it.advance();
            }
        }
        if (this.keep) {
            hierarchy.add(relation, samples);
        } else {
            hierarchy.removeSubtree(samples);
        }
        return c;
    }

    private double computeConfidence(int support, int samples) {
        double z = NormalDistribution.standardNormalQuantile(this.alpha);
        double eprob = (double)support / (double)samples;
        return Math.max(0.0, eprob - z * FastMath.sqrt(eprob * (1.0 - eprob) / (double)samples));
    }

    protected Clustering<?> runClusteringAlgorithm(ResultHierarchy hierarchy, Result parent, DBIDs ids, DataStore<DoubleVector> store, int dim, String title) {
        VectorFieldTypeInformation<DoubleVector> t = new VectorFieldTypeInformation<DoubleVector>(DoubleVector.FACTORY, dim);
        MaterializedRelation<DoubleVector> sample = new MaterializedRelation<DoubleVector>(t, ids, title, store);
        ProxyDatabase d = new ProxyDatabase(ids, sample);
        Result clusterResult = this.samplesAlgorithm.run(d);
        d.getHierarchy().remove(sample);
        d.getHierarchy().remove(clusterResult);
        hierarchy.add(parent, sample);
        hierarchy.add(sample, clusterResult);
        return clusterResult;
    }

    @Override
    public TypeInformation[] getInputTypeRestriction() {
        return TypeUtil.array(UncertainObject.UNCERTAIN_OBJECT_FIELD);
    }

    @Override
    protected Logging getLogger() {
        return LOG;
    }

    public static class Parameterizer
    extends AbstractParameterizer {
        public static final int DEFAULT_ENSEMBLE_DEPTH = 10;
        public static final OptionID CLUSTERDISTANCE_ID = new OptionID("pwc.distance", "Distance measure of clusterings.");
        public static final OptionID META_ALGORITHM_ID = new OptionID("pwc.metaclustering", "Algorithm used to aggregate clustering results. Must be a distance-based clustering algorithm.");
        public static final OptionID ALGORITHM_ID = new OptionID("pwc.clustering", "Clustering algorithm used on the samples.");
        public static final OptionID SAMPLES_ID = new OptionID("pwc.samples", "Number of clusterings to produce on samples.");
        public static final OptionID KEEP_SAMPLES_ID = new OptionID("pwc.samples.keep", "Retain all sampled relations, not only the representative results.");
        public static final OptionID RANDOM_ID = new OptionID("pwc.random", "Random generator used for sampling.");
        public static final OptionID ALPHA_ID = new OptionID("pwc.alpha", "Alpha threshold for estimating the confidence probability.");
        protected ClusteringDistanceSimilarityFunction distance;
        protected ClusteringAlgorithm<?> metaAlgorithm;
        protected ClusteringAlgorithm<?> samplesAlgorithm;
        protected int numsamples;
        protected RandomFactory random;
        protected double alpha;
        protected boolean keep;

        @Override
        protected void makeOptions(Parameterization config) {
            DoubleParameter palpha;
            RandomParameter randomP;
            Flag keepF;
            IntParameter pdepth;
            ObjectParameter palgorithm;
            super.makeOptions(config);
            this.distance = ClusteringAdjustedRandIndexSimilarityFunction.STATIC;
            ObjectParameter simP = new ObjectParameter(CLUSTERDISTANCE_ID, (Class<?>)ClusteringDistanceSimilarityFunction.class, ClusteringAdjustedRandIndexSimilarityFunction.class);
            if (config.grab(simP)) {
                this.distance = (ClusteringDistanceSimilarityFunction)simP.instantiateClass(config);
            }
            ListParameterization predef = new ListParameterization();
            predef.addParameter(DistanceBasedAlgorithm.DISTANCE_FUNCTION_ID, (Object)this.distance);
            ChainedParameterization chain = new ChainedParameterization(predef, config);
            chain.errorsTo(config);
            ObjectParameter malgorithm = new ObjectParameter(META_ALGORITHM_ID, (Class<?>)ClusteringAlgorithm.class, KMedoidsPAM.class);
            if (chain.grab(malgorithm)) {
                this.metaAlgorithm = (ClusteringAlgorithm)malgorithm.instantiateClass(chain);
                if (this.metaAlgorithm != null && this.metaAlgorithm.getInputTypeRestriction().length > 0 && !this.metaAlgorithm.getInputTypeRestriction()[0].isAssignableFromType(Clustering.TYPE)) {
                    config.reportError(new WrongParameterValueException(malgorithm, malgorithm.getValueAsString(), "The meta clustering algorithm (as configured) does not accept clustering results."));
                }
            }
            if (config.grab(palgorithm = new ObjectParameter(ALGORITHM_ID, ClusteringAlgorithm.class))) {
                this.samplesAlgorithm = (ClusteringAlgorithm)palgorithm.instantiateClass(config);
                if (this.samplesAlgorithm != null && this.samplesAlgorithm.getInputTypeRestriction().length > 0 && !this.samplesAlgorithm.getInputTypeRestriction()[0].isAssignableFromType(TypeUtil.NUMBER_VECTOR_FIELD)) {
                    config.reportError(new WrongParameterValueException(palgorithm, palgorithm.getValueAsString(), "The inner clustering algorithm (as configured) does not accept numerical vectors: " + this.samplesAlgorithm.getInputTypeRestriction()[0]));
                }
            }
            if (config.grab(pdepth = new IntParameter(SAMPLES_ID, 10))) {
                this.numsamples = (Integer)pdepth.getValue();
            }
            if (config.grab(keepF = new Flag(KEEP_SAMPLES_ID))) {
                this.keep = keepF.isTrue();
            }
            if (config.grab(randomP = new RandomParameter(RANDOM_ID))) {
                this.random = (RandomFactory)randomP.getValue();
            }
            if (config.grab(palpha = (DoubleParameter)((DoubleParameter)new DoubleParameter(ALPHA_ID, 0.95).addConstraint((ParameterConstraint)CommonConstraints.GREATER_THAN_ONE_DOUBLE)).addConstraint((ParameterConstraint)CommonConstraints.LESS_THAN_ONE_DOUBLE))) {
                this.alpha = palpha.doubleValue();
            }
        }

        @Override
        protected RepresentativeUncertainClustering makeInstance() {
            return new RepresentativeUncertainClustering(this.distance, this.metaAlgorithm, this.samplesAlgorithm, this.numsamples, this.random, this.alpha, this.keep);
        }
    }

    public static class RepresentativenessEvaluation
    extends EvaluationResult {
        public RepresentativenessEvaluation(double gtau, double besttau, double cprob) {
            super("Possible-Worlds Evaluation", "representativeness");
            EvaluationResult.MeasurementGroup g = this.newGroup("Representativeness");
            g.addMeasure("Confidence", cprob, 0.0, 1.0, false);
            g.addMeasure("Global Tau", gtau, 0.0, 1.0, true);
            g.addMeasure("Cluster Tau", besttau, 0.0, 1.0, true);
        }

        @Override
        public boolean visualizeSingleton() {
            return true;
        }
    }
}

