/*
 * Decompiled with CFR 0.152.
 */
package de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical.extraction;

import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical.HierarchicalClusteringAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical.PointerDensityHierarchyRepresentationResult;
import de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical.PointerHierarchyRepresentationResult;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.model.Model;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DBIDDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DoubleDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDVar;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.utilities.Priority;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
import java.util.ArrayList;

@Reference(authors="Erich Schubert, Michael Gertz", title="Semantic Word Clouds with Background Corpus Normalization and t-distributed Stochastic Neighbor Embedding", booktitle="ArXiV preprint, 1708.03569", url="http://arxiv.org/abs/1708.03569", bibkey="DBLP:journals/corr/abs-1708-03569")
@Priority(value=206)
public class ClustersWithNoiseExtraction
implements ClusteringAlgorithm<Clustering<Model>> {
    private static final Logging LOG = Logging.getLogger(ClustersWithNoiseExtraction.class);
    private int numCl = 1;
    private int minClSize = 1;
    private HierarchicalClusteringAlgorithm algorithm;

    public ClustersWithNoiseExtraction(HierarchicalClusteringAlgorithm algorithm, int numCl, int minClSize) {
        this.algorithm = algorithm;
        this.numCl = numCl;
        this.minClSize = minClSize;
    }

    @Override
    public Clustering<Model> run(Database database) {
        return this.run(this.algorithm.run(database));
    }

    public Clustering<Model> run(PointerHierarchyRepresentationResult pointerresult) {
        Clustering<Model> result = new Instance(pointerresult).run();
        result.addChildResult(pointerresult);
        return result;
    }

    @Override
    public TypeInformation[] getInputTypeRestriction() {
        return this.algorithm.getInputTypeRestriction();
    }

    public static class Parameterizer
    extends AbstractParameterizer {
        public static final OptionID K_ID = new OptionID("extract.k", "The number of clusters to extract.");
        public static final OptionID MINCLUSTERSIZE_ID = new OptionID("extract.minclsize", "The minimum cluster size.");
        int numCl = 1;
        int minClSize = 1;
        HierarchicalClusteringAlgorithm algorithm;

        @Override
        protected void makeOptions(Parameterization config) {
            IntParameter minclustersP;
            IntParameter numClP;
            super.makeOptions(config);
            ObjectParameter algorithmP = new ObjectParameter(AbstractAlgorithm.ALGORITHM_ID, HierarchicalClusteringAlgorithm.class);
            if (config.grab(algorithmP)) {
                this.algorithm = (HierarchicalClusteringAlgorithm)algorithmP.instantiateClass(config);
            }
            if (config.grab(numClP = (IntParameter)new IntParameter(K_ID).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT))) {
                this.numCl = numClP.intValue();
            }
            if (config.grab(minclustersP = (IntParameter)new IntParameter(MINCLUSTERSIZE_ID).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT))) {
                this.minClSize = minclustersP.intValue();
            }
        }

        @Override
        protected ClustersWithNoiseExtraction makeInstance() {
            return new ClustersWithNoiseExtraction(this.algorithm, this.numCl, this.minClSize);
        }
    }

    protected class Instance {
        protected ArrayDBIDs ids;
        protected DBIDDataStore pi;
        protected DoubleDataStore lambda;
        protected DoubleDataStore coredist = null;
        protected PointerHierarchyRepresentationResult pointerresult;

        public Instance(PointerHierarchyRepresentationResult pointerresult) {
            this.ids = pointerresult.topologicalSort();
            this.pi = pointerresult.getParentStore();
            this.lambda = pointerresult.getParentDistanceStore();
            this.pointerresult = pointerresult;
            if (pointerresult instanceof PointerDensityHierarchyRepresentationResult) {
                this.coredist = ((PointerDensityHierarchyRepresentationResult)pointerresult).getCoreDistanceStore();
            }
        }

        public Clustering<Model> run() {
            ArrayDBIDs order = this.pointerresult.topologicalSort();
            DBIDVar succ = DBIDUtil.newVar();
            WritableIntegerDataStore clustersizes = DataStoreUtil.makeIntegerStorage(this.ids, 1, 1);
            int curGood = 0;
            int bestCl = this.ids.size();
            int bestOff = this.ids.size() - bestCl - 1;
            FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Finding best threshold", this.ids.size(), LOG) : null;
            DBIDArrayIter it = order.iter();
            while (it.valid()) {
                if ((curGood += this.mergeClusterSizes(clustersizes, it, this.pi.assignVar(it, succ))) == ClustersWithNoiseExtraction.this.numCl || Math.abs(curGood - ClustersWithNoiseExtraction.this.numCl) < Math.abs(bestCl - ClustersWithNoiseExtraction.this.numCl)) {
                    bestCl = curGood;
                    bestOff = it.getOffset() + 1;
                }
                LOG.incrementProcessed(progress);
                it.advance();
            }
            if (progress != null) {
                progress.setProcessed(this.ids.size(), LOG);
            }
            LOG.ensureCompleted(progress);
            if (bestCl != ClustersWithNoiseExtraction.this.numCl) {
                LOG.warning("Could not find a result with exactly " + ClustersWithNoiseExtraction.this.numCl + " clusters (+ noise), generating " + bestCl + " clusters instead.");
            }
            WritableDataStore<ArrayModifiableDBIDs> clusters = DataStoreUtil.makeStorage(this.ids, 1, ArrayModifiableDBIDs.class);
            progress = LOG.isVerbose() ? new FiniteProgress("Performing cluster merges", bestOff, LOG) : null;
            it.seek(0);
            while (it.getOffset() < bestOff) {
                this.mergeClusters(clusters, it, this.pi.assignVar(it, succ));
                LOG.incrementProcessed(progress);
                it.advance();
            }
            LOG.ensureCompleted(progress);
            ArrayModifiableDBIDs noise = DBIDUtil.newArray();
            Cluster nclus = new Cluster("Noise", (DBIDs)noise, true);
            ArrayList toplevel = new ArrayList(bestCl + 1);
            toplevel.add(nclus);
            it.seek(0);
            while (it.valid()) {
                ArrayModifiableDBIDs c = (ArrayModifiableDBIDs)clusters.get(it);
                if (c == null) {
                    if (it.getOffset() >= bestOff) {
                        noise.add(it);
                    }
                } else if (c.size() < ClustersWithNoiseExtraction.this.minClSize) {
                    noise.addDBIDs(c);
                } else {
                    toplevel.add(new Cluster(c));
                }
                it.advance();
            }
            if (noise.isEmpty()) {
                toplevel.remove(0);
            }
            Clustering<Model> dendrogram = new Clustering<Model>("Hierarchical Clustering", "hierarchical-clustering", toplevel);
            return dendrogram;
        }

        private int mergeClusterSizes(WritableIntegerDataStore clustersizes, DBIDRef it, DBIDRef succ) {
            int c1 = clustersizes.intValue(it);
            int c2 = clustersizes.intValue(succ);
            int c12 = c1 + c2;
            clustersizes.put(succ, c12);
            return (c12 >= ClustersWithNoiseExtraction.this.minClSize ? 1 : 0) - (c1 >= ClustersWithNoiseExtraction.this.minClSize ? 1 : 0) - (c2 >= ClustersWithNoiseExtraction.this.minClSize ? 1 : 0);
        }

        private void mergeClusters(WritableDataStore<ArrayModifiableDBIDs> clusters, DBIDRef it, DBIDRef succ) {
            ArrayModifiableDBIDs c1 = (ArrayModifiableDBIDs)clusters.get(it);
            ArrayModifiableDBIDs c2 = (ArrayModifiableDBIDs)clusters.get(succ);
            if (c1 == null) {
                if (c2 == null) {
                    c2 = DBIDUtil.newArray();
                    clusters.put(succ, c2);
                    c2.add(succ);
                }
                c2.add(it);
            } else {
                if (c2 == null) {
                    c1.add(succ);
                    c2 = c1;
                    clusters.put(succ, c2);
                } else {
                    c2.addDBIDs(c1);
                }
                clusters.put(it, null);
            }
        }
    }
}

