/*
 * Decompiled with CFR 0.152.
 */
package de.lmu.ifi.dbs.elki.algorithm.clustering;

import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.model.PrototypeModel;
import de.lmu.ifi.dbs.elki.data.model.SimplePrototypeModel;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBIDMIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDVar;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import java.util.ArrayList;

@Reference(authors="A. McCallum, K. Nigam, L. H. Ungar", title="Efficient Clustering of High Dimensional Data Sets with Application to Reference Matching", booktitle="Proc. 6th ACM SIGKDD Int. Conf. on Knowledge Discovery and Data Mining", url="https://doi.org/10.1145/347090.347123", bibkey="DBLP:conf/kdd/McCallumNU00")
public class CanopyPreClustering<O>
extends AbstractDistanceBasedAlgorithm<O, Clustering<PrototypeModel<O>>>
implements ClusteringAlgorithm<Clustering<PrototypeModel<O>>> {
    private static final Logging LOG = Logging.getLogger(CanopyPreClustering.class);
    private double t1;
    private double t2;

    public CanopyPreClustering(DistanceFunction<? super O> distanceFunction, double t1, double t2) {
        super(distanceFunction);
        this.t1 = t1;
        this.t2 = t2;
    }

    public Clustering<PrototypeModel<O>> run(Database database, Relation<O> relation) {
        if (!(this.t1 >= this.t2)) {
            throw new AbortException("T1 must be at least as large as T2.");
        }
        DistanceQuery<O> dq = database.getDistanceQuery(relation, this.getDistanceFunction(), new Object[0]);
        HashSetModifiableDBIDs ids = DBIDUtil.newHashSet(relation.getDBIDs());
        ArrayList clusters = new ArrayList();
        int size = relation.size();
        FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Canopy clustering", size, LOG) : null;
        DBIDVar first = DBIDUtil.newVar();
        while (!ids.isEmpty()) {
            ids.pop(first);
            ArrayModifiableDBIDs cids = DBIDUtil.newArray();
            cids.add(first);
            DBIDMIter iter = ids.iter();
            while (iter.valid()) {
                double dist = dq.distance((DBIDRef)first, (DBIDRef)iter);
                if (!(dist > this.t1)) {
                    cids.add(iter);
                    if (dist <= this.t2) {
                        iter.remove();
                    }
                }
                iter.advance();
            }
            clusters.add(new Cluster<SimplePrototypeModel<O>>((DBIDs)cids, new SimplePrototypeModel<O>(relation.get(first))));
            if (prog == null) continue;
            prog.setProcessed(size - ids.size(), LOG);
        }
        LOG.ensureCompleted(prog);
        return new Clustering<PrototypeModel<O>>("Canopy clustering", "canopy-clustering", clusters);
    }

    @Override
    public TypeInformation[] getInputTypeRestriction() {
        return TypeUtil.array(this.getDistanceFunction().getInputTypeRestriction());
    }

    @Override
    protected Logging getLogger() {
        return LOG;
    }

    public static class Parameterizer<O>
    extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
        public static final OptionID T1_ID = new OptionID("canopy.t1", "Inclusion threshold for canopy clustering. t1 >= t2!");
        public static final OptionID T2_ID = new OptionID("canopy.t2", "Removal threshold for canopy clustering. t1 >= t2!");
        private double t1;
        private double t2;

        @Override
        protected void makeOptions(Parameterization config) {
            DoubleParameter t2P;
            super.makeOptions(config);
            DoubleParameter t1P = new DoubleParameter(T1_ID);
            if (config.grab(t1P)) {
                this.t1 = t1P.doubleValue();
            }
            if (config.grab(t2P = new DoubleParameter(T2_ID))) {
                this.t2 = t2P.doubleValue();
            }
            if (this.t1 < this.t2) {
                config.reportError(new WrongParameterValueException(t1P, "must be larger than", t2P, ""));
            }
        }

        @Override
        protected CanopyPreClustering<O> makeInstance() {
            return new CanopyPreClustering(this.distanceFunction, this.t1, this.t2);
        }
    }
}

