/*
 * Decompiled with CFR 0.152.
 */
package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;

import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithmUtil;
import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.CLARA;
import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMedoidsFastPAM;
import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.KMedoidsInitialization;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.model.MedoidModel;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayMIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
import de.lmu.ifi.dbs.elki.utilities.random.RandomFactory;
import java.util.Random;

@Reference(authors="Erich Schubert, Peter J. Rousseeuw", title="Faster k-Medoids Clustering: Improving the PAM, CLARA, and CLARANS Algorithms", booktitle="preprint, to appear", url="https://arxiv.org/abs/1810.05691", bibkey="DBLP:journals/corr/abs-1810-05691")
public class FastCLARA<V>
extends KMedoidsFastPAM<V> {
    private static final Logging LOG = Logging.getLogger(FastCLARA.class);
    double sampling;
    int numsamples;
    boolean keepmed;
    RandomFactory random;

    public FastCLARA(DistanceFunction<? super V> distanceFunction, int k, int maxiter, KMedoidsInitialization<V> initializer, double fasttol, int numsamples, double sampling, boolean keepmed, RandomFactory random) {
        super(distanceFunction, k, maxiter, initializer, fasttol);
        this.numsamples = numsamples;
        this.sampling = sampling;
        this.random = random;
        this.keepmed = keepmed;
    }

    @Override
    public Clustering<MedoidModel> run(Database database, Relation<V> relation) {
        if (relation.size() <= 0) {
            return new Clustering<MedoidModel>("CLARA Clustering", "clara-clustering");
        }
        DBIDs ids = relation.getDBIDs();
        DistanceQuery<V> distQ = database.getDistanceQuery(relation, this.getDistanceFunction(), new Object[0]);
        int samplesize = Math.min(ids.size(), (int)(this.sampling <= 1.0 ? this.sampling * (double)ids.size() : this.sampling));
        if (samplesize < 3 * this.k) {
            LOG.warning("The sampling size is set to a very small value, it should be much larger than k.");
        }
        CLARA.CachedDistanceQuery<V> cachedQ = new CLARA.CachedDistanceQuery<V>(distQ, samplesize * (samplesize - 1) >> 1);
        double best = Double.POSITIVE_INFINITY;
        ArrayModifiableDBIDs bestmedoids = null;
        WritableIntegerDataStore bestclusters = null;
        Random rnd = this.random.getSingleThreadedRandom();
        FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Processing random samples", this.numsamples, LOG) : null;
        for (int j = 0; j < this.numsamples; ++j) {
            DBIDs rids = CLARA.randomSample(ids, samplesize, rnd, this.keepmed ? bestmedoids : null);
            cachedQ.clear();
            ArrayModifiableDBIDs medoids = DBIDUtil.newArray(this.initializer.chooseInitialMedoids(this.k, rids, cachedQ));
            WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(ids, 3, -1);
            double score = new KMedoidsFastPAM.Instance(cachedQ, rids, assignment, this.fasttol).run(medoids, this.maxiter) + CLARA.assignRemainingToNearestCluster(medoids, ids, rids, assignment, distQ);
            if (LOG.isStatistics()) {
                LOG.statistics(new DoubleStatistic(this.getClass().getName() + ".sample-" + j + ".cost", score));
            }
            if (score < best) {
                best = score;
                bestmedoids = medoids;
                bestclusters = assignment;
            }
            if (cachedQ.hasUncachedQueries()) {
                LOG.warning("Some distance queries were not cached; maybe the initialization is not optimized for k-medoids.");
            }
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
        if (LOG.isStatistics()) {
            LOG.statistics(new DoubleStatistic(this.getClass().getName() + ".cost", best));
        }
        if (bestmedoids == null) {
            throw new IllegalStateException("numsamples must be larger than 0.");
        }
        ArrayModifiableDBIDs[] clusters = ClusteringAlgorithmUtil.partitionsFromIntegerLabels(ids, bestclusters, this.k);
        Clustering<MedoidModel> result = new Clustering<MedoidModel>("CLARA Clustering", "clara-clustering");
        DBIDArrayMIter it = bestmedoids.iter();
        while (it.valid()) {
            MedoidModel model = new MedoidModel(DBIDUtil.deref(it));
            result.addToplevelCluster(new Cluster<MedoidModel>((DBIDs)clusters[it.getOffset()], model));
            it.advance();
        }
        return result;
    }

    public static class Parameterizer<V>
    extends KMedoidsFastPAM.Parameterizer<V> {
        public static final OptionID NUMSAMPLES_ID = CLARA.Parameterizer.NUMSAMPLES_ID;
        public static final OptionID SAMPLESIZE_ID = CLARA.Parameterizer.SAMPLESIZE_ID;
        public static final OptionID NOKEEPMED_ID = CLARA.Parameterizer.NOKEEPMED_ID;
        public static final OptionID RANDOM_ID = CLARA.Parameterizer.RANDOM_ID;
        double sampling;
        int numsamples;
        boolean keepmed;
        RandomFactory random;

        @Override
        protected void makeOptions(Parameterization config) {
            RandomParameter randomP;
            DoubleParameter samplingP;
            super.makeOptions(config);
            IntParameter numsamplesP = (IntParameter)new IntParameter(NUMSAMPLES_ID, 5).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT);
            if (config.grab(numsamplesP)) {
                this.numsamples = numsamplesP.intValue();
            }
            if (config.grab(samplingP = (DoubleParameter)new DoubleParameter(SAMPLESIZE_ID, 80.0 + 4.0 * (double)this.k).addConstraint((ParameterConstraint)CommonConstraints.GREATER_THAN_ZERO_DOUBLE))) {
                this.sampling = samplingP.doubleValue();
            }
            Flag nokeepmedF = new Flag(NOKEEPMED_ID);
            if (this.numsamples != 1 && config.grab(nokeepmedF)) {
                this.keepmed = nokeepmedF.isFalse();
            }
            if (config.grab(randomP = new RandomParameter(RANDOM_ID))) {
                this.random = (RandomFactory)randomP.getValue();
            }
        }

        @Override
        protected FastCLARA<V> makeInstance() {
            return new FastCLARA(this.distanceFunction, this.k, this.maxiter, this.initializer, this.fasttol, this.numsamples, this.sampling, this.keepmed, this.random);
        }
    }
}

