/*
 * Decompiled with CFR 0.152.
 */
package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;

import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithmUtil;
import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMedoidsPAM;
import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.KMedoidsInitialization;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.model.MedoidModel;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayMIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDMIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.References;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
import de.lmu.ifi.dbs.elki.utilities.random.RandomFactory;
import it.unimi.dsi.fastutil.longs.Long2DoubleOpenHashMap;
import java.util.Random;

@References(value={@Reference(authors="L. Kaufman, P. J. Rousseeuw", title="Clustering Large Data Sets", booktitle="Pattern Recognition in Practice", url="https://doi.org/10.1016/B978-0-444-87877-9.50039-X", bibkey="doi:10.1016/B978-0-444-87877-9.50039-X"), @Reference(authors="L. Kaufman, P. J. Rousseeuw", title="Clustering Large Applications (Program CLARA)", booktitle="Finding Groups in Data: An Introduction to Cluster Analysis", url="https://doi.org/10.1002/9780470316801.ch3", bibkey="doi:10.1002/9780470316801.ch3")})
public class CLARA<V>
extends KMedoidsPAM<V> {
    private static final Logging LOG = Logging.getLogger(CLARA.class);
    double sampling;
    int numsamples;
    boolean keepmed;
    RandomFactory random;

    public CLARA(DistanceFunction<? super V> distanceFunction, int k, int maxiter, KMedoidsInitialization<V> initializer, int numsamples, double sampling, boolean keepmed, RandomFactory random) {
        super(distanceFunction, k, maxiter, initializer);
        this.numsamples = numsamples;
        this.sampling = sampling;
        this.random = random;
        this.keepmed = keepmed;
    }

    @Override
    public Clustering<MedoidModel> run(Database database, Relation<V> relation) {
        if (relation.size() <= 0) {
            return new Clustering<MedoidModel>("CLARA Clustering", "clara-clustering");
        }
        DBIDs ids = relation.getDBIDs();
        DistanceQuery<V> distQ = database.getDistanceQuery(relation, this.getDistanceFunction(), new Object[0]);
        int samplesize = Math.min(ids.size(), (int)(this.sampling <= 1.0 ? this.sampling * (double)ids.size() : this.sampling));
        if (samplesize < 3 * this.k) {
            LOG.warning("The sampling size is set to a very small value, it should be much larger than k.");
        }
        CachedDistanceQuery<V> cachedQ = new CachedDistanceQuery<V>(distQ, samplesize * (samplesize - 1) >> 1);
        double best = Double.POSITIVE_INFINITY;
        ArrayModifiableDBIDs bestmedoids = null;
        WritableIntegerDataStore bestclusters = null;
        Random rnd = this.random.getSingleThreadedRandom();
        FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Processing random samples", this.numsamples, LOG) : null;
        for (int j = 0; j < this.numsamples; ++j) {
            DBIDs rids = CLARA.randomSample(ids, samplesize, rnd, this.keepmed ? bestmedoids : null);
            cachedQ.clear();
            ArrayModifiableDBIDs medoids = DBIDUtil.newArray(this.initializer.chooseInitialMedoids(this.k, rids, cachedQ));
            WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(ids, 3, -1);
            double score = new KMedoidsPAM.Instance(cachedQ, rids, assignment).run(medoids, this.maxiter) + CLARA.assignRemainingToNearestCluster(medoids, ids, rids, assignment, distQ);
            if (LOG.isStatistics()) {
                LOG.statistics(new DoubleStatistic(this.getClass().getName() + ".sample-" + j + ".cost", score));
            }
            if (score < best) {
                best = score;
                bestmedoids = medoids;
                bestclusters = assignment;
            }
            if (cachedQ.hasUncachedQueries()) {
                LOG.warning("Some distance queries were not cached; maybe the initialization is not optimized for k-medoids.");
            }
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
        if (LOG.isStatistics()) {
            LOG.statistics(new DoubleStatistic(this.getClass().getName() + ".cost", best));
        }
        if (bestmedoids == null) {
            throw new IllegalStateException("numsamples must be larger than 0.");
        }
        ArrayModifiableDBIDs[] clusters = ClusteringAlgorithmUtil.partitionsFromIntegerLabels(ids, bestclusters, this.k);
        Clustering<MedoidModel> result = new Clustering<MedoidModel>("CLARA Clustering", "clara-clustering");
        DBIDArrayMIter it = bestmedoids.iter();
        while (it.valid()) {
            MedoidModel model = new MedoidModel(DBIDUtil.deref(it));
            result.addToplevelCluster(new Cluster<MedoidModel>((DBIDs)clusters[it.getOffset()], model));
            it.advance();
        }
        return result;
    }

    static DBIDs randomSample(DBIDs ids, int samplesize, Random rnd, DBIDs previous) {
        if (previous == null) {
            return DBIDUtil.randomSample(ids, samplesize, rnd);
        }
        HashSetModifiableDBIDs sample = DBIDUtil.newHashSet(samplesize);
        sample.addDBIDs(previous);
        sample.addDBIDs(DBIDUtil.randomSample(ids, samplesize - previous.size(), rnd));
        if (sample.size() < samplesize) {
            DBIDMIter it = DBIDUtil.randomSample(ids, samplesize, rnd).iter();
            while (sample.size() < samplesize && it.valid()) {
                sample.add(it);
                it.advance();
            }
        }
        return sample;
    }

    static double assignRemainingToNearestCluster(ArrayDBIDs means, DBIDs ids, DBIDs rids, WritableIntegerDataStore assignment, DistanceQuery<?> distQ) {
        rids = DBIDUtil.ensureSet(rids);
        double distsum = 0.0;
        DBIDArrayIter miter = means.iter();
        DBIDIter iditer = distQ.getRelation().iterDBIDs();
        while (iditer.valid()) {
            if (!rids.contains(iditer)) {
                double mindist = Double.POSITIVE_INFINITY;
                int minIndex = 0;
                miter.seek(0);
                int i = 0;
                while (miter.valid()) {
                    double dist = distQ.distance((DBIDRef)iditer, (DBIDRef)miter);
                    if (dist < mindist) {
                        minIndex = i;
                        mindist = dist;
                    }
                    miter.advance();
                    ++i;
                }
                distsum += mindist;
                assignment.put((DBIDRef)iditer, minIndex);
            }
            iditer.advance();
        }
        return distsum;
    }

    public static class Parameterizer<V>
    extends KMedoidsPAM.Parameterizer<V> {
        public static final OptionID NUMSAMPLES_ID = new OptionID("clara.samples", "Number of samples (iterations) to run.");
        public static final OptionID SAMPLESIZE_ID = new OptionID("clara.samplesize", "The size of the sample.");
        public static final OptionID NOKEEPMED_ID = new OptionID("clara.independent", "Draw independent samples (default is to keep the previous best medoids in the sample).");
        public static final OptionID RANDOM_ID = new OptionID("clara.random", "Random generator seed.");
        double sampling;
        int numsamples;
        boolean keepmed;
        RandomFactory random;

        @Override
        protected void makeOptions(Parameterization config) {
            RandomParameter randomP;
            DoubleParameter samplingP;
            super.makeOptions(config);
            IntParameter numsamplesP = (IntParameter)new IntParameter(NUMSAMPLES_ID, 5).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT);
            if (config.grab(numsamplesP)) {
                this.numsamples = numsamplesP.intValue();
            }
            if (config.grab(samplingP = (DoubleParameter)new DoubleParameter(SAMPLESIZE_ID, 40 + 2 * this.k).addConstraint((ParameterConstraint)CommonConstraints.GREATER_THAN_ZERO_DOUBLE))) {
                this.sampling = samplingP.doubleValue();
            }
            Flag nokeepmedF = new Flag(NOKEEPMED_ID);
            if (this.numsamples != 1 && config.grab(nokeepmedF)) {
                this.keepmed = nokeepmedF.isFalse();
            }
            if (config.grab(randomP = new RandomParameter(RANDOM_ID))) {
                this.random = (RandomFactory)randomP.getValue();
            }
        }

        @Override
        protected CLARA<V> makeInstance() {
            return new CLARA(this.distanceFunction, this.k, this.maxiter, this.initializer, this.numsamples, this.sampling, this.keepmed, this.random);
        }
    }

    static class CachedDistanceQuery<V>
    implements DistanceQuery<V> {
        DistanceQuery<V> inner;
        Long2DoubleOpenHashMap cache;
        int bad;

        public CachedDistanceQuery(DistanceQuery<V> inner, int size) {
            this.inner = inner;
            this.cache = new Long2DoubleOpenHashMap(size);
            this.cache.defaultReturnValue(Double.NaN);
        }

        public boolean hasUncachedQueries() {
            return this.bad > 0;
        }

        public void clear() {
            this.cache.clear();
            this.bad = 0;
        }

        @Override
        public double distance(DBIDRef id1, DBIDRef id2) {
            int j;
            if (DBIDUtil.equal(id1, id2)) {
                return 0.0;
            }
            if (DBIDUtil.compare(id1, id2) > 0) {
                return this.distance(id2, id1);
            }
            int i = id1.internalGetIndex();
            long idx = (long)i << 32 | (long)(j = id2.internalGetIndex());
            double v = this.cache.get(idx);
            if (Double.isNaN(v)) {
                v = this.inner.distance(id1, id2);
                this.cache.put(idx, v);
            }
            return v;
        }

        @Override
        public double distance(V o1, DBIDRef id2) {
            ++this.bad;
            return this.inner.distance(o1, (V)id2);
        }

        @Override
        public double distance(DBIDRef id1, V o2) {
            ++this.bad;
            return this.inner.distance((V)id1, o2);
        }

        @Override
        public double distance(V o1, V o2) {
            ++this.bad;
            return this.inner.distance(o1, o2);
        }

        @Override
        public DistanceFunction<? super V> getDistanceFunction() {
            return this.inner.getDistanceFunction();
        }

        @Override
        public Relation<? extends V> getRelation() {
            return this.inner.getRelation();
        }
    }
}

