/*
 * Decompiled with CFR 0.152.
 */
package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization;

import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeans;
import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.AbstractKMeansInitialization;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.model.ModelUtil;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ProxyDatabase;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.ProxyView;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.logging.LoggingUtil;
import de.lmu.ifi.dbs.elki.utilities.Alias;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ChainedParameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
import de.lmu.ifi.dbs.elki.utilities.random.RandomFactory;

@Alias(value={"de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.SampleKMeansInitialization"})
@Reference(authors="P. S. Bradley, U. M. Fayyad", title="Refining Initial Points for K-Means Clustering", booktitle="Proc. 15th Int. Conf. on Machine Learning (ICML 1998)", bibkey="DBLP:conf/icml/BradleyF98")
public class SampleKMeansInitialization<V extends NumberVector>
extends AbstractKMeansInitialization {
    private KMeans<V, ?> innerkMeans;
    private double rate;

    public SampleKMeansInitialization(RandomFactory rnd, KMeans<V, ?> innerkMeans, double rate) {
        super(rnd);
        this.innerkMeans = innerkMeans;
        this.rate = rate;
    }

    @Override
    public double[][] chooseInitialMeans(Database database, Relation<? extends NumberVector> relation, int k, NumberVectorDistanceFunction<?> distanceFunction) {
        if (relation.size() < k) {
            throw new IllegalArgumentException("Cannot choose k=" + k + " means from N=" + relation.size() + " < k objects.");
        }
        DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), this.rate, this.rnd);
        if (sample.size() < k) {
            throw new IllegalArgumentException("Sampling rate=" + this.rate + " from N=" + relation.size() + " yields only " + sample.size() + " < k objects.");
        }
        Relation<? extends NumberVector> rel = relation;
        if (!((SimpleTypeInformation)distanceFunction.getInputTypeRestriction()).isAssignableFromType(TypeUtil.NUMBER_VECTOR_FIELD)) {
            LoggingUtil.warning("Initializing k-means with k-means using specialized distance functions MAY fail, if the initialization method does require a distance defined on arbitrary number vectors.");
        }
        NumberVectorDistanceFunction<?> pdf = distanceFunction;
        ProxyView<? extends NumberVector> proxyv = new ProxyView<NumberVector>(sample, rel);
        ProxyDatabase proxydb = new ProxyDatabase(sample, proxyv);
        this.innerkMeans.setK(k);
        this.innerkMeans.setDistanceFunction(pdf);
        Clustering<?> clusters = this.innerkMeans.run(proxydb, proxyv);
        double[][] means = new double[clusters.getAllClusters().size()][];
        int i = 0;
        for (Cluster<?> cluster : clusters.getAllClusters()) {
            means[i++] = ModelUtil.getPrototype(cluster.getModel(), relation).toArray();
        }
        return means;
    }

    public static class Parameterizer<V extends NumberVector>
    extends AbstractKMeansInitialization.Parameterizer {
        public static final OptionID KMEANS_ID = new OptionID("kmeans.algorithm", "KMeans variant to run multiple times.");
        public static final OptionID SAMPLE_ID = new OptionID("kmeans.samplesize", "Sample set size (if > 1) or sampling rante (if < 1).");
        protected KMeans<V, ?> innerkMeans;
        protected double rate;

        @Override
        protected void makeOptions(Parameterization config) {
            DoubleParameter sampleP;
            super.makeOptions(config);
            ObjectParameter kMeansVariantP = new ObjectParameter(KMEANS_ID, KMeans.class);
            if (config.grab(kMeansVariantP)) {
                ListParameterization kMeansVariantParameters = new ListParameterization();
                kMeansVariantParameters.addParameter(KMeans.K_ID, (Object)13);
                kMeansVariantParameters.addParameter(KMeans.DISTANCE_FUNCTION_ID, SquaredEuclideanDistanceFunction.class);
                ChainedParameterization combinedConfig = new ChainedParameterization(kMeansVariantParameters, config);
                combinedConfig.errorsTo(config);
                this.innerkMeans = (KMeans)kMeansVariantP.instantiateClass(combinedConfig);
            }
            if (config.grab(sampleP = new DoubleParameter(SAMPLE_ID))) {
                this.rate = sampleP.doubleValue();
            }
        }

        @Override
        protected SampleKMeansInitialization<V> makeInstance() {
            return new SampleKMeansInitialization<V>(this.rnd, this.innerkMeans, this.rate);
        }
    }
}

