/*
 * Decompiled with CFR 0.152.
 */
package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;

import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.AbstractKMeans;
import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeans;
import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansLloyd;
import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.KMeansInitialization;
import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.PredefinedInitialMeans;
import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.quality.KMeansQualityMeasure;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.DoubleVector;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.model.MeanModel;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ProxyDatabase;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.MutableProgress;
import de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic;
import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.math.linearalgebra.VMath;
import de.lmu.ifi.dbs.elki.result.Result;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ChainedParameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
import de.lmu.ifi.dbs.elki.utilities.random.RandomFactory;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;

@Reference(authors="D. Pelleg, A. Moore", title="X-means: Extending K-means with Efficient Estimation on the Number of Clusters", booktitle="Proc. 17th Int. Conf. on Machine Learning (ICML 2000)", url="http://www.pelleg.org/shared/hp/download/xmeans.ps", bibkey="DBLP:conf/icml/PellegM00")
public class XMeans<V extends NumberVector, M extends MeanModel>
extends AbstractKMeans<V, M> {
    private static final Logging LOG = Logging.getLogger(XMeans.class);
    private static final String KEY = XMeans.class.getName();
    private KMeans<V, M> innerKMeans;
    private int k;
    private int k_min;
    private int k_max;
    PredefinedInitialMeans splitInitializer;
    KMeansQualityMeasure<V> informationCriterion;
    RandomFactory rnd;

    public XMeans(NumberVectorDistanceFunction<? super V> distanceFunction, int k_min, int k_max, int maxiter, KMeans<V, M> innerKMeans, KMeansInitialization initializer, KMeansQualityMeasure<V> informationCriterion, RandomFactory random) {
        super(distanceFunction, k_min, maxiter, initializer);
        this.k_min = k_min;
        this.k_max = k_max;
        this.k = k_min;
        this.innerKMeans = innerKMeans;
        this.splitInitializer = new PredefinedInitialMeans((double[][])null);
        this.innerKMeans.setInitializer(this.splitInitializer);
        this.innerKMeans.setDistanceFunction(distanceFunction);
        this.informationCriterion = informationCriterion;
        this.rnd = random;
    }

    @Override
    public Clustering<M> run(Database database, Relation<V> relation) {
        MutableProgress prog = LOG.isVerbose() ? new MutableProgress("X-means number of clusters", this.k_max, LOG) : null;
        this.innerKMeans.setK(this.k_min);
        LOG.statistics(new StringStatistic(KEY + ".initialization", this.initializer.toString()));
        this.splitInitializer.setInitialMeans(this.initializer.chooseInitialMeans(database, (Relation<? extends NumberVector>)relation, this.k_min, (NumberVectorDistanceFunction<?>)this.getDistanceFunction()));
        Clustering<M> clustering = this.innerKMeans.run(database, relation);
        if (prog != null) {
            prog.setProcessed(this.k_min, LOG);
        }
        ArrayList<Cluster<M>> clusters = new ArrayList<Cluster<M>>(clustering.getAllClusters());
        while (clusters.size() <= this.k_max) {
            ArrayList<Cluster<M>> nextClusters = new ArrayList<Cluster<M>>();
            for (Cluster<M> cluster : clusters) {
                List<Cluster<M>> childClusterList = this.splitCluster(cluster, database, relation);
                nextClusters.addAll(childClusterList);
                if (childClusterList.size() <= 1) continue;
                this.k += childClusterList.size() - 1;
                if (prog == null) continue;
                if (this.k >= this.k_max) {
                    prog.setTotal(this.k + 1);
                }
                prog.setProcessed(this.k, LOG);
            }
            if (clusters.size() == nextClusters.size()) break;
            this.splitInitializer.setInitialClusters(nextClusters);
            this.innerKMeans.setK(nextClusters.size());
            clustering = this.innerKMeans.run(database, relation);
            clusters.clear();
            clusters.addAll(clustering.getAllClusters());
        }
        if (prog != null) {
            prog.setTotal(this.k);
            prog.setProcessed(this.k, LOG);
        }
        return new Clustering<M>("X-Means Result", "X-Means", clusters);
    }

    protected List<Cluster<M>> splitCluster(Cluster<M> parentCluster, Database database, Relation<V> relation) {
        ArrayList<Cluster<M>> parentClusterList = new ArrayList<Cluster<M>>(1);
        parentClusterList.add(parentCluster);
        if (parentCluster.size() <= 1) {
            return parentClusterList;
        }
        Clustering<M> parentClustering = new Clustering<M>(parentCluster.getName(), parentCluster.getName(), parentClusterList);
        ProxyDatabase proxyDB = new ProxyDatabase(parentCluster.getIDs(), database);
        this.splitInitializer.setInitialMeans(this.splitCentroid(parentCluster, relation));
        this.innerKMeans.setK(2);
        Result childClustering = this.innerKMeans.run(proxyDB);
        double parentEvaluation = this.informationCriterion.quality((Clustering<MeanModel>)parentClustering, this.getDistanceFunction(), relation);
        double childrenEvaluation = this.informationCriterion.quality((Clustering<MeanModel>)childClustering, this.getDistanceFunction(), relation);
        if (LOG.isDebugging()) {
            LOG.debug("parentEvaluation: " + parentEvaluation);
            LOG.debug("childrenEvaluation: " + childrenEvaluation);
        }
        return this.informationCriterion.isBetter(parentEvaluation, childrenEvaluation) ? parentClusterList : ((Clustering)childClustering).getAllClusters();
    }

    protected double[][] splitCentroid(Cluster<? extends MeanModel> parentCluster, Relation<V> relation) {
        double[] parentCentroid = parentCluster.getModel().getMean();
        double radius = 0.0;
        DBIDIter it = parentCluster.getIDs().iter();
        while (it.valid()) {
            double d = this.getDistanceFunction().distance((NumberVector)relation.get(it), DoubleVector.wrap(parentCentroid));
            radius = d > radius ? d : radius;
            it.advance();
        }
        Random random = this.rnd.getSingleThreadedRandom();
        int dim = RelationUtil.dimensionality(relation);
        double[] randomVector = VMath.normalize(MathUtil.randomDoubleArray(dim, random));
        VMath.timesEquals(randomVector, (0.4 + random.nextDouble() * 0.5) * radius);
        for (int d = 0; d < dim; ++d) {
            double a = parentCentroid[d];
            double b = randomVector[d];
            parentCentroid[d] = a - b;
            randomVector[d] = a + b;
        }
        return new double[][]{parentCentroid, randomVector};
    }

    @Override
    public TypeInformation[] getInputTypeRestriction() {
        return this.innerKMeans.getInputTypeRestriction();
    }

    @Override
    protected Logging getLogger() {
        return LOG;
    }

    public static class Parameterizer<V extends NumberVector, M extends MeanModel>
    extends AbstractKMeans.Parameterizer<V> {
        public static final OptionID INNER_KMEANS_ID = new OptionID("xmeans.kmeans", "kMeans algorithm to use.");
        public static final OptionID K_MIN_ID = new OptionID("xmeans.k_min", "The minimum number of clusters to find.");
        public static final OptionID SEED_ID = new OptionID("xmeans.seed", "Random seed for splitting clusters.");
        public static final OptionID INFORMATION_CRITERION_ID = new OptionID("xmeans.quality", "The quality measure to evaluate splits (e.g. AIC, BIC)");
        protected KMeans<V, M> innerKMeans;
        protected KMeansQualityMeasure<V> informationCriterion;
        protected int k_min;
        protected int k_max;
        private RandomFactory random;

        @Override
        protected void makeOptions(Parameterization config) {
            ObjectParameter informationCriterionP;
            ObjectParameter innerKMeansP;
            IntParameter kMaxP;
            IntParameter kMinP = (IntParameter)new IntParameter(K_MIN_ID, 2).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT);
            if (config.grab(kMinP)) {
                this.k_min = kMinP.intValue();
            }
            if (config.grab(kMaxP = (IntParameter)new IntParameter(KMeans.K_ID).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT))) {
                this.k_max = kMaxP.intValue();
            }
            if (this.k_min > this.k_max) {
                config.reportError(new WrongParameterValueException(kMinP, "must be at most", kMaxP, ""));
            }
            this.getParameterInitialization(config);
            this.getParameterMaxIter(config);
            this.getParameterDistanceFunction(config);
            RandomParameter rndP = new RandomParameter(SEED_ID);
            if (config.grab(rndP)) {
                this.random = (RandomFactory)rndP.getValue();
            }
            if (config.grab(innerKMeansP = new ObjectParameter(INNER_KMEANS_ID, (Class<?>)KMeans.class, KMeansLloyd.class))) {
                ChainedParameterization combinedConfig = new ChainedParameterization(new ListParameterization().addParameter(KMeans.K_ID, (Object)this.k_min).addParameter(KMeans.INIT_ID, (Object)new PredefinedInitialMeans((double[][])null)).addParameter(KMeans.MAXITER_ID, (Object)this.maxiter).addParameter(KMeans.DISTANCE_FUNCTION_ID, (Object)(this.distanceFunction != null ? this.distanceFunction : SquaredEuclideanDistanceFunction.STATIC)), config);
                combinedConfig.errorsTo(config);
                this.innerKMeans = (KMeans)innerKMeansP.instantiateClass(combinedConfig);
            }
            if (config.grab(informationCriterionP = new ObjectParameter(INFORMATION_CRITERION_ID, KMeansQualityMeasure.class))) {
                this.informationCriterion = (KMeansQualityMeasure)informationCriterionP.instantiateClass(config);
            }
        }

        @Override
        protected XMeans<V, M> makeInstance() {
            return new XMeans<V, M>(this.distanceFunction, this.k_min, this.k_max, this.maxiter, this.innerKMeans, this.initializer, this.informationCriterion, this.random);
        }
    }
}

