/*
 * Decompiled with CFR 0.152.
 */
package de.lmu.ifi.dbs.elki.algorithm.clustering.uncertain;

import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeans;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.DoubleVector;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.model.KMeansModel;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.data.uncertain.DiscreteUncertainObject;
import de.lmu.ifi.dbs.elki.data.uncertain.UncertainObject;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDMIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
import de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic;
import de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic;
import de.lmu.ifi.dbs.elki.math.linearalgebra.VMath;
import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayLikeUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
import de.lmu.ifi.dbs.elki.utilities.random.RandomFactory;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

@Reference(authors="M. Chau, R. Cheng, B. Kao, J. Ng", title="Uncertain data mining: An example in clustering location data", booktitle="Proc. 10th Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD 2006)", url="https://doi.org/10.1007/11731139_24", bibkey="DBLP:conf/pakdd/ChauCKN06")
public class UKMeans
extends AbstractAlgorithm<Clustering<KMeansModel>>
implements ClusteringAlgorithm<Clustering<KMeansModel>> {
    protected static final Logging LOG = Logging.getLogger(UKMeans.class);
    protected static final String KEY = UKMeans.class.getName();
    protected int k;
    protected int maxiter;
    protected RandomFactory rnd;

    public UKMeans(int k, int maxiter, RandomFactory rnd) {
        this.k = k;
        this.maxiter = maxiter;
        this.rnd = rnd;
    }

    public Clustering<?> run(Database database, Relation<DiscreteUncertainObject> relation) {
        int iteration;
        if (relation.size() <= 0) {
            return new Clustering("Uk-Means Clustering", "ukmeans-clustering");
        }
        ModifiableDBIDs sampleids = DBIDUtil.randomSample(relation.getDBIDs(), this.k, this.rnd);
        List<double[]> means = new ArrayList<double[]>(this.k);
        DBIDIter iter = sampleids.iter();
        while (iter.valid()) {
            means.add(ArrayLikeUtil.toPrimitiveDoubleArray(relation.get(iter).getCenterOfMass()));
            iter.advance();
        }
        ArrayList<HashSetModifiableDBIDs> clusters = new ArrayList<HashSetModifiableDBIDs>();
        for (int i = 0; i < this.k; ++i) {
            clusters.add(DBIDUtil.newHashSet((int)((double)relation.size() * 2.0 / (double)this.k)));
        }
        WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), 3, -1);
        double[] varsum = new double[this.k];
        IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("UK-Means iteration", LOG) : null;
        DoubleStatistic varstat = LOG.isStatistics() ? new DoubleStatistic(this.getClass().getName() + ".variance-sum") : null;
        for (iteration = 0; this.maxiter <= 0 || iteration < this.maxiter; ++iteration) {
            LOG.incrementProcessed(prog);
            boolean changed = this.assignToNearestCluster(relation, means, clusters, assignment, varsum);
            this.logVarstat(varstat, varsum);
            if (!changed) break;
            means = this.means(clusters, means, relation);
        }
        LOG.setCompleted(prog);
        if (LOG.isStatistics()) {
            LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
        }
        Clustering<KMeansModel> result = new Clustering<KMeansModel>("Uk-Means Clustering", "ukmeans-clustering");
        for (int i = 0; i < clusters.size(); ++i) {
            DBIDs ids = (DBIDs)clusters.get(i);
            if (ids.isEmpty()) continue;
            result.addToplevelCluster(new Cluster<KMeansModel>(ids, new KMeansModel(means.get(i), varsum[i])));
        }
        return result;
    }

    protected boolean assignToNearestCluster(Relation<DiscreteUncertainObject> relation, List<double[]> means, List<? extends ModifiableDBIDs> clusters, WritableIntegerDataStore assignment, double[] varsum) {
        assert (this.k == means.size());
        boolean changed = false;
        Arrays.fill(varsum, 0.0);
        DBIDIter iditer = relation.iterDBIDs();
        while (iditer.valid()) {
            double mindist = Double.POSITIVE_INFINITY;
            DiscreteUncertainObject fv = relation.get(iditer);
            int minIndex = 0;
            for (int i = 0; i < this.k; ++i) {
                double dist = this.getExpectedRepDistance(DoubleVector.wrap(means.get(i)), fv);
                if (!(dist < mindist)) continue;
                minIndex = i;
                mindist = dist;
            }
            int n = minIndex;
            varsum[n] = varsum[n] + mindist;
            changed |= this.updateAssignment(iditer, clusters, assignment, minIndex);
            iditer.advance();
        }
        return changed;
    }

    protected boolean updateAssignment(DBIDIter iditer, List<? extends ModifiableDBIDs> clusters, WritableIntegerDataStore assignment, int newA) {
        int oldA = assignment.intValue(iditer);
        if (oldA == newA) {
            return false;
        }
        clusters.get(newA).add(iditer);
        assignment.putInt(iditer, newA);
        if (oldA >= 0) {
            clusters.get(oldA).remove(iditer);
        }
        return true;
    }

    protected double getExpectedRepDistance(NumberVector rep, DiscreteUncertainObject uo) {
        SquaredEuclideanDistanceFunction euclidean = SquaredEuclideanDistanceFunction.STATIC;
        int counter = 0;
        double sum = 0.0;
        for (int i = 0; i < uo.getNumberSamples(); ++i) {
            sum += euclidean.distance(rep, uo.getSample(i));
            ++counter;
        }
        return sum / (double)counter;
    }

    protected List<double[]> means(List<? extends ModifiableDBIDs> clusters, List<double[]> means, Relation<DiscreteUncertainObject> database) {
        ArrayList<double[]> newMeans = new ArrayList<double[]>(this.k);
        for (int i = 0; i < this.k; ++i) {
            ModifiableDBIDs list = clusters.get(i);
            double[] mean = null;
            if (list.size() > 0) {
                DBIDMIter iter = list.iter();
                mean = ArrayLikeUtil.toPrimitiveDoubleArray(database.get(iter).getCenterOfMass());
                iter.advance();
                while (iter.valid()) {
                    DoubleVector vec = database.get(iter).getCenterOfMass();
                    for (int j = 0; j < mean.length; ++j) {
                        int n = j;
                        mean[n] = mean[n] + vec.doubleValue(j);
                    }
                    iter.advance();
                }
                VMath.timesEquals(mean, 1.0 / (double)list.size());
            } else {
                mean = means.get(i);
            }
            newMeans.add(mean);
        }
        return newMeans;
    }

    @Override
    public TypeInformation[] getInputTypeRestriction() {
        return TypeUtil.array(UncertainObject.UNCERTAIN_OBJECT_FIELD);
    }

    @Override
    protected Logging getLogger() {
        return LOG;
    }

    protected void logVarstat(DoubleStatistic varstat, double[] varsum) {
        if (varstat != null) {
            double s = VMath.sum(varsum);
            this.getLogger().statistics(varstat.setDouble(s));
        }
    }

    public static class Parameterizer
    extends AbstractParameterizer {
        protected int k;
        protected int maxiter;
        protected RandomFactory rnd;

        @Override
        public void makeOptions(Parameterization config) {
            RandomParameter rndP;
            IntParameter maxiterP;
            super.makeOptions(config);
            IntParameter kP = (IntParameter)new IntParameter(KMeans.K_ID).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT);
            if (config.grab(kP)) {
                this.k = (Integer)kP.getValue();
            }
            if (config.grab(maxiterP = (IntParameter)new IntParameter(KMeans.MAXITER_ID, 0).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ZERO_INT))) {
                this.maxiter = (Integer)maxiterP.getValue();
            }
            if (config.grab(rndP = new RandomParameter(KMeans.SEED_ID))) {
                this.rnd = (RandomFactory)rndP.getValue();
            }
        }

        @Override
        protected UKMeans makeInstance() {
            return new UKMeans(this.k, this.maxiter, this.rnd);
        }
    }
}

