/*
 * Decompiled with CFR 0.152.
 */
package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;

import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.AbstractKMeans;
import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.KMeansInitialization;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.DoubleVector;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.model.KMeansModel;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList;
import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListMIter;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.linearalgebra.VMath;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.DoubleMinHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

@Title(value="K-Means--")
@Reference(authors="S. Chawla, A. Gionis", title="k-means--: A Unified Approach to Clustering and Outlier Detection", booktitle="Proc. 13th SIAM Int. Conf. on Data Mining (SDM 2013)", url="https://doi.org/10.1137/1.9781611972832.21", bibkey="DBLP:conf/sdm/ChawlaG13")
public class KMeansMinusMinus<V extends NumberVector>
extends AbstractKMeans<V, KMeansModel> {
    private static final Logging LOG = Logging.getLogger(KMeansMinusMinus.class);
    public double rate;
    public boolean noiseFlag;

    public KMeansMinusMinus(NumberVectorDistanceFunction<? super V> distanceFunction, int k, int maxiter, KMeansInitialization initializer, double rate, boolean noiseFlag) {
        super(distanceFunction, k, maxiter, initializer);
        this.rate = rate;
        this.noiseFlag = noiseFlag;
    }

    @Override
    public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
        Instance instance = new Instance((Relation<? extends NumberVector>)relation, (NumberVectorDistanceFunction<?>)this.getDistanceFunction(), this.initialMeans(database, relation));
        instance.run(this.maxiter);
        return instance.buildResultWithNoise();
    }

    @Override
    protected Logging getLogger() {
        return LOG;
    }

    public static class Parameterizer<V extends NumberVector>
    extends AbstractKMeans.Parameterizer<V> {
        public static final OptionID RATE_ID = new OptionID("kmeansmm.l", "Number of outliers to ignore, or (if less than 1) a relative rate.");
        public static final OptionID NOISE_FLAG_ID = new OptionID("kmeansmm.noisecluster", "Create a noise cluster, instead of assigning the noise objects.");
        private double rate;
        private boolean noiseFlag;

        @Override
        protected void makeOptions(Parameterization config) {
            Flag createNoiseCluster;
            super.makeOptions(config);
            DoubleParameter rateP = (DoubleParameter)new DoubleParameter(RATE_ID, 0.05).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
            if (config.grab(rateP)) {
                this.rate = rateP.doubleValue();
            }
            if (config.grab(createNoiseCluster = new Flag(NOISE_FLAG_ID))) {
                this.noiseFlag = (Boolean)createNoiseCluster.getValue();
            }
        }

        @Override
        protected KMeansMinusMinus<V> makeInstance() {
            return new KMeansMinusMinus(this.distanceFunction, this.k, this.maxiter, this.initializer, this.rate, this.noiseFlag);
        }
    }

    protected class Instance
    extends AbstractKMeans.Instance {
        DoubleMinHeap minHeap;
        int heapsize;
        double prevvartotal;
        List<ModifiableDoubleDBIDList> clusters;

        public Instance(Relation<? extends NumberVector> relation, NumberVectorDistanceFunction<?> df, double[][] means) {
            super(relation, df, means);
            this.prevvartotal = Double.POSITIVE_INFINITY;
            this.heapsize = (int)(KMeansMinusMinus.this.rate < 1.0 ? Math.ceil((double)relation.size() * KMeansMinusMinus.this.rate) : KMeansMinusMinus.this.rate);
            this.minHeap = new DoubleMinHeap(this.heapsize);
            this.clusters = new ArrayList<ModifiableDoubleDBIDList>();
            for (int i = 0; i < this.k; ++i) {
                this.clusters.add(DBIDUtil.newDistanceDBIDList((int)((double)relation.size() * 2.0 / (double)this.k)));
            }
            ((AbstractKMeans.Instance)this).clusters = null;
        }

        @Override
        protected int iterate(int iteration) {
            if (iteration > 1) {
                this.means = this.meansWithTreshhold(this.heapsize == 0 || this.minHeap.size() < this.heapsize ? Double.POSITIVE_INFINITY : this.minHeap.peek());
            }
            this.minHeap.clear();
            for (int i = 0; i < this.k; ++i) {
                this.clusters.get(i).clear();
            }
            int changed = this.assignToNearestCluster();
            double vartotal = VMath.sum(this.varsum);
            if (vartotal > this.prevvartotal) {
                return -changed;
            }
            this.prevvartotal = vartotal;
            return changed;
        }

        protected Clustering<KMeansModel> buildResultWithNoise() {
            ModifiableDoubleDBIDList noiseids = null;
            if (KMeansMinusMinus.this.noiseFlag && this.heapsize > 0) {
                noiseids = DBIDUtil.newDistanceDBIDList(this.minHeap.size());
                this.clusters.add(noiseids);
                double tresh = this.minHeap.peek();
                for (int i = 0; i < this.k; ++i) {
                    DoubleDBIDListMIter it = this.clusters.get(i).iter();
                    while (it.valid()) {
                        double dist = it.doubleValue();
                        if (dist >= tresh) {
                            noiseids.add(dist, it);
                            this.assignment.putInt(it, this.k);
                            it.remove();
                        }
                        it.advance();
                    }
                }
            }
            Clustering<KMeansModel> result = new Clustering<KMeansModel>("k-Means-- Clustering", "kmeans-minus-minus-clustering");
            for (int i = 0; i < this.k; ++i) {
                DBIDs ids = this.clusters.get(i);
                if (ids.isEmpty()) continue;
                result.addToplevelCluster(new Cluster<KMeansModel>(ids, new KMeansModel(this.means[i], this.varsum[i])));
            }
            if (KMeansMinusMinus.this.noiseFlag && noiseids != null && !noiseids.isEmpty()) {
                result.addToplevelCluster(new Cluster<KMeansModel>(noiseids, true, new KMeansModel(null, 0.0)));
            }
            return result;
        }

        @Override
        protected int assignToNearestCluster() {
            assert (this.k == this.means.length);
            int changed = 0;
            Arrays.fill(this.varsum, 0.0);
            DBIDIter iditer = this.relation.iterDBIDs();
            while (iditer.valid()) {
                NumberVector fv = (NumberVector)this.relation.get(iditer);
                double mindist = Double.POSITIVE_INFINITY;
                int minIndex = 0;
                for (int i = 0; i < this.k; ++i) {
                    double dist = this.distance(fv, DoubleVector.wrap(this.means[i]));
                    if (!(dist < mindist)) continue;
                    minIndex = i;
                    mindist = dist;
                }
                if (this.heapsize > 0) {
                    this.minHeap.add(mindist, this.heapsize);
                }
                int n = minIndex;
                this.varsum[n] = this.varsum[n] + mindist;
                this.clusters.get(minIndex).add(mindist, iditer);
                if (this.assignment.putInt(iditer, minIndex) != minIndex) {
                    ++changed;
                }
                iditer.advance();
            }
            return changed;
        }

        protected double[][] meansWithTreshhold(double tresh) {
            double[][] newMeans = new double[this.k][];
            for (int i = 0; i < this.k; ++i) {
                DoubleDBIDList list = this.clusters.get(i);
                double[] raw = null;
                int count = 0;
                DoubleDBIDListIter iter = list.iter();
                while (iter.valid()) {
                    if (!(iter.doubleValue() >= tresh)) {
                        NumberVector vec = (NumberVector)this.relation.get(iter);
                        if (raw == null) {
                            raw = vec.toArray();
                        } else {
                            AbstractKMeans.plusEquals(raw, vec);
                        }
                        ++count;
                    }
                    iter.advance();
                }
                newMeans[i] = raw != null ? VMath.timesEquals(raw, 1.0 / (double)count) : this.means[i];
            }
            return newMeans;
        }

        @Override
        protected Logging getLogger() {
            return LOG;
        }
    }
}

