/*
 * Decompiled with CFR 0.152.
 */
package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;

import de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractProjectedClustering;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.DoubleVector;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.model.ClusterModel;
import de.lmu.ifi.dbs.elki.data.model.Model;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDMIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid;
import de.lmu.ifi.dbs.elki.math.linearalgebra.VMath;
import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAResult;
import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCARunner;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
import de.lmu.ifi.dbs.elki.utilities.random.RandomFactory;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import net.jafama.FastMath;

@Title(value="ORCLUS: Arbitrarily ORiented projected CLUSter generation")
@Description(value="Algorithm to find correlation clusters in high dimensional spaces.")
@Reference(authors="C. C. Aggarwal, P. S. Yu", title="Finding Generalized Projected Clusters in High Dimensional Spaces", booktitle="Proc. ACM SIGMOD Int. Conf. on Management of Data (SIGMOD '00)", url="https://doi.org/10.1145/342009.335383", bibkey="DBLP:conf/sigmod/AggarwalY00")
public class ORCLUS<V extends NumberVector>
extends AbstractProjectedClustering<Clustering<Model>, V> {
    private static final Logging LOG = Logging.getLogger(ORCLUS.class);
    private double alpha;
    private RandomFactory rnd;
    private PCARunner pca;

    public ORCLUS(int k, int k_i, int l, double alpha, RandomFactory rnd, PCARunner pca) {
        super(k, k_i, l);
        this.alpha = alpha;
        this.rnd = rnd;
        this.pca = pca;
    }

    public Clustering<Model> run(Database database, Relation<V> relation) {
        IndefiniteProgress cprogress;
        int dim_c = RelationUtil.dimensionality(relation);
        if (dim_c < this.l) {
            throw new IllegalStateException("Dimensionality of data < parameter l! (" + dim_c + " < " + this.l + ")");
        }
        int k_c = Math.min(relation.size(), this.k_i * this.k);
        List<ORCLUSCluster> clusters = this.initialSeeds(relation, k_c);
        double beta = FastMath.exp(-FastMath.log((double)dim_c / (double)this.l) * FastMath.log(1.0 / this.alpha) / FastMath.log((double)k_c / (double)this.k));
        IndefiniteProgress indefiniteProgress = cprogress = LOG.isVerbose() ? new IndefiniteProgress("Current number of clusters:", LOG) : null;
        while (k_c > this.k) {
            this.assign(relation, clusters);
            for (ORCLUSCluster cluster : clusters) {
                if (cluster.objectIDs.size() <= 0) continue;
                cluster.basis = this.findBasis(relation, cluster, dim_c);
            }
            k_c = (int)Math.max((double)this.k, (double)k_c * this.alpha);
            dim_c = (int)Math.max((double)this.l, (double)dim_c * beta);
            this.merge(relation, clusters, k_c, dim_c, cprogress);
            if (cprogress == null) continue;
            cprogress.setProcessed(clusters.size(), LOG);
        }
        this.assign(relation, clusters);
        LOG.setCompleted(cprogress);
        Clustering<Model> r = new Clustering<Model>("ORCLUS clustering", "orclus-clustering");
        for (ORCLUSCluster c : clusters) {
            r.addToplevelCluster(new Cluster<ClusterModel>((DBIDs)c.objectIDs, ClusterModel.CLUSTER));
        }
        return r;
    }

    private List<ORCLUSCluster> initialSeeds(Relation<V> database, int k) {
        ModifiableDBIDs randomSample = DBIDUtil.randomSample(database.getDBIDs(), k, this.rnd);
        ArrayList<ORCLUSCluster> seeds = new ArrayList<ORCLUSCluster>(k);
        DBIDIter iter = randomSample.iter();
        while (iter.valid()) {
            seeds.add(new ORCLUSCluster(((NumberVector)database.get(iter)).toArray(), iter));
            iter.advance();
        }
        return seeds;
    }

    private void assign(Relation<V> database, List<ORCLUSCluster> clusters) {
        SquaredEuclideanDistanceFunction distFunc = SquaredEuclideanDistanceFunction.STATIC;
        for (ORCLUSCluster oRCLUSCluster : clusters) {
            oRCLUSCluster.objectIDs.clear();
        }
        ArrayList<DoubleVector> projectedCentroids = new ArrayList<DoubleVector>(clusters.size());
        for (ORCLUSCluster c : clusters) {
            projectedCentroids.add(DoubleVector.wrap(this.project(c, c.centroid)));
        }
        DBIDIter dBIDIter = database.iterDBIDs();
        while (dBIDIter.valid()) {
            double[] o = ((NumberVector)database.get(dBIDIter)).toArray();
            double minDist = Double.POSITIVE_INFINITY;
            ORCLUSCluster minCluster = null;
            for (int i = 0; i < clusters.size(); ++i) {
                ORCLUSCluster c = clusters.get(i);
                DoubleVector o_proj = DoubleVector.wrap(this.project(c, o));
                double dist = distFunc.distance(o_proj, (NumberVector)projectedCentroids.get(i));
                if (!(dist < minDist)) continue;
                minDist = dist;
                minCluster = c;
            }
            minCluster.objectIDs.add(dBIDIter);
            dBIDIter.advance();
        }
        for (ORCLUSCluster cluster : clusters) {
            if (cluster.objectIDs.size() <= 0) continue;
            cluster.centroid = Centroid.make(database, cluster.objectIDs).toArray();
        }
    }

    private double[][] findBasis(Relation<V> database, ORCLUSCluster cluster, int dim) {
        PCAResult pcares = this.pca.processIds(cluster.objectIDs, database);
        double[][] evs = pcares.getEigenvectors();
        return (double[][])Arrays.copyOfRange(evs, evs.length - dim, evs.length);
    }

    private void merge(Relation<V> relation, List<ORCLUSCluster> clusters, int k_new, int d_new, IndefiniteProgress cprogress) {
        ArrayList<ProjectedEnergy> projectedEnergies = new ArrayList<ProjectedEnergy>(clusters.size() * (clusters.size() - 1) >>> 1);
        for (int i = 0; i < clusters.size(); ++i) {
            for (int j = i + 1; j < clusters.size(); ++j) {
                ORCLUSCluster c_i = clusters.get(i);
                ORCLUSCluster c_j = clusters.get(j);
                projectedEnergies.add(this.projectedEnergy(relation, c_i, c_j, i, j, d_new));
            }
        }
        while (clusters.size() > k_new) {
            if (cprogress != null) {
                cprogress.setProcessed(clusters.size(), LOG);
            }
            ProjectedEnergy minPE = (ProjectedEnergy)Collections.min(projectedEnergies);
            for (int c = 0; c < clusters.size(); ++c) {
                if (c == minPE.i) {
                    clusters.remove(c);
                    clusters.add(c, minPE.cluster);
                }
                if (c != minPE.j) continue;
                clusters.remove(c);
            }
            int i = minPE.i;
            int j = minPE.j;
            Iterator it = projectedEnergies.iterator();
            while (it.hasNext()) {
                ProjectedEnergy pe = (ProjectedEnergy)it.next();
                if (pe.i == i || pe.i == j || pe.j == i || pe.j == j) {
                    it.remove();
                    continue;
                }
                if (pe.i > j) {
                    --pe.i;
                }
                if (pe.j <= j) continue;
                --pe.j;
            }
            ORCLUSCluster c_ij = minPE.cluster;
            for (int c = 0; c < clusters.size(); ++c) {
                if (c < i) {
                    projectedEnergies.add(this.projectedEnergy(relation, clusters.get(c), c_ij, c, i, d_new));
                    continue;
                }
                if (c <= i) continue;
                projectedEnergies.add(this.projectedEnergy(relation, clusters.get(c), c_ij, i, c, d_new));
            }
        }
    }

    private ProjectedEnergy projectedEnergy(Relation<V> relation, ORCLUSCluster c_i, ORCLUSCluster c_j, int i, int j, int dim) {
        SquaredEuclideanDistanceFunction distFunc = SquaredEuclideanDistanceFunction.STATIC;
        ORCLUSCluster c_ij = this.union(relation, c_i, c_j, dim);
        double sum = 0.0;
        DoubleVector c_proj = DoubleVector.wrap(this.project(c_ij, c_ij.centroid));
        DBIDMIter iter = c_ij.objectIDs.iter();
        while (iter.valid()) {
            DoubleVector o_proj = DoubleVector.wrap(this.project(c_ij, ((NumberVector)relation.get(iter)).toArray()));
            sum += distFunc.distance(o_proj, c_proj);
            iter.advance();
        }
        return new ProjectedEnergy(i, j, c_ij, sum /= (double)c_ij.objectIDs.size());
    }

    private ORCLUSCluster union(Relation<V> relation, ORCLUSCluster c1, ORCLUSCluster c2, int dim) {
        ORCLUSCluster c = new ORCLUSCluster();
        c.objectIDs = DBIDUtil.newHashSet(c1.objectIDs);
        c.objectIDs.addDBIDs(c2.objectIDs);
        c.objectIDs = DBIDUtil.newArray(c.objectIDs);
        if (c.objectIDs.size() > 0) {
            c.centroid = Centroid.make(relation, c.objectIDs).getArrayRef();
            c.basis = this.findBasis(relation, c, dim);
        } else {
            c.centroid = VMath.timesEquals(VMath.plusEquals(c1.centroid, c2.centroid), 0.5);
            c.basis = VMath.identity(dim, c.centroid.length);
        }
        return c;
    }

    private double[] project(ORCLUSCluster c, double[] o) {
        return VMath.times(c.basis, o);
    }

    @Override
    public TypeInformation[] getInputTypeRestriction() {
        return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
    }

    @Override
    protected Logging getLogger() {
        return LOG;
    }

    public static class Parameterizer<V extends NumberVector>
    extends AbstractProjectedClustering.Parameterizer {
        public static final OptionID ALPHA_ID = new OptionID("orclus.alpha", "The factor for reducing the number of current clusters in each iteration.");
        public static final OptionID SEED_ID = new OptionID("orclus.seed", "The random number generator seed.");
        protected double alpha;
        protected RandomFactory rnd;
        protected PCARunner pca;

        @Override
        protected void makeOptions(Parameterization config) {
            ObjectParameter pcaP;
            RandomParameter rndP;
            DoubleParameter alphaP;
            IntParameter lP;
            IntParameter k_iP;
            super.makeOptions(config);
            IntParameter kP = (IntParameter)new IntParameter(K_ID).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT);
            if (config.grab(kP)) {
                this.k = (Integer)kP.getValue();
            }
            if (config.grab(k_iP = (IntParameter)new IntParameter(K_I_ID, 30).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT))) {
                this.k_i = (Integer)k_iP.getValue();
            }
            if (config.grab(lP = (IntParameter)new IntParameter(L_ID).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT))) {
                this.l = (Integer)lP.getValue();
            }
            if (config.grab(alphaP = (DoubleParameter)((DoubleParameter)new DoubleParameter(ALPHA_ID, 0.5).addConstraint((ParameterConstraint)CommonConstraints.GREATER_THAN_ZERO_DOUBLE)).addConstraint((ParameterConstraint)CommonConstraints.LESS_EQUAL_ONE_DOUBLE))) {
                this.alpha = alphaP.doubleValue();
            }
            if (config.grab(rndP = new RandomParameter(SEED_ID))) {
                this.rnd = (RandomFactory)rndP.getValue();
            }
            if (config.grab(pcaP = new ObjectParameter(PCARunner.Parameterizer.PCARUNNER_ID, (Class<?>)PCARunner.class, PCARunner.class))) {
                this.pca = (PCARunner)pcaP.instantiateClass(config);
            }
        }

        @Override
        protected ORCLUS<V> makeInstance() {
            return new ORCLUS(this.k, this.k_i, this.l, this.alpha, this.rnd, this.pca);
        }
    }

    private final class ProjectedEnergy
    implements Comparable<ProjectedEnergy> {
        int i;
        int j;
        ORCLUSCluster cluster;
        double projectedEnergy;

        ProjectedEnergy(int i, int j, ORCLUSCluster cluster, double projectedEnergy) {
            this.i = i;
            this.j = j;
            this.cluster = cluster;
            this.projectedEnergy = projectedEnergy;
        }

        @Override
        public int compareTo(ProjectedEnergy o) {
            return Double.compare(this.projectedEnergy, o.projectedEnergy);
        }
    }

    private final class ORCLUSCluster {
        ModifiableDBIDs objectIDs = DBIDUtil.newArray();
        double[][] basis;
        double[] centroid;

        ORCLUSCluster() {
        }

        ORCLUSCluster(double[] o, DBIDRef id) {
            this.centroid = o;
            this.basis = VMath.unitMatrix(o.length);
            this.objectIDs.add(id);
        }
    }
}

