/*
 * Decompiled with CFR 0.152.
 */
package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;

import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.SubspaceClusteringAlgorithm;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.Subspace;
import de.lmu.ifi.dbs.elki.data.model.SubspaceModel;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayMIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceMaximumDistanceFunction;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid;
import de.lmu.ifi.dbs.elki.utilities.datastructures.BitsUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.AbstractParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
import de.lmu.ifi.dbs.elki.utilities.random.RandomFactory;
import java.util.Random;
import net.jafama.FastMath;

@Title(value="DOC: Density-based Optimal projective Clustering")
@Reference(authors="C. M. Procopiuc, M. Jones, P. K. Agarwal, T. M. Murali", title="A Monte Carlo algorithm for fast projective clustering", booktitle="Proc. ACM SIGMOD Int. Conf. on Management of Data (SIGMOD '02)", url="https://doi.org/10.1145/564691.564739", bibkey="DBLP:conf/sigmod/ProcopiucJAM02")
public class DOC<V extends NumberVector>
extends AbstractAlgorithm<Clustering<SubspaceModel>>
implements SubspaceClusteringAlgorithm<SubspaceModel> {
    private static final Logging LOG = Logging.getLogger(DOC.class);
    protected double alpha;
    protected double beta;
    protected double w;
    protected RandomFactory rnd;

    public DOC(double alpha, double beta, double w, RandomFactory random) {
        this.alpha = alpha;
        this.beta = beta;
        this.w = w;
        this.rnd = random;
    }

    public Clustering<SubspaceModel> run(Database database, Relation<V> relation) {
        Cluster<SubspaceModel> C;
        IndefiniteProgress cprogress;
        int d = RelationUtil.dimensionality(relation);
        ArrayModifiableDBIDs S = DBIDUtil.newArray(relation.getDBIDs());
        double r = Math.abs(FastMath.log(d + d) / FastMath.log(this.beta * 0.5));
        int n = (int)(2.0 / this.alpha);
        int m = (int)(FastMath.pow(2.0 / this.alpha, r) * FastMath.log(4.0));
        m = Math.min(m, Math.min(1000000, d * d));
        int minClusterSize = (int)(this.alpha * (double)S.size());
        Clustering<SubspaceModel> result = new Clustering<SubspaceModel>("DOC Clusters", "DOC");
        IndefiniteProgress indefiniteProgress = cprogress = LOG.isVerbose() ? new IndefiniteProgress("Number of clusters", LOG) : null;
        while (S.size() > minClusterSize && (C = this.runDOC(database, relation, S, d, n, m, (int)r, minClusterSize)) != null) {
            result.addToplevelCluster(C);
            S.removeDBIDs(C.getIDs());
            if (cprogress == null) continue;
            cprogress.setProcessed(result.getAllClusters().size(), LOG);
        }
        if (S.size() > 0) {
            long[] alldims = BitsUtil.ones(d);
            result.addToplevelCluster(new Cluster<SubspaceModel>(S, true, new SubspaceModel(new Subspace(alldims), Centroid.make(relation, S).getArrayRef())));
        }
        LOG.setCompleted(cprogress);
        return result;
    }

    protected Cluster<SubspaceModel> runDOC(Database database, Relation<V> relation, ArrayModifiableDBIDs S, int d, int n, int m, int r, int minClusterSize) {
        DBIDs C = null;
        long[] D = null;
        double quality = Double.NEGATIVE_INFINITY;
        FiniteProgress iprogress = LOG.isVerbose() ? new FiniteProgress("Iteration progress for current cluster", m * n, LOG) : null;
        Random random = this.rnd.getSingleThreadedRandom();
        DBIDArrayMIter iter = S.iter();
        for (int i = 0; i < n; ++i) {
            iter.seek(random.nextInt(S.size()));
            for (int j = 0; j < m; ++j) {
                ModifiableDBIDs randomSet = DBIDUtil.randomSample((DBIDs)S, r, random);
                long[] nD = BitsUtil.zero(d);
                for (int k = 0; k < d; ++k) {
                    if (!this.dimensionIsRelevant(k, relation, randomSet)) continue;
                    BitsUtil.setI(nD, k);
                }
                if (BitsUtil.cardinality(nD) > 0) {
                    DBIDs nC = this.findNeighbors(iter, nD, S, relation);
                    if (LOG.isDebuggingFiner()) {
                        LOG.finer("Testing a cluster candidate, |C| = " + nC.size() + ", |D| = " + BitsUtil.cardinality(nD));
                    }
                    if (nC.size() < minClusterSize) {
                        if (!LOG.isDebuggingFiner()) continue;
                        LOG.finer("... but it's too small.");
                        continue;
                    }
                    double nQuality = this.computeClusterQuality(nC.size(), BitsUtil.cardinality(nD));
                    if (nQuality > quality) {
                        if (LOG.isDebuggingFiner()) {
                            LOG.finer("... and it's the best so far: " + nQuality + " vs. " + quality);
                        }
                        C = nC;
                        D = nD;
                        quality = nQuality;
                    } else if (LOG.isDebuggingFiner()) {
                        LOG.finer("... but we already have a better one.");
                    }
                }
                LOG.incrementProcessed(iprogress);
            }
        }
        LOG.ensureCompleted(iprogress);
        return C != null ? this.makeCluster(relation, C, D) : null;
    }

    protected DBIDs findNeighbors(DBIDRef q, long[] nD, ArrayModifiableDBIDs S, Relation<V> relation) {
        DistanceQuery<NumberVector> dq = relation.getDistanceQuery(new SubspaceMaximumDistanceFunction(nD), new Object[0]);
        ArrayModifiableDBIDs nC = DBIDUtil.newArray();
        DBIDArrayMIter it = S.iter();
        while (it.valid()) {
            if (dq.distance(q, (DBIDRef)it) <= this.w) {
                nC.add(it);
            }
            it.advance();
        }
        return nC;
    }

    protected boolean dimensionIsRelevant(int dimension, Relation<V> relation, DBIDs points) {
        double min = Double.POSITIVE_INFINITY;
        double max = Double.NEGATIVE_INFINITY;
        DBIDIter iter = points.iter();
        while (iter.valid()) {
            double xV = ((NumberVector)relation.get(iter)).doubleValue(dimension);
            min = xV < min ? xV : min;
            double d = max = xV > max ? xV : max;
            if (max - min > this.w) {
                return false;
            }
            iter.advance();
        }
        return true;
    }

    protected Cluster<SubspaceModel> makeCluster(Relation<V> relation, DBIDs C, long[] D) {
        HashSetModifiableDBIDs ids = DBIDUtil.newHashSet(C);
        Cluster<SubspaceModel> cluster = new Cluster<SubspaceModel>(ids);
        cluster.setModel(new SubspaceModel(new Subspace(D), Centroid.make(relation, ids).getArrayRef()));
        return cluster;
    }

    protected double computeClusterQuality(int clusterSize, int numRelevantDimensions) {
        return (double)clusterSize * FastMath.pow(1.0 / this.beta, numRelevantDimensions);
    }

    @Override
    public TypeInformation[] getInputTypeRestriction() {
        return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
    }

    @Override
    protected Logging getLogger() {
        return LOG;
    }

    public static class Parameterizer<V extends NumberVector>
    extends AbstractParameterizer {
        public static final OptionID ALPHA_ID = new OptionID("doc.alpha", "Minimum relative density for a set of points to be considered a cluster (|C|>=doc.alpha*|S|).");
        public static final OptionID BETA_ID = new OptionID("doc.beta", "Preference of cluster size versus number of relevant dimensions (higher value means higher priority on larger clusters).");
        public static final OptionID W_ID = new OptionID("doc.w", "Maximum extent of scattering of points along a single attribute for the attribute to be considered relevant.");
        public static final OptionID RANDOM_ID = new OptionID("doc.random-seed", "Random seed, for reproducible experiments.");
        protected double alpha;
        protected double beta;
        protected double w;
        protected RandomFactory random = RandomFactory.DEFAULT;

        @Override
        protected void makeOptions(Parameterization config) {
            super.makeOptions(config);
            AbstractParameter param = (DoubleParameter)((DoubleParameter)new DoubleParameter(ALPHA_ID, 0.2).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE)).addConstraint((ParameterConstraint)CommonConstraints.LESS_EQUAL_ONE_DOUBLE);
            if (config.grab(param)) {
                this.alpha = (Double)param.getValue();
            }
            if (config.grab(param = (DoubleParameter)((DoubleParameter)new DoubleParameter(BETA_ID, 0.8).addConstraint((ParameterConstraint)CommonConstraints.GREATER_THAN_ZERO_DOUBLE)).addConstraint((ParameterConstraint)CommonConstraints.LESS_THAN_ONE_DOUBLE))) {
                this.beta = (Double)param.getValue();
            }
            if (config.grab(param = (DoubleParameter)new DoubleParameter(W_ID, 0.05).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE))) {
                this.w = (Double)param.getValue();
            }
            if (config.grab(param = new RandomParameter(RANDOM_ID))) {
                this.random = (RandomFactory)param.getValue();
            }
        }

        @Override
        protected DOC<V> makeInstance() {
            return new DOC(this.alpha, this.beta, this.w, this.random);
        }
    }
}

