/*
 * Decompiled with CFR 0.152.
 */
package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;

import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.clustering.DBSCAN;
import de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.SubspaceClusteringAlgorithm;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.Subspace;
import de.lmu.ifi.dbs.elki.data.model.Model;
import de.lmu.ifi.dbs.elki.data.model.SubspaceModel;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.ProxyView;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.DimensionSelectingSubspaceDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid;
import de.lmu.ifi.dbs.elki.utilities.datastructures.BitsUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.TreeMap;

@Title(value="SUBCLU: Density connected Subspace Clustering")
@Description(value="Algorithm to detect arbitrarily shaped and positioned clusters in subspaces. SUBCLU delivers for each subspace the same clusters DBSCAN would have found, when applied to this subspace seperately.")
@Reference(authors="Karin Kailing, Hans-Peter Kriegel, Peer Kr\u00f6ger", title="Density Connected Subspace Clustering for High Dimensional Data", booktitle="Proc. SIAM Int. Conf. on Data Mining (SDM'04)", url="https://doi.org/10.1137/1.9781611972740.23", bibkey="DBLP:conf/sdm/KroegerKK04")
public class SUBCLU<V extends NumberVector>
extends AbstractAlgorithm<Clustering<SubspaceModel>>
implements SubspaceClusteringAlgorithm<SubspaceModel> {
    private static final Logging LOG = Logging.getLogger(SUBCLU.class);
    protected DimensionSelectingSubspaceDistanceFunction<V> distanceFunction;
    protected double epsilon;
    protected int minpts;
    protected int mindim;

    public SUBCLU(DimensionSelectingSubspaceDistanceFunction<V> distanceFunction, double epsilon, int minpts, int mindim) {
        this.distanceFunction = distanceFunction;
        this.epsilon = epsilon;
        this.minpts = minpts;
        this.mindim = mindim;
    }

    public Clustering<SubspaceModel> run(Relation<V> relation) {
        int d;
        int dimensionality = RelationUtil.dimensionality(relation);
        if (dimensionality <= 1) {
            throw new IllegalStateException("SUBCLU needs multivariate data.");
        }
        StepProgress stepprog = LOG.isVerbose() ? new StepProgress(dimensionality) : null;
        TreeMap<Subspace, List<Cluster<Model>>> clusterMap = new TreeMap<Subspace, List<Cluster<Model>>>(Subspace.DIMENSION_COMPARATOR);
        LOG.beginStep(stepprog, 1, "Generate all 1-dimensional clusters.");
        ArrayList<Subspace> subspaces = new ArrayList<Subspace>();
        for (d = 0; d < dimensionality; ++d) {
            Subspace currentSubspace = new Subspace(d);
            List<Cluster<Model>> clusters = this.runDBSCAN(relation, null, currentSubspace);
            if (LOG.isDebuggingFiner()) {
                StringBuilder msg = new StringBuilder(1000).append(clusters.size()).append(" clusters in subspace ").append(currentSubspace.dimensionsToString()).append(':');
                for (Cluster cluster : clusters) {
                    msg.append("\n      ").append(cluster.getIDs());
                }
                LOG.debugFiner(msg.toString());
            }
            if (clusters.isEmpty()) continue;
            subspaces.add(currentSubspace);
            clusterMap.put(currentSubspace, clusters);
        }
        for (d = 2; d <= dimensionality; ++d) {
            if (stepprog != null) {
                stepprog.beginStep(d, "Generate " + d + "-dimensional clusters from " + (d - 1) + "-dimensional clusters.", LOG);
            }
            List<Subspace> candidates = this.generateSubspaceCandidates(subspaces);
            ArrayList<Subspace> s_d = new ArrayList<Subspace>();
            FiniteProgress substepprog = LOG.isVerbose() ? new FiniteProgress("Candidates of dimensionality " + d, candidates.size(), LOG) : null;
            for (Subspace subspace : candidates) {
                Object candidateClusters;
                Subspace bestSubspace = this.bestSubspace(subspaces, subspace, clusterMap);
                if (LOG.isDebuggingFine()) {
                    LOG.debugFine("best subspace of " + subspace.dimensionsToString() + ": " + bestSubspace.dimensionsToString());
                }
                ArrayList<Cluster<Model>> clusters = new ArrayList<Cluster<Model>>();
                for (Cluster<Model> cluster : clusterMap.get(bestSubspace)) {
                    if (cluster.size() < this.minpts || (candidateClusters = this.runDBSCAN(relation, cluster.getIDs(), subspace)).isEmpty()) continue;
                    clusters.addAll((Collection<Cluster<Model>>)candidateClusters);
                }
                if (LOG.isDebuggingFine() && !clusters.isEmpty()) {
                    StringBuilder msg = new StringBuilder(1000).append(clusters.size()).append(" cluster(s) in subspace ").append(subspace).append(':');
                    candidateClusters = clusters.iterator();
                    while (candidateClusters.hasNext()) {
                        Cluster c = (Cluster)candidateClusters.next();
                        msg.append("\n      ").append(c.getIDs());
                    }
                    LOG.debugFine(msg.toString());
                }
                if (!clusters.isEmpty()) {
                    s_d.add(subspace);
                    clusterMap.put(subspace, clusters);
                }
                LOG.incrementProcessed(substepprog);
            }
            LOG.ensureCompleted(substepprog);
            if (s_d.isEmpty()) {
                if (stepprog == null) break;
                for (int dim = d + 1; dim <= dimensionality; ++dim) {
                    stepprog.beginStep(dim, "Generation of " + dim + "-dimensional clusters not applicable, because no " + d + "-dimensional subspaces were found.", LOG);
                }
                break;
            }
            subspaces = s_d;
        }
        LOG.setCompleted(stepprog);
        int numClusters = 0;
        HashSetModifiableDBIDs noise = DBIDUtil.newHashSet(relation.getDBIDs());
        TreeMap<Subspace, HashSetModifiableDBIDs> filtered = new TreeMap<Subspace, HashSetModifiableDBIDs>(Subspace.DIMENSION_COMPARATOR);
        Clustering<SubspaceModel> result = new Clustering<SubspaceModel>("SUBCLU clustering", "subclu-clustering");
        for (Subspace subspace : clusterMap.descendingKeySet()) {
            if (subspace.dimensionality() < this.mindim) continue;
            DBIDs blacklisted = (DBIDs)filtered.get(subspace);
            blacklisted = blacklisted != null ? blacklisted : DBIDUtil.EMPTYDBIDS;
            HashSetModifiableDBIDs blacklist = DBIDUtil.newHashSet(blacklisted);
            List<Cluster<Model>> clusters = clusterMap.get(subspace);
            for (Cluster<Model> cluster : clusters) {
                DBIDs ids = cluster.getIDs();
                ModifiableDBIDs newids = DBIDUtil.difference(ids, blacklisted);
                Cluster<SubspaceModel> newCluster = new Cluster<SubspaceModel>(newids);
                newCluster.setModel(new SubspaceModel(subspace, Centroid.make(relation, ids).getArrayRef()));
                newCluster.setName("cluster_" + numClusters++);
                result.addToplevelCluster(newCluster);
                blacklist.addDBIDs(newids);
                noise.removeDBIDs(newids);
            }
            if (subspace.dimensionality() <= this.mindim) continue;
            long[] tmp = BitsUtil.copy(subspace.getDimensions());
            int pos = BitsUtil.nextSetBit(tmp, 0);
            while (pos >= 0) {
                BitsUtil.clearI(tmp, pos);
                Subspace sub = new Subspace(BitsUtil.copy(tmp));
                BitsUtil.setI(tmp, pos);
                ModifiableDBIDs bl = (ModifiableDBIDs)filtered.get(sub);
                if (bl != null) {
                    bl.addDBIDs(blacklist);
                } else {
                    filtered.put(sub, DBIDUtil.newHashSet(blacklist));
                }
                pos = BitsUtil.nextSetBit(tmp, pos + 1);
            }
        }
        if (!noise.isEmpty()) {
            Cluster<SubspaceModel> newCluster = new Cluster<SubspaceModel>(noise);
            newCluster.setModel(new SubspaceModel(new Subspace(BitsUtil.zero(dimensionality)), Centroid.make(relation, noise).getArrayRef()));
            newCluster.setName("noise");
            newCluster.setNoise(true);
            result.addToplevelCluster(newCluster);
        }
        return result;
    }

    private List<Cluster<Model>> runDBSCAN(Relation<V> relation, DBIDs ids, Subspace subspace) {
        this.distanceFunction.setSelectedDimensions(subspace.getDimensions());
        if (LOG.isVerbose()) {
            LOG.verbose("Run DBSCAN on subspace " + subspace.dimensionsToString() + (ids == null ? "" : " on cluster of " + ids.size() + " objects."));
        }
        relation = ids == null ? relation : new ProxyView(ids, relation);
        DBSCAN<V> dbscan = new DBSCAN<V>(this.distanceFunction, this.epsilon, this.minpts);
        Clustering<Model> dbsres = dbscan.run(relation);
        ArrayList<Cluster<Model>> clusters = new ArrayList<Cluster<Model>>();
        for (Cluster<Model> c : dbsres.getAllClusters()) {
            if (c.isNoise()) continue;
            clusters.add(c);
        }
        return clusters;
    }

    private List<Subspace> generateSubspaceCandidates(List<Subspace> subspaces) {
        StringBuilder msgFinest;
        if (subspaces.isEmpty()) {
            return Collections.emptyList();
        }
        StringBuilder stringBuilder = msgFinest = LOG.isDebuggingFinest() ? new StringBuilder(1000) : null;
        if (msgFinest != null) {
            msgFinest.append("subspaces ").append(subspaces).append('\n');
        }
        ArrayList<Subspace> candidates = new ArrayList<Subspace>();
        int d = subspaces.get(0).dimensionality() + 1;
        for (int i = 0; i < subspaces.size(); ++i) {
            Subspace s1 = subspaces.get(i);
            for (int j = i + 1; j < subspaces.size(); ++j) {
                Subspace candidate = s1.join(subspaces.get(j));
                if (candidate == null || d != 2 && !this.checkLower(candidate, subspaces)) continue;
                if (msgFinest != null) {
                    msgFinest.append("candidate: ").append(candidate.dimensionsToString()).append('\n');
                }
                candidates.add(candidate);
            }
        }
        if (msgFinest != null) {
            LOG.debugFinest(msgFinest.toString());
        }
        if (LOG.isDebugging()) {
            StringBuilder msg = new StringBuilder(1000).append(d).append("-dimensional candidate subspaces: ");
            for (Subspace candidate : candidates) {
                msg.append(candidate.dimensionsToString()).append(' ');
            }
            LOG.debug(msg.toString());
        }
        return candidates;
    }

    private boolean checkLower(Subspace candidate, List<Subspace> subspaces) {
        long[] dimensions = BitsUtil.copy(candidate.getDimensions());
        int dim = BitsUtil.nextSetBit(dimensions, 0);
        while (dim >= 0) {
            BitsUtil.clearI(dimensions, dim);
            if (!subspaces.contains(new Subspace(dimensions))) {
                return false;
            }
            BitsUtil.setI(dimensions, dim);
            dim = BitsUtil.nextSetBit(dimensions, dim + 1);
        }
        return true;
    }

    private Subspace bestSubspace(List<Subspace> subspaces, Subspace candidate, TreeMap<Subspace, List<Cluster<Model>>> clusterMap) {
        Subspace bestSubspace = null;
        int min = Integer.MAX_VALUE;
        for (Subspace subspace : subspaces) {
            if (!subspace.isSubspace(candidate)) continue;
            int sum = 0;
            for (Cluster<Model> cluster : clusterMap.get(subspace)) {
                sum += cluster.size();
            }
            if (sum >= min) continue;
            min = sum;
            bestSubspace = subspace;
        }
        return bestSubspace;
    }

    @Override
    public TypeInformation[] getInputTypeRestriction() {
        return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
    }

    @Override
    protected Logging getLogger() {
        return LOG;
    }

    public static class Parameterizer<V extends NumberVector>
    extends AbstractParameterizer {
        public static final OptionID DISTANCE_FUNCTION_ID = new OptionID("subclu.distancefunction", "Distance function to determine the distance between database objects.");
        public static final OptionID EPSILON_ID = new OptionID("subclu.epsilon", "The maximum radius of the neighborhood to be considered.");
        public static final OptionID MINPTS_ID = new OptionID("subclu.minpts", "Threshold for minimum number of points in the epsilon-neighborhood of a point.");
        public static final OptionID MINDIM_ID = new OptionID("subclu.mindim", "Minimum dimensionality to generate clusters for.");
        protected DimensionSelectingSubspaceDistanceFunction<V> distance;
        protected double epsilon;
        protected int minpts;
        protected int mindim = 1;

        @Override
        protected void makeOptions(Parameterization config) {
            IntParameter mindimP;
            IntParameter minptsP;
            DoubleParameter epsilonP;
            super.makeOptions(config);
            ObjectParameter param = new ObjectParameter(DISTANCE_FUNCTION_ID, (Class<?>)DimensionSelectingSubspaceDistanceFunction.class, SubspaceEuclideanDistanceFunction.class);
            if (config.grab(param)) {
                this.distance = (DimensionSelectingSubspaceDistanceFunction)param.instantiateClass(config);
            }
            if (config.grab(epsilonP = new DoubleParameter(EPSILON_ID))) {
                this.epsilon = (Double)epsilonP.getValue();
            }
            if (config.grab(minptsP = (IntParameter)new IntParameter(MINPTS_ID).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT))) {
                this.minpts = (Integer)minptsP.getValue();
            }
            if (config.grab(mindimP = (IntParameter)((IntParameter)new IntParameter(MINDIM_ID).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT)).setOptional(true))) {
                this.mindim = (Integer)mindimP.getValue();
            }
        }

        @Override
        protected SUBCLU<V> makeInstance() {
            return new SUBCLU<V>(this.distance, this.epsilon, this.minpts, this.mindim);
        }
    }
}

