/*
 * Decompiled with CFR 0.152.
 */
package de.lmu.ifi.dbs.elki.algorithm.outlier.clustering;

import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.DistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansSort;
import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.model.MeanModel;
import de.lmu.ifi.dbs.elki.data.model.ModelUtil;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.result.Result;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;

@Title(value="Discovering cluster-based local outliers")
@Reference(authors="Z. He, X. Xu, S. Deng", title="Discovering cluster-based local outliers", booktitle="Pattern Recognition Letters 24(9-10)", url="https://doi.org/10.1016/S0167-8655(03)00003-5", bibkey="DBLP:journals/prl/HeXD03")
public class CBLOF<O extends NumberVector>
extends AbstractDistanceBasedAlgorithm<O, OutlierResult>
implements OutlierAlgorithm {
    private static final Logging LOG = Logging.getLogger(CBLOF.class);
    protected ClusteringAlgorithm<Clustering<MeanModel>> clusteringAlgorithm;
    protected double alpha;
    protected double beta;
    protected NumberVectorDistanceFunction<? super O> distance;

    public CBLOF(NumberVectorDistanceFunction<? super O> distanceFunction, ClusteringAlgorithm<Clustering<MeanModel>> clusteringAlgorithm, double alpha, double beta) {
        super(distanceFunction);
        this.clusteringAlgorithm = clusteringAlgorithm;
        this.alpha = alpha;
        this.beta = beta;
        this.distance = distanceFunction;
    }

    public OutlierResult run(Database database, Relation<O> relation) {
        StepProgress stepprog = LOG.isVerbose() ? new StepProgress("CBLOF", 3) : null;
        DBIDs ids = relation.getDBIDs();
        LOG.beginStep(stepprog, 1, "Computing clustering.");
        Result clustering = this.clusteringAlgorithm.run(database);
        LOG.beginStep(stepprog, 2, "Computing boundary between large and small clusters.");
        List clusters = ((Clustering)clustering).getAllClusters();
        Collections.sort(clusters, new Comparator<Cluster<MeanModel>>(){

            @Override
            public int compare(Cluster<MeanModel> o1, Cluster<MeanModel> o2) {
                return Integer.compare(o2.size(), o1.size());
            }
        });
        int clusterBoundary = this.getClusterBoundary(relation, clusters);
        List largeClusters = clusters.subList(0, clusterBoundary + 1);
        List smallClusters = clusters.subList(clusterBoundary + 1, clusters.size());
        LOG.beginStep(stepprog, 3, "Computing Cluster-Based Local Outlier Factors (CBLOF).");
        WritableDoubleDataStore cblofs = DataStoreUtil.makeDoubleStorage(ids, 30);
        DoubleMinMax cblofMinMax = new DoubleMinMax();
        this.computeCBLOFs(relation, this.distance, cblofs, cblofMinMax, largeClusters, smallClusters);
        LOG.setCompleted(stepprog);
        MaterializedDoubleRelation scoreResult = new MaterializedDoubleRelation("Cluster-Based Local Outlier Factor", "cblof-outlier", cblofs, ids);
        QuotientOutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(cblofMinMax.getMin(), cblofMinMax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
        return new OutlierResult(scoreMeta, scoreResult);
    }

    private int getClusterBoundary(Relation<O> relation, List<? extends Cluster<MeanModel>> clusters) {
        int totalSize = relation.size();
        int clusterBoundary = clusters.size() - 1;
        int cumulativeSize = 0;
        for (int i = 0; i < clusters.size() - 1; ++i) {
            if ((double)(cumulativeSize += clusters.get(i).size()) >= (double)totalSize * this.alpha) {
                clusterBoundary = i;
                break;
            }
            if (!((double)clusters.get(i).size() / (double)clusters.get(i + 1).size() >= this.beta)) continue;
            clusterBoundary = i;
            break;
        }
        return clusterBoundary;
    }

    private void computeCBLOFs(Relation<O> relation, NumberVectorDistanceFunction<? super O> distance, WritableDoubleDataStore cblofs, DoubleMinMax cblofMinMax, List<? extends Cluster<MeanModel>> largeClusters, List<? extends Cluster<MeanModel>> smallClusters) {
        ArrayList<NumberVector> largeClusterMeans = new ArrayList<NumberVector>(largeClusters.size());
        for (Cluster<MeanModel> cluster : largeClusters) {
            NumberVector mean = ModelUtil.getPrototypeOrCentroid(cluster.getModel(), relation, cluster.getIDs());
            largeClusterMeans.add(mean);
            DBIDIter iter = cluster.getIDs().iter();
            while (iter.valid()) {
                double cblof = this.computeLargeClusterCBLOF((NumberVector)relation.get(iter), distance, mean, cluster);
                this.storeCBLOFScore(cblofs, cblofMinMax, cblof, iter);
                iter.advance();
            }
        }
        for (Cluster<MeanModel> cluster : smallClusters) {
            DBIDIter iter = cluster.getIDs().iter();
            while (iter.valid()) {
                double cblof = this.computeSmallClusterCBLOF((NumberVector)relation.get(iter), distance, largeClusterMeans, cluster);
                this.storeCBLOFScore(cblofs, cblofMinMax, cblof, iter);
                iter.advance();
            }
        }
    }

    private void storeCBLOFScore(WritableDoubleDataStore cblofs, DoubleMinMax cblofMinMax, double cblof, DBIDIter iter) {
        cblofs.putDouble(iter, cblof);
        cblofMinMax.put(cblof);
    }

    private double computeSmallClusterCBLOF(O obj, NumberVectorDistanceFunction<? super O> distance, List<NumberVector> largeClusterMeans, Cluster<MeanModel> cluster) {
        double nearestLargeClusterDistance = Double.MAX_VALUE;
        for (NumberVector clusterMean : largeClusterMeans) {
            double clusterDistance = distance.distance((NumberVector)obj, clusterMean);
            if (!(clusterDistance < nearestLargeClusterDistance)) continue;
            nearestLargeClusterDistance = clusterDistance;
        }
        return (double)cluster.size() * nearestLargeClusterDistance;
    }

    private double computeLargeClusterCBLOF(O obj, NumberVectorDistanceFunction<? super O> distanceQuery, NumberVector clusterMean, Cluster<MeanModel> cluster) {
        return (double)cluster.size() * distanceQuery.distance((NumberVector)obj, clusterMean);
    }

    @Override
    public TypeInformation[] getInputTypeRestriction() {
        return TypeUtil.array(this.getDistanceFunction().getInputTypeRestriction());
    }

    @Override
    protected Logging getLogger() {
        return LOG;
    }

    public static class Parameterizer<O extends NumberVector>
    extends AbstractParameterizer {
        public static final OptionID CLUSTERING_ID = new OptionID("cblof.algorithm", "Clustering algorithm to use for detecting outliers.");
        public static final OptionID ALPHPA_ID = new OptionID("cblof.alpha", "The ratio of the data that should be included in the large clusters");
        public static final OptionID BETA_ID = new OptionID("cblof.beta", "The ratio of the data that should be included in the large clusters");
        protected ClusteringAlgorithm<Clustering<MeanModel>> clusteringAlgorithm;
        protected double alpha;
        protected double beta;
        protected NumberVectorDistanceFunction<? super O> distance;

        @Override
        protected void makeOptions(Parameterization config) {
            ObjectParameter clusterP;
            DoubleParameter pB;
            DoubleParameter pA;
            super.makeOptions(config);
            ObjectParameter distanceP = new ObjectParameter(DistanceBasedAlgorithm.DISTANCE_FUNCTION_ID, (Class<?>)NumberVectorDistanceFunction.class, EuclideanDistanceFunction.class);
            if (config.grab(distanceP)) {
                this.distance = (NumberVectorDistanceFunction)distanceP.instantiateClass(config);
            }
            if (config.grab(pA = (DoubleParameter)((DoubleParameter)new DoubleParameter(ALPHPA_ID).addConstraint((ParameterConstraint)CommonConstraints.LESS_THAN_ONE_DOUBLE)).addConstraint((ParameterConstraint)CommonConstraints.GREATER_THAN_ZERO_DOUBLE))) {
                this.alpha = pA.doubleValue();
            }
            if (config.grab(pB = (DoubleParameter)new DoubleParameter(BETA_ID).addConstraint((ParameterConstraint)CommonConstraints.GREATER_THAN_ONE_DOUBLE))) {
                this.beta = pB.doubleValue();
            }
            if (config.grab(clusterP = new ObjectParameter(CLUSTERING_ID, (Class<?>)ClusteringAlgorithm.class, KMeansSort.class))) {
                this.clusteringAlgorithm = (ClusteringAlgorithm)clusterP.instantiateClass(config);
            }
        }

        @Override
        protected CBLOF<O> makeInstance() {
            return new CBLOF<O>(this.distance, this.clusteringAlgorithm, this.alpha, this.beta);
        }
    }
}

