/*
 * Decompiled with CFR 0.152.
 */
package de.lmu.ifi.dbs.elki.algorithm.outlier;

import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDMIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.linearalgebra.VMath;
import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAResult;
import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCARunner;
import de.lmu.ifi.dbs.elki.math.statistics.distribution.ChiSquaredDistribution;
import de.lmu.ifi.dbs.elki.math.statistics.distribution.GammaDistribution;
import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.GammaChoiWetteEstimator;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore;
import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArrayAdapter;
import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.EnumParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
import java.util.Arrays;

@Title(value="COP: Correlation Outlier Probability")
@Reference(authors="Hans-Peter Kriegel, Peer Kr\u00f6ger, Erich Schubert, Arthur Zimek", title="Outlier Detection in Arbitrarily Oriented Subspaces", booktitle="Proc. IEEE Int. Conf. on Data Mining (ICDM 2012)", url="https://doi.org/10.1109/ICDM.2012.21", bibkey="DBLP:conf/icdm/KriegelKSZ12")
public class COP<V extends NumberVector>
extends AbstractDistanceBasedAlgorithm<V, OutlierResult>
implements OutlierAlgorithm {
    private static final Logging LOG = Logging.getLogger(COP.class);
    public static final String COP_SCORES = "cop-outlier";
    public static final String COP_DIM = "cop-dim";
    public static final String COP_ERRORVEC = "cop-errorvec";
    private static final DoubleArrayAdapter SHORTENED_ARRAY = new DoubleArrayAdapter(){

        @Override
        public int size(double[] array) {
            return (int)(0.85 * (double)array.length);
        }
    };
    int k;
    private PCARunner pca;
    double expect = 1.0E-4;
    DistanceDist dist = DistanceDist.CHISQUARED;
    boolean models;

    public COP(DistanceFunction<? super V> distanceFunction, int k, PCARunner pca, double expect, DistanceDist dist, boolean models) {
        super(distanceFunction);
        this.k = k;
        this.pca = pca;
        this.expect = expect;
        this.dist = dist;
        this.models = models;
    }

    public OutlierResult run(Relation<V> relation) {
        DBIDs ids = relation.getDBIDs();
        KNNQuery<V> knnQuery = QueryUtil.getKNNQuery(relation, this.getDistanceFunction(), this.k + 1);
        int dim = RelationUtil.dimensionality(relation);
        if (this.k <= dim + 1) {
            LOG.warning("PCA is underspecified with a too low k! k should be at much larger than " + dim);
        }
        WritableDoubleDataStore cop_score = DataStoreUtil.makeDoubleStorage(ids, 6);
        WritableDataStore<double[]> cop_err_v = this.models ? DataStoreUtil.makeStorage(ids, 6, double[].class) : null;
        WritableIntegerDataStore cop_dim = this.models ? DataStoreUtil.makeIntegerStorage(ids, 6, -1) : null;
        FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Correlation Outlier Probabilities", relation.size(), LOG) : null;
        double[] centroid = new double[dim];
        double[] scores = new double[dim];
        HashSetModifiableDBIDs nids = DBIDUtil.newHashSet(this.k + 10);
        DBIDIter id = ids.iter();
        while (id.valid()) {
            nids.clear();
            nids.addDBIDs(knnQuery.getKNNForDBID(id, this.k + 1));
            nids.remove(id);
            COP.computeCentroid(centroid, relation, nids);
            PCAResult pcares = this.pca.processIds(nids, relation);
            double[][] tevecs = pcares.getEigenvectors();
            double[] evs = pcares.getEigenvalues();
            double[] projected = VMath.times(tevecs, VMath.minusEquals(((NumberVector)relation.get(id)).toArray(), centroid));
            if (this.dist == DistanceDist.CHISQUARED) {
                double sqdevs = 0.0;
                for (int d = 0; d < dim; ++d) {
                    double dev = projected[d];
                    scores[d] = 1.0 - ChiSquaredDistribution.cdf(sqdevs += dev * dev / evs[d], d + 1);
                }
            } else {
                assert (this.dist == DistanceDist.GAMMA);
                double[][] dists = new double[dim][nids.size()];
                double[] srel = new double[dim];
                DBIDMIter s = nids.iter();
                for (int j = 0; s.valid() && j < nids.size(); ++j) {
                    NumberVector vec = (NumberVector)relation.get(s);
                    for (int d = 0; d < dim; ++d) {
                        srel[d] = vec.doubleValue(d) - centroid[d];
                    }
                    double sqdist = 0.0;
                    for (int d = 0; d < dim; ++d) {
                        double serrd = VMath.transposeTimes(tevecs[d], srel);
                        dists[d][j] = sqdist += serrd * serrd / evs[d];
                    }
                    s.advance();
                }
                double sqdevs = 0.0;
                for (int d = 0; d < dim; ++d) {
                    double dev = projected[d];
                    Arrays.sort(dists[d]);
                    scores[d] = 1.0 - ((GammaDistribution)GammaChoiWetteEstimator.STATIC.estimate(dists[d], (NumberArrayAdapter)SHORTENED_ARRAY)).cdf(sqdevs += dev * dev / evs[d]);
                }
            }
            double min = Double.POSITIVE_INFINITY;
            int vdim = dim - 1;
            for (int d = 0; d < dim; ++d) {
                double v = scores[d];
                if (!(v < min)) continue;
                min = v;
                vdim = d;
            }
            double prob = this.expect * (1.0 - min) / (this.expect + min);
            cop_score.putDouble(id, prob);
            if (this.models) {
                Arrays.fill(projected, vdim + 1, dim, 0.0);
                cop_err_v.put(id, VMath.timesEquals(VMath.transposeTimes(tevecs, projected), -prob));
                cop_dim.putInt(id, dim - vdim);
            }
            LOG.incrementProcessed(prog);
            id.advance();
        }
        LOG.ensureCompleted(prog);
        MaterializedDoubleRelation scoreResult = new MaterializedDoubleRelation("Correlation Outlier Probabilities", COP_SCORES, cop_score, ids);
        ProbabilisticOutlierScore scoreMeta = new ProbabilisticOutlierScore();
        OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
        if (this.models) {
            result.addChildResult(new MaterializedRelation<Integer>("Local Dimensionality", COP_DIM, TypeUtil.INTEGER, cop_dim, ids));
            result.addChildResult(new MaterializedRelation<double[]>("Error vectors", COP_ERRORVEC, TypeUtil.DOUBLE_ARRAY, cop_err_v, ids));
        }
        return result;
    }

    private static void computeCentroid(double[] centroid, Relation<? extends NumberVector> relation, DBIDs ids) {
        Arrays.fill(centroid, 0.0);
        int dim = centroid.length;
        DBIDIter it = ids.iter();
        while (it.valid()) {
            NumberVector v = relation.get(it);
            for (int i = 0; i < dim; ++i) {
                int n = i;
                centroid[n] = centroid[n] + v.doubleValue(i);
            }
            it.advance();
        }
        VMath.timesEquals(centroid, 1.0 / (double)ids.size());
    }

    @Override
    public TypeInformation[] getInputTypeRestriction() {
        return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
    }

    @Override
    protected Logging getLogger() {
        return LOG;
    }

    public static class Parameterizer<V extends NumberVector>
    extends AbstractDistanceBasedAlgorithm.Parameterizer<V> {
        public static final OptionID K_ID = new OptionID("cop.k", "The number of nearest neighbors of an object to be considered for computing its COP_SCORE.");
        public static final OptionID DIST_ID = new OptionID("cop.dist", "The assumed distribution of squared distances. ChiSquared is faster, Gamma expected to be more accurate but could also overfit.");
        public static final OptionID PCARUNNER_ID = new OptionID("cop.pcarunner", "The class to compute (filtered) PCA.");
        public static final OptionID EXPECT_ID = new OptionID("cop.expect", "Expected share of outliers. Only affect score normalization.");
        public static final OptionID MODELS_ID = new OptionID("cop.models", "Include COP models (error vectors) in output. This needs more memory.");
        int k;
        PCARunner pca;
        DistanceDist dist;
        double expect;
        boolean models = false;

        @Override
        protected void makeOptions(Parameterization config) {
            Flag modelsF;
            ObjectParameter pcaP;
            DoubleParameter expectP;
            EnumParameter<DistanceDist> distP;
            super.makeOptions(config);
            IntParameter kP = (IntParameter)new IntParameter(K_ID).addConstraint((ParameterConstraint)new GreaterConstraint(5));
            if (config.grab(kP)) {
                this.k = kP.intValue();
            }
            if (config.grab(distP = new EnumParameter<DistanceDist>(DIST_ID, DistanceDist.class, DistanceDist.GAMMA))) {
                this.dist = (DistanceDist)((Object)distP.getValue());
            }
            if (config.grab(expectP = (DoubleParameter)((DoubleParameter)new DoubleParameter(EXPECT_ID, 0.001).addConstraint((ParameterConstraint)CommonConstraints.GREATER_THAN_ZERO_DOUBLE)).addConstraint((ParameterConstraint)CommonConstraints.LESS_THAN_ONE_DOUBLE))) {
                this.expect = expectP.doubleValue();
            }
            if (config.grab(pcaP = new ObjectParameter(PCARUNNER_ID, (Class<?>)PCARunner.class, PCARunner.class))) {
                this.pca = (PCARunner)pcaP.instantiateClass(config);
            }
            if (config.grab(modelsF = new Flag(MODELS_ID))) {
                this.models = modelsF.isTrue();
            }
        }

        @Override
        protected COP<V> makeInstance() {
            return new COP(this.distanceFunction, this.k, this.pca, this.expect, this.dist, this.models);
        }
    }

    public static enum DistanceDist {
        CHISQUARED,
        GAMMA;

    }
}

