/*
 * Decompiled with CFR 0.152.
 */
package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;

import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.DatabaseUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
import de.lmu.ifi.dbs.elki.database.ids.KNNList;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.math.MeanVariance;
import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution;
import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.GaussianKernelDensityFunction;
import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.KernelDensityFunction;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;

@Title(value="KDEOS: Kernel Density Estimator Outlier Score")
@Reference(authors="Erich Schubert, Arthur Zimek, Hans-Peter Kriegel", title="Generalized Outlier Detection with Flexible Kernel Density Estimates", booktitle="Proc. 14th SIAM International Conference on Data Mining (SDM 2014)", url="https://doi.org/10.1137/1.9781611973440.63", bibkey="DBLP:conf/sdm/SchubertZK14")
public class KDEOS<O>
extends AbstractDistanceBasedAlgorithm<O, OutlierResult>
implements OutlierAlgorithm {
    private static final Logging LOG = Logging.getLogger(KDEOS.class);
    KernelDensityFunction kernel;
    int kmin;
    int kmax;
    double scale;
    double minBandwidth = 1.0E-6;
    int idim = -1;
    static final double CUTOFF = 1.0E-20;

    public KDEOS(DistanceFunction<? super O> distanceFunction, int kmin, int kmax, KernelDensityFunction kernel, double minBandwidth, double scale, int idim) {
        super(distanceFunction);
        this.kmin = kmin;
        this.kmax = kmax;
        this.kernel = kernel;
        this.minBandwidth = minBandwidth;
        this.scale = scale;
        this.idim = idim;
    }

    public OutlierResult run(Database database, Relation<O> rel) {
        DBIDs ids = rel.getDBIDs();
        LOG.verbose("Running kNN preprocessor.");
        KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, rel, this.getDistanceFunction(), this.kmax + 1);
        WritableDataStore<double[]> densities = DataStoreUtil.makeStorage(ids, 3, double[].class);
        this.estimateDensities(rel, knnq, ids, densities);
        WritableDoubleDataStore kofs = DataStoreUtil.makeDoubleStorage(ids, 30);
        DoubleMinMax minmax = new DoubleMinMax();
        this.computeOutlierScores(knnq, ids, densities, kofs, minmax);
        MaterializedDoubleRelation scoreres = new MaterializedDoubleRelation("Kernel Density Estimation Outlier Scores", "kdeos-outlier", kofs, ids);
        ProbabilisticOutlierScore meta = new ProbabilisticOutlierScore(minmax.getMin(), minmax.getMax());
        return new OutlierResult(meta, scoreres);
    }

    protected void estimateDensities(Relation<O> rel, KNNQuery<O> knnq, DBIDs ids, WritableDataStore<double[]> densities) {
        int dim = this.dimensionality(rel);
        int knum = this.kmax + 1 - this.kmin;
        DBIDIter iter = ids.iter();
        while (iter.valid()) {
            densities.put(iter, new double[knum]);
            iter.advance();
        }
        FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing densities", ids.size(), LOG) : null;
        double iminbw = this.minBandwidth > 0.0 ? 1.0 / (this.minBandwidth * this.scale) : Double.POSITIVE_INFINITY;
        DBIDIter iter2 = ids.iter();
        while (iter2.valid()) {
            KNNList neighbors = knnq.getKNNForDBID(iter2, this.kmax + 1);
            int idx = 0;
            double sum = 0.0;
            DoubleDBIDListIter kneighbor = neighbors.iter();
            for (int k = 1; k <= this.kmax && kneighbor.valid(); ++k) {
                sum += kneighbor.doubleValue();
                if (k >= this.kmin) {
                    double ibw = Math.min((double)k / (sum * this.scale), iminbw);
                    double sca = MathUtil.powi(ibw, dim);
                    DoubleDBIDListIter neighbor = neighbors.iter();
                    while (neighbor.valid()) {
                        double dens = sca < Double.POSITIVE_INFINITY ? sca * this.kernel.density(neighbor.doubleValue() * ibw) : (neighbor.doubleValue() == 0.0 ? 1.0 : 0.0);
                        double[] dArray = (double[])densities.get(neighbor);
                        int n = idx;
                        dArray[n] = dArray[n] + dens;
                        if (dens < 1.0E-20) break;
                        neighbor.advance();
                    }
                    ++idx;
                }
                kneighbor.advance();
            }
            LOG.incrementProcessed(prog);
            iter2.advance();
        }
        LOG.ensureCompleted(prog);
    }

    private int dimensionality(Relation<O> rel) {
        if (this.idim >= 0) {
            return this.idim;
        }
        Relation<O> frel = rel;
        int dim = RelationUtil.dimensionality(frel);
        if (dim < 1) {
            throw new AbortException("When using KDEOS with non-vectorspace data, the intrinsic dimensionality parameter must be set!");
        }
        return dim;
    }

    protected void computeOutlierScores(KNNQuery<O> knnq, DBIDs ids, WritableDataStore<double[]> densities, WritableDoubleDataStore kdeos, DoubleMinMax minmax) {
        int knum = this.kmax + 1 - this.kmin;
        FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing KDEOS scores", ids.size(), LOG) : null;
        double[][] scratch = new double[knum][this.kmax + 5];
        MeanVariance mv = new MeanVariance();
        DBIDIter iter = ids.iter();
        while (iter.valid()) {
            double[] dens = (double[])densities.get(iter);
            KNNList neighbors = knnq.getKNNForDBID(iter, this.kmax + 1);
            if (scratch[0].length < neighbors.size()) {
                scratch = new double[knum][neighbors.size() + 5];
            }
            int i = 0;
            DoubleDBIDListIter neighbor = neighbors.iter();
            while (neighbor.valid()) {
                double[] ndens = (double[])densities.get(neighbor);
                for (int k = 0; k < knum; ++k) {
                    scratch[k][i] = ndens[k];
                }
                neighbor.advance();
                ++i;
            }
            assert (i == neighbors.size());
            double score = 0.0;
            for (int i2 = 0; i2 < knum; ++i2) {
                mv.reset();
                for (int j = 0; j < neighbors.size(); ++j) {
                    mv.put(scratch[i2][j]);
                }
                double mean = mv.getMean();
                double stddev = mv.getSampleStddev();
                if (!(stddev > 0.0)) continue;
                score += (mean - dens[i2]) / stddev;
            }
            score /= (double)knum;
            score = NormalDistribution.standardNormalCDF(score);
            minmax.put(score);
            kdeos.put((DBIDRef)iter, score);
            LOG.incrementProcessed(prog);
            iter.advance();
        }
        LOG.ensureCompleted(prog);
    }

    @Override
    public TypeInformation[] getInputTypeRestriction() {
        TypeInformation res = this.getDistanceFunction().getInputTypeRestriction();
        if (this.idim < 0) {
            res = new CombinedTypeInformation(TypeUtil.NUMBER_VECTOR_FIELD, res);
        }
        return TypeUtil.array(res);
    }

    @Override
    protected Logging getLogger() {
        return LOG;
    }

    public static class Parameterizer<O>
    extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
        public static final OptionID KERNEL_ID = new OptionID("kdeos.kernel", "Kernel density function to use.");
        public static final OptionID KERNEL_MIN_ID = new OptionID("kdeos.kernel.minbw", "Minimum bandwidth for kernel density estimation.");
        public static final OptionID KERNEL_SCALE_ID = new OptionID("kdeos.kernel.scale", "Scaling factor for the kernel function.");
        public static final OptionID KMIN_ID = new OptionID("kdeos.k.min", "Minimum value of k to analyze.");
        public static final OptionID KMAX_ID = new OptionID("kdeos.k.max", "Maximum value of k to analyze.");
        public static final OptionID IDIM_ID = new OptionID("kdeos.idim", "Intrinsic dimensionality of this data set. Use -1 for using the true data dimensionality, but values such as 0-2 often offer better performance.");
        KernelDensityFunction kernel;
        int kmin;
        int kmax;
        double scale;
        double minBandwidth = 0.0;
        int idim = -1;

        @Override
        protected void makeOptions(Parameterization config) {
            IntParameter idimP;
            DoubleParameter minbwP;
            DoubleParameter scaleP;
            IntParameter kmaxP;
            IntParameter kminP;
            super.makeOptions(config);
            ObjectParameter kernelP = new ObjectParameter(KERNEL_ID, (Class<?>)KernelDensityFunction.class, GaussianKernelDensityFunction.class);
            if (config.grab(kernelP)) {
                this.kernel = (KernelDensityFunction)kernelP.instantiateClass(config);
            }
            if (config.grab(kminP = (IntParameter)new IntParameter(KMIN_ID).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT))) {
                this.kmin = kminP.intValue();
            }
            if (config.grab(kmaxP = (IntParameter)new IntParameter(KMAX_ID).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT))) {
                this.kmax = kmaxP.intValue();
            }
            if (this.kmin > this.kmax) {
                config.reportError(new WrongParameterValueException(kminP, "must be at most", kmaxP, ""));
            }
            if (config.grab(scaleP = (DoubleParameter)((DoubleParameter)new DoubleParameter(KERNEL_SCALE_ID).addConstraint((ParameterConstraint)CommonConstraints.GREATER_THAN_ZERO_DOUBLE)).setDefaultValue((Object)0.25))) {
                this.scale = scaleP.doubleValue() * (this.kernel != null ? this.kernel.canonicalBandwidth() : 1.0);
            }
            if (config.grab(minbwP = (DoubleParameter)((DoubleParameter)new DoubleParameter(KERNEL_MIN_ID).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE)).setOptional(true))) {
                this.minBandwidth = minbwP.doubleValue();
            }
            if (config.grab(idimP = new IntParameter(IDIM_ID, 1))) {
                this.idim = idimP.intValue();
            }
        }

        @Override
        protected KDEOS<O> makeInstance() {
            return new KDEOS(this.distanceFunction, this.kmin, this.kmax, this.kernel, this.minBandwidth, this.scale, this.idim);
        }
    }
}

