/*
 * Decompiled with CFR 0.152.
 */
package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace;

import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.DoubleVector;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair;
import de.lmu.ifi.dbs.elki.database.query.similarity.SimilarityQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.similarityfunction.SharedNearestNeighborSimilarityFunction;
import de.lmu.ifi.dbs.elki.distance.similarityfunction.SimilarityFunction;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.Mean;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid;
import de.lmu.ifi.dbs.elki.math.linearalgebra.VMath;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.textwriter.TextWriteable;
import de.lmu.ifi.dbs.elki.result.textwriter.TextWriterStream;
import de.lmu.ifi.dbs.elki.utilities.Alias;
import de.lmu.ifi.dbs.elki.utilities.datastructures.BitsUtil;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TiedTopBoundedHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;

@Title(value="SOD: Subspace outlier degree")
@Description(value="Outlier Detection in Axis-Parallel Subspaces of High Dimensional Data")
@Reference(authors="Hans-Peter Kriegel, Peer Kr\u00f6ger, Erich Schubert, Arthur Zimek", title="Outlier Detection in Axis-Parallel Subspaces of High Dimensional Data", booktitle="Proc. Pacific-Asia Conf. on Knowledge Discovery and Data Mining (PAKDD 2009)", url="https://doi.org/10.1007/978-3-642-01307-2_86", bibkey="DBLP:conf/pakdd/KriegelKSZ09")
@Alias(value={"de.lmu.ifi.dbs.elki.algorithm.outlier.SOD"})
public class SOD<V extends NumberVector>
extends AbstractAlgorithm<OutlierResult>
implements OutlierAlgorithm {
    private static final Logging LOG = Logging.getLogger(SOD.class);
    private int knn;
    private double alpha;
    private SimilarityFunction<V> similarityFunction;
    private boolean models;

    public SOD(int knn, double alpha, SimilarityFunction<V> similarityFunction, boolean models) {
        this.knn = knn;
        this.alpha = alpha;
        this.similarityFunction = similarityFunction;
        this.models = models;
    }

    public OutlierResult run(Relation<V> relation) {
        SimilarityQuery<V> snnInstance = this.similarityFunction.instantiate(relation);
        FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Assigning Subspace Outlier Degree", relation.size(), LOG) : null;
        WritableDoubleDataStore sod_scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), 4);
        WritableDataStore<SODModel> sod_models = this.models ? DataStoreUtil.makeStorage(relation.getDBIDs(), 4, SODModel.class) : null;
        DoubleMinMax minmax = new DoubleMinMax();
        DBIDIter iter = relation.iterDBIDs();
        while (iter.valid()) {
            double[] center;
            DBIDs neighborhood = this.getNearestNeighbors(relation, snnInstance, iter);
            long[] weightVector = null;
            double sod = 0.0;
            if (neighborhood.size() > 0) {
                center = Centroid.make(relation, neighborhood).getArrayRef();
                double[] variances = SOD.computePerDimensionVariances(relation, center, neighborhood);
                double expectationOfVariance = Mean.of(variances);
                weightVector = BitsUtil.zero(variances.length);
                for (int d = 0; d < variances.length; ++d) {
                    if (!(variances[d] < this.alpha * expectationOfVariance)) continue;
                    BitsUtil.setI(weightVector, d);
                }
                sod = this.subspaceOutlierDegree((NumberVector)relation.get(iter), center, weightVector);
            } else {
                center = ((NumberVector)relation.get(iter)).toArray();
            }
            if (sod_models != null) {
                sod_models.put(iter, new SODModel(center, weightVector));
            }
            sod_scores.putDouble(iter, sod);
            minmax.put(sod);
            LOG.incrementProcessed(progress);
            iter.advance();
        }
        LOG.ensureCompleted(progress);
        BasicOutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
        OutlierResult sodResult = new OutlierResult(meta, new MaterializedDoubleRelation("Subspace Outlier Degree", "sod-outlier", sod_scores, relation.getDBIDs()));
        if (sod_models != null) {
            sodResult.addChildResult(new MaterializedRelation<SODModel>("Subspace Outlier Model", "sod-outlier", new SimpleTypeInformation<SODModel>(SODModel.class), sod_models, relation.getDBIDs()));
        }
        return sodResult;
    }

    private DBIDs getNearestNeighbors(Relation<V> relation, SimilarityQuery<V> simQ, DBIDRef queryObject) {
        TiedTopBoundedHeap<DoubleDBIDPair> nearestNeighbors = new TiedTopBoundedHeap<DoubleDBIDPair>(this.knn);
        DBIDIter iter = relation.iterDBIDs();
        while (iter.valid()) {
            double sim;
            if (!DBIDUtil.equal(iter, queryObject) && (sim = simQ.similarity(queryObject, (DBIDRef)iter)) > 0.0) {
                ((Heap)nearestNeighbors).add(DBIDUtil.newPair(sim, (DBIDRef)iter));
            }
            iter.advance();
        }
        ArrayModifiableDBIDs dbids = DBIDUtil.newArray(((Heap)nearestNeighbors).size());
        while (((Heap)nearestNeighbors).size() > 0) {
            dbids.add((DBIDRef)((Heap)nearestNeighbors).poll());
        }
        return dbids;
    }

    private static double[] computePerDimensionVariances(Relation<? extends NumberVector> relation, double[] center, DBIDs neighborhood) {
        int dim = center.length;
        double[] variances = new double[dim];
        DBIDIter iter = neighborhood.iter();
        while (iter.valid()) {
            NumberVector databaseObject = relation.get(iter);
            int d = 0;
            while (d < dim) {
                double deviation = databaseObject.doubleValue(d) - center[d];
                int n = d++;
                variances[n] = variances[n] + deviation * deviation;
            }
            iter.advance();
        }
        return VMath.timesEquals(variances, 1.0 / (double)neighborhood.size());
    }

    private double subspaceOutlierDegree(V queryObject, double[] center, long[] weightVector) {
        int card = BitsUtil.cardinality(weightVector);
        if (card == 0) {
            return 0.0;
        }
        SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(weightVector);
        return df.distance((NumberVector)queryObject, DoubleVector.wrap(center)) / (double)card;
    }

    @Override
    public TypeInformation[] getInputTypeRestriction() {
        return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
    }

    @Override
    protected Logging getLogger() {
        return LOG;
    }

    public static class Parameterizer<V extends NumberVector>
    extends AbstractParameterizer {
        public static final OptionID KNN_ID = new OptionID("sod.knn", "The number of most snn-similar objects to use as reference set for learning the subspace properties.");
        public static final OptionID ALPHA_ID = new OptionID("sod.alpha", "The multiplier for the discriminance value for discerning small from large variances.");
        public static final OptionID SIM_ID = new OptionID("sod.similarity", "The similarity function used for the neighborhood set.");
        public static final OptionID MODELS_ID = new OptionID("sod.models", "Report the models computed by SOD (default: report only scores).");
        private int knn = 1;
        private double alpha = 1.1;
        private SimilarityFunction<V> similarityFunction;
        private boolean models = false;

        @Override
        protected void makeOptions(Parameterization config) {
            Flag modelsF;
            DoubleParameter alphaP;
            IntParameter knnP;
            super.makeOptions(config);
            ObjectParameter simP = new ObjectParameter(SIM_ID, (Class<?>)SimilarityFunction.class, SharedNearestNeighborSimilarityFunction.class);
            if (config.grab(simP)) {
                this.similarityFunction = (SimilarityFunction)simP.instantiateClass(config);
            }
            if (config.grab(knnP = (IntParameter)new IntParameter(KNN_ID).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT))) {
                this.knn = (Integer)knnP.getValue();
            }
            if (config.grab(alphaP = (DoubleParameter)new DoubleParameter(ALPHA_ID, 1.1).addConstraint((ParameterConstraint)CommonConstraints.GREATER_THAN_ZERO_DOUBLE))) {
                this.alpha = alphaP.doubleValue();
            }
            if (config.grab(modelsF = new Flag(MODELS_ID))) {
                this.models = modelsF.isTrue();
            }
        }

        @Override
        protected SOD<V> makeInstance() {
            return new SOD<V>(this.knn, this.alpha, this.similarityFunction, this.models);
        }
    }

    public static class SODModel
    implements TextWriteable {
        private double[] center;
        private long[] weightVector;

        public SODModel(double[] center, long[] weightVector) {
            this.center = center;
            this.weightVector = weightVector;
        }

        @Override
        public void writeToText(TextWriterStream out, String label) {
            out.commentPrintLn(this.getClass().getSimpleName() + ":");
            out.commentPrintLn("relevant attributes (starting with 0): " + BitsUtil.toString(this.weightVector, ", ", 0));
            out.commentPrintLn("center of neighborhood: " + this.center.toString());
            out.commentPrintSeparator();
        }
    }
}

