/*
 * Decompiled with CFR 0.152.
 */
package de.lmu.ifi.dbs.elki.algorithm.outlier.meta;

import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LOF;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.VectorUtil;
import de.lmu.ifi.dbs.elki.data.projection.NumericalFeatureSelection;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.ProxyDatabase;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.ProjectedView;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.statistics.tests.GoodnessOfFitTest;
import de.lmu.ifi.dbs.elki.math.statistics.tests.KolmogorovSmirnovTest;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
import de.lmu.ifi.dbs.elki.utilities.random.RandomFactory;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collections;
import java.util.Comparator;
import java.util.Random;
import java.util.Set;
import java.util.TreeSet;
import net.jafama.FastMath;

@Title(value="HiCS: High Contrast Subspaces for Density-Based Outlier Ranking")
@Description(value="Algorithm to compute High Contrast Subspaces in a database as a pre-processing step for for density-based outlier ranking methods.")
@Reference(authors="F. Keller, E. M\u00fcller, K. B\u00f6hm", title="HiCS: High Contrast Subspaces for Density-Based Outlier Ranking", booktitle="Proc. IEEE 28th Int. Conf. on Data Engineering (ICDE 2012)", url="https://doi.org/10.1109/ICDE.2012.88", bibkey="DBLP:conf/icde/KellerMB12")
public class HiCS<V extends NumberVector>
extends AbstractAlgorithm<OutlierResult>
implements OutlierAlgorithm {
    private static final Logging LOG = Logging.getLogger(HiCS.class);
    private static final int MAX_RETRIES = 100;
    private int m;
    private double alpha;
    private OutlierAlgorithm outlierAlgorithm;
    private GoodnessOfFitTest statTest;
    private int cutoff;
    private RandomFactory rnd;

    public HiCS(int m, double alpha, OutlierAlgorithm outlierAlgorithm, GoodnessOfFitTest statTest, int cutoff, RandomFactory rnd) {
        this.m = m;
        this.alpha = alpha;
        this.outlierAlgorithm = outlierAlgorithm;
        this.statTest = statTest;
        this.cutoff = cutoff;
        this.rnd = rnd;
    }

    public OutlierResult run(Relation<V> relation) {
        DBIDs ids = relation.getDBIDs();
        ArrayList<ArrayDBIDs> subspaceIndex = this.buildOneDimIndexes(relation);
        Set<HiCSSubspace> subspaces = this.calculateSubspaces(relation, subspaceIndex, this.rnd.getSingleThreadedRandom());
        if (LOG.isVerbose()) {
            LOG.verbose("Number of high-contrast subspaces: " + subspaces.size());
        }
        ArrayList<DoubleRelation> results = new ArrayList<DoubleRelation>();
        FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Calculating Outlier scores for high Contrast subspaces", subspaces.size(), LOG) : null;
        for (HiCSSubspace dimset : subspaces) {
            if (LOG.isVerbose()) {
                LOG.verbose("Performing outlier detection in subspace " + dimset);
            }
            ProxyDatabase pdb = new ProxyDatabase(ids);
            pdb.addRelation(new ProjectedView(relation, new NumericalFeatureSelection(dimset)));
            OutlierResult result = this.outlierAlgorithm.run(pdb);
            results.add(result.getScores());
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
        WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), 4);
        DoubleMinMax minmax = new DoubleMinMax();
        DBIDIter iditer = relation.iterDBIDs();
        while (iditer.valid()) {
            double sum = 0.0;
            for (DoubleRelation r : results) {
                double s = r.doubleValue(iditer);
                if (Double.isNaN(s)) continue;
                sum += s;
            }
            scores.putDouble(iditer, sum);
            minmax.put(sum);
            iditer.advance();
        }
        BasicOutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
        MaterializedDoubleRelation scoreres = new MaterializedDoubleRelation("HiCS", "HiCS-outlier", scores, relation.getDBIDs());
        return new OutlierResult(meta, scoreres);
    }

    private ArrayList<ArrayDBIDs> buildOneDimIndexes(Relation<? extends NumberVector> relation) {
        int dim = RelationUtil.dimensionality(relation);
        ArrayList<ArrayDBIDs> subspaceIndex = new ArrayList<ArrayDBIDs>(dim + 1);
        VectorUtil.SortDBIDsBySingleDimension comp = new VectorUtil.SortDBIDsBySingleDimension(relation);
        for (int i = 0; i < dim; ++i) {
            ArrayModifiableDBIDs amDBIDs = DBIDUtil.newArray(relation.getDBIDs());
            comp.setDimension(i);
            amDBIDs.sort(comp);
            subspaceIndex.add(amDBIDs);
        }
        return subspaceIndex;
    }

    private Set<HiCSSubspace> calculateSubspaces(Relation<? extends NumberVector> relation, ArrayList<ArrayDBIDs> subspaceIndex, Random random) {
        FiniteProgress dprog;
        int dbdim = RelationUtil.dimensionality(relation);
        FiniteProgress finiteProgress = dprog = LOG.isVerbose() ? new FiniteProgress("Subspace dimensionality", dbdim, LOG) : null;
        if (dprog != null) {
            dprog.setProcessed(2, LOG);
        }
        TreeSet<HiCSSubspace> subspaceList = new TreeSet<HiCSSubspace>(HiCSSubspace.SORT_BY_SUBSPACE);
        TopBoundedHeap<HiCSSubspace> dDimensionalList = new TopBoundedHeap<HiCSSubspace>(this.cutoff, HiCSSubspace.SORT_BY_CONTRAST_ASC);
        FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Generating two-element subsets", dbdim * (dbdim - 1) >> 1, LOG) : null;
        for (int i = 0; i < dbdim; ++i) {
            for (int j = i + 1; j < dbdim; ++j) {
                HiCSSubspace ts = new HiCSSubspace();
                ts.set(i);
                ts.set(j);
                this.calculateContrast(relation, ts, subspaceIndex, random);
                dDimensionalList.add(ts);
                LOG.incrementProcessed(prog);
            }
        }
        LOG.ensureCompleted(prog);
        IndefiniteProgress qprog = LOG.isVerbose() ? new IndefiniteProgress("Testing subspace candidates", LOG) : null;
        int d = 3;
        while (!dDimensionalList.isEmpty()) {
            if (dprog != null) {
                dprog.setProcessed(d, LOG);
            }
            ArrayList candidateList = new ArrayList(dDimensionalList.size());
            Heap.UnorderedIter it = dDimensionalList.unorderedIter();
            while (it.valid()) {
                subspaceList.add((HiCSSubspace)it.get());
                candidateList.add(it.get());
                it.advance();
            }
            dDimensionalList.clear();
            Collections.sort(candidateList, HiCSSubspace.SORT_BY_SUBSPACE);
            for (int i = 0; i < candidateList.size() - 1; ++i) {
                for (int j = i + 1; j < candidateList.size(); ++j) {
                    HiCSSubspace set1 = (HiCSSubspace)candidateList.get(i);
                    HiCSSubspace set2 = (HiCSSubspace)candidateList.get(j);
                    HiCSSubspace joinedSet = new HiCSSubspace();
                    joinedSet.or(set1);
                    joinedSet.or(set2);
                    if (joinedSet.cardinality() != d) continue;
                    this.calculateContrast(relation, joinedSet, subspaceIndex, random);
                    dDimensionalList.add(joinedSet);
                    LOG.incrementProcessed(qprog);
                }
            }
            block6: for (HiCSSubspace cand : candidateList) {
                Heap.UnorderedIter it2 = dDimensionalList.unorderedIter();
                while (it2.valid()) {
                    if (((HiCSSubspace)it2.get()).contrast > cand.contrast) {
                        subspaceList.remove(cand);
                        continue block6;
                    }
                    it2.advance();
                }
            }
            ++d;
        }
        LOG.setCompleted(qprog);
        if (dprog != null) {
            dprog.setProcessed(dbdim, LOG);
            dprog.ensureCompleted(LOG);
        }
        return subspaceList;
    }

    private void calculateContrast(Relation<? extends NumberVector> relation, HiCSSubspace subspace, ArrayList<ArrayDBIDs> subspaceIndex, Random random) {
        int card = subspace.cardinality();
        double alpha1 = FastMath.pow(this.alpha, 1.0 / (double)card);
        int windowsize = (int)((double)relation.size() * alpha1);
        FiniteProgress prog = LOG.isDebugging() ? new FiniteProgress("Monte-Carlo iterations", this.m, LOG) : null;
        int retries = 0;
        double deviationSum = 0.0;
        for (int i = 0; i < this.m; ++i) {
            DBIDArrayIter iter;
            int chosen = -1;
            for (int tmp = random.nextInt(card); tmp >= 0; --tmp) {
                chosen = subspace.nextSetBit(chosen + 1);
            }
            DBIDs conditionalSample = relation.getDBIDs();
            int j = subspace.nextSetBit(0);
            while (j >= 0) {
                if (j != chosen) {
                    ArrayDBIDs sortedIndices = subspaceIndex.get(j);
                    ArrayModifiableDBIDs indexBlock = DBIDUtil.newArray(windowsize);
                    iter = sortedIndices.iter();
                    iter.seek(random.nextInt(relation.size() - windowsize));
                    for (int k = 0; k < windowsize; ++k) {
                        indexBlock.add(iter);
                        iter.advance();
                    }
                    conditionalSample = DBIDUtil.intersection(conditionalSample, indexBlock);
                }
                j = subspace.nextSetBit(j + 1);
            }
            if (conditionalSample.size() < 10) {
                ++retries;
                if (LOG.isDebugging()) {
                    LOG.debug("Sample size very small. Retry no. " + retries);
                }
                if (retries >= 100) {
                    LOG.warning("Too many retries, for small samples: " + retries);
                } else {
                    --i;
                    continue;
                }
            }
            double[] sampleValues = new double[conditionalSample.size()];
            int l = 0;
            DBIDIter iter2 = conditionalSample.iter();
            while (iter2.valid()) {
                sampleValues[l++] = relation.get(iter2).doubleValue(chosen);
                iter2.advance();
            }
            double[] fullValues = new double[relation.size()];
            int l2 = 0;
            iter = subspaceIndex.get(chosen).iter();
            while (iter.valid()) {
                fullValues[l2++] = relation.get(iter).doubleValue(chosen);
                iter.advance();
            }
            double contrast = this.statTest.deviation(fullValues, sampleValues);
            if (Double.isNaN(contrast)) {
                --i;
                LOG.warning("Contrast was NaN");
                continue;
            }
            deviationSum += contrast;
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
        subspace.contrast = deviationSum / (double)this.m;
    }

    @Override
    public TypeInformation[] getInputTypeRestriction() {
        return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
    }

    @Override
    protected Logging getLogger() {
        return LOG;
    }

    public static class Parameterizer<V extends NumberVector>
    extends AbstractParameterizer {
        public static final OptionID M_ID = new OptionID("hics.m", "The number of iterations in the Monte-Carlo processing.");
        public static final OptionID ALPHA_ID = new OptionID("hics.alpha", "The discriminance value that determines the size of the test statistic .");
        public static final OptionID ALGO_ID = new OptionID("hics.algo", "The Algorithm that performs the actual outlier detection on the resulting set of subspace");
        public static final OptionID TEST_ID = new OptionID("hics.test", "The statistical test that is used to calculate the deviation of two data samples");
        public static final OptionID LIMIT_ID = new OptionID("hics.limit", "The threshold that determines how many d-dimensional subspace candidates to retain in each step of the generation");
        public static final OptionID SEED_ID = new OptionID("hics.seed", "The random seed.");
        private int m = 50;
        private double alpha = 0.1;
        private OutlierAlgorithm outlierAlgorithm;
        private GoodnessOfFitTest statTest;
        private int cutoff = 400;
        private RandomFactory rnd;

        @Override
        protected void makeOptions(Parameterization config) {
            RandomParameter rndP;
            IntParameter cutoffP;
            ObjectParameter testP;
            ObjectParameter algoP;
            DoubleParameter alphaP;
            super.makeOptions(config);
            IntParameter mP = (IntParameter)new IntParameter(M_ID, 50).addConstraint((ParameterConstraint)CommonConstraints.GREATER_THAN_ONE_INT);
            if (config.grab(mP)) {
                this.m = mP.intValue();
            }
            if (config.grab(alphaP = (DoubleParameter)new DoubleParameter(ALPHA_ID, 0.1).addConstraint((ParameterConstraint)CommonConstraints.GREATER_THAN_ZERO_DOUBLE))) {
                this.alpha = alphaP.doubleValue();
            }
            if (config.grab(algoP = new ObjectParameter(ALGO_ID, (Class<?>)OutlierAlgorithm.class, LOF.class))) {
                this.outlierAlgorithm = (OutlierAlgorithm)algoP.instantiateClass(config);
            }
            if (config.grab(testP = new ObjectParameter(TEST_ID, (Class<?>)GoodnessOfFitTest.class, KolmogorovSmirnovTest.class))) {
                this.statTest = (GoodnessOfFitTest)testP.instantiateClass(config);
            }
            if (config.grab(cutoffP = (IntParameter)new IntParameter(LIMIT_ID, 100).addConstraint((ParameterConstraint)CommonConstraints.GREATER_THAN_ONE_INT))) {
                this.cutoff = cutoffP.intValue();
            }
            if (config.grab(rndP = new RandomParameter(SEED_ID))) {
                this.rnd = (RandomFactory)rndP.getValue();
            }
        }

        @Override
        protected HiCS<V> makeInstance() {
            return new HiCS(this.m, this.alpha, this.outlierAlgorithm, this.statTest, this.cutoff, this.rnd);
        }
    }

    public static class HiCSSubspace
    extends BitSet {
        private static final long serialVersionUID = 1L;
        protected double contrast;
        public static final Comparator<HiCSSubspace> SORT_BY_CONTRAST_ASC = new Comparator<HiCSSubspace>(){

            @Override
            public int compare(HiCSSubspace o1, HiCSSubspace o2) {
                return o1.contrast == o2.contrast ? 0 : (o1.contrast > o2.contrast ? 1 : -1);
            }
        };
        public static final Comparator<HiCSSubspace> SORT_BY_CONTRAST_DESC = new Comparator<HiCSSubspace>(){

            @Override
            public int compare(HiCSSubspace o1, HiCSSubspace o2) {
                return o1.contrast == o2.contrast ? 0 : (o1.contrast < o2.contrast ? 1 : -1);
            }
        };
        public static final Comparator<HiCSSubspace> SORT_BY_SUBSPACE = new Comparator<HiCSSubspace>(){

            @Override
            public int compare(HiCSSubspace o1, HiCSSubspace o2) {
                int dim1 = o1.nextSetBit(0);
                int dim2 = o2.nextSetBit(0);
                while (dim1 >= 0 && dim2 >= 0) {
                    if (dim1 < dim2) {
                        return -1;
                    }
                    if (dim1 > dim2) {
                        return 1;
                    }
                    dim1 = o1.nextSetBit(dim1 + 1);
                    dim2 = o2.nextSetBit(dim2 + 1);
                }
                return 0;
            }
        };

        @Override
        public String toString() {
            StringBuilder buf = new StringBuilder(1000);
            buf.append("[contrast=").append(this.contrast);
            int i = this.nextSetBit(0);
            while (i >= 0) {
                buf.append(' ').append(i + 1);
                i = this.nextSetBit(i + 1);
            }
            buf.append(']');
            return buf.toString();
        }
    }
}

