/*
 * Decompiled with CFR 0.152.
 */
package de.lmu.ifi.dbs.elki.algorithm.statistics;

import de.lmu.ifi.dbs.elki.algorithm.AbstractNumberVectorDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.DistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.data.DoubleVector;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.DBIDMIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic;
import de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic;
import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.math.MeanVariance;
import de.lmu.ifi.dbs.elki.math.statistics.distribution.BetaDistribution;
import de.lmu.ifi.dbs.elki.result.Result;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleListParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
import de.lmu.ifi.dbs.elki.utilities.random.RandomFactory;
import java.util.Arrays;
import java.util.Random;

@Reference(authors="B. Hopkins, J. G. Skellam", title="A new method for determining the type of distribution of plant individuals", booktitle="Annals of Botany, 18(2), 213-227", url="https://doi.org/10.1093/oxfordjournals.aob.a083391", bibkey="doi:10.1093/oxfordjournals.aob.a083391")
public class HopkinsStatisticClusteringTendency
extends AbstractNumberVectorDistanceBasedAlgorithm<NumberVector, Result> {
    private static final Logging LOG = Logging.getLogger(HopkinsStatisticClusteringTendency.class);
    protected int sampleSize;
    protected int rep;
    protected int k;
    protected RandomFactory random;
    private double[] maxima = new double[0];
    private double[] minima = new double[0];

    public HopkinsStatisticClusteringTendency(NumberVectorDistanceFunction<? super NumberVector> distanceFunction, int samplesize, RandomFactory random, int rep, int k, double[] minima, double[] maxima) {
        super(distanceFunction);
        this.sampleSize = samplesize;
        this.random = random;
        this.rep = rep;
        this.k = k;
        this.minima = minima;
        this.maxima = maxima;
    }

    public Result run(Database database, Relation<NumberVector> relation) {
        int dim = RelationUtil.dimensionality(relation);
        DistanceQuery<NumberVector> distanceQuery = database.getDistanceQuery(relation, this.getDistanceFunction(), new Object[0]);
        KNNQuery<NumberVector> knnQuery = database.getKNNQuery(distanceQuery, this.k + 1);
        double[] min = new double[dim];
        double[] extend = new double[dim];
        this.initializeDataExtends(relation, dim, min, extend);
        if (!LOG.isStatistics()) {
            LOG.warning("This algorithm must be used with at least logging level " + Logging.Level.STATISTICS);
        }
        MeanVariance hmean = new MeanVariance();
        MeanVariance umean = new MeanVariance();
        MeanVariance wmean = new MeanVariance();
        for (int j = 0; j < this.rep; ++j) {
            double w = this.computeNNForRealData(knnQuery, relation, dim);
            double u = this.computeNNForUniformData(knnQuery, min, extend);
            double h = u / (u + w);
            hmean.put(h);
            umean.put(u);
            wmean.put(w);
        }
        String prefix = this.getClass().getName();
        LOG.statistics(new LongStatistic(prefix + ".samplesize", this.sampleSize));
        LOG.statistics(new LongStatistic(prefix + ".dim", dim));
        LOG.statistics(new LongStatistic(prefix + ".hopkins.nearest-neighbor", this.k));
        LOG.statistics(new DoubleStatistic(prefix + ".hopkins.h.mean", hmean.getMean()));
        LOG.statistics(new DoubleStatistic(prefix + ".hopkins.u.mean", umean.getMean()));
        LOG.statistics(new DoubleStatistic(prefix + ".hopkins.w.mean", wmean.getMean()));
        if (this.rep > 1) {
            LOG.statistics(new DoubleStatistic(prefix + ".hopkins.h.std", hmean.getSampleStddev()));
            LOG.statistics(new DoubleStatistic(prefix + ".hopkins.u.std", umean.getSampleStddev()));
            LOG.statistics(new DoubleStatistic(prefix + ".hopkins.w.std", wmean.getSampleStddev()));
        }
        double x = hmean.getMean();
        double ix = BetaDistribution.regularizedIncBeta(x, this.sampleSize, this.sampleSize);
        double p = x > 0.5 ? 1.0 - ix : ix;
        LOG.statistics(new DoubleStatistic(prefix + ".hopkins.p", p));
        return null;
    }

    protected double computeNNForRealData(KNNQuery<NumberVector> knnQuery, Relation<NumberVector> relation, int dim) {
        double w = 0.0;
        ModifiableDBIDs dataSampleIds = DBIDUtil.randomSample(relation.getDBIDs(), this.sampleSize, this.random);
        DBIDMIter iter = dataSampleIds.iter();
        while (iter.valid()) {
            double kdist = knnQuery.getKNNForDBID(iter, this.k + 1).getKNNDistance();
            w += MathUtil.powi(kdist, dim);
            iter.advance();
        }
        return w;
    }

    protected double computeNNForUniformData(KNNQuery<NumberVector> knnQuery, double[] min, double[] extend) {
        Random rand = this.random.getSingleThreadedRandom();
        int dim = min.length;
        double[] buf = new double[dim];
        double u = 0.0;
        for (int i = 0; i < this.sampleSize; ++i) {
            for (int d = 0; d < buf.length; ++d) {
                buf[d] = min[d] + rand.nextDouble() * extend[d];
            }
            double kdist = knnQuery.getKNNForObject(DoubleVector.wrap(buf), this.k).getKNNDistance();
            u += MathUtil.powi(kdist, dim);
        }
        return u;
    }

    protected void initializeDataExtends(Relation<NumberVector> relation, int dim, double[] min, double[] extend) {
        assert (min.length == dim && extend.length == dim);
        if (this.minima == null || this.maxima == null || this.minima.length == 0 || this.maxima.length == 0) {
            double[][] minmax = RelationUtil.computeMinMax(relation);
            double[] dmin = minmax[0];
            double[] dmax = minmax[1];
            for (int d = 0; d < dim; ++d) {
                min[d] = dmin[d];
                extend[d] = dmax[d] - dmin[d];
            }
            return;
        }
        if (this.minima.length == dim) {
            System.arraycopy(this.minima, 0, min, 0, dim);
        } else if (this.minima.length == 1) {
            Arrays.fill(min, this.minima[0]);
        } else {
            throw new AbortException("Invalid minima specified: expected " + dim + " got minima dimensionality: " + this.minima.length);
        }
        if (this.maxima.length == dim) {
            for (int d = 0; d < dim; ++d) {
                extend[d] = this.maxima[d] - min[d];
            }
            return;
        }
        if (this.maxima.length == 1) {
            for (int d = 0; d < dim; ++d) {
                extend[d] = this.maxima[0] - min[d];
            }
            return;
        }
        throw new AbortException("Invalid maxima specified: expected " + dim + " got maxima dimensionality: " + this.maxima.length);
    }

    @Override
    protected Logging getLogger() {
        return LOG;
    }

    @Override
    public TypeInformation[] getInputTypeRestriction() {
        return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
    }

    public static class Parameterizer
    extends AbstractNumberVectorDistanceBasedAlgorithm.Parameterizer<NumberVector> {
        public static final OptionID SAMPLESIZE_ID = new OptionID("hopkins.samplesize", "Number of object / random samples to analyze.");
        public static final OptionID REP_ID = new OptionID("hopkins.rep", "The number of times to repeat the experiment (default: 1)");
        public static final OptionID SEED_ID = new OptionID("hopkins.seed", "The random number generator.");
        public static final OptionID MINIMA_ID = new OptionID("hopkins.min", "Minimum values in each dimension. If no value is specified, the minimum value in each dimension will be used. If only one value is specified, this value will be used for all dimensions.");
        public static final OptionID MAXIMA_ID = new OptionID("hopkins.max", "Maximum values in each dimension. If no value is specified, the maximum value in each dimension will be used. If only one value is specified, this value will be used for all dimensions.");
        public static final OptionID K_ID = new OptionID("hopkins.k", "Nearest neighbor to use for the statistic");
        protected int sampleSize = 0;
        protected int rep = 1;
        protected int k = 1;
        protected RandomFactory random;
        private double[] maxima = null;
        private double[] minima = null;

        @Override
        protected void makeOptions(Parameterization config) {
            DoubleListParameter maximaP;
            DoubleListParameter minimaP;
            RandomParameter randomP;
            IntParameter sampleP;
            IntParameter kP;
            IntParameter repP;
            ObjectParameter distanceFunctionP = new ObjectParameter(DistanceBasedAlgorithm.DISTANCE_FUNCTION_ID, (Class<?>)NumberVectorDistanceFunction.class, EuclideanDistanceFunction.class);
            if (config.grab(distanceFunctionP)) {
                this.distanceFunction = (NumberVectorDistanceFunction)distanceFunctionP.instantiateClass(config);
            }
            if (config.grab(repP = (IntParameter)new IntParameter(REP_ID, 1).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT))) {
                this.rep = (Integer)repP.getValue();
            }
            if (config.grab(kP = (IntParameter)new IntParameter(K_ID, 1).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT))) {
                this.k = (Integer)kP.getValue();
            }
            if (config.grab(sampleP = (IntParameter)new IntParameter(SAMPLESIZE_ID).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT))) {
                this.sampleSize = (Integer)sampleP.getValue();
            }
            if (config.grab(randomP = new RandomParameter(SEED_ID))) {
                this.random = (RandomFactory)randomP.getValue();
            }
            if (config.grab(minimaP = (DoubleListParameter)new DoubleListParameter(MINIMA_ID).setOptional(true))) {
                this.minima = (double[])((double[])minimaP.getValue()).clone();
            }
            if (config.grab(maximaP = (DoubleListParameter)new DoubleListParameter(MAXIMA_ID).setOptional(!minimaP.isDefined()))) {
                this.maxima = (double[])((double[])maximaP.getValue()).clone();
            }
            if (this.minima != null && this.maxima != null && this.minima.length != this.maxima.length) {
                config.reportError(new WrongParameterValueException(minimaP, "and", maximaP, "must have the same number of values."));
            }
        }

        @Override
        protected HopkinsStatisticClusteringTendency makeInstance() {
            return new HopkinsStatisticClusteringTendency(this.distanceFunction, this.sampleSize, this.random, this.rep, this.k, this.minima, this.maxima);
        }
    }
}

