/*
 * Decompiled with CFR 0.152.
 */
package de.lmu.ifi.dbs.elki.datasource.filter.normalization.columnwise;

import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
import de.lmu.ifi.dbs.elki.datasource.filter.FilterUtil;
import de.lmu.ifi.dbs.elki.datasource.filter.normalization.Normalization;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution;
import de.lmu.ifi.dbs.elki.math.statistics.distribution.UniformDistribution;
import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.DistributionEstimator;
import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.meta.BestFitEstimator;
import de.lmu.ifi.dbs.elki.math.statistics.tests.KolmogorovSmirnovTest;
import de.lmu.ifi.dbs.elki.utilities.Alias;
import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectListParameter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

@Alias(value={"de.lmu.ifi.dbs.elki.datasource.filter.normalization.AttributeWiseCDFNormalization"})
public class AttributeWiseCDFNormalization<V extends NumberVector>
implements Normalization<V> {
    private static final Logging LOG = Logging.getLogger(AttributeWiseCDFNormalization.class);
    protected List<DistributionEstimator<?>> estimators;
    protected List<Distribution> dists;
    protected NumberVector.Factory<V> factory;

    public AttributeWiseCDFNormalization(List<DistributionEstimator<?>> estimators) {
        this.estimators = estimators;
    }

    @Override
    public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
        if (objects.dataLength() == 0) {
            return objects;
        }
        for (int r = 0; r < objects.metaLength(); ++r) {
            SimpleTypeInformation<?> type = objects.meta(r);
            List<?> column = objects.getColumn(r);
            if (!TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(type)) continue;
            List<?> castColumn = column;
            VectorFieldTypeInformation castType = (VectorFieldTypeInformation)type;
            this.factory = FilterUtil.guessFactory(castType);
            int dim = castType.getDimensionality();
            this.dists = new ArrayList<Distribution>(dim);
            double[] test = this.estimators.size() > 1 ? new double[castColumn.size()] : null;
            Adapter adapter = new Adapter();
            for (int d = 0; d < dim; ++d) {
                adapter.dim = d;
                Distribution dist = this.findBestFit(castColumn, adapter, d, test);
                if (dist instanceof UniformDistribution) {
                    dist = this.constantZero(castColumn, adapter) ? new UniformDistribution(0.0, 1.0) : dist;
                }
                this.dists.add(dist);
            }
            double[] buf = new double[dim];
            for (int i = 0; i < objects.dataLength(); ++i) {
                NumberVector obj = (NumberVector)castColumn.get(i);
                for (int d = 0; d < dim; ++d) {
                    buf[d] = this.dists.get(d).cdf(obj.doubleValue(d));
                }
                castColumn.set(i, this.factory.newNumberVector(buf));
            }
        }
        return objects;
    }

    protected Distribution findBestFit(List<V> col, Adapter adapter, int d, double[] test) {
        if (this.estimators.size() == 1) {
            return this.estimators.get(0).estimate(col, adapter);
        }
        Distribution best = null;
        double bestq = Double.POSITIVE_INFINITY;
        block2: for (DistributionEstimator<Double> distributionEstimator : this.estimators) {
            try {
                Object dist = distributionEstimator.estimate(col, adapter);
                for (int i = 0; i < test.length; ++i) {
                    test[i] = dist.cdf(((NumberVector)col.get(i)).doubleValue(d));
                    if (Double.isNaN(test[i])) {
                        LOG.warning("Got NaN after fitting " + distributionEstimator + ": " + dist);
                        continue block2;
                    }
                    if (!Double.isInfinite(test[i])) continue;
                    LOG.warning("Got infinite value after fitting " + distributionEstimator + ": " + dist);
                    continue block2;
                }
                Arrays.sort(test);
                double q = KolmogorovSmirnovTest.simpleTest(test);
                if (LOG.isVeryVerbose()) {
                    LOG.veryverbose("Estimator " + distributionEstimator + " (" + dist + ") has maximum deviation " + q + " for dimension " + d);
                }
                if (best != null && !(q < bestq)) continue;
                best = (Distribution)dist;
                bestq = q;
            }
            catch (ArithmeticException e) {
                if (!LOG.isVeryVerbose()) continue;
                LOG.veryverbose("Fitting distribution " + distributionEstimator + " failed: " + e.getMessage());
            }
        }
        if (LOG.isVerbose()) {
            LOG.verbose("Best fit for dimension " + d + ": " + best);
        }
        return best;
    }

    protected boolean constantZero(List<V> column, Adapter adapter) {
        int s = adapter.size(column);
        for (int i = 0; i < s; ++i) {
            if (adapter.get(column, i) == 0.0) continue;
            return false;
        }
        return true;
    }

    public String toString() {
        StringBuilder result = new StringBuilder(1000).append("normalization class: ").append(this.getClass().getName()).append('\n').append("normalization distributions: ");
        for (DistributionEstimator<?> est : this.estimators) {
            result.append(est.getClass().getSimpleName()).append(',');
        }
        if (!this.estimators.isEmpty()) {
            result.setLength(result.length() - 1);
        }
        return result.toString();
    }

    public static class Parameterizer<V extends NumberVector>
    extends AbstractParameterizer {
        public static final OptionID DISTRIBUTIONS_ID = new OptionID("normalize.distributions", "A list of the distribution estimators to try.");
        private List<DistributionEstimator<?>> estimators;

        @Override
        protected void makeOptions(Parameterization config) {
            super.makeOptions(config);
            ObjectListParameter estP = new ObjectListParameter(DISTRIBUTIONS_ID, DistributionEstimator.class);
            estP.setDefaultValue(Arrays.asList(BestFitEstimator.class));
            if (config.grab(estP)) {
                this.estimators = estP.instantiateClasses(config);
            }
        }

        @Override
        protected AttributeWiseCDFNormalization<V> makeInstance() {
            return new AttributeWiseCDFNormalization(this.estimators);
        }
    }

    protected static class Adapter
    implements NumberArrayAdapter<Double, List<? extends NumberVector>> {
        int dim;

        protected Adapter() {
        }

        @Override
        public int size(List<? extends NumberVector> array) {
            return array.size();
        }

        @Override
        public Double get(List<? extends NumberVector> array, int off) throws IndexOutOfBoundsException {
            return this.getDouble(array, off);
        }

        @Override
        public double getDouble(List<? extends NumberVector> array, int off) throws IndexOutOfBoundsException {
            return array.get(off).doubleValue(this.dim);
        }

        @Override
        public long getLong(List<? extends NumberVector> array, int off) throws IndexOutOfBoundsException {
            return array.get(off).longValue(this.dim);
        }
    }
}

