/*
 * Decompiled with CFR 0.152.
 */
package de.lmu.ifi.dbs.elki.algorithm.clustering.trivial;

import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.model.ClusterModel;
import de.lmu.ifi.dbs.elki.data.model.Model;
import de.lmu.ifi.dbs.elki.data.type.NoSupportedDataTypeException;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.utilities.Alias;
import de.lmu.ifi.dbs.elki.utilities.Priority;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.PatternParameter;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;

@Title(value="Clustering by label")
@Description(value="Cluster points by a (pre-assigned!) label. For comparing results with a reference clustering.")
@Alias(value={"de.lmu.ifi.dbs.elki.algorithm.clustering.ByLabelClustering"})
@Priority(value=-100)
public class ByLabelClustering
extends AbstractAlgorithm<Clustering<Model>>
implements ClusteringAlgorithm<Clustering<Model>> {
    private static final Logging LOG = Logging.getLogger(ByLabelClustering.class);
    private boolean multiple;
    private Pattern noisepattern = null;

    public ByLabelClustering(boolean multiple, Pattern noisepattern) {
        this.multiple = multiple;
        this.noisepattern = noisepattern;
    }

    public ByLabelClustering() {
        this(false, null);
    }

    @Override
    public Clustering<Model> run(Database database) {
        try {
            Relation relation = database.getRelation(TypeUtil.CLASSLABEL, new Object[0]);
            return this.run(relation);
        }
        catch (NoSupportedDataTypeException e) {
            return this.run(database.getRelation(this.getInputTypeRestriction()[0], new Object[0]));
        }
    }

    public Clustering<Model> run(Relation<?> relation) {
        HashMap<String, DBIDs> labelMap = this.multiple ? this.multipleAssignment(relation) : this.singleAssignment(relation);
        ArrayModifiableDBIDs noiseids = DBIDUtil.newArray();
        Clustering<Model> result = new Clustering<Model>("By Label Clustering", "bylabel-clustering");
        for (Map.Entry<String, DBIDs> entry : labelMap.entrySet()) {
            DBIDs ids = entry.getValue();
            if (ids.size() <= 1) {
                noiseids.addDBIDs(ids);
                continue;
            }
            Cluster<ClusterModel> c = new Cluster<ClusterModel>(entry.getKey(), ids, ClusterModel.CLUSTER);
            if (this.noisepattern != null && this.noisepattern.matcher(entry.getKey()).find()) {
                c.setNoise(true);
            }
            result.addToplevelCluster(c);
        }
        if (noiseids.size() > 0) {
            Cluster<ClusterModel> c = new Cluster<ClusterModel>("Noise", (DBIDs)noiseids, ClusterModel.CLUSTER);
            c.setNoise(true);
            result.addToplevelCluster(c);
        }
        return result;
    }

    private HashMap<String, DBIDs> singleAssignment(Relation<?> data) {
        HashMap<String, DBIDs> labelMap = new HashMap<String, DBIDs>();
        DBIDIter iditer = data.iterDBIDs();
        while (iditer.valid()) {
            Object val = data.get(iditer);
            String label = val != null ? val.toString() : null;
            this.assign(labelMap, label, iditer);
            iditer.advance();
        }
        return labelMap;
    }

    private HashMap<String, DBIDs> multipleAssignment(Relation<?> data) {
        HashMap<String, DBIDs> labelMap = new HashMap<String, DBIDs>();
        DBIDIter iditer = data.iterDBIDs();
        while (iditer.valid()) {
            String[] labels;
            for (String label : labels = data.get(iditer).toString().split(" ")) {
                this.assign(labelMap, label, iditer);
            }
            iditer.advance();
        }
        return labelMap;
    }

    private void assign(HashMap<String, DBIDs> labelMap, String label, DBIDRef id) {
        if (labelMap.containsKey(label)) {
            DBIDs exist = labelMap.get(label);
            if (exist instanceof DBID) {
                HashSetModifiableDBIDs n = DBIDUtil.newHashSet();
                n.add((DBID)exist);
                n.add(id);
                labelMap.put(label, n);
            } else {
                assert (exist instanceof HashSetModifiableDBIDs);
                assert (exist.size() > 1);
                ((ModifiableDBIDs)exist).add(id);
            }
        } else {
            labelMap.put(label, DBIDUtil.deref(id));
        }
    }

    @Override
    public TypeInformation[] getInputTypeRestriction() {
        return TypeUtil.array(TypeUtil.GUESSED_LABEL);
    }

    @Override
    protected Logging getLogger() {
        return LOG;
    }

    public static class Parameterizer
    extends AbstractParameterizer {
        public static final OptionID MULTIPLE_ID = new OptionID("bylabelclustering.multiple", "Flag to indicate that only subspaces with large coverage (i.e. the fraction of the database that is covered by the dense units) are selected, the rest will be pruned.");
        public static final OptionID NOISE_ID = new OptionID("bylabelclustering.noise", "Pattern to recognize noise classes by their label.");
        protected boolean multiple;
        protected Pattern noisepat;

        @Override
        protected void makeOptions(Parameterization config) {
            PatternParameter noisepatP;
            super.makeOptions(config);
            Flag multipleF = new Flag(MULTIPLE_ID);
            if (config.grab(multipleF)) {
                this.multiple = (Boolean)multipleF.getValue();
            }
            if (config.grab(noisepatP = (PatternParameter)new PatternParameter(NOISE_ID).setOptional(true))) {
                this.noisepat = (Pattern)noisepatP.getValue();
            }
        }

        @Override
        protected ByLabelClustering makeInstance() {
            return new ByLabelClustering(this.multiple, this.noisepat);
        }
    }
}

