/*
 * Decompiled with CFR 0.152.
 */
package de.lmu.ifi.dbs.elki.datasource.parser;

import de.lmu.ifi.dbs.elki.data.ClassLabel;
import de.lmu.ifi.dbs.elki.data.DoubleVector;
import de.lmu.ifi.dbs.elki.data.ExternalID;
import de.lmu.ifi.dbs.elki.data.LabelList;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.SimpleClassLabel;
import de.lmu.ifi.dbs.elki.data.SparseDoubleVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
import de.lmu.ifi.dbs.elki.datasource.parser.Parser;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.io.ParseUtil;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.PatternParameter;
import it.unimi.dsi.fastutil.ints.Int2DoubleOpenHashMap;
import it.unimi.dsi.fastutil.ints.Int2ObjectMap;
import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap;
import it.unimi.dsi.fastutil.objects.ObjectIterator;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.StreamTokenizer;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

@Title(value="ARFF File Format Parser")
public class ArffParser
implements Parser {
    private static final Logging LOG = Logging.getLogger(ArffParser.class);
    public static final Matcher ARFF_HEADER_RELATION = Pattern.compile("@relation\\s+(.*)", 2).matcher("");
    public static final Matcher ARFF_HEADER_ATTRIBUTE = Pattern.compile("@attribute\\s+([^ ]+|['\"].*?['\"])\\s+(numeric|real|integer|string|double|date(\\s.*)|\\{.*\\})\\s*", 2).matcher("");
    public static final Matcher ARFF_HEADER_DATA = Pattern.compile("@data\\s*", 2).matcher("");
    public static final Matcher ARFF_COMMENT = Pattern.compile("^\\s*%.*").matcher("");
    public static final String DEFAULT_ARFF_MAGIC_EID = "(External-?ID)";
    public static final String DEFAULT_ARFF_MAGIC_CLASS = "(Class|Class-?Label)";
    public static final Matcher ARFF_NUMERIC = Pattern.compile("(numeric|real|integer|double)", 2).matcher("");
    public static final Matcher EMPTY = Pattern.compile("^\\s*$").matcher("");
    Matcher magic_eid;
    Matcher magic_class;
    ArrayList<String> labels = new ArrayList();
    NumberVector.Factory<?> denseFactory = DoubleVector.FACTORY;

    public ArffParser(Pattern magic_eid, Pattern magic_class) {
        this.magic_eid = magic_eid.matcher("");
        this.magic_class = magic_class.matcher("");
    }

    public ArffParser(String magic_eid, String magic_class) {
        this(Pattern.compile(magic_eid, 2), Pattern.compile(magic_class, 2));
    }

    /*
     * Exception decompiling
     */
    @Override
    public MultipleObjectsBundle parse(InputStream instream) {
        /*
         * This method has failed to decompile.  When submitting a bug report, please provide this stack trace, and (if you hold appropriate legal rights) the relevant class file.
         * 
         * org.benf.cfr.reader.util.ConfusedCFRException: Started 3 blocks at once
         *     at org.benf.cfr.reader.bytecode.analysis.opgraph.Op04StructuredStatement.getStartingBlocks(Op04StructuredStatement.java:412)
         *     at org.benf.cfr.reader.bytecode.analysis.opgraph.Op04StructuredStatement.buildNestedBlocks(Op04StructuredStatement.java:487)
         *     at org.benf.cfr.reader.bytecode.analysis.opgraph.Op03SimpleStatement.createInitialStructuredBlock(Op03SimpleStatement.java:736)
         *     at org.benf.cfr.reader.bytecode.CodeAnalyser.getAnalysisInner(CodeAnalyser.java:850)
         *     at org.benf.cfr.reader.bytecode.CodeAnalyser.getAnalysisOrWrapFail(CodeAnalyser.java:278)
         *     at org.benf.cfr.reader.bytecode.CodeAnalyser.getAnalysis(CodeAnalyser.java:201)
         *     at org.benf.cfr.reader.entities.attributes.AttributeCode.analyse(AttributeCode.java:94)
         *     at org.benf.cfr.reader.entities.Method.analyse(Method.java:531)
         *     at org.benf.cfr.reader.entities.ClassFile.analyseMid(ClassFile.java:1055)
         *     at org.benf.cfr.reader.entities.ClassFile.analyseTop(ClassFile.java:942)
         *     at org.benf.cfr.reader.Driver.doJarVersionTypes(Driver.java:257)
         *     at org.benf.cfr.reader.Driver.doJar(Driver.java:139)
         *     at org.benf.cfr.reader.CfrDriverImpl.analyse(CfrDriverImpl.java:76)
         *     at org.benf.cfr.reader.Main.main(Main.java:54)
         */
        throw new IllegalStateException("Decompilation failed");
    }

    private Object[] loadSparseInstance(StreamTokenizer tokenizer, int[] targ, int[] dimsize, TypeInformation[] elkitypes, int metaLength) throws IOException {
        Int2ObjectOpenHashMap<Object> map;
        block18: {
            map = new Int2ObjectOpenHashMap<Object>();
            while (true) {
                this.nextToken(tokenizer);
                assert (tokenizer.ttype != -1 && tokenizer.ttype != 10);
                if (tokenizer.ttype == 125) {
                    this.nextToken(tokenizer);
                    assert (tokenizer.ttype == -1 || tokenizer.ttype == 10);
                    break block18;
                }
                if (tokenizer.ttype != -3) {
                    throw new AbortException("Unexpected token type encountered: " + tokenizer.toString() + " type: " + tokenizer.ttype);
                }
                int dim = ParseUtil.parseIntBase10(tokenizer.sval);
                if (map.containsKey(dim)) {
                    throw new AbortException("Duplicate key in sparse vector: " + tokenizer.toString());
                }
                this.nextToken(tokenizer);
                if (tokenizer.ttype != -3) break;
                map.put(dim, TypeUtil.NUMBER_VECTOR_FIELD.equals(elkitypes[targ[dim]]) ? Double.valueOf(ParseUtil.parseDouble(tokenizer.sval)) : tokenizer.sval);
            }
            throw new AbortException("Unexpected token type encountered: " + tokenizer.toString());
        }
        Object[] data = new Object[metaLength];
        for (int out = 0; out < metaLength; ++out) {
            String val;
            int s = -1;
            for (int i = 0; i < targ.length; ++i) {
                if (targ[i] != out) continue;
                s = i;
                break;
            }
            assert (s >= 0);
            if (TypeUtil.NUMBER_VECTOR_FIELD.equals(elkitypes[out])) {
                Int2DoubleOpenHashMap f = new Int2DoubleOpenHashMap(dimsize[out]);
                ObjectIterator iter = map.int2ObjectEntrySet().fastIterator();
                while (iter.hasNext()) {
                    Int2ObjectMap.Entry entry = (Int2ObjectMap.Entry)iter.next();
                    int i = entry.getIntKey();
                    if (i < s || i >= s + dimsize[out]) continue;
                    double v = (Double)entry.getValue();
                    f.put(i - s, v);
                }
                data[out] = new SparseDoubleVector(f, dimsize[out]);
                continue;
            }
            if (TypeUtil.LABELLIST.equals(elkitypes[out])) {
                this.labels.clear();
                ObjectIterator iter = map.int2ObjectEntrySet().fastIterator();
                while (iter.hasNext()) {
                    Int2ObjectMap.Entry entry = (Int2ObjectMap.Entry)iter.next();
                    int i = entry.getIntKey();
                    if (i < s) continue;
                    if (i >= s + dimsize[out]) break;
                    if (this.labels.size() < i - s) {
                        LOG.warning("Sparse consecutive labels are currently not correctly supported.");
                    }
                    this.labels.add((String)entry.getValue());
                }
                data[out] = LabelList.make(this.labels);
                continue;
            }
            if (TypeUtil.EXTERNALID.equals(elkitypes[out])) {
                val = (String)map.get(s);
                if (val == null) {
                    throw new AbortException("External ID column not set in sparse instance." + tokenizer.toString());
                }
                data[out] = new ExternalID(val);
                continue;
            }
            if (TypeUtil.CLASSLABEL.equals(elkitypes[out])) {
                val = map.get(s);
                if (val == null) {
                    throw new AbortException("Class label column not set in sparse instance." + tokenizer.toString());
                }
                SimpleClassLabel lbl = new SimpleClassLabel(String.valueOf(val));
                data[out] = lbl;
                continue;
            }
            throw new AbortException("Unsupported type for column ->" + out + ": " + (elkitypes[out] != null ? elkitypes[out].toString() : "null"));
        }
        return data;
    }

    private Object[] loadDenseInstance(StreamTokenizer tokenizer, int[] dimsize, TypeInformation[] etyp, int outdim) throws IOException {
        Object[] data = new Object[outdim];
        for (int out = 0; out < outdim; ++out) {
            if (TypeUtil.NUMBER_VECTOR_FIELD.equals(etyp[out])) {
                double[] cur = new double[dimsize[out]];
                for (int k = 0; k < dimsize[out]; ++k) {
                    if (tokenizer.ttype == 63) {
                        cur[k] = Double.NaN;
                    } else if (tokenizer.ttype == -3) {
                        try {
                            cur[k] = ParseUtil.parseDouble(tokenizer.sval);
                        }
                        catch (NumberFormatException e) {
                            throw new AbortException("Expected number value, got: " + tokenizer.sval);
                        }
                    } else {
                        throw new AbortException("Expected word token, got: " + tokenizer.toString());
                    }
                    this.nextToken(tokenizer);
                }
                data[out] = this.denseFactory.newNumberVector(cur);
                continue;
            }
            if (TypeUtil.LABELLIST.equals(etyp[out])) {
                this.labels.clear();
                for (int k = 0; k < dimsize[out]; ++k) {
                    if (tokenizer.ttype != -3) {
                        throw new AbortException("Expected word token, got: " + tokenizer.toString());
                    }
                    this.labels.add(tokenizer.sval);
                    this.nextToken(tokenizer);
                }
                data[out] = LabelList.make(this.labels);
                continue;
            }
            if (TypeUtil.EXTERNALID.equals(etyp[out])) {
                if (tokenizer.ttype != -3) {
                    throw new AbortException("Expected word token, got: " + tokenizer.toString());
                }
                data[out] = new ExternalID(tokenizer.sval);
                this.nextToken(tokenizer);
                continue;
            }
            if (TypeUtil.CLASSLABEL.equals(etyp[out])) {
                if (tokenizer.ttype != -3) {
                    throw new AbortException("Expected word token, got: " + tokenizer.toString());
                }
                SimpleClassLabel lbl = new SimpleClassLabel(tokenizer.sval);
                data[out] = lbl;
                this.nextToken(tokenizer);
                continue;
            }
            throw new AbortException("Unsupported type for column ->" + out + ": " + (etyp[out] != null ? etyp[out].toString() : "null"));
        }
        return data;
    }

    private StreamTokenizer makeArffTokenizer(BufferedReader br) {
        StreamTokenizer tokenizer = new StreamTokenizer(br);
        tokenizer.resetSyntax();
        tokenizer.whitespaceChars(0, 32);
        tokenizer.ordinaryChars(48, 57);
        tokenizer.ordinaryChar(45);
        tokenizer.ordinaryChar(46);
        tokenizer.wordChars(33, 255);
        tokenizer.whitespaceChars(44, 44);
        tokenizer.commentChar(37);
        tokenizer.quoteChar(34);
        tokenizer.quoteChar(39);
        tokenizer.ordinaryChar(123);
        tokenizer.ordinaryChar(125);
        tokenizer.eolIsSignificant(true);
        return tokenizer;
    }

    private void setupBundleHeaders(ArrayList<String> names, int[] targ, TypeInformation[] etyp, int[] dimsize, MultipleObjectsBundle bundle, boolean sparse) {
        int in = 0;
        int out = 0;
        while (in < targ.length) {
            int nin;
            for (nin = in + 1; nin < targ.length && targ[nin] == targ[in]; ++nin) {
            }
            if (TypeUtil.NUMBER_VECTOR_FIELD.equals(etyp[out])) {
                VectorFieldTypeInformation<NumberVector> type;
                String[] labels = new String[dimsize[out]];
                for (int i = 0; i < dimsize[out]; ++i) {
                    labels[i] = names.get(out + i);
                }
                if (!sparse) {
                    type = new VectorFieldTypeInformation<DoubleVector>(DoubleVector.FACTORY, dimsize[out], labels);
                    bundle.appendColumn(type, new ArrayList());
                } else {
                    type = new VectorFieldTypeInformation<SparseDoubleVector>(SparseDoubleVector.FACTORY, dimsize[out], labels);
                    bundle.appendColumn(type, new ArrayList());
                }
            } else if (TypeUtil.LABELLIST.equals(etyp[out])) {
                StringBuilder label = new StringBuilder(names.get(out));
                for (int i = 1; i < dimsize[out]; ++i) {
                    label.append(' ').append(names.get(out + i));
                }
                bundle.appendColumn(new SimpleTypeInformation<LabelList>(LabelList.class, label.toString()), new ArrayList());
            } else if (TypeUtil.EXTERNALID.equals(etyp[out])) {
                bundle.appendColumn(new SimpleTypeInformation<ExternalID>(ExternalID.class, names.get(out)), new ArrayList());
            } else if (TypeUtil.CLASSLABEL.equals(etyp[out])) {
                bundle.appendColumn(new SimpleTypeInformation<ClassLabel>(ClassLabel.class, names.get(out)), new ArrayList());
            } else {
                throw new AbortException("Unsupported type for column " + in + "->" + out + ": " + (etyp[out] != null ? etyp[out].toString() : "null"));
            }
            assert (out == bundle.metaLength() - 1);
            in = nin;
            ++out;
        }
    }

    private void readHeader(BufferedReader br) throws IOException {
        String line;
        do {
            if ((line = br.readLine()) != null) continue;
            throw new AbortException(ARFF_HEADER_RELATION + " not found in file.");
        } while (ARFF_COMMENT.reset(line).matches() || EMPTY.reset(line).matches());
        if (!ARFF_HEADER_RELATION.reset(line).matches()) {
            throw new AbortException("Expected relation declaration: " + line);
        }
    }

    private void parseAttributeStatements(BufferedReader br, ArrayList<String> names, ArrayList<String> types) throws IOException {
        block6: {
            String line;
            while (true) {
                if ((line = br.readLine()) == null) {
                    throw new AbortException(ARFF_HEADER_DATA + " not found in file.");
                }
                if (ARFF_COMMENT.reset(line).matches() || EMPTY.reset(line).matches()) continue;
                if (ARFF_HEADER_DATA.reset(line).matches()) break block6;
                Matcher matcher = ARFF_HEADER_ATTRIBUTE.reset(line);
                if (!matcher.matches()) break;
                String name = matcher.group(1);
                if (name.charAt(0) == '\'' && name.charAt(name.length() - 1) == '\'') {
                    name = name.substring(1, name.length() - 1);
                } else if (name.charAt(0) == '\"' && name.charAt(name.length() - 1) == '\"') {
                    name = name.substring(1, name.length() - 1);
                }
                String type = matcher.group(2);
                names.add(name);
                types.add(type);
            }
            throw new AbortException("Unrecognized line: " + line);
        }
        assert (names.size() == types.size());
    }

    private void processColumnTypes(ArrayList<String> names, ArrayList<String> types, int[] targ, TypeInformation[] etyp, int[] dims) {
        int next = 0;
        for (int i = 0; i < targ.length; ++i) {
            if (this.magic_eid != null && this.magic_eid.reset(names.get(i)).matches()) {
                targ[i] = next;
                etyp[next] = TypeUtil.EXTERNALID;
                dims[next] = 1;
                ++next;
                continue;
            }
            if (this.magic_class != null && this.magic_class.reset(names.get(i)).matches()) {
                targ[i] = next;
                etyp[next] = TypeUtil.CLASSLABEL;
                dims[next] = 1;
                ++next;
                continue;
            }
            if (ARFF_NUMERIC.reset(types.get(i)).matches()) {
                if (next > 0 && TypeUtil.NUMBER_VECTOR_FIELD.equals(etyp[next - 1])) {
                    targ[i] = next - 1;
                    int n = next - 1;
                    dims[n] = dims[n] + 1;
                    continue;
                }
                targ[i] = next;
                etyp[next] = TypeUtil.NUMBER_VECTOR_FIELD;
                dims[next] = 1;
                ++next;
                continue;
            }
            if (next > 0 && TypeUtil.LABELLIST.equals(etyp[next - 1])) {
                targ[i] = next - 1;
                int n = next - 1;
                dims[n] = dims[n] + 1;
                continue;
            }
            targ[i] = next;
            etyp[next] = TypeUtil.LABELLIST;
            dims[next] = 1;
            ++next;
        }
    }

    private void nextToken(StreamTokenizer tokenizer) throws IOException {
        tokenizer.nextToken();
        if (tokenizer.ttype == 39 || tokenizer.ttype == 34) {
            tokenizer.ttype = -3;
        } else if (tokenizer.ttype == -3 && tokenizer.sval.equals("?")) {
            tokenizer.ttype = 63;
        }
        if (LOG.isDebugging()) {
            if (tokenizer.ttype == -2) {
                LOG.debug("token: " + tokenizer.nval);
            } else if (tokenizer.ttype == -3) {
                LOG.debug("token: " + tokenizer.sval);
            } else if (tokenizer.ttype == -1) {
                LOG.debug("token: EOF");
            } else if (tokenizer.ttype == 10) {
                LOG.debug("token: EOL");
            } else {
                LOG.debug("token type: " + tokenizer.ttype);
            }
        }
    }

    @Override
    public void cleanup() {
        if (this.magic_eid != null) {
            this.magic_eid.reset("");
        }
        if (this.magic_class != null) {
            this.magic_class.reset("");
        }
    }

    public static class Parameterizer
    extends AbstractParameterizer {
        public static final OptionID MAGIC_EID_ID = new OptionID("arff.externalid", "Pattern to recognize external ID attributes.");
        public static final OptionID MAGIC_CLASS_ID = new OptionID("arff.classlabel", "Pattern to recognize class label attributes.");
        Pattern magic_eid;
        Pattern magic_class;

        @Override
        protected void makeOptions(Parameterization config) {
            PatternParameter classP;
            super.makeOptions(config);
            PatternParameter eidP = new PatternParameter(MAGIC_EID_ID, ArffParser.DEFAULT_ARFF_MAGIC_EID);
            if (config.grab(eidP)) {
                this.magic_eid = (Pattern)eidP.getValue();
            }
            if (config.grab(classP = new PatternParameter(MAGIC_CLASS_ID, ArffParser.DEFAULT_ARFF_MAGIC_CLASS))) {
                this.magic_class = (Pattern)classP.getValue();
            }
        }

        @Override
        protected ArffParser makeInstance() {
            return new ArffParser(this.magic_eid, this.magic_class);
        }
    }
}

