/*
 * Decompiled with CFR 0.152.
 */
package zephyr.kenkyusha.encol;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import zephyr.kenkyusha.encol.Hinshi;
import zephyr.kenkyusha.encol.MecabUtil;
import zephyr.kenkyusha.encol.OpenNLPUtil;
import zephyr.kenkyusha.encol.POSType;
import zephyr.kenkyusha.encol.Util;
import zephyr.kenkyusha.encol.Youso;
import zephyr.util.MultiHashMap;

public class MakeJpCorpus {
    private final OpenNLPUtil nlp;
    private final MecabUtil mecab;
    private final MultiHashMap<String, String> collectedWords = new MultiHashMap();
    private static final HashSet<String> ignoreWords = new HashSet();

    MakeJpCorpus(OpenNLPUtil openNLPUtil, MecabUtil mecabUtil) {
        this.nlp = openNLPUtil;
        this.mecab = mecabUtil;
    }

    private void parse(String string) {
        String string2 = Util.getJapanese(string);
        ArrayList<Youso> arrayList = this.mecab.tag(string2);
        String string3 = "";
        block0: for (int i = 0; i < arrayList.size(); ++i) {
            Youso youso = arrayList.get(i);
            if (youso.hinshi == Hinshi.PUNC) continue;
            string3 = youso.txt;
            Hinshi hinshi = youso.hinshi;
            ++i;
            while (i < arrayList.size()) {
                Youso youso2 = arrayList.get(i);
                if (youso2.hinshi != hinshi && youso2.hinshi != Hinshi.JOSHI && youso2.hinshi != Hinshi.JYOV && hinshi != Hinshi.SETTOU) {
                    this.collectedWords.put(string3, string);
                    string3 = "";
                    --i;
                    continue block0;
                }
                string3 = string3 + youso2.txt;
                hinshi = youso2.hinshi;
                ++i;
            }
        }
        if (string3.length() > 0) {
            System.out.println("\t" + string3);
            this.collectedWords.put(string3, string);
        }
    }

    private String word2lemma(String string, String string2) {
        POSType pOSType = POSType.tag2pos(string2);
        switch (pOSType) {
            case NOUN: 
            case VERB: 
            case MD: {
                return this.nlp.word2lemma(string, string2);
            }
        }
        return string;
    }

    private void ouput(String string, LinkedList<String> linkedList) {
        if (linkedList.size() <= 20) {
            System.out.printf("<dt title=\"%s, %d\">%s</dt><dd>, %d\r\n", string, linkedList.size(), string, linkedList.size());
            for (String string2 : linkedList) {
                System.out.println("<p>" + Util.SENTENCE_HEAD + string2 + "</p>");
            }
            System.out.println("</dd>");
            return;
        }
        HashMap<String, WordSentence> hashMap = new HashMap<String, WordSentence>();
        for (String objectArray2 : linkedList) {
            String d = Util.getEnglish(objectArray2);
            String[] stringArray = d.toLowerCase().split("[^a-zA-Z]+");
            String[] i = this.nlp.tag(stringArray);
            for (int j = 0; j < stringArray.length; ++j) {
                String d2 = stringArray[j];
                if (d2.length() <= 1 || ignoreWords.contains(d2)) continue;
                String string2 = this.word2lemma(d2, i[j]);
                Object object = (WordSentence)hashMap.get(string2);
                if (object != null) {
                    ((WordSentence)object).list.add(objectArray2);
                    continue;
                }
                object = new WordSentence(string2, objectArray2);
                hashMap.put(string2, (WordSentence)object);
            }
        }
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        linkedHashSet.addAll(linkedList);
        Object[] objectArray = hashMap.values().toArray(new WordSentence[0]);
        Arrays.sort(objectArray);
        double d = 0.3 * (double)linkedList.size();
        for (int i = 0; i < objectArray.length; ++i) {
            Object iterator = objectArray[i];
            if ((double)((WordSentence)iterator).list.size() < d) break;
            double string7 = 100.0 * (double)((WordSentence)iterator).list.size() / (double)linkedList.size();
            System.out.printf("<dt title=\"%s, %s, %d, %.1f%%\">%s</dt><dd>, %s, %d, %.1f%%\r\n", string, ((WordSentence)iterator).word, ((WordSentence)iterator).list.size(), string7, string, ((WordSentence)iterator).word, ((WordSentence)iterator).list.size(), string7);
            for (String string3 : ((WordSentence)iterator).list) {
                System.out.println("<p>" + Util.SENTENCE_HEAD + string3 + "</p>");
                linkedHashSet.remove(string3);
            }
            System.out.println("</dd>");
        }
        if (linkedHashSet.size() > 0) {
            System.out.printf("<dt title=\"%s, *, %d\">%s</dt><dd>, *, %d\r\n", string, linkedHashSet.size(), string, linkedHashSet.size());
            Iterator iterator = linkedHashSet.iterator();
            while (iterator.hasNext()) {
                String string4 = (String)iterator.next();
                System.out.println("<p>" + Util.SENTENCE_HEAD + string4 + "</p>");
            }
            System.out.println("</dd>");
        }
    }

    public void output(LinkedList<String> linkedList) {
        System.err.println("\u65e5\u672c\u8a9e\u7528\u4f8b\u89e3\u6790");
        int n = 0;
        for (String objectArray : linkedList) {
            this.parse(objectArray);
            if (++n % 10000 != 0) continue;
            if (n % 100000 == 0) {
                System.err.print(n / 100000);
                continue;
            }
            System.err.print(".");
        }
        System.err.println();
        System.err.println("\u65e5\u672c\u8a9e\u89e3\u6790\u7d50\u679c\u51fa\u529b");
        n = 0;
        Object[] objectArray = this.collectedWords.keySet().toArray(new String[0]);
        Arrays.sort(objectArray);
        for (Object object : objectArray) {
            LinkedList<String> linkedList2 = this.collectedWords.getList(object);
            this.ouput((String)object, linkedList2);
            if (++n % 1000 != 0) continue;
            if (n % 10000 == 0) {
                System.err.print(n / 10000);
                continue;
            }
            System.err.print(".");
        }
        System.err.println();
    }

    static {
        ignoreWords.add("an");
        ignoreWords.add("the");
        ignoreWords.add("am");
        ignoreWords.add("are");
        ignoreWords.add("is");
        ignoreWords.add("was");
        ignoreWords.add("were");
        ignoreWords.add("my");
        ignoreWords.add("me");
        ignoreWords.add("you");
        ignoreWords.add("your");
        ignoreWords.add("he");
        ignoreWords.add("his");
        ignoreWords.add("him");
        ignoreWords.add("she");
        ignoreWords.add("her");
        ignoreWords.add("it");
        ignoreWords.add("its");
        ignoreWords.add("we");
        ignoreWords.add("our");
        ignoreWords.add("us");
        ignoreWords.add("they");
        ignoreWords.add("their");
        ignoreWords.add("them");
        ignoreWords.add("there");
        ignoreWords.add("of");
        ignoreWords.add("in");
        ignoreWords.add("to");
        ignoreWords.add("and");
        ignoreWords.add("or");
    }

    private static class WordSentence
    implements Comparable<WordSentence> {
        final String word;
        final LinkedList<String> list = new LinkedList();

        WordSentence(String string, String string2) {
            this.word = string;
            this.list.add(string2);
        }

        @Override
        public int compareTo(WordSentence wordSentence) {
            int n = wordSentence.list.size() - this.list.size();
            if (n != 0) {
                return n;
            }
            return this.word.compareTo(wordSentence.word);
        }
    }
}

