/*
 * Decompiled with CFR 0.152.
 */
package com.cybozu.labs.langdetect;

import com.cybozu.labs.langdetect.DetectorFactory;
import com.cybozu.labs.langdetect.ErrorCode;
import com.cybozu.labs.langdetect.LangDetectException;
import com.cybozu.labs.langdetect.Language;
import com.cybozu.labs.langdetect.util.NGram;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Formatter;
import java.util.HashMap;
import java.util.Random;
import java.util.regex.Pattern;

public class Detector {
    private static final double ALPHA_DEFAULT = 0.5;
    private static final double ALPHA_WIDTH = 0.05;
    private static final int ITERATION_LIMIT = 1000;
    private static final double PROB_THRESHOLD = 0.1;
    private static final double CONV_THRESHOLD = 0.99999;
    private static final int BASE_FREQ = 10000;
    private static final String UNKNOWN_LANG = "unknown";
    private static final Pattern URL_REGEX = Pattern.compile("https?://[-_.?&~;+=/#0-9A-Za-z]{1,2076}");
    private static final Pattern MAIL_REGEX = Pattern.compile("[-_.0-9A-Za-z]{1,64}@[-_0-9A-Za-z]{1,255}[-_.0-9A-Za-z]{1,255}");
    private final HashMap<String, double[]> wordLangProbMap;
    private final ArrayList<String> langlist;
    private StringBuffer text;
    private double[] langprob = null;
    private double alpha = 0.5;
    private int n_trial = 7;
    private int max_text_length = 10000;
    private double[] priorMap = null;
    private boolean verbose = false;
    private Long seed = null;

    public Detector(DetectorFactory factory) {
        this.wordLangProbMap = factory.wordLangProbMap;
        this.langlist = factory.langlist;
        this.text = new StringBuffer();
        this.seed = factory.seed;
    }

    public void setVerbose() {
        this.verbose = true;
    }

    public void setAlpha(double alpha) {
        this.alpha = alpha;
    }

    public void setPriorMap(HashMap<String, Double> priorMap) throws LangDetectException {
        this.priorMap = new double[this.langlist.size()];
        double sump = 0.0;
        int i = 0;
        while (i < this.priorMap.length) {
            String lang = this.langlist.get(i);
            if (priorMap.containsKey(lang)) {
                double p = priorMap.get(lang);
                if (p < 0.0) {
                    throw new LangDetectException(ErrorCode.InitParamError, "Prior probability must be non-negative.");
                }
                this.priorMap[i] = p;
                sump += p;
            }
            ++i;
        }
        if (sump <= 0.0) {
            throw new LangDetectException(ErrorCode.InitParamError, "More one of prior probability must be non-zero.");
        }
        i = 0;
        while (i < this.priorMap.length) {
            int n = i++;
            this.priorMap[n] = this.priorMap[n] / sump;
        }
    }

    public void setMaxTextLength(int max_text_length) {
        this.max_text_length = max_text_length;
    }

    public void append(Reader reader) throws IOException {
        char[] buf = new char[this.max_text_length / 2];
        while (this.text.length() < this.max_text_length && reader.ready()) {
            int length = reader.read(buf);
            this.append(new String(buf, 0, length));
        }
    }

    public void append(String text) {
        text = URL_REGEX.matcher(text).replaceAll(" ");
        text = MAIL_REGEX.matcher(text).replaceAll(" ");
        text = NGram.normalize_vi(text);
        int pre = 0;
        int i = 0;
        while (i < text.length() && i < this.max_text_length) {
            char c = text.charAt(i);
            if (c != ' ' || pre != 32) {
                this.text.append(c);
            }
            pre = c;
            ++i;
        }
    }

    private void cleaningText() {
        int latinCount = 0;
        int nonLatinCount = 0;
        int i = 0;
        while (i < this.text.length()) {
            char c = this.text.charAt(i);
            if (c <= 'z' && c >= 'A') {
                ++latinCount;
            } else if (c >= '\u0300' && Character.UnicodeBlock.of(c) != Character.UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) {
                ++nonLatinCount;
            }
            ++i;
        }
        if (latinCount * 2 < nonLatinCount) {
            StringBuffer textWithoutLatin = new StringBuffer();
            int i2 = 0;
            while (i2 < this.text.length()) {
                char c = this.text.charAt(i2);
                if (c > 'z' || c < 'A') {
                    textWithoutLatin.append(c);
                }
                ++i2;
            }
            this.text = textWithoutLatin;
        }
    }

    public String detect() throws LangDetectException {
        ArrayList<Language> probabilities = this.getProbabilities();
        if (probabilities.size() > 0) {
            return probabilities.get((int)0).lang;
        }
        return UNKNOWN_LANG;
    }

    public ArrayList<Language> getProbabilities() throws LangDetectException {
        if (this.langprob == null) {
            this.detectBlock();
        }
        ArrayList<Language> list = this.sortProbability(this.langprob);
        return list;
    }

    private void detectBlock() throws LangDetectException {
        this.cleaningText();
        ArrayList<String> ngrams = this.extractNGrams();
        if (ngrams.size() == 0) {
            throw new LangDetectException(ErrorCode.CantDetectError, "no features in text");
        }
        this.langprob = new double[this.langlist.size()];
        Random rand = new Random();
        if (this.seed != null) {
            rand.setSeed(this.seed);
        }
        int t = 0;
        while (t < this.n_trial) {
            double[] prob = this.initProbability();
            double alpha = this.alpha + rand.nextGaussian() * 0.05;
            int i = 0;
            while (true) {
                int r = rand.nextInt(ngrams.size());
                this.updateLangProb(prob, ngrams.get(r), alpha);
                if (i % 5 == 0) {
                    if (Detector.normalizeProb(prob) > 0.99999 || i >= 1000) break;
                    if (this.verbose) {
                        System.out.println("> " + this.sortProbability(prob));
                    }
                }
                ++i;
            }
            int j = 0;
            while (j < this.langprob.length) {
                int n = j;
                this.langprob[n] = this.langprob[n] + prob[j] / (double)this.n_trial;
                ++j;
            }
            if (this.verbose) {
                System.out.println("==> " + this.sortProbability(prob));
            }
            ++t;
        }
    }

    private double[] initProbability() {
        double[] prob = new double[this.langlist.size()];
        if (this.priorMap != null) {
            int i = 0;
            while (i < prob.length) {
                prob[i] = this.priorMap[i];
                ++i;
            }
        } else {
            int i = 0;
            while (i < prob.length) {
                prob[i] = 1.0 / (double)this.langlist.size();
                ++i;
            }
        }
        return prob;
    }

    private ArrayList<String> extractNGrams() {
        ArrayList<String> list = new ArrayList<String>();
        NGram ngram = new NGram();
        int i = 0;
        while (i < this.text.length()) {
            ngram.addChar(this.text.charAt(i));
            int n = 1;
            while (n <= 3) {
                String w = ngram.get(n);
                if (w != null && this.wordLangProbMap.containsKey(w)) {
                    list.add(w);
                }
                ++n;
            }
            ++i;
        }
        return list;
    }

    private boolean updateLangProb(double[] prob, String word, double alpha) {
        if (word == null || !this.wordLangProbMap.containsKey(word)) {
            return false;
        }
        double[] langProbMap = this.wordLangProbMap.get(word);
        if (this.verbose) {
            System.out.println(String.valueOf(word) + "(" + Detector.unicodeEncode(word) + "):" + this.wordProbToString(langProbMap));
        }
        double weight = alpha / 10000.0;
        int i = 0;
        while (i < prob.length) {
            int n = i;
            prob[n] = prob[n] * (weight + langProbMap[i]);
            ++i;
        }
        return true;
    }

    private String wordProbToString(double[] prob) {
        Formatter formatter = new Formatter();
        int j = 0;
        while (j < prob.length) {
            double p = prob[j];
            if (p >= 1.0E-5) {
                formatter.format(" %s:%.5f", this.langlist.get(j), p);
            }
            ++j;
        }
        return formatter.toString();
    }

    private static double normalizeProb(double[] prob) {
        double maxp = 0.0;
        double sump = 0.0;
        int i = 0;
        while (i < prob.length) {
            sump += prob[i];
            ++i;
        }
        i = 0;
        while (i < prob.length) {
            double p = prob[i] / sump;
            if (maxp < p) {
                maxp = p;
            }
            prob[i] = p;
            ++i;
        }
        return maxp;
    }

    private ArrayList<Language> sortProbability(double[] prob) {
        ArrayList<Language> list = new ArrayList<Language>();
        int j = 0;
        while (j < prob.length) {
            double p = prob[j];
            if (p > 0.1) {
                int i = 0;
                while (i <= list.size()) {
                    if (i == list.size() || list.get((int)i).prob < p) {
                        list.add(i, new Language(this.langlist.get(j), p));
                        break;
                    }
                    ++i;
                }
            }
            ++j;
        }
        return list;
    }

    private static String unicodeEncode(String word) {
        StringBuffer buf = new StringBuffer();
        int i = 0;
        while (i < word.length()) {
            char ch = word.charAt(i);
            if (ch >= '\u0080') {
                String st = Integer.toHexString(65536 + ch);
                while (st.length() < 4) {
                    st = "0" + st;
                }
                buf.append("\\u").append(st.subSequence(1, 5));
            } else {
                buf.append(ch);
            }
            ++i;
        }
        return buf.toString();
    }
}

