/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.language.identifier.detector;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.text.Normalizer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.zip.ZipFile;
import org.languagetool.language.identifier.LanguageIdentifierService;

public class NGramDetector {
    private static final double EPSILON = 1.0E-4;
    private static final Pattern DIGITS = Pattern.compile("\\d+");
    private static final Pattern KOREAN = Pattern.compile("[\\uac00-\\ud7a3]");
    private static final Pattern JAPANESE = Pattern.compile("[\\u3040-\\u30ff]");
    private static final Pattern CHINESE = Pattern.compile("[\\u4e00-\\u9FFF]");
    private static final Pattern KHMER = Pattern.compile("[\\u1780-\\u17FF]");
    private static final Pattern TAGALOG = Pattern.compile("[\\u1700-\\u171F]");
    private static final Pattern ARMENIAN = Pattern.compile("[\\u0530-\\u058F]");
    private static final Pattern GREEK = Pattern.compile("[\\u0370-\\u03FF]");
    private static final Pattern TAMIL = Pattern.compile("[\\u0B80-\\u0BFF]");
    private static final Pattern WHITESPACE = Pattern.compile("\\s+");
    private final Map<String, Integer> vocab;
    private final List<String[]> codes;
    private final List<Map<String, Double>> knpBigramProbs;
    private final int thresholdsStart;
    private final List<double[]> thresholds;
    private final int maxLength;
    private final ZipFile zipFile;

    public NGramDetector(File sourceModelZip, int maxLength) throws IOException {
        this.maxLength = maxLength;
        this.zipFile = new ZipFile(sourceModelZip);
        this.codes = new ArrayList<String[]>();
        try (BufferedReader br = this.getReader("iso_codes.tsv");){
            String line;
            while ((line = br.readLine()) != null) {
                String[] values = line.split("\t");
                if (!values[3].equals("1")) continue;
                this.codes.add(values);
            }
        }
        this.vocab = new HashMap<String, Integer>();
        br = this.getReader("vocab.txt");
        var4_4 = null;
        try {
            int i = 0;
            while ((line = br.readLine()) != null) {
                this.vocab.put(line.split("\t")[0].trim(), i);
                ++i;
            }
        }
        catch (Throwable line) {
            var4_4 = line;
            throw line;
        }
        finally {
            if (br != null) {
                if (var4_4 != null) {
                    try {
                        br.close();
                    }
                    catch (Throwable line) {
                        var4_4.addSuppressed(line);
                    }
                } else {
                    br.close();
                }
            }
        }
        this.thresholds = new ArrayList<double[]>();
        br = this.getReader("thresholds.txt");
        var4_4 = null;
        try {
            this.thresholdsStart = Integer.parseInt(br.readLine());
            while ((line = br.readLine()) != null) {
                double[] vals = Arrays.stream(line.split(" ")).mapToDouble(Double::parseDouble).toArray();
                this.thresholds.add(vals);
            }
        }
        catch (Throwable throwable) {
            var4_4 = throwable;
            throw throwable;
        }
        finally {
            if (br != null) {
                if (var4_4 != null) {
                    try {
                        br.close();
                    }
                    catch (Throwable throwable) {
                        var4_4.addSuppressed(throwable);
                    }
                } else {
                    br.close();
                }
            }
        }
        this.knpBigramProbs = ((Stream)this.expectedFiles().stream().map(this::readLines).parallel()).map(NGramDetector::loadDict).collect(Collectors.toList());
    }

    public Map<String, Double> detectLanguages(String text2, List<String> additionalLanguageCodes) {
        List<Integer> enc = this.encode(text2);
        List<Object> finalProbs = new ArrayList<Double>();
        List<int[]> keys = this.keys(enc);
        for (int i = 0; i < this.codes.size(); ++i) {
            double val = 0.0;
            for (int[] key2 : keys) {
                double prob = this.knpBigramProbs.get(i).getOrDefault(key2[0] + "_" + key2[1], 1.0E-4);
                val += StrictMath.log(prob);
            }
            finalProbs.add(val);
        }
        HashMap<String, Double> result2 = new HashMap<String, Double>();
        if (text2.length() >= this.thresholdsStart) {
            int argMax = 0;
            for (int i = 1; i < finalProbs.size(); ++i) {
                if (!((Double)finalProbs.get(i) > (Double)finalProbs.get(argMax))) continue;
                argMax = i;
            }
            int thresholdIndex = StrictMath.min(text2.length(), this.maxLength) - this.thresholdsStart;
            if ((Double)finalProbs.get(argMax) < this.thresholds.get(thresholdIndex)[argMax]) {
                result2.put("zz", 100.0);
                return result2;
            }
        }
        finalProbs = finalProbs.stream().map(StrictMath::exp).collect(Collectors.toList());
        finalProbs = this.normalize(finalProbs);
        for (int i = 0; i < this.codes.size(); ++i) {
            String langCode;
            String string2 = langCode = this.codes.get(i)[1].equals("NULL") ? this.codes.get(i)[2] : this.codes.get(i)[1];
            if (!LanguageIdentifierService.INSTANCE.canLanguageBeDetected(langCode, additionalLanguageCodes)) continue;
            result2.put(langCode, (Double)finalProbs.get(i));
        }
        return result2;
    }

    private BufferedReader getReader(String fileName) throws IOException {
        InputStream is = this.zipFile.getInputStream(this.zipFile.getEntry(fileName));
        InputStreamReader isr = new InputStreamReader(is, StandardCharsets.UTF_8);
        return new BufferedReader(isr);
    }

    private List<String> readLines(String path) {
        ArrayList<String> result2 = new ArrayList<String>();
        try (BufferedReader br = this.getReader(path);){
            String line;
            while ((line = br.readLine()) != null) {
                result2.add(line);
            }
        }
        catch (IOException e) {
            throw new RuntimeException(e);
        }
        return result2;
    }

    private static Map<String, Double> loadDict(List<String> lines) {
        HashMap<String, Double> tm = new HashMap<String, Double>();
        for (String line : lines) {
            String[] parts = line.trim().split(" ");
            String key2 = String.join((CharSequence)"_", Arrays.copyOfRange(parts, 0, parts.length - 1));
            tm.put(key2, Double.parseDouble(parts[parts.length - 1]));
        }
        return tm;
    }

    private List<String> expectedFiles() {
        ArrayList<String> result2 = new ArrayList<String>();
        for (int i = 0; i < this.codes.size(); ++i) {
            String name = String.format("%02d.txt", i);
            result2.add(name);
        }
        return result2;
    }

    private List<Integer> encode(String text2) {
        int ci;
        ArrayList<Integer> result2 = new ArrayList<Integer>();
        result2.add(1);
        if (text2.length() > this.maxLength) {
            text2 = text2.substring(0, this.maxLength);
        }
        text2 = Normalizer.normalize(text2, Normalizer.Form.NFKC).toLowerCase();
        text2 = DIGITS.matcher(text2).replaceAll("<NUM>");
        text2 = KOREAN.matcher(text2).replaceAll("<KO>");
        text2 = JAPANESE.matcher(text2).replaceAll("<JA>");
        text2 = CHINESE.matcher(text2).replaceAll("<ZH>");
        text2 = KHMER.matcher(text2).replaceAll("<KM>");
        text2 = TAGALOG.matcher(text2).replaceAll("<TL>");
        text2 = ARMENIAN.matcher(text2).replaceAll("<HY>");
        text2 = GREEK.matcher(text2).replaceAll("<EL>");
        text2 = TAMIL.matcher(text2).replaceAll("<TA>");
        if ((text2 = WHITESPACE.matcher(text2).replaceAll("\u2581")).length() == 0) {
            return result2;
        }
        text2 = "\u2581" + text2;
        for (int cur = 0; cur < text2.length(); cur += ci) {
            int tok = 0;
            ci = 1;
            for (int i = cur + 1; i <= text2.length(); ++i) {
                int maybeTok = this.vocab.getOrDefault(text2.substring(cur, i), -1);
                if (maybeTok <= -1) continue;
                tok = maybeTok;
                ci = i - cur;
            }
            result2.add(tok);
        }
        return result2;
    }

    private List<int[]> keys(List<Integer> enc) {
        ArrayList<int[]> result2 = new ArrayList<int[]>();
        for (int i = 1; i < enc.size(); ++i) {
            result2.add(new int[]{enc.get(i - 1), enc.get(i)});
        }
        return result2;
    }

    private List<Double> normalize(List<Double> vals) {
        double tot = vals.stream().mapToDouble(f -> f).sum();
        return vals.stream().map(n -> n / tot).collect(Collectors.toList());
    }
}

