/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.international.arabic.pipeline;

import edu.stanford.nlp.international.arabic.Buckwalter;
import edu.stanford.nlp.international.process.Mapper;
import edu.stanford.nlp.trees.international.arabic.ATBTreeUtils;
import java.io.File;
import java.io.Serializable;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class DefaultLexicalMapper
implements Mapper,
Serializable {
    private static final long serialVersionUID = -197782849766133026L;
    private static final Pattern utf8ArabicChart = Pattern.compile("[\u0600-\u06ff]");
    private static final String bwAlefChar = "A";
    private static final Pattern bwDiacritics = Pattern.compile("F|N|K|a|u|i|\\~|o");
    private static final Pattern bwTatweel = Pattern.compile("_");
    private static final Pattern bwAlef = Pattern.compile("\\{|\\||>|<");
    private static final Pattern bwQuran = Pattern.compile("`");
    private static final Pattern bwProDrop = Pattern.compile("\\[nll\\]");
    public static final Pattern latinPunc = Pattern.compile("([!-/:-@\\u005B\\]^-`{-~\u00a1-\u00bf\u2010-\u2027\u2030-\u205e\u20a0-\u20b5])+");
    public static final Pattern arabicPunc = Pattern.compile("([\u00ab\u00bb\u0609-\u060d\u061b-\u061f\u066a\u066c-\u066d\u06d4])+");
    public static final Pattern arabicDigit = Pattern.compile("([\u06f0-\u06f9\u0660-\u0669])+");
    private static final Pattern utf8Diacritics = Pattern.compile("\u064e|\u064b|\u064f|\u064c|\u0650|\u064d|\u0651|\u0652");
    private static final Pattern utf8Tatweel = Pattern.compile("\u0640");
    private static final Pattern utf8Alef = Pattern.compile("\u0627|\u0625|\u0623|\u0622|\u0671");
    private static final Pattern utf8Quran = Pattern.compile("[\u0615-\u061a\u06d6-\u06e5]");
    private static final Pattern utf8ProDrop = Pattern.compile("\\[\u0646\u0644\u0644\\]");
    public static final Pattern segmentationMarker = Pattern.compile("^-+|-+$");
    private static final Pattern morphemeBoundary = Pattern.compile("\\+");
    private static final Pattern hasDigit = Pattern.compile("\\d+");
    private static boolean useATBVocalizedSectionMapping = false;
    private static boolean stripMarkersInUTF8 = false;
    private final String parentTagString = "PUNC LATIN -NONE-";
    private final Set<String> parentTagsToEscape = new HashSet<String>();
    private static final String utf8CliticString = "\u0644 \u0641 \u0648 \u0645\u0627 \u0647 \u0647\u0627 \u0647\u0645 \u0647\u0646 \u0646\u0627 \u0643\u0645 \u062a\u0646 \u062a\u0645 \u0649 \u064a \u0647\u0645\u0627 \u0643 \u0628 \u0645";
    private static final Set<String> utf8Clitics = new HashSet<String>();
    private static final Set<String> bwClitics;

    public DefaultLexicalMapper() {
        this.parentTagsToEscape.addAll(Arrays.asList("PUNC LATIN -NONE-".split("\\s+")));
    }

    private static String mapUtf8(String element) {
        String strippedElem;
        Matcher latinPuncOnly = latinPunc.matcher(element);
        Matcher arbPuncOnly = arabicPunc.matcher(element);
        if (latinPuncOnly.matches() || arbPuncOnly.matches()) {
            return element;
        }
        Matcher rmDiacritics = utf8Diacritics.matcher(element);
        if ((element = rmDiacritics.replaceAll("")).length() > 1) {
            Matcher rmTatweel = utf8Tatweel.matcher(element);
            element = rmTatweel.replaceAll("");
        }
        Matcher normAlef = utf8Alef.matcher(element);
        element = normAlef.replaceAll("\u0627");
        Matcher rmQuran = utf8Quran.matcher(element);
        element = rmQuran.replaceAll("");
        Matcher rmProDrop = utf8ProDrop.matcher(element);
        element = rmProDrop.replaceAll("");
        if (stripMarkersInUTF8 && (strippedElem = segmentationMarker.matcher(element).replaceAll("")).length() > 0) {
            element = strippedElem;
        }
        return element;
    }

    private static String mapBuckwalter(String element) {
        Matcher puncOnly = latinPunc.matcher(element);
        if (puncOnly.matches()) {
            return element;
        }
        Matcher rmDiacritics = bwDiacritics.matcher(element);
        if ((element = rmDiacritics.replaceAll("")).length() > 1) {
            Matcher rmTatweel = bwTatweel.matcher(element);
            element = rmTatweel.replaceAll("");
        }
        Matcher normAlef = bwAlef.matcher(element);
        element = normAlef.replaceAll(bwAlefChar);
        Matcher rmQuran = bwQuran.matcher(element);
        element = rmQuran.replaceAll("");
        Matcher rmProDrop = bwProDrop.matcher(element);
        element = rmProDrop.replaceAll("");
        if (useATBVocalizedSectionMapping && element.length() > 1) {
            String strippedElem;
            Matcher rmMorphemeBoundary = morphemeBoundary.matcher(element);
            Matcher cliticMarker = segmentationMarker.matcher(element = rmMorphemeBoundary.replaceAll(""));
            if (cliticMarker.find() && !hasDigit.matcher(element).find() && (strippedElem = cliticMarker.replaceAll("")).length() > 0) {
                element = bwClitics.contains(strippedElem) ? element : strippedElem;
            }
        } else if (element.length() > 1 && !ATBTreeUtils.reservedWords.contains(element)) {
            Matcher rmCliticMarker = segmentationMarker.matcher(element);
            element = rmCliticMarker.replaceAll("");
        }
        return element;
    }

    public String map(String parent, String element) {
        String elem = element.trim();
        if (parent != null && this.parentTagsToEscape.contains(parent)) {
            return elem;
        }
        Matcher utf8Encoding = utf8ArabicChart.matcher(elem);
        return utf8Encoding.find() ? DefaultLexicalMapper.mapUtf8(elem) : DefaultLexicalMapper.mapBuckwalter(elem);
    }

    public void setup(File path, String ... options) {
        if (options == null) {
            return;
        }
        for (int i = 0; i < options.length; ++i) {
            String opt = options[i];
            if (opt.equals("ATBVocalizedSection")) {
                useATBVocalizedSectionMapping = true;
                continue;
            }
            if (!opt.equals("StripMarkersInUTF8")) continue;
            stripMarkersInUTF8 = true;
        }
    }

    public boolean canChangeEncoding(String parent, String element) {
        parent = parent.trim();
        element = element.trim();
        if (parent.contains("NUMERIC_COMMA") || parent.contains("PUNC") && element.equals("r")) {
            return true;
        }
        Matcher numMatcher = hasDigit.matcher(element);
        return !numMatcher.find() && !this.parentTagsToEscape.contains(parent);
    }

    public static void main(String[] args) {
        DefaultLexicalMapper m = new DefaultLexicalMapper();
        System.out.printf("< :-> %s\n", m.map(null, "FNKqq"));
    }

    static {
        utf8Clitics.addAll(Arrays.asList(utf8CliticString.split("\\s+")));
        Buckwalter bw = new Buckwalter(true);
        String bwString = bw.apply(utf8CliticString);
        bwClitics = new HashSet<String>();
        bwClitics.addAll(Arrays.asList(bwString.split("\\s+")));
    }
}

