/*
 * Decompiled with CFR 0.152.
 */
package ai.grazie.rules.common;

import ai.grazie.nlp.langs.Language;
import ai.grazie.nlp.patterns.Pattern;
import ai.grazie.rules.common.WordSet;
import ai.grazie.rules.tree.Formatter;
import ai.grazie.rules.tree.Node;
import ai.grazie.rules.tree.TextRange;
import ai.grazie.rules.tree.TreeSupport;
import ai.grazie.rules.util.CharUtil;
import ai.grazie.rules.util.TransformingCharSequence;
import ai.grazie.rules.util.regex.Regex;
import com.hankcs.algorithm.AhoCorasickDoubleArrayTrie;
import java.io.Serializable;
import java.lang.ref.WeakReference;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.WeakHashMap;
import java.util.stream.Stream;
import one.util.streamex.EntryStream;
import one.util.streamex.StreamEx;
import org.apache.commons.lang3.StringUtils;
import org.jspecify.annotations.NonNull;
import org.jspecify.annotations.Nullable;
import org.languagetool.tools.StringTools;

public class KnownPhrases {
    public static final Set<Language> SUPPORTED_LANGUAGES = Set.of(Language.ENGLISH, Language.GERMAN, Language.RUSSIAN, Language.UKRAINIAN);
    public final Language language;
    private final Map<String, List<Phrase>> knownPhrases = new LinkedHashMap<String, List<Phrase>>();
    private @Nullable AhoCorasickDoubleArrayTrie<Serializable> trie;
    private static final Map<Language, WeakReference<KnownPhrases>> instances = new WeakHashMap<Language, WeakReference<KnownPhrases>>();

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public static KnownPhrases forLanguage(@NonNull Language language) {
        Map<Language, WeakReference<KnownPhrases>> map = instances;
        synchronized (map) {
            KnownPhrases result;
            WeakReference<KnownPhrases> ref = instances.get(language);
            KnownPhrases knownPhrases = result = ref == null ? null : (KnownPhrases)ref.get();
            if (result == null) {
                result = new KnownPhrases(language);
                instances.put(language, new WeakReference<KnownPhrases>(result));
            }
            return result;
        }
    }

    private KnownPhrases(Language language) {
        assert (SUPPORTED_LANGUAGES.contains(language)) : "Language " + String.valueOf(language) + " is not supported by KnownPhrases";
        this.language = language;
        for (String file : this.multiWordFiles()) {
            if (!WordSet.resourceExists(file)) continue;
            this.knownPhrases.put(file, KnownPhrases.readRegexFile(file));
        }
        HashMap<String, String> sourceInterner = new HashMap<String, String>();
        for (String file : List.of(this.diacriticsPath(), this.geoDiacriticsPath())) {
            if (!WordSet.resourceExists(file)) continue;
            ArrayList<Phrase> filePhrases = new ArrayList<Phrase>();
            this.knownPhrases.put(file, filePhrases);
            for (String line : WordSet.loadLines(file)) {
                KnownPhrases.checkApostrophes(line, line);
                String[] parts = line.split(";", 2);
                filePhrases.add(new Phrase(parts[0], sourceInterner.computeIfAbsent(parts[1], __ -> parts[1])));
            }
        }
        for (String file : Stream.of("international", language.getIso().toString()).map(s -> s + "/accepted_nosuggest.txt").toList()) {
            if (!WordSet.resourceExists(file)) continue;
            this.knownPhrases.put(file, KnownPhrases.readRegexFile(file));
        }
    }

    private static void checkApostrophes(String phrase, String line) {
        assert (!phrase.contains("'")) : "Phrases should contain smart apostrophes: '" + phrase + "' in line " + line;
    }

    private static List<Phrase> readRegexFile(String file) {
        ArrayList<Phrase> filePhrases = new ArrayList<Phrase>();
        for (String line : WordSet.loadLines(file)) {
            if (line.startsWith("#") || line.isBlank()) continue;
            Set<String> phrases = Regex.parse(line).possibleValues();
            assert (phrases != null) : "The entries in " + file + " should be finite enumerable regexes with not too many possible values. Couldn't extract values from: " + line;
            for (String phrase : (StreamEx)StreamEx.of(phrases).sorted(String.CASE_INSENSITIVE_ORDER)) {
                KnownPhrases.checkApostrophes(phrase, line);
                filePhrases.add(new Phrase(phrase, line));
            }
        }
        return filePhrases;
    }

    public List<Phrase> phrasesFromFile(String file) {
        return this.knownPhrases.getOrDefault(file, List.of());
    }

    public String geoDiacriticsPath() {
        return String.valueOf(this.language.getIso()) + "/geo_diacritics.txt";
    }

    public String diacriticsPath() {
        return String.valueOf(this.language.getIso()) + "/diacritics.txt";
    }

    List<String> multiWordFiles() {
        return Stream.of("international", this.language.getIso().toString()).map(s -> s + "/multi-word-spelling.txt").toList();
    }

    public boolean isPartOfValidPhrase(Node node) {
        return this.validPhrases(node.tree().text()).stream().anyMatch(tr -> tr.containsInclusive(node.textRange()));
    }

    public List<TextRange> validPhrases(CharSequence sentence) {
        AhoCorasickDoubleArrayTrie<Serializable> trie = this.obtainTrie();
        ArrayList<TextRange> result = new ArrayList<TextRange>();
        trie.parseText((CharSequence)new TransformingCharSequence(sentence, c -> (char)(c == '\'' ? 8217 : (CharUtil.isAnySpace(c) ? 32 : (int)Character.toLowerCase(c)))), (start, end, entry) -> {
            if (Pattern.isWordBoundaryBefore((CharSequence)sentence, (int)start) && Pattern.isWordBoundaryBefore((CharSequence)sentence, (int)end)) {
                StreamEx candidates;
                StreamEx streamEx = candidates = entry instanceof String ? Stream.of((String)((Object)entry)) : StreamEx.of((Object[])((String[])entry));
                if (KnownPhrases.isActualTextAcceptable(start, sentence.subSequence(start, end).toString(), (Stream<String>)candidates, sentence)) {
                    result.add(new TextRange(start, end));
                }
            }
        });
        return result;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private AhoCorasickDoubleArrayTrie<Serializable> obtainTrie() {
        Map<String, List<Phrase>> map = this.knownPhrases;
        synchronized (map) {
            AhoCorasickDoubleArrayTrie<Serializable> trie = this.trie;
            if (trie == null) {
                this.trie = trie = this.buildTrie();
            }
            return trie;
        }
    }

    public boolean isRangeCoveredByValidPhrase(CharSequence text, int start, int end) {
        return this.validPhrases(text).stream().anyMatch(tr -> tr.start() <= start && end <= tr.end());
    }

    private AhoCorasickDoubleArrayTrie<Serializable> buildTrie() {
        HashMap<String, List> trieMap = new HashMap<String, List>();
        for (List<Phrase> value : this.knownPhrases.values()) {
            for (Phrase p : value) {
                String phrase = p.phrase;
                trieMap.computeIfAbsent(TransformingCharSequence.lowerCase(phrase).toString(), __ -> new ArrayList()).add(phrase);
            }
        }
        AhoCorasickDoubleArrayTrie trie = new AhoCorasickDoubleArrayTrie();
        trie.build((Map)EntryStream.of(trieMap).mapValues(v -> v.size() > 1 ? v.toArray(new String[0]) : (Serializable)v.get(0)).toSortedMap());
        return trie;
    }

    static boolean isActualTextAcceptable(int start, String actual, Stream<String> suggestions, CharSequence sentence) {
        String clean = actual.replaceAll("[\\s\\p{Z}]+", " ").replaceAll("\u2011", "-").replaceAll("\\p{So}", "").replaceAll("'", "\u2019");
        return suggestions.anyMatch(sug -> KnownPhrases.isAllowed(clean, sug, start, sentence));
    }

    private static boolean isAllowed(String actual, String expected, int start, CharSequence sentence) {
        if (expected.equals(actual)) {
            return true;
        }
        if (actual.equalsIgnoreCase(expected)) {
            if (KnownPhrases.isUpperCaseOnly(actual) || actual.matches("(Mc|Le)[A-Z]+")) {
                return true;
            }
            if (expected.split(" ")[0].chars().noneMatch(Character::isUpperCase)) {
                if ((TreeSupport.isCapitalizedSentenceStart(sentence, start) || Formatter.possiblyEndsWithSentenceBoundary(sentence.subSequence(0, start)) || start > 0 && CharUtil.isAnyOf("'\"\u201c\u201d\u201e\u00ab\u00bb`\u2018\u2019", sentence.charAt(start - 1))) && actual.equals(StringTools.uppercaseFirstChar((String)expected))) {
                    return true;
                }
                if (KnownPhrases.isAllCapitalized(actual) && (!StringUtils.isMixedCase((CharSequence)expected) || KnownPhrases.looksLikeAllCapitalizedHeader(sentence))) {
                    return true;
                }
            }
        }
        return false;
    }

    private static boolean looksLikeAllCapitalizedHeader(CharSequence sentence) {
        return KnownPhrases.isAllCapitalized(sentence) && !Formatter.possiblyEndsWithSentenceBoundary(sentence);
    }

    private static boolean isUpperCaseOnly(String phrase) {
        return phrase.chars().noneMatch(Character::isLowerCase);
    }

    private static boolean isAllCapitalized(CharSequence sentence) {
        for (int i = 0; i < sentence.length(); ++i) {
            if (!Character.isLowerCase(sentence.charAt(i)) || i != 0 && KnownPhrases.isWordChar(sentence.charAt(i - 1))) continue;
            return false;
        }
        return true;
    }

    private static boolean isWordChar(char c) {
        return Character.isLetterOrDigit(c) || c == '-' || c == '_';
    }

    public record Phrase(String phrase, String source) {
    }
}

