package org.asqatasun.rules.elementchecker.lang;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Collection;
import java.util.Iterator;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.validator.GenericValidator;
import org.asqatasun.entity.audit.TestSolution;
import org.asqatasun.processor.SSPHandler;
import org.asqatasun.ruleimplementation.TestSolutionHandler;
import org.asqatasun.rules.elementchecker.NomenclatureBasedElementChecker;
import org.asqatasun.rules.elementchecker.lang.detector.LanguageDetectionResult;
import org.asqatasun.rules.elementchecker.lang.detector.LanguageDetector;
import org.asqatasun.rules.keystore.AttributeStore;
import org.asqatasun.rules.keystore.EvidenceStore;
import org.asqatasun.rules.keystore.HtmlElementStore;
import org.asqatasun.rules.keystore.RemarkMessageStore;
import org.asqatasun.rules.textbuilder.CompleteTextElementBuilder;
import org.asqatasun.rules.textbuilder.TextElementBuilder;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:BOOT-INF/lib/asqatasun-rules-commons-5.0.0-rc.1.jar:org/asqatasun/rules/elementchecker/lang/LangChecker.class */
public abstract class LangChecker extends NomenclatureBasedElementChecker {
    private static final String NON_ALPHANUMERIC_PATTERN_STR = "[\\d+\\W+]+?";
    private static final String LANG_DECLARATION_PATTERN_STR = "\\w{2,3}(\\-\\w{2,})?$";
    private Collection<String> xhtmlDoctypesSet;
    private Collection<String> validLanguagesSet;
    private static final String XHTML_DOCTYPE_NOM = "XhtmlDoctypeDeclarations";
    private static final String LANG_NOM = "ValidLanguageCode";
    private static final int DISPLAYABLE_TEXT_SIZE = 200;
    private String suspectedIdenticalLangMsg;
    private String suspectedDifferentLangMsg;
    private String differentLangMsg;
    private String identicalLangMsg;
    private TextElementBuilder testableTextElementBuilder;
    private static final Logger LOGGER = LoggerFactory.getLogger((Class<?>) LangChecker.class);
    private static final String[] EXCLUDED_ELEMENTS = {HtmlElementStore.SCRIPT_ELEMENT, HtmlElementStore.CODE_ELEMENT, HtmlElementStore.KBD_ELEMENT, HtmlElementStore.SAMP_ELEMENT, HtmlElementStore.TT_ELEMENT, HtmlElementStore.VAR_ELEMENT, HtmlElementStore.NO_FRAMES_ELEMENT, "noscript"};
    private static final Collection<String> EXCLUDED_ELEMENTS_LIST = Arrays.asList(EXCLUDED_ELEMENTS);
    private final Pattern nonAlphanumericPattern = Pattern.compile(NON_ALPHANUMERIC_PATTERN_STR);
    private final Pattern langDeclarationPattern = Pattern.compile(LANG_DECLARATION_PATTERN_STR);
    private int nbOfElementsTested = 0;

    public void setSuspectedIdenticalLangMsg(String str) {
        this.suspectedIdenticalLangMsg = str;
    }

    public void setSuspectedDifferentLangMsg(String str) {
        this.suspectedDifferentLangMsg = str;
    }

    public void setDifferentLangMsg(String str) {
        this.differentLangMsg = str;
    }

    public void setIdenticalLangMsg(String str) {
        this.identicalLangMsg = str;
    }

    public int getNbOfElementsTested() {
        return this.nbOfElementsTested;
    }

    public void newElementTested() {
        this.nbOfElementsTested++;
    }

    public void setTestableTextElementBuilder(TextElementBuilder textElementBuilder) {
        this.testableTextElementBuilder = textElementBuilder;
    }

    public LangChecker() {
    }

    public LangChecker(String str, String str2, String str3, String str4) {
        this.identicalLangMsg = str;
        this.differentLangMsg = str2;
        this.suspectedIdenticalLangMsg = str4;
        this.suspectedDifferentLangMsg = str3;
    }

    @Override // org.asqatasun.rules.elementchecker.ElementCheckerImpl
    protected void doCheck(SSPHandler sSPHandler, Elements elements, TestSolutionHandler testSolutionHandler) {
        loadXhtmlDoctypes();
        loadValidLanguages();
        Iterator<Element> it = elements.iterator();
        while (it.hasNext()) {
            testSolutionHandler.addTestSolution(doCheckLanguage(it.next(), sSPHandler));
        }
    }

    protected abstract TestSolution doCheckLanguage(Element element, SSPHandler sSPHandler);

    /* JADX INFO: Access modifiers changed from: protected */
    public TestSolution checkLanguageDeclarationValidity(Element element, String str, String str2, boolean z) {
        TestSolution testSolution = TestSolution.PASSED;
        if (!isLangWellDeclared(str)) {
            testSolution = TestSolution.FAILED;
            if (z) {
                addInvalidDeclarationSourceCodeRemark(element, str, testSolution, RemarkMessageStore.MALFORMED_LANGUAGE_DECLARATION_MSG);
            }
        } else if (!this.validLanguagesSet.contains(str2.toLowerCase())) {
            testSolution = TestSolution.FAILED;
            if (z) {
                addInvalidDeclarationSourceCodeRemark(element, str2, testSolution, RemarkMessageStore.WRONG_LANGUAGE_DECLARATION_MSG);
            }
        }
        return testSolution;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public TestSolution checkLanguageRelevancy(Element element, String str, String str2, String str3, TestSolution testSolution, TestSolution testSolution2) {
        Long l = null;
        if (LOGGER.isDebugEnabled()) {
            l = Long.valueOf(Calendar.getInstance().getTime().getTime());
        }
        LanguageDetectionResult detectLanguage = LanguageDetector.getInstance().detectLanguage(str3);
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("Detection took " + (Calendar.getInstance().getTime().getTime() - l.longValue()) + " ms on " + str3.length() + " characters");
        }
        if (detectLanguage == null) {
            addSourceCodeRemark(TestSolution.NEED_MORE_INFO, element, RemarkMessageStore.UNDETECTED_LANG_MSG, str, str2, "", str3);
            return TestSolution.NEED_MORE_INFO;
        }
        boolean equalsIgnoreCase = StringUtils.equalsIgnoreCase(str, detectLanguage.getDetectedLanguage());
        if (equalsIgnoreCase && detectLanguage.isReliable()) {
            addSourceCodeRemark(testSolution, element, this.identicalLangMsg, str, str2, detectLanguage.getDetectedLanguage(), str3);
            return testSolution;
        }
        if (equalsIgnoreCase && !detectLanguage.isReliable()) {
            addSourceCodeRemark(TestSolution.NEED_MORE_INFO, element, this.suspectedIdenticalLangMsg, str, str2, detectLanguage.getDetectedLanguage(), str3);
            return TestSolution.NEED_MORE_INFO;
        }
        if (equalsIgnoreCase || !detectLanguage.isReliable()) {
            addSourceCodeRemark(TestSolution.NEED_MORE_INFO, element, this.suspectedDifferentLangMsg, str, str2, detectLanguage.getDetectedLanguage(), str3);
            return TestSolution.NEED_MORE_INFO;
        }
        addSourceCodeRemark(testSolution2, element, this.differentLangMsg, str, str2, detectLanguage.getDetectedLanguage(), str3);
        return testSolution2;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String extractLangDefinitionFromElement(Element element, SSPHandler sSPHandler) {
        String trim = element.attr(AttributeStore.LANG_ATTR).trim();
        String trim2 = element.attr("xml:lang").trim();
        if (trim2.isEmpty() && !trim.isEmpty()) {
            return trim;
        }
        if (!trim2.isEmpty() && trim.isEmpty()) {
            return trim2;
        }
        if (!trim2.equalsIgnoreCase(trim) && hasSSPXhtmlDoctype(sSPHandler)) {
            return trim2;
        }
        return trim;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String extractEffectiveLang(String str) {
        int indexOf = StringUtils.indexOf(str, 45);
        return indexOf != -1 ? StringUtils.substring(str, 0, indexOf) : str;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String extractTextFromElement(Element element, boolean z) {
        if (EXCLUDED_ELEMENTS_LIST.contains(element.tagName())) {
            return null;
        }
        StringBuilder sb = new StringBuilder();
        if (this.testableTextElementBuilder == null) {
            this.testableTextElementBuilder = new CompleteTextElementBuilder();
        }
        sb.append(this.testableTextElementBuilder.buildTextFromElement(element));
        if (z) {
            Iterator<Element> it = element.children().iterator();
            while (it.hasNext()) {
                Element next = it.next();
                if (!isLangDefinedForElement(next) && !EXCLUDED_ELEMENTS_LIST.contains(next.tagName())) {
                    sb.append(' ');
                    sb.append(extractTextFromElement(next, true));
                }
            }
        }
        return sb.toString().replaceAll(" +", StringUtils.SPACE);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public boolean isTextTestable(String str) {
        if (StringUtils.isBlank(str)) {
            return false;
        }
        String trim = StringUtils.trim(str);
        return (this.nonAlphanumericPattern.matcher(trim).matches() || GenericValidator.isEmail(trim) || GenericValidator.isUrl(trim)) ? false : true;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public boolean isLangDefinedForElement(Element element) {
        return element.hasAttr(AttributeStore.LANG_ATTR) || element.hasAttr("xml:lang");
    }

    protected boolean hasSSPXhtmlDoctype(SSPHandler sSPHandler) {
        return this.xhtmlDoctypesSet.contains(sSPHandler.getSSP().getDoctype());
    }

    protected void loadXhtmlDoctypes() {
        if (this.xhtmlDoctypesSet == null) {
            this.xhtmlDoctypesSet = getNomenclatureLoaderService().loadByCode(XHTML_DOCTYPE_NOM).getValueList();
        }
    }

    protected void loadValidLanguages() {
        if (this.validLanguagesSet == null) {
            this.validLanguagesSet = getNomenclatureLoaderService().loadByCode(LANG_NOM).getValueList();
        }
    }

    protected boolean isLangWellDeclared(String str) {
        return this.langDeclarationPattern.matcher(str).matches();
    }

    private void addSourceCodeRemark(TestSolution testSolution, Element element, String str, String str2, String str3, String str4, String str5) {
        if (testSolution.equals(TestSolution.PASSED) || StringUtils.isBlank(str)) {
            return;
        }
        ArrayList arrayList = new ArrayList();
        if (StringUtils.isNotBlank(str3)) {
            arrayList.add(getEvidenceElement(EvidenceStore.DEFAULT_LANGUAGE_EE, str2));
            arrayList.add(getEvidenceElement(EvidenceStore.CURRENT_LANGUAGE_EE, str3));
        } else {
            arrayList.add(getEvidenceElement(EvidenceStore.LANGUAGE_EE, str2));
        }
        arrayList.add(getEvidenceElement(EvidenceStore.DETECTED_LANGUAGE_EE, str4));
        if (str5.length() > 200) {
            str5 = str5.substring(0, 200);
        }
        arrayList.add(getEvidenceElement(EvidenceStore.EXTRACTED_TEXT_EE, str5));
        addSourceCodeRemark(testSolution, element, str, arrayList);
    }

    private void addInvalidDeclarationSourceCodeRemark(Element element, String str, TestSolution testSolution, String str2) {
        ArrayList arrayList = new ArrayList();
        arrayList.add(getEvidenceElement(EvidenceStore.LANGUAGE_EE, str));
        addSourceCodeRemark(testSolution, element, str2, arrayList);
    }
}
