package org.asqatasun.contentadapter.html;

import java.util.Iterator;
import org.asqatasun.contentadapter.HTMLCleaner;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Attribute;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Entities;
import org.jsoup.nodes.Node;

/* loaded from: input_file:BOOT-INF/lib/asqatasun-contentadapter-5.0.0-rc.1.jar:org/asqatasun/contentadapter/html/HTMLJsoupCleanerImpl.class */
public class HTMLJsoupCleanerImpl extends AbstractHTMLCleaner implements HTMLCleaner {
    private static final String EMPTY_NS_DEFINITION_PATTERN = "xmlns=\"(\\s)*\"";
    private static final String NS_TAG_OPEN_PREFIX_DEFINITION_PATTERN = "<a[0-9]+:";
    private static final String NS_TAG_CLOSURE_PREFIX_DEFINITION_PATTERN = "</a[0-9]+:";
    static final String CORRECTOR_NAME = "JsoupCleaner";

    @Override // org.asqatasun.contentadapter.HTMLCleaner
    public void run() {
        this.dirtyHTML = removeBadNamespaceDefinition(this.dirtyHTML);
        Document parse = Jsoup.parse(this.dirtyHTML);
        parse.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
        parse.outputSettings().outline(true);
        parse.outputSettings().indentAmount(2);
        removeComments(parse);
        removeMalformedAttributes(parse);
        this.result = parse.outerHtml();
    }

    private void removeComments(Node node) {
        int i = 0;
        while (i < node.childNodes().size()) {
            Node childNode = node.childNode(i);
            if (childNode.nodeName().equals("#comment")) {
                childNode.remove();
            } else {
                removeComments(childNode);
                i++;
            }
        }
    }

    private void removeMalformedAttributes(Node node) {
        for (int i = 0; i < node.childNodes().size(); i++) {
            Node childNode = node.childNode(i);
            Iterator<Attribute> it = childNode.attributes().iterator();
            while (it.hasNext()) {
                Attribute next = it.next();
                if (next.getKey().startsWith("\"") && next.getKey().endsWith("\"")) {
                    childNode.removeAttr(next.getKey());
                }
            }
            removeMalformedAttributes(childNode);
        }
    }

    private String removeBadNamespaceDefinition(String str) {
        return str.replaceAll(EMPTY_NS_DEFINITION_PATTERN, "").replaceAll(NS_TAG_OPEN_PREFIX_DEFINITION_PATTERN, "<").replaceAll(NS_TAG_CLOSURE_PREFIX_DEFINITION_PATTERN, "</");
    }

    @Override // org.asqatasun.contentadapter.HTMLCleaner
    public String getCorrectorName() {
        return CORRECTOR_NAME;
    }
}
