package coldfusion.tagext.search;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.nio.charset.StandardCharsets;
import org.apache.xerces.dom.CoreDocumentImpl;
import org.apache.xerces.xni.parser.XMLDocumentFilter;
import org.cyberneko.html.filters.ElementRemover;
import org.cyberneko.html.parsers.DOMFragmentParser;
import org.cyberneko.html.parsers.DOMParser;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;

/* loaded from: input_file:coldfusion/tagext/search/HTMLParser.class */
public class HTMLParser {
    private String title;
    private String text;

    public HTMLParser(String str) {
        this.title = "";
        this.text = "";
        try {
            DOMFragmentParser dOMFragmentParser = new DOMFragmentParser();
            dOMFragmentParser.setFeature("http://cyberneko.org/html/features/scanner/script/strip-comment-delims", true);
            dOMFragmentParser.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true);
            dOMFragmentParser.setProperty("http://cyberneko.org/html/properties/filters", new XMLDocumentFilter[]{getRemover()});
            InputSource inputSource = new InputSource(new FileInputStream(new File(str)));
            DocumentFragment createDocumentFragment = new CoreDocumentImpl().createDocumentFragment();
            dOMFragmentParser.parse(inputSource, createDocumentFragment);
            DOMParser dOMParser = new DOMParser();
            dOMParser.parse(new File(str).toURI().toString());
            if (!getTitle(dOMParser.getDocument())) {
                this.title = "";
            }
            scan(createDocumentFragment);
        } catch (Exception e) {
            this.title = "";
            this.text = "";
        }
    }

    public HTMLParser(StringBuilder sb) {
        this.title = "";
        this.text = "";
        try {
            DOMFragmentParser dOMFragmentParser = new DOMFragmentParser();
            dOMFragmentParser.setFeature("http://cyberneko.org/html/features/scanner/script/strip-comment-delims", true);
            dOMFragmentParser.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true);
            dOMFragmentParser.setProperty("http://cyberneko.org/html/properties/filters", new XMLDocumentFilter[]{getRemover()});
            InputSource inputSource = new InputSource(new ByteArrayInputStream(sb.toString().getBytes(StandardCharsets.UTF_8)));
            DocumentFragment createDocumentFragment = new CoreDocumentImpl().createDocumentFragment();
            dOMFragmentParser.parse(inputSource, createDocumentFragment);
            scan(createDocumentFragment);
        } catch (Exception e) {
            this.title = "";
            this.text = "";
        }
    }

    private boolean getTitle(Node node) {
        Node firstChild;
        if (node.getNodeName().equalsIgnoreCase("title") && (firstChild = node.getFirstChild()) != null) {
            this.title = firstChild.getNodeValue();
            return true;
        }
        NodeList childNodes = node.getChildNodes();
        if (childNodes == null) {
            return false;
        }
        int length = childNodes.getLength();
        for (int i = 0; i < length; i++) {
            if (getTitle(childNodes.item(i))) {
                return true;
            }
        }
        return false;
    }

    private ElementRemover getRemover() {
        ElementRemover elementRemover = new ElementRemover();
        elementRemover.acceptElement("a", new String[]{"href"});
        elementRemover.acceptElement("link", new String[]{"href"});
        elementRemover.acceptElement("img", new String[]{"src"});
        elementRemover.removeElement("script");
        elementRemover.removeElement("style");
        return elementRemover;
    }

    private void scan(Node node) {
        boolean z = true;
        String nodeName = node.getNodeName();
        if (nodeName.equalsIgnoreCase("#title")) {
            Node firstChild = node.getFirstChild();
            if (firstChild != null) {
                this.title = firstChild.getNodeValue();
            }
            z = false;
        } else if (nodeName.equalsIgnoreCase("#text")) {
            this.text += " " + node.getNodeValue().trim();
        }
        Node node2 = null;
        if (z) {
            node2 = node.getFirstChild();
        }
        while (node2 != null) {
            scan(node2);
            node2 = node2.getNextSibling();
        }
    }

    public String getTitle() {
        return this.title;
    }

    public String getText() {
        return this.text;
    }

    public String getSummary() {
        return this.text.substring(0, Math.min(this.text.length(), 300));
    }
}
