package org.apache.solr.handler.dataimport;

import java.io.File;
import java.io.InputStream;
import java.io.StringWriter;
import java.io.Writer;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamResult;
import org.apache.commons.io.IOUtils;
import org.apache.http.protocol.HTTP;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.EmptyParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.html.HtmlMapper;
import org.apache.tika.parser.html.IdentityHtmlMapper;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.ContentHandlerDecorator;
import org.apache.xalan.templates.Constants;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/* loaded from: input_file:Disk1/InstData/Resource1.zip:$IA_PROJECT_DIR$/hotfix/dist_zg_ia_sf.jar:cfusion/jetty/webapps/solr.war:WEB-INF/lib/solr-dataimporthandler-extras-7.2.1.jar:org/apache/solr/handler/dataimport/TikaEntityProcessor.class */
public class TikaEntityProcessor extends EntityProcessorBase {
    private static Parser EMPTY_PARSER = new EmptyParser();
    private TikaConfig tikaConfig;
    private String format = "text";
    private boolean done = false;
    private boolean extractEmbedded = false;
    private String parser;
    static final String AUTO_PARSER = "org.apache.tika.parser.AutoDetectParser";
    private String htmlMapper;
    private String spatialMetadataField;

    @Override // org.apache.solr.handler.dataimport.EntityProcessorBase, org.apache.solr.handler.dataimport.EntityProcessor
    public void init(Context context) {
        super.init(context);
        this.done = false;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.apache.solr.handler.dataimport.EntityProcessorBase
    public void firstInit(Context context) {
        super.firstInit(context);
        try {
            String resolvedEntityAttribute = context.getResolvedEntityAttribute("tikaConfig");
            if (resolvedEntityAttribute == null) {
                this.tikaConfig = new TikaConfig(context.getSolrCore().getResourceLoader().getClassLoader());
            } else {
                File file = new File(resolvedEntityAttribute);
                if (!file.isAbsolute()) {
                    file = new File(context.getSolrCore().getResourceLoader().getConfigDir(), resolvedEntityAttribute);
                }
                this.tikaConfig = new TikaConfig(file);
            }
        } catch (Exception e) {
            DataImportHandlerException.wrapAndThrow(500, e, "Unable to load Tika Config");
        }
        if ("true".equals(context.getResolvedEntityAttribute("extractEmbedded"))) {
            this.extractEmbedded = true;
        }
        this.format = context.getResolvedEntityAttribute("format");
        if (this.format == null) {
            this.format = "text";
        }
        if (!"html".equals(this.format) && !"xml".equals(this.format) && !"text".equals(this.format) && !"none".equals(this.format)) {
            throw new DataImportHandlerException(500, "'format' can be one of text|html|xml|none");
        }
        this.htmlMapper = context.getResolvedEntityAttribute("htmlMapper");
        if (this.htmlMapper == null) {
            this.htmlMapper = "default";
        }
        if (!"default".equals(this.htmlMapper) && !HTTP.IDENTITY_CODING.equals(this.htmlMapper)) {
            throw new DataImportHandlerException(500, "'htmlMapper', if present, must be 'default' or 'identity'");
        }
        this.parser = context.getResolvedEntityAttribute("parser");
        if (this.parser == null) {
            this.parser = AUTO_PARSER;
        }
        this.spatialMetadataField = context.getResolvedEntityAttribute("spatialMetadataField");
    }

    @Override // org.apache.solr.handler.dataimport.EntityProcessorBase, org.apache.solr.handler.dataimport.EntityProcessor
    public Map<String, Object> nextRow() {
        String str;
        String str2;
        if (this.done) {
            return null;
        }
        HashMap hashMap = new HashMap();
        InputStream inputStream = (InputStream) this.context.getDataSource().getData(this.context.getResolvedEntityAttribute("url"));
        ContentHandler contentHandler = null;
        Metadata metadata = new Metadata();
        StringWriter stringWriter = new StringWriter();
        try {
            if ("html".equals(this.format)) {
                contentHandler = getHtmlHandler(stringWriter);
            } else if ("xml".equals(this.format)) {
                contentHandler = getXmlContentHandler(stringWriter);
            } else if ("text".equals(this.format)) {
                contentHandler = getTextContentHandler(stringWriter);
            } else if ("none".equals(this.format)) {
                contentHandler = new DefaultHandler();
            }
        } catch (TransformerConfigurationException e) {
            DataImportHandlerException.wrapAndThrow(500, e, "Unable to create content handler");
        }
        AutoDetectParser autoDetectParser = this.parser.equals(AUTO_PARSER) ? new AutoDetectParser(this.tikaConfig) : (Parser) this.context.getSolrCore().getResourceLoader().newInstance(this.parser, Parser.class);
        try {
            ParseContext parseContext = new ParseContext();
            if (HTTP.IDENTITY_CODING.equals(this.htmlMapper)) {
                parseContext.set(HtmlMapper.class, IdentityHtmlMapper.INSTANCE);
            }
            if (this.extractEmbedded) {
                parseContext.set(Parser.class, autoDetectParser);
            } else {
                parseContext.set(Parser.class, EMPTY_PARSER);
            }
            autoDetectParser.parse(inputStream, contentHandler, metadata, parseContext);
        } catch (Exception e2) {
            if ("skip".equals(this.onError)) {
                throw new DataImportHandlerException(301, "Document skipped :" + e2.getMessage());
            }
            DataImportHandlerException.wrapAndThrow(500, e2, "Unable to read content");
        }
        IOUtils.closeQuietly(inputStream);
        for (Map<String, String> map : this.context.getAllEntityFields()) {
            if ("true".equals(map.get("meta")) && (str2 = metadata.get((str = map.get("column")))) != null) {
                hashMap.put(str, str2);
            }
        }
        if (!"none".equals(this.format)) {
            hashMap.put("text", stringWriter.toString());
        }
        tryToAddLatLon(metadata, hashMap);
        this.done = true;
        return hashMap;
    }

    private void tryToAddLatLon(Metadata metadata, Map<String, Object> map) {
        if (this.spatialMetadataField == null) {
            return;
        }
        String str = metadata.get(Metadata.LATITUDE);
        String str2 = metadata.get(Metadata.LONGITUDE);
        if (str == null || str2 == null) {
            return;
        }
        map.put(this.spatialMetadataField, String.format(Locale.ROOT, "%s,%s", str, str2));
    }

    private static ContentHandler getHtmlHandler(Writer writer) throws TransformerConfigurationException {
        TransformerHandler newTransformerHandler = ((SAXTransformerFactory) TransformerFactory.newInstance()).newTransformerHandler();
        newTransformerHandler.getTransformer().setOutputProperty(Constants.ATTRNAME_OUTPUT_METHOD, "html");
        newTransformerHandler.setResult(new StreamResult(writer));
        return new ContentHandlerDecorator(newTransformerHandler) { // from class: org.apache.solr.handler.dataimport.TikaEntityProcessor.1
            public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
                if (org.apache.xalan.xsltc.compiler.Constants.XHTML_URI.equals(str)) {
                    str = null;
                }
                if ("head".equals(str2)) {
                    return;
                }
                super.startElement(str, str2, str3, attributes);
            }

            public void endElement(String str, String str2, String str3) throws SAXException {
                if (org.apache.xalan.xsltc.compiler.Constants.XHTML_URI.equals(str)) {
                    str = null;
                }
                if ("head".equals(str2)) {
                    return;
                }
                super.endElement(str, str2, str3);
            }

            public void startPrefixMapping(String str, String str2) {
            }

            public void endPrefixMapping(String str) {
            }
        };
    }

    private static ContentHandler getTextContentHandler(Writer writer) {
        return new BodyContentHandler(writer);
    }

    private static ContentHandler getXmlContentHandler(Writer writer) throws TransformerConfigurationException {
        TransformerHandler newTransformerHandler = ((SAXTransformerFactory) TransformerFactory.newInstance()).newTransformerHandler();
        newTransformerHandler.getTransformer().setOutputProperty(Constants.ATTRNAME_OUTPUT_METHOD, "xml");
        newTransformerHandler.setResult(new StreamResult(writer));
        return newTransformerHandler;
    }
}
