package com.adobe.internal.pdfm.wordlister;

import com.adobe.internal.pdfm.DocumentException;
import com.adobe.internal.pdfm.PDFMDocHandle;
import com.adobe.internal.pdfm.PDFMException;
import com.adobe.internal.pdfm.filters.FilterValue;
import com.adobe.internal.pdfm.util.FilenameEncodings;
import com.adobe.internal.pdfm.util.FontSetBuilder;
import com.adobe.internal.pdfm.util.PDFMPermissionsManager;
import com.adobe.internal.pdfm.util.TempFileManager;
import com.adobe.internal.pdftoolkit.core.exceptions.PDFException;
import com.adobe.internal.pdftoolkit.core.exceptions.PDFIOException;
import com.adobe.internal.pdftoolkit.core.exceptions.PDFInvalidDocumentException;
import com.adobe.internal.pdftoolkit.core.exceptions.PDFSecurityException;
import com.adobe.internal.pdftoolkit.core.permissionprovider.ObjectOperations;
import com.adobe.internal.pdftoolkit.core.types.ASQuad;
import com.adobe.internal.pdftoolkit.pdf.document.PDFDocument;
import com.adobe.internal.pdftoolkit.pdf.page.PDFPageTree;
import com.adobe.internal.pdftoolkit.services.readingorder.ReadingOrderTextExtractor;
import com.adobe.internal.pdftoolkit.services.textextraction.ParagraphIterator;
import com.adobe.internal.pdftoolkit.services.textextraction.TextExtractor;
import com.adobe.internal.pdftoolkit.services.textextraction.Word;
import com.adobe.internal.pdftoolkit.services.textextraction.WordsIterator;
import com.adobe.logging.AdobeLogger;
import com.adobe.logging.MsgUtil;
import com.adobe.logging.PDFMLogger;
import com.adobe.service.pdfm.client.PDFMMsgSet;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.List;
import javax.xml.transform.Result;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamResult;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;

/* loaded from: input_file:com/adobe/internal/pdfm/wordlister/WordLister.class */
public class WordLister {
    private static final AdobeLogger LOGGER = PDFMLogger.getAdobeLogger((Class<?>) WordLister.class);
    private static final String DOCTEXTNS = "http://ns.adobe.com/DDX/DocText/1.0/";
    private static final String DOCTEXTELEM = "DocText";
    private static final String PARAGRAPHSPERPAGEELEM = "ParagraphsPerPage";
    private static final String TEXTPERPAGEELEM = "TextPerPage";
    private static final String WITHQUADSELEM = "WithQuads";
    private static final String WORDELEM = "Word";
    private static final String SENTENCEELEM = "Sentence";
    private static final String PARAGRAPHELEM = "Paragraph";
    private static final String PAGEELEM = "Page";
    private static final String QUADELEM = "Quad";
    private static final String P1ELEM = "P1";
    private static final String P2ELEM = "P2";
    private static final String P3ELEM = "P3";
    private static final String P4ELEM = "P4";
    private static final String PAGENUMATTR = "pageNumber";
    private static final String XATTR = "X";
    private static final String YATTR = "Y";
    private PDFDocument pdfDoc = null;
    private boolean withQuads = false;
    private boolean withParagraphs = false;
    private WordEmitter wordEmitter = null;
    private ParagraphEmitter paragraphEmitter = null;

    /* loaded from: input_file:com/adobe/internal/pdfm/wordlister/WordLister$EmitLinesOfParagraph.class */
    private class EmitLinesOfParagraph implements ParagraphEmitter {
        private EmitLinesOfParagraph() {
        }

        @Override // com.adobe.internal.pdfm.wordlister.WordLister.ParagraphEmitter
        public void emitParagraphs(ReadingOrderTextExtractor readingOrderTextExtractor, List<List<Word>> list, TransformerHandler transformerHandler) throws PDFException, SAXException {
            for (int i = 0; i < list.size(); i++) {
                List<Word> list2 = list.get(i);
                boolean z = true;
                for (int i2 = 0; i2 < list2.size(); i2++) {
                    Word word = list2.get(i2);
                    if (word.toString().trim().length() > 0) {
                        WordLister.this.getWordEmitter().emitWords(word, transformerHandler, z);
                    }
                    z = false;
                }
            }
            transformerHandler.endElement(WordLister.DOCTEXTNS, WordLister.SENTENCEELEM, WordLister.SENTENCEELEM);
        }
    }

    /* loaded from: input_file:com/adobe/internal/pdfm/wordlister/WordLister$EmitPlainWords.class */
    private class EmitPlainWords implements WordEmitter {
        private EmitPlainWords() {
        }

        @Override // com.adobe.internal.pdfm.wordlister.WordLister.WordEmitter
        public void emitWords(Word word, TransformerHandler transformerHandler, boolean z) throws SAXException {
            if (!z) {
                transformerHandler.characters(" ".toCharArray(), 0, 1);
            }
            char[] charArray = word.toString().toCharArray();
            transformerHandler.characters(charArray, 0, charArray.length);
        }
    }

    /* loaded from: input_file:com/adobe/internal/pdfm/wordlister/WordLister$EmitQuadWords.class */
    private class EmitQuadWords implements WordEmitter {
        private EmitQuadWords() {
        }

        @Override // com.adobe.internal.pdfm.wordlister.WordLister.WordEmitter
        public void emitWords(Word word, TransformerHandler transformerHandler, boolean z) throws SAXException {
            transformerHandler.startElement(WordLister.DOCTEXTNS, WordLister.WORDELEM, WordLister.WORDELEM, new AttributesImpl());
            char[] charArray = word.toString().toCharArray();
            transformerHandler.characters(charArray, 0, charArray.length);
            try {
                WordLister.this.emitQuads(word, transformerHandler);
                transformerHandler.endElement(WordLister.DOCTEXTNS, WordLister.WORDELEM, WordLister.WORDELEM);
            } catch (PDFInvalidDocumentException e) {
                throw new SAXException((Exception) e);
            } catch (PDFSecurityException e2) {
                throw new SAXException((Exception) e2);
            } catch (PDFIOException e3) {
                throw new SAXException((Exception) e3);
            }
        }
    }

    /* loaded from: input_file:com/adobe/internal/pdfm/wordlister/WordLister$EmitSentencesOfParagraph.class */
    private class EmitSentencesOfParagraph implements ParagraphEmitter {
        private EmitSentencesOfParagraph() {
        }

        @Override // com.adobe.internal.pdfm.wordlister.WordLister.ParagraphEmitter
        public void emitParagraphs(ReadingOrderTextExtractor readingOrderTextExtractor, List<List<Word>> list, TransformerHandler transformerHandler) throws PDFException, SAXException {
            List buildSentences = readingOrderTextExtractor.buildSentences(list);
            AttributesImpl attributesImpl = new AttributesImpl();
            for (int i = 0; i < buildSentences.size(); i++) {
                transformerHandler.startElement(WordLister.DOCTEXTNS, WordLister.SENTENCEELEM, WordLister.SENTENCEELEM, attributesImpl);
                char[] charArray = ((String) buildSentences.get(i)).toCharArray();
                transformerHandler.characters(charArray, 0, charArray.length);
                transformerHandler.endElement(WordLister.DOCTEXTNS, WordLister.SENTENCEELEM, WordLister.SENTENCEELEM);
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:com/adobe/internal/pdfm/wordlister/WordLister$ParagraphEmitter.class */
    public interface ParagraphEmitter {
        void emitParagraphs(ReadingOrderTextExtractor readingOrderTextExtractor, List<List<Word>> list, TransformerHandler transformerHandler) throws PDFException, SAXException;
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:com/adobe/internal/pdfm/wordlister/WordLister$WordEmitter.class */
    public interface WordEmitter {
        void emitWords(Word word, TransformerHandler transformerHandler, boolean z) throws SAXException;
    }

    public WordLister() {
    }

    public WordLister(String str) {
        setWithQuads(str);
        setWithParagraphs(str);
    }

    public File extractDocText(PDFMDocHandle pDFMDocHandle, TempFileManager tempFileManager) throws WordListerException, DocumentException, IOException {
        String str;
        LOGGER.entering(getClass().getName(), "extractWords");
        OutputStreamWriter outputStreamWriter = null;
        try {
            try {
                try {
                    try {
                        setPdfDoc(pDFMDocHandle.acquirePDF());
                        new PDFMPermissionsManager(pDFMDocHandle).assertPermitted(ObjectOperations.DOC_EXPORT);
                        File tempFile = tempFileManager.getTempFile("words-");
                        outputStreamWriter = new OutputStreamWriter(new FileOutputStream(tempFile, false), FilenameEncodings.UTF8);
                        Result streamResult = new StreamResult(outputStreamWriter);
                        TransformerHandler newTransformerHandler = ((SAXTransformerFactory) SAXTransformerFactory.newInstance()).newTransformerHandler();
                        Transformer transformer = newTransformerHandler.getTransformer();
                        transformer.setOutputProperty("encoding", FilenameEncodings.UTF8);
                        transformer.setOutputProperty("indent", "yes");
                        newTransformerHandler.setResult(streamResult);
                        newTransformerHandler.startDocument();
                        AttributesImpl attributesImpl = new AttributesImpl();
                        attributesImpl.clear();
                        newTransformerHandler.startElement(DOCTEXTNS, DOCTEXTELEM, DOCTEXTELEM, attributesImpl);
                        if (isWithQuads()) {
                            setWordEmitter(new EmitQuadWords());
                            setParagraphEmitter(new EmitLinesOfParagraph());
                            str = WITHQUADSELEM;
                        } else if (isWithParagraphs()) {
                            setWordEmitter(new EmitPlainWords());
                            setParagraphEmitter(new EmitSentencesOfParagraph());
                            str = PARAGRAPHSPERPAGEELEM;
                        } else {
                            setWordEmitter(new EmitPlainWords());
                            setParagraphEmitter(new EmitSentencesOfParagraph());
                            str = TEXTPERPAGEELEM;
                        }
                        if (isWithParagraphs()) {
                            emitPageOfParagraphs(str, newTransformerHandler);
                        } else {
                            emitPage(str, newTransformerHandler);
                        }
                        newTransformerHandler.endElement(DOCTEXTNS, DOCTEXTELEM, DOCTEXTELEM);
                        newTransformerHandler.endDocument();
                        tempFileManager.remove(tempFile);
                        if (outputStreamWriter != null) {
                            outputStreamWriter.close();
                        }
                        if (getPdfDoc() != null) {
                            pDFMDocHandle.releasePDF();
                        }
                        LOGGER.exiting(getClass().getName(), "extractWords");
                        return tempFile;
                    } catch (SAXException e) {
                        throw new WordListerException(MsgUtil.getMsg(PDFMMsgSet.PDFM_S20003_SAX_ERROR, pDFMDocHandle.getName()), e);
                    }
                } catch (PDFMException e2) {
                    throw new WordListerException(MsgUtil.getMsg(PDFMMsgSet.PDFM_S20006_WORDLISTER_PERMISSIONS_FAILURE, pDFMDocHandle.getName()), e2);
                }
            } catch (PDFException e3) {
                throw new WordListerException(MsgUtil.getMsg(PDFMMsgSet.PDFM_S20005_INTERNAL_ERROR, pDFMDocHandle.getName()), e3);
            } catch (TransformerException e4) {
                throw new WordListerException(MsgUtil.getMsg(PDFMMsgSet.PDFM_S20002_TRANSFORMER_ERROR, pDFMDocHandle.getName()), e4);
            }
        } catch (Throwable th) {
            if (outputStreamWriter != null) {
                outputStreamWriter.close();
            }
            if (getPdfDoc() != null) {
                pDFMDocHandle.releasePDF();
            }
            LOGGER.exiting(getClass().getName(), "extractWords");
            throw th;
        }
    }

    private void emitPageOfParagraphs(String str, TransformerHandler transformerHandler) throws PDFException, SAXException {
        PDFPageTree requirePages = getPdfDoc().requirePages();
        ReadingOrderTextExtractor newInstance = ReadingOrderTextExtractor.newInstance(getPdfDoc(), FontSetBuilder.getPdfFontSet());
        AttributesImpl attributesImpl = new AttributesImpl();
        transformerHandler.startElement(DOCTEXTNS, str, str, attributesImpl);
        for (int i = 0; i < requirePages.size(); i++) {
            ParagraphIterator paragraphIterator = newInstance.getParagraphIterator(requirePages.getPage(i), i);
            if (paragraphIterator.hasNext()) {
                attributesImpl.addAttribute(DOCTEXTNS, PAGENUMATTR, PAGENUMATTR, "Integer", Integer.toString(i + 1));
                transformerHandler.startElement(DOCTEXTNS, PAGEELEM, PAGEELEM, attributesImpl);
                attributesImpl.clear();
                while (paragraphIterator.hasNext()) {
                    transformerHandler.startElement(DOCTEXTNS, PARAGRAPHELEM, PARAGRAPHELEM, attributesImpl);
                    getParagraphEmitter().emitParagraphs(newInstance, paragraphIterator.next(), transformerHandler);
                    transformerHandler.endElement(DOCTEXTNS, PARAGRAPHELEM, PARAGRAPHELEM);
                }
                transformerHandler.endElement(DOCTEXTNS, PAGEELEM, PAGEELEM);
            }
        }
        transformerHandler.endElement(DOCTEXTNS, str, str);
    }

    private void emitPage(String str, TransformerHandler transformerHandler) throws PDFException, SAXException {
        PDFPageTree requirePages = getPdfDoc().requirePages();
        TextExtractor newInstance = TextExtractor.newInstance(getPdfDoc(), FontSetBuilder.getPdfFontSet());
        AttributesImpl attributesImpl = new AttributesImpl();
        transformerHandler.startElement(DOCTEXTNS, str, str, attributesImpl);
        for (int i = 0; i < requirePages.size(); i++) {
            WordsIterator wordsIterator = newInstance.getWordsIterator(requirePages.getPage(i), i);
            boolean z = true;
            if (wordsIterator.hasNext()) {
                attributesImpl.clear();
                attributesImpl.addAttribute(DOCTEXTNS, PAGENUMATTR, PAGENUMATTR, "Integer", Integer.toString(i + 1));
                transformerHandler.startElement(DOCTEXTNS, PAGEELEM, PAGEELEM, attributesImpl);
                while (wordsIterator.hasNext()) {
                    getWordEmitter().emitWords(wordsIterator.next(), transformerHandler, z);
                    z = false;
                }
                transformerHandler.endElement(DOCTEXTNS, PAGEELEM, PAGEELEM);
            }
        }
        transformerHandler.endElement(DOCTEXTNS, str, str);
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void emitQuads(Word word, TransformerHandler transformerHandler) throws SAXException, PDFInvalidDocumentException, PDFIOException, PDFSecurityException {
        AttributesImpl attributesImpl = new AttributesImpl();
        if (word.getBoundingQuads() != null) {
            for (ASQuad aSQuad : word.getBoundingQuads()) {
                attributesImpl.clear();
                transformerHandler.startElement(DOCTEXTNS, QUADELEM, QUADELEM, attributesImpl);
                attributesImpl.addAttribute(DOCTEXTNS, XATTR, XATTR, FilterValue.kReal, Double.toString(aSQuad.p1().x()));
                attributesImpl.addAttribute(DOCTEXTNS, YATTR, YATTR, FilterValue.kReal, Double.toString(aSQuad.p1().y()));
                transformerHandler.startElement(DOCTEXTNS, P1ELEM, P1ELEM, attributesImpl);
                transformerHandler.endElement(DOCTEXTNS, P1ELEM, P1ELEM);
                attributesImpl.clear();
                attributesImpl.addAttribute(DOCTEXTNS, XATTR, XATTR, FilterValue.kReal, Double.toString(aSQuad.p2().x()));
                attributesImpl.addAttribute(DOCTEXTNS, YATTR, YATTR, FilterValue.kReal, Double.toString(aSQuad.p2().y()));
                transformerHandler.startElement(DOCTEXTNS, P2ELEM, P2ELEM, attributesImpl);
                transformerHandler.endElement(DOCTEXTNS, P2ELEM, P2ELEM);
                attributesImpl.clear();
                attributesImpl.addAttribute(DOCTEXTNS, XATTR, XATTR, FilterValue.kReal, Double.toString(aSQuad.p3().x()));
                attributesImpl.addAttribute(DOCTEXTNS, YATTR, YATTR, FilterValue.kReal, Double.toString(aSQuad.p3().y()));
                transformerHandler.startElement(DOCTEXTNS, P3ELEM, P3ELEM, attributesImpl);
                transformerHandler.endElement(DOCTEXTNS, P3ELEM, P3ELEM);
                attributesImpl.clear();
                attributesImpl.addAttribute(DOCTEXTNS, XATTR, XATTR, FilterValue.kReal, Double.toString(aSQuad.p4().x()));
                attributesImpl.addAttribute(DOCTEXTNS, YATTR, YATTR, FilterValue.kReal, Double.toString(aSQuad.p4().y()));
                transformerHandler.startElement(DOCTEXTNS, P4ELEM, P4ELEM, attributesImpl);
                transformerHandler.endElement(DOCTEXTNS, P4ELEM, P4ELEM);
                transformerHandler.endElement(DOCTEXTNS, QUADELEM, QUADELEM);
            }
        }
    }

    protected WordEmitter getWordEmitter() {
        return this.wordEmitter;
    }

    protected void setWordEmitter(WordEmitter wordEmitter) {
        this.wordEmitter = wordEmitter;
    }

    protected ParagraphEmitter getParagraphEmitter() {
        return this.paragraphEmitter;
    }

    protected void setParagraphEmitter(ParagraphEmitter paragraphEmitter) {
        this.paragraphEmitter = paragraphEmitter;
    }

    private PDFDocument getPdfDoc() {
        return this.pdfDoc;
    }

    private void setPdfDoc(PDFDocument pDFDocument) {
        this.pdfDoc = pDFDocument;
    }

    public boolean isWithQuads() {
        return this.withQuads;
    }

    public void setWithQuads(boolean z) {
        this.withQuads = z;
    }

    public void setWithQuads(String str) {
        if (str == null) {
            this.withQuads = false;
        } else {
            this.withQuads = str.equals(WITHQUADSELEM);
        }
    }

    public boolean isWithParagraphs() {
        return this.withParagraphs;
    }

    public void setWithParagraphs(boolean z) {
        this.withParagraphs = z;
    }

    public void setWithParagraphs(String str) {
        if (str == null) {
            this.withParagraphs = false;
        } else {
            this.withParagraphs = str.equals(PARAGRAPHSPERPAGEELEM);
        }
    }
}
