/*
 * Decompiled with CFR 0.152.
 */
package org.apache.tika.parser.pdf;

import java.io.IOException;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSInteger;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDStructureTreeRoot;
import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDMarkedContent;
import org.apache.pdfbox.text.PDFMarkedContentExtractor;
import org.apache.pdfbox.text.TextPosition;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.pdf.PDF2XHTML;
import org.apache.tika.parser.pdf.PDFParserConfig;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

public class PDFMarkedContent2XHTML
extends PDF2XHTML {
    private static final int MAX_RECURSION_DEPTH = 1000;
    private static final String DIV = "div";
    private static final Map<String, HtmlTag> COMMON_TAG_MAP = new HashMap<String, HtmlTag>();
    private State state = new State();

    private PDFMarkedContent2XHTML(PDDocument document, ContentHandler handler, ParseContext context, Metadata metadata, PDFParserConfig config) throws IOException {
        super(document, handler, context, metadata, config);
    }

    public static void process(PDDocument pdDocument, ContentHandler handler, ParseContext context, Metadata metadata, PDFParserConfig config) throws SAXException, TikaException {
        PDFMarkedContent2XHTML pdfMarkedContent2XHTML = null;
        try {
            pdfMarkedContent2XHTML = new PDFMarkedContent2XHTML(pdDocument, handler, context, metadata, config);
        }
        catch (IOException e) {
            throw new TikaException("couldn't initialize PDFMarkedContent2XHTML", (Throwable)e);
        }
        try {
            pdfMarkedContent2XHTML.writeText(pdDocument, new Writer(){

                @Override
                public void write(char[] cbuf, int off, int len) {
                }

                @Override
                public void flush() {
                }

                @Override
                public void close() {
                }
            });
        }
        catch (IOException e) {
            if (e.getCause() instanceof SAXException) {
                throw (SAXException)e.getCause();
            }
            throw new TikaException("Unable to extract PDF content", (Throwable)e);
        }
        if (!pdfMarkedContent2XHTML.exceptions.isEmpty()) {
            throw new TikaException("Unable to extract PDF content", (Throwable)pdfMarkedContent2XHTML.exceptions.get(0));
        }
    }

    private static Map<String, HtmlTag> loadRoleMap(Map<String, Object> roleMap) {
        if (roleMap == null) {
            return Collections.EMPTY_MAP;
        }
        HashMap<String, HtmlTag> tags = new HashMap<String, HtmlTag>();
        for (Map.Entry<String, Object> e : roleMap.entrySet()) {
            String k = e.getKey();
            Object obj = e.getValue();
            if (!(obj instanceof String)) continue;
            String v = (String)obj;
            String lc = v.toLowerCase(Locale.US);
            if (COMMON_TAG_MAP.containsValue(new HtmlTag(lc))) {
                tags.put(k, new HtmlTag(lc));
                continue;
            }
            tags.put(k, new HtmlTag(DIV, lc));
        }
        return tags;
    }

    private static void findPages(COSBase kidsObj, List<ObjectRef> pageRefs) {
        if (kidsObj == null) {
            return;
        }
        if (kidsObj instanceof COSArray) {
            for (COSBase kid : (COSArray)kidsObj) {
                COSBase kidbase;
                if (!(kid instanceof COSObject) || !((kidbase = ((COSObject)kid).getObject()) instanceof COSDictionary)) continue;
                COSDictionary dict = (COSDictionary)kidbase;
                if (COSName.PAGE.equals(dict.getCOSName(COSName.TYPE))) {
                    pageRefs.add(new ObjectRef(((COSObject)kid).getKey().getNumber(), ((COSObject)kid).getKey().getGeneration()));
                    continue;
                }
                if (!dict.containsKey(COSName.KIDS)) continue;
                PDFMarkedContent2XHTML.findPages(dict.getDictionaryObject(COSName.KIDS), pageRefs);
            }
        }
    }

    @Override
    protected void processPages(PDPageTree pageTree) throws IOException {
        ArrayList<ObjectRef> pageRefs = new ArrayList<ObjectRef>();
        PDFMarkedContent2XHTML.findPages(pageTree.getCOSObject().getDictionaryObject(COSName.KIDS), pageRefs);
        if (pageRefs.size() != this.pdDocument.getNumberOfPages()) {
            throw new IOException(new TikaException("Couldn't find the right number of page refs (" + pageRefs.size() + ") for pages (" + this.pdDocument.getNumberOfPages() + ")"));
        }
        PDStructureTreeRoot structureTreeRoot = this.pdDocument.getDocumentCatalog().getStructureTreeRoot();
        Map<String, HtmlTag> roleMap = PDFMarkedContent2XHTML.loadRoleMap(structureTreeRoot.getRoleMap());
        Map<MCID, String> paragraphs = this.loadTextByMCID(pageTree, pageRefs);
        try {
            this.recurse(structureTreeRoot.getK(), null, 0, paragraphs, roleMap);
        }
        catch (SAXException e) {
            throw new IOException(e);
        }
        try {
            if (this.state.hrefAnchorBuilder.length() > 0) {
                this.xhtml.startElement("p");
                this.writeString(this.state.hrefAnchorBuilder.toString());
                this.xhtml.endElement("p");
            }
            for (MCID mcid : paragraphs.keySet()) {
                if (this.state.processedMCIDs.contains(mcid)) continue;
                if (mcid.mcid > -1) {
                    // empty if block
                }
                this.xhtml.startElement("p");
                this.writeString(paragraphs.get(mcid));
                this.xhtml.endElement("p");
            }
        }
        catch (SAXException e) {
            throw new IOException(e);
        }
        for (PDPage page : pageTree) {
            this.startPage(page);
            this.endPage(page);
        }
    }

    private void recurse(COSBase kids, ObjectRef currentPageRef, int depth, Map<MCID, String> paragraphs, Map<String, HtmlTag> roleMap) throws IOException, SAXException {
        if (depth > 1000) {
            throw new IOException(new TikaException("Exceeded max recursion depth 1000"));
        }
        if (kids instanceof COSArray) {
            for (COSBase k : (COSArray)kids) {
                this.recurse(k, currentPageRef, depth, paragraphs, roleMap);
            }
        } else if (kids instanceof COSObject && ((COSObject)kids).getObject() instanceof COSDictionary) {
            COSBase grandkids;
            COSDictionary dict = (COSDictionary)((COSObject)kids).getObject();
            COSName type = dict.getCOSName(COSName.TYPE);
            if (COSName.OBJR.equals(type)) {
                this.recurse(dict.getDictionaryObject(COSName.OBJ), currentPageRef, depth + 1, paragraphs, roleMap);
            }
            COSName n = dict.getCOSName(COSName.S);
            String name = "";
            if (n != null) {
                name = n.getName();
            }
            if ((grandkids = dict.getItem(COSName.K)) == null) {
                return;
            }
            COSBase pageBase = dict.getItem(COSName.PG);
            if (pageBase instanceof COSObject) {
                currentPageRef = new ObjectRef(((COSObject)pageBase).getKey().getNumber(), ((COSObject)pageBase).getKey().getGeneration());
            }
            HtmlTag tag = this.getTag(name, roleMap);
            boolean startedLink = false;
            boolean ignoreTag = false;
            if ("link".equals(tag.clazz)) {
                this.state.inLink = true;
                startedLink = true;
            }
            if (!this.state.inLink) {
                if ("span".equals(tag.tag)) {
                    ignoreTag = true;
                } else if ("lbody".equals(tag.clazz)) {
                    ignoreTag = true;
                }
                if (!ignoreTag) {
                    if (tag.clazz != null && !tag.clazz.isBlank()) {
                        this.xhtml.startElement(tag.tag, "class", tag.clazz);
                    } else {
                        this.xhtml.startElement(tag.tag);
                    }
                }
            }
            this.recurse(grandkids, currentPageRef, depth + 1, paragraphs, roleMap);
            if (startedLink) {
                this.writeLink();
            }
            if (!(this.state.inLink || startedLink || ignoreTag)) {
                this.xhtml.endElement(tag.tag);
            }
        } else if (kids instanceof COSInteger) {
            int mcidInt = ((COSInteger)kids).intValue();
            MCID mcid = new MCID(currentPageRef, mcidInt);
            if (paragraphs.containsKey(mcid)) {
                if (this.state.inLink) {
                    this.state.hrefAnchorBuilder.append(paragraphs.get(mcid));
                } else {
                    try {
                        this.writeString(paragraphs.get(mcid));
                    }
                    catch (IOException e) {
                        this.handleCatchableIOE(e);
                    }
                }
                this.state.processedMCIDs.add(mcid);
            }
        } else if (kids instanceof COSDictionary) {
            COSDictionary dict = (COSDictionary)kids;
            COSDictionary anchor = dict.getCOSDictionary(COSName.A);
            if (anchor != null) {
                this.state.uri = anchor.getString(COSName.URI);
            } else if (dict.containsKey(COSName.K)) {
                this.recurse(dict.getDictionaryObject(COSName.K), currentPageRef, depth + 1, paragraphs, roleMap);
            } else if (dict.containsKey(COSName.OBJ)) {
                this.recurse(dict.getDictionaryObject(COSName.OBJ), currentPageRef, depth + 1, paragraphs, roleMap);
            }
        }
    }

    private void writeLink() throws SAXException, IOException {
        if (this.state.uri != null && !this.state.uri.isBlank()) {
            this.xhtml.startElement("a", "href", this.state.uri);
            this.xhtml.characters(this.state.hrefAnchorBuilder.toString());
            this.xhtml.endElement("a");
        } else {
            try {
                this.writeString(this.state.hrefAnchorBuilder.toString());
            }
            catch (IOException e) {
                this.handleCatchableIOE(e);
            }
        }
        this.state.hrefAnchorBuilder.setLength(0);
        this.state.inLink = false;
        this.state.uri = null;
    }

    private HtmlTag getTag(String name, Map<String, HtmlTag> roleMap) {
        if (roleMap.containsKey(name)) {
            return roleMap.get(name);
        }
        String lc = name.toLowerCase(Locale.US);
        if (COMMON_TAG_MAP.containsKey(lc)) {
            return COMMON_TAG_MAP.get(lc);
        }
        roleMap.put(name, new HtmlTag(DIV, name.toLowerCase(Locale.US)));
        return roleMap.get(name);
    }

    private Map<MCID, String> loadTextByMCID(PDPageTree pageTree, List<ObjectRef> pageRefs) throws IOException {
        int pageCount = 1;
        HashMap<MCID, String> paragraphs = new HashMap<MCID, String>();
        for (PDPage page : pageTree) {
            ObjectRef pageRef = pageRefs.get(pageCount - 1);
            PDFMarkedContentExtractor ex = new PDFMarkedContentExtractor();
            try {
                ex.processPage(page);
            }
            catch (IOException e) {
                this.handleCatchableIOE(e);
                continue;
            }
            for (PDMarkedContent c : ex.getMarkedContents()) {
                List<Object> objects = c.getContents();
                StringBuilder sb = new StringBuilder();
                for (Object o : objects) {
                    String unicode;
                    if (!(o instanceof TextPosition) || (unicode = ((TextPosition)o).getUnicode()) == null) continue;
                    sb.append(unicode);
                }
                int mcidInt = c.getMCID();
                MCID mcid = new MCID(pageRef, mcidInt);
                Object p = sb.toString();
                if (c.getTag().equals("P")) {
                    p = ((String)p).trim();
                }
                if (mcidInt < 0 && paragraphs.containsKey(mcid)) {
                    p = (String)paragraphs.get(mcid) + "\n" + (String)p;
                }
                paragraphs.put(mcid, (String)p);
            }
            ++pageCount;
        }
        return paragraphs;
    }

    static {
        COMMON_TAG_MAP.put("document", new HtmlTag("body"));
        COMMON_TAG_MAP.put(DIV, new HtmlTag(DIV));
        COMMON_TAG_MAP.put("p", new HtmlTag("p"));
        COMMON_TAG_MAP.put("span", new HtmlTag("span"));
        COMMON_TAG_MAP.put("table", new HtmlTag("table"));
        COMMON_TAG_MAP.put("thead", new HtmlTag("thead"));
        COMMON_TAG_MAP.put("tbody", new HtmlTag("tbody"));
        COMMON_TAG_MAP.put("tr", new HtmlTag("tr"));
        COMMON_TAG_MAP.put("th", new HtmlTag("th"));
        COMMON_TAG_MAP.put("td", new HtmlTag("td"));
        COMMON_TAG_MAP.put("l", new HtmlTag("ul"));
        COMMON_TAG_MAP.put("li", new HtmlTag("li"));
        COMMON_TAG_MAP.put("h1", new HtmlTag("h1"));
        COMMON_TAG_MAP.put("h2", new HtmlTag("h2"));
        COMMON_TAG_MAP.put("h3", new HtmlTag("h3"));
        COMMON_TAG_MAP.put("h4", new HtmlTag("h4"));
        COMMON_TAG_MAP.put("h5", new HtmlTag("h5"));
        COMMON_TAG_MAP.put("h6", new HtmlTag("h6"));
    }

    private static class MCID {
        private final ObjectRef objectRef;
        private final int mcid;

        public MCID(ObjectRef objectRef, int mcid) {
            this.objectRef = objectRef;
            this.mcid = mcid;
        }

        public boolean equals(Object o) {
            if (this == o) {
                return true;
            }
            if (o == null || this.getClass() != o.getClass()) {
                return false;
            }
            MCID mcid1 = (MCID)o;
            return this.mcid == mcid1.mcid && Objects.equals(this.objectRef, mcid1.objectRef);
        }

        public int hashCode() {
            return Objects.hash(this.objectRef, this.mcid);
        }

        public String toString() {
            return "MCID{objectRef=" + this.objectRef + ", mcid=" + this.mcid + "}";
        }
    }

    private static class ObjectRef {
        private final long objId;
        private final int version;

        public ObjectRef(long objId, int version) {
            this.objId = objId;
            this.version = version;
        }

        public boolean equals(Object o) {
            if (this == o) {
                return true;
            }
            if (o == null || this.getClass() != o.getClass()) {
                return false;
            }
            ObjectRef objectRef = (ObjectRef)o;
            return this.objId == objectRef.objId && this.version == objectRef.version;
        }

        public int hashCode() {
            return Objects.hash(this.objId, this.version);
        }

        public String toString() {
            return "ObjectRef{objId=" + this.objId + ", version=" + this.version + "}";
        }
    }

    private static class HtmlTag {
        private final String tag;
        private final String clazz;

        HtmlTag() {
            this("");
        }

        HtmlTag(String tag) {
            this(tag, "");
        }

        HtmlTag(String tag, String clazz) {
            this.tag = tag;
            this.clazz = clazz;
        }

        public boolean equals(Object o) {
            if (this == o) {
                return true;
            }
            if (o == null || this.getClass() != o.getClass()) {
                return false;
            }
            HtmlTag htmlTag = (HtmlTag)o;
            if (!Objects.equals(this.tag, htmlTag.tag)) {
                return false;
            }
            return Objects.equals(this.clazz, htmlTag.clazz);
        }

        public int hashCode() {
            int result = this.tag != null ? this.tag.hashCode() : 0;
            result = 31 * result + (this.clazz != null ? this.clazz.hashCode() : 0);
            return result;
        }
    }

    private static class State {
        Set<MCID> processedMCIDs = new HashSet<MCID>();
        boolean inLink = false;
        int tableDepth = 0;
        private StringBuilder hrefAnchorBuilder = new StringBuilder();
        private String uri = null;
        private int tdDepth = 0;

        private State() {
        }
    }
}

