Source code for PyPDF2.generic

# -*- coding: utf-8 -*-
# Copyright (c) 2006, Mathieu Fenniak
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.


"""
Implementation of generic PDF objects (dictionary, number, string, and so on).
"""
__author__ = "Mathieu Fenniak"
__author_email__ = "biziqe@mathieu.fenniak.net"

import codecs
import decimal
import logging
import re
import sys
import warnings
from sys import version_info

from PyPDF2._security import RC4_encrypt
from PyPDF2._utils import DEPR_MSG
from PyPDF2.constants import FilterTypes as FT
from PyPDF2.constants import StreamAttributes as SA
from PyPDF2.errors import (
    STREAM_TRUNCATED_PREMATURELY,
    PdfReadError,
    PdfReadWarning,
    PdfStreamError,
)

from . import _utils, filters
from ._utils import b_, chr_, ord_, readNonWhitespace, skipOverComment, u_

if version_info < (3, 0):
    from cStringIO import StringIO

    BytesIO = StringIO
else:
    from io import BytesIO, StringIO

logger = logging.getLogger(__name__)
ObjectPrefix = b_("/<[tf(n%")
NumberSigns = b_("+-")
IndirectPattern = re.compile(b_(r"[+-]?(\d+)\s+(\d+)\s+R[^a-zA-Z]"))


def read_object(stream, pdf):
    tok = stream.read(1)
    stream.seek(-1, 1)  # reset to start
    idx = ObjectPrefix.find(tok)
    if idx == 0:
        return NameObject.read_from_stream(stream, pdf)
    elif idx == 1:
        # hexadecimal string OR dictionary
        peek = stream.read(2)
        stream.seek(-2, 1)  # reset to start

        if peek == b_("<<"):
            return DictionaryObject.read_from_stream(stream, pdf)
        else:
            return readHexStringFromStream(stream)
    elif idx == 2:
        return ArrayObject.read_from_stream(stream, pdf)
    elif idx == 3 or idx == 4:
        return BooleanObject.read_from_stream(stream)
    elif idx == 5:
        return readStringFromStream(stream)
    elif idx == 6:
        return NullObject.read_from_stream(stream)
    elif idx == 7:
        # comment
        while tok not in (b_("\r"), b_("\n")):
            tok = stream.read(1)
            # Prevents an infinite loop by raising an error if the stream is at
            # the EOF
            if len(tok) <= 0:
                raise PdfStreamError("File ended unexpectedly.")
        tok = readNonWhitespace(stream)
        stream.seek(-1, 1)
        return read_object(stream, pdf)
    else:
        # number object OR indirect reference
        peek = stream.read(20)
        stream.seek(-len(peek), 1)  # reset to start
        if IndirectPattern.match(peek) is not None:
            return IndirectObject.read_from_stream(stream, pdf)
        else:
            return NumberObject.read_from_stream(stream)


def readObject(stream, pdf):
    warnings.warn(
        "readObject will be deprecated with PyPDF2 2.0.0, use read_object instead",
        PendingDeprecationWarning,
        stacklevel=2,
    )
    return read_object(stream, pdf)


class PdfObject(object):
    def get_object(self):
        """Resolve indirect references."""
        return self

    def getObject(self):
        warnings.warn(
            "getObject will be removed in PyPDF2 2.0.0. Use get_object instead.",
            PendingDeprecationWarning,
            stacklevel=2,
        )
        return self.get_object()


class NullObject(PdfObject):
    def write_to_stream(self, stream, encryption_key):
        stream.write(b_("null"))

    @staticmethod
    def read_from_stream(stream):
        nulltxt = stream.read(4)
        if nulltxt != b_("null"):
            raise PdfReadError("Could not read Null object")
        return NullObject()

    def writeToStream(self, stream, encryption_key):
        warnings.warn(
            "writeToStream will be removed in PyPDF2 2.0.0. "
            "Use write_to_stream instead.",
            PendingDeprecationWarning,
            stacklevel=2,
        )
        self.write_to_stream(stream, encryption_key)

    @staticmethod
    def readFromStream(stream):
        warnings.warn(
            "readFromStream will be removed in PyPDF2 2.0.0. "
            "Use read_from_stream instead.",
            PendingDeprecationWarning,
            stacklevel=2,
        )
        return NullObject.read_from_stream(stream)


class BooleanObject(PdfObject):
    def __init__(self, value):
        self.value = value

    def write_to_stream(self, stream, encryption_key):
        if self.value:
            stream.write(b_("true"))
        else:
            stream.write(b_("false"))

    def writeToStream(self, stream, encryption_key):
        warnings.warn(
            "writeToStream will be removed in PyPDF2 2.0.0. "
            "Use write_to_stream instead.",
            PendingDeprecationWarning,
            stacklevel=2,
        )
        self.write_to_stream(stream, encryption_key)

    @staticmethod
    def read_from_stream(stream):
        word = stream.read(4)
        if word == b_("true"):
            return BooleanObject(True)
        elif word == b_("fals"):
            stream.read(1)
            return BooleanObject(False)
        else:
            raise PdfReadError("Could not read Boolean object")

    @staticmethod
    def readFromStream(stream):
        warnings.warn(
            "readFromStream will be removed in PyPDF2 2.0.0. "
            "Use read_from_stream instead.",
            PendingDeprecationWarning,
            stacklevel=2,
        )
        return BooleanObject.read_from_stream(stream)


class ArrayObject(list, PdfObject):
    def write_to_stream(self, stream, encryption_key):
        stream.write(b_("["))
        for data in self:
            stream.write(b_(" "))
            data.write_to_stream(stream, encryption_key)
        stream.write(b_(" ]"))

    def writeToStream(self, stream, encryption_key):
        warnings.warn(
            "writeToStream will be removed in PyPDF2 2.0.0. "
            "Use write_to_stream instead.",
            PendingDeprecationWarning,
            stacklevel=2,
        )
        self.write_to_stream(stream, encryption_key)

    @staticmethod
    def read_from_stream(stream, pdf):
        arr = ArrayObject()
        tmp = stream.read(1)
        if tmp != b_("["):
            raise PdfReadError("Could not read array")
        while True:
            # skip leading whitespace
            tok = stream.read(1)
            while tok.isspace():
                tok = stream.read(1)
            stream.seek(-1, 1)
            # check for array ending
            peekahead = stream.read(1)
            if peekahead == b_("]"):
                break
            stream.seek(-1, 1)
            # read and append obj
            arr.append(read_object(stream, pdf))
        return arr

    @staticmethod
    def readFromStream(stream, pdf):
        warnings.warn(
            "readFromStream will be removed in PyPDF2 2.0.0. "
            "Use read_from_stream instead.",
            PendingDeprecationWarning,
            stacklevel=2,
        )
        return ArrayObject.read_from_stream(stream, pdf)


class IndirectObject(PdfObject):
    def __init__(self, idnum, generation, pdf):
        self.idnum = idnum
        self.generation = generation
        self.pdf = pdf

    def get_object(self):
        return self.pdf.get_object(self).get_object()

    def __repr__(self):
        return "IndirectObject(%r, %r)" % (self.idnum, self.generation)

    def __eq__(self, other):
        return (
            other is not None
            and isinstance(other, IndirectObject)
            and self.idnum == other.idnum
            and self.generation == other.generation
            and self.pdf is other.pdf
        )

    def __ne__(self, other):
        return not self.__eq__(other)

    def write_to_stream(self, stream, encryption_key):
        stream.write(b_("%s %s R" % (self.idnum, self.generation)))

    def writeToStream(self, stream, encryption_key):
        warnings.warn(
            "writeToStream will be removed in PyPDF2 2.0.0. "
            "Use write_to_stream instead.",
            PendingDeprecationWarning,
            stacklevel=2,
        )
        self.write_to_stream(stream, encryption_key)

    @staticmethod
    def read_from_stream(stream, pdf):
        idnum = b_("")
        while True:
            tok = stream.read(1)
            if not tok:
                raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
            if tok.isspace():
                break
            idnum += tok
        generation = b_("")
        while True:
            tok = stream.read(1)
            if not tok:
                raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
            if tok.isspace():
                if not generation:
                    continue
                break
            generation += tok
        r = readNonWhitespace(stream)
        if r != b_("R"):
            raise PdfReadError(
                "Error reading indirect object reference at byte %s"
                % _utils.hexStr(stream.tell())
            )
        return IndirectObject(int(idnum), int(generation), pdf)

    @staticmethod
    def readFromStream(stream, pdf):
        warnings.warn(
            "readFromStream will be removed in PyPDF2 2.0.0. "
            "Use read_from_stream instead.",
            PendingDeprecationWarning,
            stacklevel=2,
        )
        return IndirectObject.read_from_stream(stream, pdf)


class FloatObject(decimal.Decimal, PdfObject):
    def __new__(cls, value="0", context=None):
        try:
            return decimal.Decimal.__new__(cls, _utils.str_(value), context)
        except Exception:
            try:
                return decimal.Decimal.__new__(cls, str(value))
            except decimal.InvalidOperation:
                # If this isn't a valid decimal (happens in malformed PDFs)
                # fallback to 0
                logger.warning("Invalid FloatObject {}".format(value))
                return decimal.Decimal.__new__(cls, "0")

    def __repr__(self):
        if self == self.to_integral():
            return str(self.quantize(decimal.Decimal(1)))
        else:
            # Standard formatting adds useless extraneous zeros.
            o = "%.5f" % self
            # Remove the zeros.
            while o and o[-1] == "0":
                o = o[:-1]
            return o

    def as_numeric(self):
        return float(b_(repr(self)))

    def write_to_stream(self, stream, encryption_key):
        stream.write(b_(repr(self)))

    def writeToStream(self, stream, encryption_key):
        warnings.warn(
            "writeToStream will be removed in PyPDF2 2.0.0. "
            "Use write_to_stream instead.",
            PendingDeprecationWarning,
            stacklevel=2,
        )
        self.write_to_stream(stream, encryption_key)


class NumberObject(int, PdfObject):
    NumberPattern = re.compile(b_("[^+-.0-9]"))
    ByteDot = b_(".")

    def __new__(cls, value):
        val = int(value)
        try:
            return int.__new__(cls, val)
        except OverflowError:
            return int.__new__(cls, 0)

    def as_numeric(self):
        return int(b_(repr(self)))

    def write_to_stream(self, stream, encryption_key):
        stream.write(b_(repr(self)))

    def writeToStream(self, stream, encryption_key):
        warnings.warn(
            "writeToStream will be removed in PyPDF2 2.0.0. "
            "Use write_to_stream instead.",
            PendingDeprecationWarning,
            stacklevel=2,
        )
        self.write_to_stream(stream, encryption_key)

    @staticmethod
    def read_from_stream(stream):
        num = _utils.readUntilRegex(stream, NumberObject.NumberPattern)
        if num.find(NumberObject.ByteDot) != -1:
            return FloatObject(num)
        else:
            return NumberObject(num)

    @staticmethod
    def readFromStream(stream):
        warnings.warn(
            "readFromStream will be removed in PyPDF2 2.0.0. "
            "Use read_from_stream instead.",
            PendingDeprecationWarning,
            stacklevel=2,
        )
        return NumberObject.read_from_stream(stream)


def createStringObject(string):
    """
    Given a string (either a "str" or "unicode"), create a ByteStringObject or a
    TextStringObject to represent the string.
    """
    if isinstance(string, _utils.string_type):
        return TextStringObject(string)
    elif isinstance(string, _utils.bytes_type):
        try:
            if string.startswith(codecs.BOM_UTF16_BE):
                retval = TextStringObject(string.decode("utf-16"))
                retval.autodetect_utf16 = True
                return retval
            else:
                # This is probably a big performance hit here, but we need to
                # convert string objects into the text/unicode-aware version if
                # possible... and the only way to check if that's possible is
                # to try.  Some strings are strings, some are just byte arrays.
                retval = TextStringObject(decode_pdfdocencoding(string))
                retval.autodetect_pdfdocencoding = True
                return retval
        except UnicodeDecodeError:
            return ByteStringObject(string)
    else:
        raise TypeError("createStringObject should have str or unicode arg")


def readHexStringFromStream(stream):
    stream.read(1)
    txt = ""
    x = b_("")
    while True:
        tok = readNonWhitespace(stream)
        if not tok:
            raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
        if tok == b_(">"):
            break
        x += tok
        if len(x) == 2:
            txt += chr(int(x, base=16))
            x = b_("")
    if len(x) == 1:
        x += b_("0")
    if len(x) == 2:
        txt += chr(int(x, base=16))
    return createStringObject(b_(txt))


def readStringFromStream(stream):
    tok = stream.read(1)
    parens = 1
    txt = b_("")
    while True:
        tok = stream.read(1)
        if not tok:
            raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
        if tok == b_("("):
            parens += 1
        elif tok == b_(")"):
            parens -= 1
            if parens == 0:
                break
        elif tok == b_("\\"):
            tok = stream.read(1)
            escape_dict = {
                b_("n"): b_("\n"),
                b_("r"): b_("\r"),
                b_("t"): b_("\t"),
                b_("b"): b_("\b"),
                b_("f"): b_("\f"),
                b_("c"): b_(r"\c"),
                b_("("): b_("("),
                b_(")"): b_(")"),
                b_("/"): b_("/"),
                b_("\\"): b_("\\"),
                b_(" "): b_(" "),
                b_("/"): b_("/"),
                b_("%"): b_("%"),
                b_("<"): b_("<"),
                b_(">"): b_(">"),
                b_("["): b_("["),
                b_("]"): b_("]"),
                b_("#"): b_("#"),
                b_("_"): b_("_"),
                b_("&"): b_("&"),
                b_("$"): b_("$"),
            }
            try:
                tok = escape_dict[tok]
            except KeyError:
                if tok.isdigit():
                    # "The number ddd may consist of one, two, or three
                    # octal digits; high-order overflow shall be ignored.
                    # Three octal digits shall be used, with leading zeros
                    # as needed, if the next character of the string is also
                    # a digit." (PDF reference 7.3.4.2, p 16)
                    for _ in range(2):
                        ntok = stream.read(1)
                        if ntok.isdigit():
                            tok += ntok
                        else:
                            break
                    tok = b_(chr(int(tok, base=8)))
                elif tok in b_("\n\r"):
                    # This case is  hit when a backslash followed by a line
                    # break occurs.  If it's a multi-char EOL, consume the
                    # second character:
                    tok = stream.read(1)
                    if tok not in b_("\n\r"):
                        stream.seek(-1, 1)
                    # Then don't add anything to the actual string, since this
                    # line break was escaped:
                    tok = b_("")
                else:
                    msg = r"Unexpected escaped string: {}".format(tok.decode("utf8"))
                    # if.strict: PdfReadError(msg)
                    logger.warning(msg)
        txt += tok
    return createStringObject(txt)


class ByteStringObject(_utils.bytes_type, PdfObject):  # type: ignore
    """
    Represents a string object where the text encoding could not be determined.
    This occurs quite often, as the PDF spec doesn't provide an alternate way to
    represent strings -- for example, the encryption data stored in files (like
    /O) is clearly not text, but is still stored in a "String" object.
    """

    @property
    def original_bytes(self):
        """For compatibility with TextStringObject.original_bytes."""
        return self

    def write_to_stream(self, stream, encryption_key):
        bytearr = self
        if encryption_key:
            bytearr = RC4_encrypt(encryption_key, bytearr)
        stream.write(b_("<"))
        stream.write(_utils.hexencode(bytearr))
        stream.write(b_(">"))

    def writeToStream(self, stream, encryption_key):
        warnings.warn(
            "writeToStream will be removed in PyPDF2 2.0.0. "
            "Use write_to_stream instead.",
            PendingDeprecationWarning,
            stacklevel=2,
        )
        self.write_to_stream(stream, encryption_key)


class TextStringObject(_utils.string_type, PdfObject):  # type: ignore
    """
    Represents a string object that has been decoded into a real unicode string.
    If read from a PDF document, this string appeared to match the
    PDFDocEncoding, or contained a UTF-16BE BOM mark to cause UTF-16 decoding to
    occur.
    """

    autodetect_pdfdocencoding = False
    autodetect_utf16 = False

    @property
    def original_bytes(self):
        """
        It is occasionally possible that a text string object gets created where
        a byte string object was expected due to the autodetection mechanism --
        if that occurs, this "original_bytes" property can be used to
        back-calculate what the original encoded bytes were.
        """
        return self.get_original_bytes()

    def get_original_bytes(self):
        # We're a text string object, but the library is trying to get our raw
        # bytes.  This can happen if we auto-detected this string as text, but
        # we were wrong.  It's pretty common.  Return the original bytes that
        # would have been used to create this object, based upon the autodetect
        # method.
        if self.autodetect_utf16:
            return codecs.BOM_UTF16_BE + self.encode("utf-16be")
        elif self.autodetect_pdfdocencoding:
            return encode_pdfdocencoding(self)
        else:
            raise Exception("no information about original bytes")

    def write_to_stream(self, stream, encryption_key):
        # Try to write the string out as a PDFDocEncoding encoded string.  It's
        # nicer to look at in the PDF file.  Sadly, we take a performance hit
        # here for trying...
        try:
            bytearr = encode_pdfdocencoding(self)
        except UnicodeEncodeError:
            bytearr = codecs.BOM_UTF16_BE + self.encode("utf-16be")
        if encryption_key:
            bytearr = RC4_encrypt(encryption_key, bytearr)
            obj = ByteStringObject(bytearr)
            obj.write_to_stream(stream, None)
        else:
            stream.write(b_("("))
            for c in bytearr:
                if not chr_(c).isalnum() and c != b_(" "):
                    stream.write(b_("\\%03o" % ord_(c)))
                else:
                    stream.write(b_(chr_(c)))
            stream.write(b_(")"))

    def writeToStream(self, stream, encryption_key):
        warnings.warn(
            "writeToStream will be removed in PyPDF2 2.0.0. "
            "Use write_to_stream instead.",
            PendingDeprecationWarning,
            stacklevel=2,
        )
        self.write_to_stream(stream, encryption_key)


class NameObject(str, PdfObject):
    delimiterPattern = re.compile(b_(r"\s+|[\(\)<>\[\]{}/%]"))
    surfix = b_("/")

    def write_to_stream(self, stream, encryption_key):
        stream.write(b_(self))

    def writeToStream(self, stream, encryption_key):
        warnings.warn(
            "writeToStream will be removed in PyPDF2 2.0.0. "
            "Use write_to_stream instead.",
            PendingDeprecationWarning,
            stacklevel=2,
        )
        self.write_to_stream(stream, encryption_key)

    @staticmethod
    def read_from_stream(stream, pdf):
        name = stream.read(1)
        if name != NameObject.surfix:
            raise PdfReadError("name read error")
        name += _utils.readUntilRegex(
            stream, NameObject.delimiterPattern, ignore_eof=True
        )
        try:
            try:
                ret = name.decode("utf-8")
            except (UnicodeEncodeError, UnicodeDecodeError):
                ret = name.decode("gbk")
            return NameObject(ret)
        except (UnicodeEncodeError, UnicodeDecodeError):
            # Name objects should represent irregular characters
            # with a '#' followed by the symbol's hex number
            if not pdf.strict:
                warnings.warn("Illegal character in Name Object", _utils.PdfReadWarning)
                return NameObject(name)
            else:
                raise PdfReadError("Illegal character in Name Object")

    @staticmethod
    def readFromStream(stream, pdf):
        warnings.warn(
            "readFromStream will be removed in PyPDF2 2.0.0. "
            "Use read_from_stream instead.",
            PendingDeprecationWarning,
            stacklevel=2,
        )
        return NameObject.read_from_stream(stream, pdf)


class DictionaryObject(dict, PdfObject):
    def raw_get(self, key):
        return dict.__getitem__(self, key)

    def __setitem__(self, key, value):
        if not isinstance(key, PdfObject):
            raise ValueError("key must be PdfObject")
        if not isinstance(value, PdfObject):
            raise ValueError("value must be PdfObject")
        return dict.__setitem__(self, key, value)

    def setdefault(self, key, value=None):
        if not isinstance(key, PdfObject):
            raise ValueError("key must be PdfObject")
        if not isinstance(value, PdfObject):
            raise ValueError("value must be PdfObject")
        return dict.setdefault(self, key, value)

    def __getitem__(self, key):
        return dict.__getitem__(self, key).get_object()

    @property
    def xmp_metadata(self):
        """
        Retrieve XMP (Extensible Metadata Platform) data relevant to the
        this object, if available.

        Stability: Added in v1.12, will exist for all future v1.x releases.
        @return Returns a {@link #xmp.XmpInformation XmlInformation} instance
        that can be used to access XMP metadata from the document.  Can also
        return None if no metadata was found on the document root.
        """
        metadata = self.get("/Metadata", None)
        if metadata is None:
            return None
        metadata = metadata.get_object()
        from . import xmp

        if not isinstance(metadata, xmp.XmpInformation):
            metadata = xmp.XmpInformation(metadata)
            self[NameObject("/Metadata")] = metadata
        return metadata

    def getXmpMetadata(self):  # XmpInformation
        """
        .. deprecated:: 1.28.3
            Use :meth:`xmp_metadata` instead.
        """
        warnings.warn(
            "getXmpMetadata will be removed in PyPDF2 2.0.0. "
            "Use xmp_metadata instead.",
            PendingDeprecationWarning,
            stacklevel=2,
        )
        return self.xmp_metadata

    @property
    def xmpMetadata(self):
        """
        .. deprecated:: 1.28.3
            Use :meth:`xmp_metadata` instead.
        """
        warnings.warn(
            "xmpMetadata will be removed in PyPDF2 2.0.0. Use xmp_metadata instead.",
            PendingDeprecationWarning,
            stacklevel=2,
        )
        return self.xmp_metadata

    def write_to_stream(self, stream, encryption_key):
        stream.write(b_("<<\n"))
        for key, value in list(self.items()):
            key.write_to_stream(stream, encryption_key)
            stream.write(b_(" "))
            value.write_to_stream(stream, encryption_key)
            stream.write(b_("\n"))
        stream.write(b_(">>"))

    def writeToStream(self, stream, encryption_key):
        warnings.warn(
            "writeToStream will be removed in PyPDF2 2.0.0. "
            "Use write_to_stream instead.",
            PendingDeprecationWarning,
            stacklevel=2,
        )
        self.write_to_stream(stream, encryption_key)

    @staticmethod
    def read_from_stream(stream, pdf):
        tmp = stream.read(2)
        if tmp != b_("<<"):
            raise PdfReadError(
                "Dictionary read error at byte %s: stream must begin with '<<'"
                % _utils.hexStr(stream.tell())
            )
        data = {}
        while True:
            tok = readNonWhitespace(stream)
            if tok == b_("\x00"):
                continue
            elif tok == b_("%"):
                stream.seek(-1, 1)
                skipOverComment(stream)
                continue
            if not tok:
                raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)

            if tok == b_(">"):
                stream.read(1)
                break
            stream.seek(-1, 1)
            key = read_object(stream, pdf)
            tok = readNonWhitespace(stream)
            stream.seek(-1, 1)
            value = read_object(stream, pdf)
            if not data.get(key):
                data[key] = value
            elif pdf.strict:
                # multiple definitions of key not permitted
                raise PdfReadError(
                    "Multiple definitions in dictionary at byte %s for key %s"
                    % (_utils.hexStr(stream.tell()), key)
                )
            else:
                warnings.warn(
                    "Multiple definitions in dictionary at byte %s for key %s"
                    % (_utils.hexStr(stream.tell()), key),
                    PdfReadWarning,
                )

        pos = stream.tell()
        s = readNonWhitespace(stream)
        if s == b_("s") and stream.read(5) == b_("tream"):
            eol = stream.read(1)
            # odd PDF file output has spaces after 'stream' keyword but before EOL.
            # patch provided by Danial Sandler
            while eol == b_(" "):
                eol = stream.read(1)
            if eol not in (b_("\n"), b_("\r")):
                raise PdfStreamError("Stream data must be followed by a newline")
            if eol == b_("\r"):
                # read \n after
                if stream.read(1) != b_("\n"):
                    stream.seek(-1, 1)
            # this is a stream object, not a dictionary
            if SA.LENGTH not in data:
                raise PdfStreamError("Stream length not defined")
            length = data[SA.LENGTH]
            if isinstance(length, IndirectObject):
                t = stream.tell()
                length = pdf.get_object(length)
                stream.seek(t, 0)
            data["__streamdata__"] = stream.read(length)
            e = readNonWhitespace(stream)
            ndstream = stream.read(8)
            if (e + ndstream) != b_("endstream"):
                # (sigh) - the odd PDF file has a length that is too long, so
                # we need to read backwards to find the "endstream" ending.
                # ReportLab (unknown version) generates files with this bug,
                # and Python users into PDF files tend to be our audience.
                # we need to do this to correct the streamdata and chop off
                # an extra character.
                pos = stream.tell()
                stream.seek(-10, 1)
                end = stream.read(9)
                if end == b_("endstream"):
                    # we found it by looking back one character further.
                    data["__streamdata__"] = data["__streamdata__"][:-1]
                else:
                    stream.seek(pos, 0)
                    raise PdfReadError(
                        "Unable to find 'endstream' marker after stream at byte %s."
                        % _utils.hexStr(stream.tell())
                    )
        else:
            stream.seek(pos, 0)
        if "__streamdata__" in data:
            return StreamObject.initializeFromDictionary(data)
        else:
            retval = DictionaryObject()
            retval.update(data)
            return retval

    @staticmethod
    def readFromStream(stream, pdf):
        warnings.warn(
            "readFromStream will be removed in PyPDF2 2.0.0. "
            "Use read_from_stream instead.",
            PendingDeprecationWarning,
            stacklevel=2,
        )
        return DictionaryObject.read_from_stream(stream, pdf)


class TreeObject(DictionaryObject):
    def __init__(self):
        DictionaryObject.__init__(self)

    def hasChildren(self):
        return "/First" in self

    def __iter__(self):
        return self.children()

    def children(self):
        if not self.hasChildren():
            if sys.version_info >= (3, 5):  # PEP 479
                return
            else:
                raise StopIteration

        child = self["/First"]
        while True:
            yield child
            if child == self["/Last"]:
                if sys.version_info >= (3, 5):  # PEP 479
                    return
                else:
                    raise StopIteration
            child = child["/Next"]

    def addChild(self, child, pdf):
        warnings.warn(
            DEPR_MSG.format("addChild", "add_child"),
            PendingDeprecationWarning,
            stacklevel=2,
        )
        self.add_child(child, pdf)

    def add_child(self, child, pdf):  # PdfReader
        child_obj = child.get_object()
        child = pdf.getReference(child_obj)
        assert isinstance(child, IndirectObject)

        if "/First" not in self:
            self[NameObject("/First")] = child
            self[NameObject("/Count")] = NumberObject(0)
            prev = None
        else:
            prev = self["/Last"]

        self[NameObject("/Last")] = child
        self[NameObject("/Count")] = NumberObject(self[NameObject("/Count")] + 1)

        if prev:
            prev_ref = pdf.getReference(prev)
            assert isinstance(prev_ref, IndirectObject)
            child_obj[NameObject("/Prev")] = prev_ref
            prev[NameObject("/Next")] = child

        parent_ref = pdf.getReference(self)
        assert isinstance(parent_ref, IndirectObject)
        child_obj[NameObject("/Parent")] = parent_ref

    def removeChild(self, child):
        warnings.warn(
            DEPR_MSG.format("removeChild", "remove_child"),
            PendingDeprecationWarning,
            stacklevel=2,
        )
        self.remove_child(child)

    def remove_child(self, child):
        child_obj = child.get_object()

        if NameObject("/Parent") not in child_obj:
            raise ValueError("Removed child does not appear to be a tree item")
        elif child_obj[NameObject("/Parent")] != self:
            raise ValueError("Removed child is not a member of this tree")

        found = False
        prev_ref = None
        prev = None
        cur_ref = self[NameObject("/First")]
        cur = cur_ref.get_object()
        last_ref = self[NameObject("/Last")]
        last = last_ref.get_object()
        while cur is not None:
            if cur == child_obj:
                if prev is None:
                    if NameObject("/Next") in cur:
                        # Removing first tree node
                        next_ref = cur[NameObject("/Next")]
                        next = next_ref.get_object()
                        del next[NameObject("/Prev")]
                        self[NameObject("/First")] = next_ref
                        self[NameObject("/Count")] = self[NameObject("/Count")] - 1

                    else:
                        # Removing only tree node
                        assert self[NameObject("/Count")] == 1
                        del self[NameObject("/Count")]
                        del self[NameObject("/First")]
                        if NameObject("/Last") in self:
                            del self[NameObject("/Last")]
                else:
                    if NameObject("/Next") in cur:
                        # Removing middle tree node
                        next_ref = cur[NameObject("/Next")]
                        next = next_ref.get_object()
                        next[NameObject("/Prev")] = prev_ref
                        prev[NameObject("/Next")] = next_ref
                        self[NameObject("/Count")] = self[NameObject("/Count")] - 1
                    else:
                        # Removing last tree node
                        assert cur == last
                        del prev[NameObject("/Next")]
                        self[NameObject("/Last")] = prev_ref
                        self[NameObject("/Count")] = self[NameObject("/Count")] - 1
                found = True
                break

            prev_ref = cur_ref
            prev = cur
            if NameObject("/Next") in cur:
                cur_ref = cur[NameObject("/Next")]
                cur = cur_ref.get_object()
            else:
                cur_ref = None
                cur = None

        if not found:
            raise ValueError("Removal couldn't find item in tree")

        del child_obj[NameObject("/Parent")]
        if NameObject("/Next") in child_obj:
            del child_obj[NameObject("/Next")]
        if NameObject("/Prev") in child_obj:
            del child_obj[NameObject("/Prev")]

    def emptyTree(self):
        for child in self:
            child_obj = child.get_object()
            del child_obj[NameObject("/Parent")]
            if NameObject("/Next") in child_obj:
                del child_obj[NameObject("/Next")]
            if NameObject("/Prev") in child_obj:
                del child_obj[NameObject("/Prev")]

        if NameObject("/Count") in self:
            del self[NameObject("/Count")]
        if NameObject("/First") in self:
            del self[NameObject("/First")]
        if NameObject("/Last") in self:
            del self[NameObject("/Last")]


class StreamObject(DictionaryObject):
    def __init__(self):
        self._data = None
        self.decoded_self = None

    @property
    def decodedSelf(self):
        warnings.warn(
            DEPR_MSG.format("decodedSelf", "decoded_self"),
            PendingDeprecationWarning,
            stacklevel=2,
        )
        return self.decoded_self

    @decodedSelf.setter
    def decodedSelf(self, value):
        warnings.warn(
            DEPR_MSG.format("decodedSelf", "decoded_self"),
            PendingDeprecationWarning,
            stacklevel=2,
        )
        self.decoded_self = value

    def write_to_stream(self, stream, encryption_key):
        self[NameObject(SA.LENGTH)] = NumberObject(len(self._data))
        DictionaryObject.write_to_stream(self, stream, encryption_key)
        del self[SA.LENGTH]
        stream.write(b_("\nstream\n"))
        data = self._data
        if encryption_key:
            data = RC4_encrypt(encryption_key, data)
        stream.write(data)
        stream.write(b_("\nendstream"))

    @staticmethod
    def initializeFromDictionary(data):
        if SA.FILTER in data:
            retval = EncodedStreamObject()
        else:
            retval = DecodedStreamObject()
        retval._data = data["__streamdata__"]
        del data["__streamdata__"]
        del data[SA.LENGTH]
        retval.update(data)
        return retval

    def flateEncode(self):
        warnings.warn(
            DEPR_MSG.format("flateEncode", "flate_encode"),
            PendingDeprecationWarning,
            stacklevel=2,
        )
        return self.flate_encode()

    def flate_encode(self):
        if SA.FILTER in self:
            f = self[SA.FILTER]
            if isinstance(f, ArrayObject):
                f.insert(0, NameObject(FT.FLATE_DECODE))
            else:
                newf = ArrayObject()
                newf.append(NameObject("/FlateDecode"))
                newf.append(f)
                f = newf
        else:
            f = NameObject("/FlateDecode")
        retval = EncodedStreamObject()
        retval[NameObject(SA.FILTER)] = f
        retval._data = filters.FlateDecode.encode(self._data)
        return retval


class DecodedStreamObject(StreamObject):
    def get_data(self):
        return self._data

    def set_data(self, data):
        self._data = data

    def getData(self):
        warnings.warn(
            DEPR_MSG.format("decodedSelf", "decoded_self"),
            PendingDeprecationWarning,
            stacklevel=2,
        )
        return self._data

    def setData(self, data):
        warnings.warn(
            DEPR_MSG.format("decodedSelf", "decoded_self"),
            PendingDeprecationWarning,
            stacklevel=2,
        )
        self.set_data(data)


class EncodedStreamObject(StreamObject):
    def __init__(self):
        self.decoded_self = None

    @property
    def decodedSelf(self):
        warnings.warn(
            DEPR_MSG.format("decodedSelf", "decoded_self"),
            PendingDeprecationWarning,
            stacklevel=2,
        )
        return self.decoded_self

    @decodedSelf.setter
    def decodedSelf(self, value):
        warnings.warn(
            DEPR_MSG.format("decodedSelf", "decoded_self"),
            PendingDeprecationWarning,
            stacklevel=2,
        )
        self.decoded_self = value

    def get_data(self):
        if self.decoded_self:
            # cached version of decoded object
            return self.decoded_self.get_data()
        else:
            # create decoded object
            decoded = DecodedStreamObject()

            decoded._data = filters.decode_stream_data(self)
            for key, value in list(self.items()):
                if key not in (SA.LENGTH, SA.FILTER, SA.DECODE_PARMS):
                    decoded[key] = value
            self.decoded_self = decoded
            return decoded._data

    def set_data(self, data):
        raise PdfReadError("Creating EncodedStreamObject is not currently supported")


class ContentStream(DecodedStreamObject):
    def __init__(self, stream, pdf):
        self.pdf = pdf
        self.operations = []
        # stream may be a StreamObject or an ArrayObject containing
        # multiple StreamObjects to be cat'd together.
        stream = stream.get_object()
        if isinstance(stream, ArrayObject):
            data = b_("")
            for s in stream:
                data += b_(s.get_object().get_data())
            stream = BytesIO(b_(data))
        else:
            stream = BytesIO(b_(stream.get_data()))
        self.__parseContentStream(stream)

    def __parseContentStream(self, stream):
        # file("f:\\tmp.txt", "w").write(stream.read())
        stream.seek(0, 0)
        operands = []
        while True:
            peek = readNonWhitespace(stream)
            if peek == b_("") or ord_(peek) == 0:
                break
            stream.seek(-1, 1)
            if peek.isalpha() or peek == b_("'") or peek == b_('"'):
                operator = _utils.readUntilRegex(
                    stream, NameObject.delimiterPattern, True
                )
                if operator == b_("BI"):
                    # begin inline image - a completely different parsing
                    # mechanism is required, of course... thanks buddy...
                    assert operands == []
                    ii = self._readInlineImage(stream)
                    self.operations.append((ii, b_("INLINE IMAGE")))
                else:
                    self.operations.append((operands, operator))
                    operands = []
            elif peek == b_("%"):
                # If we encounter a comment in the content stream, we have to
                # handle it here.  Typically, readObject will handle
                # encountering a comment -- but readObject assumes that
                # following the comment must be the object we're trying to
                # read.  In this case, it could be an operator instead.
                while peek not in (b_("\r"), b_("\n")):
                    peek = stream.read(1)
            else:
                operands.append(read_object(stream, None))

    def _readInlineImage(self, stream):
        # begin reading just after the "BI" - begin image
        # first read the dictionary of settings.
        settings = DictionaryObject()
        while True:
            tok = readNonWhitespace(stream)
            stream.seek(-1, 1)
            if tok == b_("I"):
                # "ID" - begin of image data
                break
            key = read_object(stream, self.pdf)
            tok = readNonWhitespace(stream)
            stream.seek(-1, 1)
            value = read_object(stream, self.pdf)
            settings[key] = value
        # left at beginning of ID
        tmp = stream.read(3)
        assert tmp[:2] == b_("ID")
        data = BytesIO()
        # Read the inline image, while checking for EI (End Image) operator.
        while True:
            # Read 8 kB at a time and check if the chunk contains the E operator.
            buf = stream.read(8192)
            # We have reached the end of the stream, but haven't found the EI operator.
            if not buf:
                raise PdfReadError("Unexpected end of stream")
            loc = buf.find(b_("E"))

            if loc == -1:
                data.write(buf)
            else:
                # Write out everything before the E.
                data.write(buf[0:loc])

                # Seek back in the stream to read the E next.
                stream.seek(loc - len(buf), 1)
                tok = stream.read(1)
                # Check for End Image
                tok2 = stream.read(1)
                if tok2 == b_("I"):
                    # Data can contain EI, so check for the Q operator.
                    tok3 = stream.read(1)
                    info = tok + tok2
                    # We need to find whitespace between EI and Q.
                    has_q_whitespace = False
                    while tok3 in _utils.WHITESPACES:
                        has_q_whitespace = True
                        info += tok3
                        tok3 = stream.read(1)
                    if tok3 == b_("Q") and has_q_whitespace:
                        stream.seek(-1, 1)
                        break
                    else:
                        stream.seek(-1, 1)
                        data.write(info)
                else:
                    stream.seek(-1, 1)
                    data.write(tok)
        return {"settings": settings, "data": data.getvalue()}

    def _getData(self):
        newdata = BytesIO()
        for operands, operator in self.operations:
            if operator == b_("INLINE IMAGE"):
                newdata.write(b_("BI"))
                dicttext = BytesIO()
                operands["settings"].write_to_stream(dicttext, None)
                newdata.write(dicttext.getvalue()[2:-2])
                newdata.write(b_("ID "))
                newdata.write(operands["data"])
                newdata.write(b_("EI"))
            else:
                for op in operands:
                    op.write_to_stream(newdata, None)
                    newdata.write(b_(" "))
                newdata.write(b_(operator))
            newdata.write(b_("\n"))
        return newdata.getvalue()

    def _setData(self, value):
        self.__parseContentStream(BytesIO(b_(value)))

    _data = property(_getData, _setData)


[docs]class RectangleObject(ArrayObject): """ This class is used to represent *page boxes* in PyPDF2. These boxes include: * :attr:`artbox <PyPDF2._page.PageObject.artbox>` * :attr:`bleedbox <PyPDF2._page.PageObject.bleedbox>` * :attr:`cropbox <PyPDF2._page.PageObject.cropbox>` * :attr:`mediabox <PyPDF2._page.PageObject.mediabox>` * :attr:`trimbox <PyPDF2._page.PageObject.trimbox>` """ def __init__(self, arr): # must have four points assert len(arr) == 4 # automatically convert arr[x] into NumberObject(arr[x]) if necessary ArrayObject.__init__(self, [self._ensure_is_number(x) for x in arr]) def _ensure_is_number(self, value): if not isinstance(value, (NumberObject, FloatObject)): value = FloatObject(value) return value
[docs] def ensureIsNumber(self, value): warnings.warn( "ensureIsNumber will be removed in PyPDF2 2.0.0. ", PendingDeprecationWarning, stacklevel=2, ) return self._ensure_is_number(value)
def __repr__(self): return "RectangleObject(%s)" % repr(list(self)) @property def left(self): return self[0] @property def bottom(self): return self[1] @property def right(self): return self[2] @property def top(self): return self[3]
[docs] def getLowerLeft_x(self): warnings.warn( DEPR_MSG.format("getLowerLeft_x", "left"), PendingDeprecationWarning, stacklevel=2, ) return self.left
[docs] def getLowerLeft_y(self): warnings.warn( DEPR_MSG.format("getLowerLeft_y", "bottom"), PendingDeprecationWarning, stacklevel=2, ) return self.bottom
[docs] def getUpperRight_x(self): warnings.warn( DEPR_MSG.format("getUpperRight_x", "right"), PendingDeprecationWarning, stacklevel=2, ) return self.right
[docs] def getUpperRight_y(self): warnings.warn( DEPR_MSG.format("getUpperRight_y", "top"), PendingDeprecationWarning, stacklevel=2, ) return self.top
[docs] def getUpperLeft_x(self): warnings.warn( DEPR_MSG.format("getUpperLeft_x", "left"), PendingDeprecationWarning, stacklevel=2, ) return self.left
[docs] def getUpperLeft_y(self): warnings.warn( DEPR_MSG.format("getUpperLeft_y", "top"), PendingDeprecationWarning, stacklevel=2, ) return self.top
[docs] def getLowerRight_x(self): warnings.warn( DEPR_MSG.format("getLowerRight_x", "right"), PendingDeprecationWarning, stacklevel=2, ) return self.right
[docs] def getLowerRight_y(self): warnings.warn( DEPR_MSG.format("getLowerRight_y", "bottom"), PendingDeprecationWarning, stacklevel=2, ) return self.bottom
@property def lower_left(self): """ Property to read and modify the lower left coordinate of this box in (x,y) form. """ return self.left, self.bottom @lower_left.setter def lower_left(self, value): self[0], self[1] = [self._ensure_is_number(x) for x in value] @property def lower_right(self): """ Property to read and modify the lower right coordinate of this box in (x,y) form. """ return self.right, self.bottom @lower_right.setter def lower_right(self, value): self[2], self[1] = [self._ensure_is_number(x) for x in value] @property def upper_left(self): """ Property to read and modify the upper left coordinate of this box in (x,y) form. """ return self.left, self.top @upper_left.setter def upper_left(self, value): self[0], self[3] = [self._ensure_is_number(x) for x in value] @property def upper_right(self): """ Property to read and modify the upper right coordinate of this box in (x,y) form. """ return self.right, self.top @upper_right.setter def upper_right(self, value): self[2], self[3] = [self._ensure_is_number(x) for x in value]
[docs] def getLowerLeft(self): warnings.warn( DEPR_MSG.format("getLowerLeft", "lower_left"), PendingDeprecationWarning, stacklevel=2, ) return self.lower_left
[docs] def getLowerRight(self): warnings.warn( DEPR_MSG.format("getLowerRight", "lower_right"), PendingDeprecationWarning, stacklevel=2, ) return self.lower_right
[docs] def getUpperLeft(self): warnings.warn( DEPR_MSG.format("getUpperLeft", "upper_left"), PendingDeprecationWarning, stacklevel=2, ) return self.upper_left
[docs] def getUpperRight(self): warnings.warn( DEPR_MSG.format("getUpperRight", "upper_right"), PendingDeprecationWarning, stacklevel=2, ) return self.upper_right
[docs] def setLowerLeft(self, value): warnings.warn( DEPR_MSG.format("setLowerLeft", "lower_left"), PendingDeprecationWarning, stacklevel=2, ) self.lower_left = value
[docs] def setLowerRight(self, value): warnings.warn( DEPR_MSG.format("setLowerRight", "lower_right"), PendingDeprecationWarning, stacklevel=2, ) self[2], self[1] = [self._ensure_is_number(x) for x in value]
[docs] def setUpperLeft(self, value): warnings.warn( DEPR_MSG.format("setUpperLeft", "upper_left"), PendingDeprecationWarning, stacklevel=2, ) self[0], self[3] = [self._ensure_is_number(x) for x in value]
[docs] def setUpperRight(self, value): warnings.warn( DEPR_MSG.format("setUpperRight", "upper_right"), PendingDeprecationWarning, stacklevel=2, ) self[2], self[3] = [self._ensure_is_number(x) for x in value]
@property def width(self): return self.right - self.left
[docs] def getWidth(self): warnings.warn(DEPR_MSG.format("getWidth", "width"), DeprecationWarning) return self.width
@property def height(self): return self.top - self.bottom
[docs] def getHeight(self): warnings.warn(DEPR_MSG.format("getHeight", "height"), DeprecationWarning) return self.height
@property def lowerLeft(self): warnings.warn( DEPR_MSG.format("lowerLeft", "lower_left"), PendingDeprecationWarning, stacklevel=2, ) return self.lower_left @lowerLeft.setter def lowerLeft(self, value): warnings.warn( DEPR_MSG.format("lowerLeft", "lower_left"), PendingDeprecationWarning, stacklevel=2, ) self.lower_left = value @property def lowerRight(self): warnings.warn( DEPR_MSG.format("lowerRight", "lower_right"), PendingDeprecationWarning, stacklevel=2, ) return self.lower_right @lowerRight.setter def lowerRight(self, value): warnings.warn( DEPR_MSG.format("lowerRight", "lower_right"), PendingDeprecationWarning, stacklevel=2, ) self.lower_right = value @property def upperLeft(self): warnings.warn( DEPR_MSG.format("upperLeft", "upper_left"), PendingDeprecationWarning, stacklevel=2, ) return self.upper_left @upperLeft.setter def upperLeft(self, value): warnings.warn( DEPR_MSG.format("upperLeft", "upper_left"), PendingDeprecationWarning, stacklevel=2, ) self.upper_left = value @property def upperRight(self): warnings.warn( DEPR_MSG.format("upperRight", "upper_right"), PendingDeprecationWarning, stacklevel=2, ) return self.upper_right @upperRight.setter def upperRight(self, value): warnings.warn( DEPR_MSG.format("upperRight", "upper_right"), PendingDeprecationWarning, stacklevel=2, ) self.upper_right = value
[docs]class Field(TreeObject): """ A class representing a field dictionary. This class is accessed through :meth:`get_fields()<PyPDF2.PdfReader.get_fields>` """ def __init__(self, data): DictionaryObject.__init__(self) attributes = ( "/FT", "/Parent", "/Kids", "/T", "/TU", "/TM", "/Ff", "/V", "/DV", "/AA", ) for attr in attributes: try: self[NameObject(attr)] = data[attr] except KeyError: pass @property def field_type(self): """Read-only property accessing the type of this field.""" return self.get("/FT") @property def fieldType(self): """ .. deprecated:: 1.28.3 Use :py:attr:`field_type` instead. """ warnings.warn( "fieldType will be removed in PyPDF2 2.0.0. " "Use the field_type property instead.", PendingDeprecationWarning, stacklevel=2, ) return self.field_type @property def parent(self): """Read-only property accessing the parent of this field.""" return self.get("/Parent") @property def kids(self): """Read-only property accessing the kids of this field.""" return self.get("/Kids") @property def name(self): """Read-only property accessing the name of this field.""" return self.get("/T") @property def alternate_name(self): """Read-only property accessing the alternate name of this field.""" return self.get("/TU") @property def altName(self): """ .. deprecated:: 1.28.3 Use :py:attr:`alternate_name` instead. """ warnings.warn( "altName will be removed in PyPDF2 2.0.0. " "Use the alternate_name property instead.", PendingDeprecationWarning, stacklevel=2, ) return self.alternate_name @property def mapping_name(self): """ Read-only property accessing the mapping name of this field. This name is used by PyPDF2 as a key in the dictionary returned by :meth:`get_fields()<PyPDF2.PdfReader.get_fields>` """ return self.get("/TM") @property def mappingName(self): """ .. deprecated:: 1.28.3 Use :py:attr:`mapping_name` instead. """ warnings.warn( "mappingName will be removed in PyPDF2 2.0.0. " "Use the mapping_name property instead.", PendingDeprecationWarning, stacklevel=2, ) return self.mapping_name @property def flags(self): """ Read-only property accessing the field flags, specifying various characteristics of the field (see Table 8.70 of the PDF 1.7 reference). """ return self.get("/Ff") @property def value(self): """ Read-only property accessing the value of this field. Format varies based on field type. """ return self.get("/V") @property def default_value(self): """Read-only property accessing the default value of this field.""" return self.get("/DV") @property def defaultValue(self): """ .. deprecated:: 1.28.3 Use :py:attr:`default_value` instead. """ warnings.warn( "defaultValue will be removed in PyPDF2 2.0.0. " "Use the default_value property instead.", PendingDeprecationWarning, stacklevel=2, ) return self.default_value @property def additional_actions(self): """ Read-only property accessing the additional actions dictionary. This dictionary defines the field's behavior in response to trigger events. See Section 8.5.2 of the PDF 1.7 reference. """ self.get("/AA") @property def additionalActions(self): """ .. deprecated:: 1.28.3 Use :py:attr:`additional_actions` instead. """ warnings.warn( "additionalActions will be removed in PyPDF2 2.0.0. " "Use the additional_actions property instead.", PendingDeprecationWarning, stacklevel=2, ) return self.additional_actions
[docs]class Destination(TreeObject): """ A class representing a destination within a PDF file. See section 8.2.1 of the PDF 1.6 reference. :param str title: Title of this destination. :param IndirectObject page: Reference to the page of this destination. Should be an instance of :class:`IndirectObject<PyPDF2.generic.IndirectObject>`. :param str typ: How the destination is displayed. :param args: Additional arguments may be necessary depending on the type. :raises PdfReadError: If destination type is invalid. .. list-table:: Valid ``typ`` arguments (see PDF spec for details) :widths: 50 50 * - /Fit - No additional arguments * - /XYZ - [left] [top] [zoomFactor] * - /FitH - [top] * - /FitV - [left] * - /FitR - [left] [bottom] [right] [top] * - /FitB - No additional arguments * - /FitBH - [top] * - /FitBV - [left] """ def __init__(self, title, page, typ, *args): DictionaryObject.__init__(self) self[NameObject("/Title")] = title self[NameObject("/Page")] = page self[NameObject("/Type")] = typ from PyPDF2.constants import TypArguments as TA from PyPDF2.constants import TypFitArguments as TF # from table 8.2 of the PDF 1.7 reference. if typ == "/XYZ": ( self[NameObject(TA.LEFT)], self[NameObject(TA.TOP)], self[NameObject("/Zoom")], ) = args elif typ == TF.FIT_R: ( self[NameObject(TA.LEFT)], self[NameObject(TA.BOTTOM)], self[NameObject(TA.RIGHT)], self[NameObject(TA.TOP)], ) = args elif typ in [TF.FIT_H, TF.FIT_BH]: (self[NameObject(TA.TOP)],) = args elif typ in [TF.FIT_V, TF.FIT_BV]: (self[NameObject(TA.LEFT)],) = args elif typ in [TF.FIT, TF.FIT_B]: pass else: raise PdfReadError("Unknown Destination Type: %r" % typ) @property def dest_array(self): return ArrayObject( [self.raw_get("/Page"), self["/Type"]] + [ self[x] for x in ["/Left", "/Bottom", "/Right", "/Top", "/Zoom"] if x in self ] )
[docs] def getDestArray(self): """ .. deprecated:: 1.28.3 Use :py:attr:`dest_array` instead. """ warnings.warn( "getDestArray will be removed in PyPDF2 2.0.0. " "Use the dest_array property instead.", PendingDeprecationWarning, stacklevel=2, ) return self.dest_array
[docs] def write_to_stream(self, stream, encryption_key): stream.write(b_("<<\n")) key = NameObject("/D") key.write_to_stream(stream, encryption_key) stream.write(b_(" ")) value = self.dest_array value.write_to_stream(stream, encryption_key) key = NameObject("/S") key.write_to_stream(stream, encryption_key) stream.write(b_(" ")) value = NameObject("/GoTo") value.write_to_stream(stream, encryption_key) stream.write(b_("\n")) stream.write(b_(">>"))
@property def title(self): """ Read-only property accessing the destination title. :rtype: str """ return self.get("/Title") @property def page(self): """ Read-only property accessing the destination page number. :rtype: int """ return self.get("/Page") @property def typ(self): """ Read-only property accessing the destination type. :rtype: str """ return self.get("/Type") @property def zoom(self): """ Read-only property accessing the zoom factor. :rtype: int, or ``None`` if not available. """ return self.get("/Zoom", None) @property def left(self): """ Read-only property accessing the left horizontal coordinate. :rtype: int, or ``None`` if not available. """ return self.get("/Left", None) @property def right(self): """ Read-only property accessing the right horizontal coordinate. :rtype: int, or ``None`` if not available. """ return self.get("/Right", None) @property def top(self): """ Read-only property accessing the top vertical coordinate. :rtype: int, or ``None`` if not available. """ return self.get("/Top", None) @property def bottom(self): """ Read-only property accessing the bottom vertical coordinate. :rtype: int, or ``None`` if not available. """ return self.get("/Bottom", None)
class Bookmark(Destination): def write_to_stream(self, stream, encryption_key): stream.write(b_("<<\n")) for key in [ NameObject(x) for x in ["/Title", "/Parent", "/First", "/Last", "/Next", "/Prev"] if x in self ]: key.write_to_stream(stream, encryption_key) stream.write(b_(" ")) value = self.raw_get(key) value.write_to_stream(stream, encryption_key) stream.write(b_("\n")) key = NameObject("/Dest") key.write_to_stream(stream, encryption_key) stream.write(b_(" ")) value = self.dest_array value.write_to_stream(stream, encryption_key) stream.write(b_("\n")) stream.write(b_(">>")) def encode_pdfdocencoding(unicode_string): retval = b_("") for c in unicode_string: try: retval += b_(chr(_pdfDocEncoding_rev[c])) except KeyError: raise UnicodeEncodeError( "pdfdocencoding", c, -1, -1, "does not exist in translation table" ) return retval def decode_pdfdocencoding(byte_array): retval = u_("") for b in byte_array: c = _pdfDocEncoding[ord_(b)] if c == u_("\u0000"): raise UnicodeDecodeError( "pdfdocencoding", _utils.barray(b), -1, -1, "does not exist in translation table", ) retval += c return retval # PDFDocEncoding Character Set: Table D.2 of PDF Reference 1.7 # C.1 Predefined encodings sorted by character name of another PDF reference # Some indices have '\u0000' although they should have something else: # 22: should be '\u0017' _pdfDocEncoding = ( u_("\u0000"), u_("\u0001"), u_("\u0002"), u_("\u0003"), u_("\u0004"), u_("\u0005"), u_("\u0006"), u_("\u0007"), # 0 - 7 u_("\u0008"), u_("\u0009"), u_("\u000a"), u_("\u000b"), u_("\u000c"), u_("\u000d"), u_("\u000e"), u_("\u000f"), # 8 - 15 u_("\u0010"), u_("\u0011"), u_("\u0012"), u_("\u0013"), u_("\u0014"), u_("\u0015"), u_("\u0000"), u_("\u0017"), # 16 - 23 u_("\u02d8"), u_("\u02c7"), u_("\u02c6"), u_("\u02d9"), u_("\u02dd"), u_("\u02db"), u_("\u02da"), u_("\u02dc"), # 24 - 31 u_("\u0020"), u_("\u0021"), u_("\u0022"), u_("\u0023"), u_("\u0024"), u_("\u0025"), u_("\u0026"), u_("\u0027"), # 32 - 39 u_("\u0028"), u_("\u0029"), u_("\u002a"), u_("\u002b"), u_("\u002c"), u_("\u002d"), u_("\u002e"), u_("\u002f"), # 40 - 47 u_("\u0030"), u_("\u0031"), u_("\u0032"), u_("\u0033"), u_("\u0034"), u_("\u0035"), u_("\u0036"), u_("\u0037"), # 48 - 55 u_("\u0038"), u_("\u0039"), u_("\u003a"), u_("\u003b"), u_("\u003c"), u_("\u003d"), u_("\u003e"), u_("\u003f"), # 56 - 63 u_("\u0040"), u_("\u0041"), u_("\u0042"), u_("\u0043"), u_("\u0044"), u_("\u0045"), u_("\u0046"), u_("\u0047"), # 64 - 71 u_("\u0048"), u_("\u0049"), u_("\u004a"), u_("\u004b"), u_("\u004c"), u_("\u004d"), u_("\u004e"), u_("\u004f"), # 72 - 79 u_("\u0050"), u_("\u0051"), u_("\u0052"), u_("\u0053"), u_("\u0054"), u_("\u0055"), u_("\u0056"), u_("\u0057"), # 80 - 87 u_("\u0058"), u_("\u0059"), u_("\u005a"), u_("\u005b"), u_("\u005c"), u_("\u005d"), u_("\u005e"), u_("\u005f"), # 88 - 95 u_("\u0060"), u_("\u0061"), u_("\u0062"), u_("\u0063"), u_("\u0064"), u_("\u0065"), u_("\u0066"), u_("\u0067"), # 96 - 103 u_("\u0068"), u_("\u0069"), u_("\u006a"), u_("\u006b"), u_("\u006c"), u_("\u006d"), u_("\u006e"), u_("\u006f"), # 104 - 111 u_("\u0070"), u_("\u0071"), u_("\u0072"), u_("\u0073"), u_("\u0074"), u_("\u0075"), u_("\u0076"), u_("\u0077"), # 112 - 119 u_("\u0078"), u_("\u0079"), u_("\u007a"), u_("\u007b"), u_("\u007c"), u_("\u007d"), u_("\u007e"), u_("\u0000"), # 120 - 127 u_("\u2022"), u_("\u2020"), u_("\u2021"), u_("\u2026"), u_("\u2014"), u_("\u2013"), u_("\u0192"), u_("\u2044"), # 128 - 135 u_("\u2039"), u_("\u203a"), u_("\u2212"), u_("\u2030"), u_("\u201e"), u_("\u201c"), u_("\u201d"), u_("\u2018"), # 136 - 143 u_("\u2019"), u_("\u201a"), u_("\u2122"), u_("\ufb01"), u_("\ufb02"), u_("\u0141"), u_("\u0152"), u_("\u0160"), # 144 - 151 u_("\u0178"), u_("\u017d"), u_("\u0131"), u_("\u0142"), u_("\u0153"), u_("\u0161"), u_("\u017e"), u_("\u0000"), # 152 - 159 u_("\u20ac"), u_("\u00a1"), u_("\u00a2"), u_("\u00a3"), u_("\u00a4"), u_("\u00a5"), u_("\u00a6"), u_("\u00a7"), # 160 - 167 u_("\u00a8"), u_("\u00a9"), u_("\u00aa"), u_("\u00ab"), u_("\u00ac"), u_("\u0000"), u_("\u00ae"), u_("\u00af"), # 168 - 175 u_("\u00b0"), u_("\u00b1"), u_("\u00b2"), u_("\u00b3"), u_("\u00b4"), u_("\u00b5"), u_("\u00b6"), u_("\u00b7"), # 176 - 183 u_("\u00b8"), u_("\u00b9"), u_("\u00ba"), u_("\u00bb"), u_("\u00bc"), u_("\u00bd"), u_("\u00be"), u_("\u00bf"), # 184 - 191 u_("\u00c0"), u_("\u00c1"), u_("\u00c2"), u_("\u00c3"), u_("\u00c4"), u_("\u00c5"), u_("\u00c6"), u_("\u00c7"), # 192 - 199 u_("\u00c8"), u_("\u00c9"), u_("\u00ca"), u_("\u00cb"), u_("\u00cc"), u_("\u00cd"), u_("\u00ce"), u_("\u00cf"), # 200 - 207 u_("\u00d0"), u_("\u00d1"), u_("\u00d2"), u_("\u00d3"), u_("\u00d4"), u_("\u00d5"), u_("\u00d6"), u_("\u00d7"), # 208 - 215 u_("\u00d8"), u_("\u00d9"), u_("\u00da"), u_("\u00db"), u_("\u00dc"), u_("\u00dd"), u_("\u00de"), u_("\u00df"), # 216 - 223 u_("\u00e0"), u_("\u00e1"), u_("\u00e2"), u_("\u00e3"), u_("\u00e4"), u_("\u00e5"), u_("\u00e6"), u_("\u00e7"), # 224 - 231 u_("\u00e8"), u_("\u00e9"), u_("\u00ea"), u_("\u00eb"), u_("\u00ec"), u_("\u00ed"), u_("\u00ee"), u_("\u00ef"), # 232 - 239 u_("\u00f0"), u_("\u00f1"), u_("\u00f2"), u_("\u00f3"), u_("\u00f4"), u_("\u00f5"), u_("\u00f6"), u_("\u00f7"), # 240 - 247 u_("\u00f8"), u_("\u00f9"), u_("\u00fa"), u_("\u00fb"), u_("\u00fc"), u_("\u00fd"), u_("\u00fe"), u_("\u00ff"), # 248 - 255 ) assert len(_pdfDocEncoding) == 256 _pdfDocEncoding_rev = {} for i in range(256): char = _pdfDocEncoding[i] if char == u_("\u0000"): continue assert char not in _pdfDocEncoding_rev, ( str(char) + " at " + str(i) + " already at " + str(_pdfDocEncoding_rev[char]) ) _pdfDocEncoding_rev[char] = i