Spaces:
Paused
Paused
| # Copyright (c) 2006, Mathieu Fenniak | |
| # Copyright (c) 2007, Ashish Kulkarni <kulkarni.ashish@gmail.com> | |
| # | |
| # All rights reserved. | |
| # | |
| # Redistribution and use in source and binary forms, with or without | |
| # modification, are permitted provided that the following conditions are | |
| # met: | |
| # | |
| # * Redistributions of source code must retain the above copyright notice, | |
| # this list of conditions and the following disclaimer. | |
| # * Redistributions in binary form must reproduce the above copyright notice, | |
| # this list of conditions and the following disclaimer in the documentation | |
| # and/or other materials provided with the distribution. | |
| # * The name of the author may not be used to endorse or promote products | |
| # derived from this software without specific prior written permission. | |
| # | |
| # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
| # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |
| # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
| # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
| # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
| # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
| # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
| # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
| # POSSIBILITY OF SUCH DAMAGE. | |
| import codecs | |
| import collections | |
| import decimal | |
| import logging | |
| import random | |
| import re | |
| import struct | |
| import time | |
| import uuid | |
| import warnings | |
| from hashlib import md5 | |
| from io import BytesIO, FileIO, IOBase | |
| from pathlib import Path | |
| from types import TracebackType | |
| from typing import ( | |
| IO, | |
| Any, | |
| Callable, | |
| Deque, | |
| Dict, | |
| Iterable, | |
| List, | |
| Optional, | |
| Pattern, | |
| Tuple, | |
| Type, | |
| Union, | |
| cast, | |
| ) | |
| from ._encryption import Encryption | |
| from ._page import PageObject, _VirtualList | |
| from ._reader import PdfReader | |
| from ._security import _alg33, _alg34, _alg35 | |
| from ._utils import ( | |
| StrByteType, | |
| StreamType, | |
| _get_max_pdf_version_header, | |
| b_, | |
| deprecate_with_replacement, | |
| deprecation_bookmark, | |
| deprecation_with_replacement, | |
| logger_warning, | |
| ) | |
| from .constants import AnnotationDictionaryAttributes | |
| from .constants import CatalogAttributes as CA | |
| from .constants import CatalogDictionary | |
| from .constants import Core as CO | |
| from .constants import EncryptionDictAttributes as ED | |
| from .constants import ( | |
| FieldDictionaryAttributes, | |
| FieldFlag, | |
| FileSpecificationDictionaryEntries, | |
| GoToActionArguments, | |
| InteractiveFormDictEntries, | |
| ) | |
| from .constants import PageAttributes as PG | |
| from .constants import PagesAttributes as PA | |
| from .constants import StreamAttributes as SA | |
| from .constants import TrailerKeys as TK | |
| from .constants import TypFitArguments, UserAccessPermissions | |
| from .generic import ( | |
| PAGE_FIT, | |
| AnnotationBuilder, | |
| ArrayObject, | |
| BooleanObject, | |
| ByteStringObject, | |
| ContentStream, | |
| DecodedStreamObject, | |
| Destination, | |
| DictionaryObject, | |
| Fit, | |
| FloatObject, | |
| IndirectObject, | |
| NameObject, | |
| NullObject, | |
| NumberObject, | |
| PdfObject, | |
| RectangleObject, | |
| StreamObject, | |
| TextStringObject, | |
| TreeObject, | |
| create_string_object, | |
| hex_to_rgb, | |
| ) | |
| from .pagerange import PageRange, PageRangeSpec | |
| from .types import ( | |
| BorderArrayType, | |
| FitType, | |
| LayoutType, | |
| OutlineItemType, | |
| OutlineType, | |
| PagemodeType, | |
| ZoomArgType, | |
| ) | |
| logger = logging.getLogger(__name__) | |
| OPTIONAL_READ_WRITE_FIELD = FieldFlag(0) | |
| ALL_DOCUMENT_PERMISSIONS = UserAccessPermissions((2**31 - 1) - 3) | |
| class PdfWriter: | |
| """ | |
| This class supports writing PDF files out, given pages produced by another | |
| class (typically :class:`PdfReader<PyPDF2.PdfReader>`). | |
| """ | |
| def __init__(self, fileobj: StrByteType = "") -> None: | |
| self._header = b"%PDF-1.3" | |
| self._objects: List[PdfObject] = [] # array of indirect objects | |
| self._idnum_hash: Dict[bytes, IndirectObject] = {} | |
| self._id_translated: Dict[int, Dict[int, int]] = {} | |
| # The root of our page tree node. | |
| pages = DictionaryObject() | |
| pages.update( | |
| { | |
| NameObject(PA.TYPE): NameObject("/Pages"), | |
| NameObject(PA.COUNT): NumberObject(0), | |
| NameObject(PA.KIDS): ArrayObject(), | |
| } | |
| ) | |
| self._pages = self._add_object(pages) | |
| # info object | |
| info = DictionaryObject() | |
| info.update( | |
| { | |
| NameObject("/Producer"): create_string_object( | |
| codecs.BOM_UTF16_BE + "PyPDF2".encode("utf-16be") | |
| ) | |
| } | |
| ) | |
| self._info = self._add_object(info) | |
| # root object | |
| self._root_object = DictionaryObject() | |
| self._root_object.update( | |
| { | |
| NameObject(PA.TYPE): NameObject(CO.CATALOG), | |
| NameObject(CO.PAGES): self._pages, | |
| } | |
| ) | |
| self._root = self._add_object(self._root_object) | |
| self.fileobj = fileobj | |
| self.with_as_usage = False | |
| def __enter__(self) -> "PdfWriter": | |
| """Store that writer is initialized by 'with'.""" | |
| self.with_as_usage = True | |
| return self | |
| def __exit__( | |
| self, | |
| exc_type: Optional[Type[BaseException]], | |
| exc: Optional[BaseException], | |
| traceback: Optional[TracebackType], | |
| ) -> None: | |
| """Write data to the fileobj.""" | |
| if self.fileobj: | |
| self.write(self.fileobj) | |
| def pdf_header(self) -> bytes: | |
| """ | |
| Header of the PDF document that is written. | |
| This should be something like b'%PDF-1.5'. It is recommended to set the | |
| lowest version that supports all features which are used within the | |
| PDF file. | |
| """ | |
| return self._header | |
| def pdf_header(self, new_header: bytes) -> None: | |
| self._header = new_header | |
| def _add_object(self, obj: PdfObject) -> IndirectObject: | |
| if hasattr(obj, "indirect_reference") and obj.indirect_reference.pdf == self: # type: ignore | |
| return obj.indirect_reference # type: ignore | |
| self._objects.append(obj) | |
| obj.indirect_reference = IndirectObject(len(self._objects), 0, self) | |
| return obj.indirect_reference | |
| def get_object( | |
| self, | |
| indirect_reference: Union[None, int, IndirectObject] = None, | |
| ido: Optional[IndirectObject] = None, | |
| ) -> PdfObject: | |
| if ido is not None: # deprecated | |
| if indirect_reference is not None: | |
| raise ValueError( | |
| "Please only set 'indirect_reference'. The 'ido' argument is deprecated." | |
| ) | |
| else: | |
| indirect_reference = ido | |
| warnings.warn( | |
| "The parameter 'ido' is depreciated and will be removed in PyPDF2 4.0.0.", | |
| DeprecationWarning, | |
| ) | |
| assert ( | |
| indirect_reference is not None | |
| ) # the None value is only there to keep the deprecated name | |
| if isinstance(indirect_reference, int): | |
| return self._objects[indirect_reference - 1] | |
| if indirect_reference.pdf != self: | |
| raise ValueError("pdf must be self") | |
| return self._objects[indirect_reference.idnum - 1] # type: ignore | |
| def getObject( | |
| self, ido: Union[int, IndirectObject] | |
| ) -> PdfObject: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :meth:`get_object` instead. | |
| """ | |
| deprecation_with_replacement("getObject", "get_object", "3.0.0") | |
| return self.get_object(ido) | |
| def _add_page( | |
| self, | |
| page: PageObject, | |
| action: Callable[[Any, IndirectObject], None], | |
| excluded_keys: Iterable[str] = (), | |
| ) -> PageObject: | |
| assert cast(str, page[PA.TYPE]) == CO.PAGE | |
| page_org = page | |
| excluded_keys = list(excluded_keys) | |
| excluded_keys += [PA.PARENT, "/StructParents"] | |
| # acrobat does not accept to have two indirect ref pointing on the same page; | |
| # therefore in order to add easily multiple copies of the same page, we need to create a new | |
| # dictionary for the page, however the objects below (including content) is not duplicated | |
| try: # delete an already existing page | |
| del self._id_translated[id(page_org.indirect_reference.pdf)][ # type: ignore | |
| page_org.indirect_reference.idnum # type: ignore | |
| ] | |
| except Exception: | |
| pass | |
| page = cast("PageObject", page_org.clone(self, False, excluded_keys)) | |
| # page_ind = self._add_object(page) | |
| if page_org.pdf is not None: | |
| other = page_org.pdf.pdf_header | |
| if isinstance(other, str): | |
| other = other.encode() # type: ignore | |
| self.pdf_header = _get_max_pdf_version_header(self.pdf_header, other) # type: ignore | |
| page[NameObject(PA.PARENT)] = self._pages | |
| pages = cast(DictionaryObject, self.get_object(self._pages)) | |
| assert page.indirect_reference is not None | |
| action(pages[PA.KIDS], page.indirect_reference) | |
| page_count = cast(int, pages[PA.COUNT]) | |
| pages[NameObject(PA.COUNT)] = NumberObject(page_count + 1) | |
| return page | |
| def set_need_appearances_writer(self) -> None: | |
| # See 12.7.2 and 7.7.2 for more information: | |
| # http://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf | |
| try: | |
| catalog = self._root_object | |
| # get the AcroForm tree | |
| if CatalogDictionary.ACRO_FORM not in catalog: | |
| self._root_object.update( | |
| { | |
| NameObject(CatalogDictionary.ACRO_FORM): IndirectObject( | |
| len(self._objects), 0, self | |
| ) | |
| } | |
| ) | |
| need_appearances = NameObject(InteractiveFormDictEntries.NeedAppearances) | |
| self._root_object[CatalogDictionary.ACRO_FORM][need_appearances] = BooleanObject(True) # type: ignore | |
| except Exception as exc: | |
| logger.error("set_need_appearances_writer() catch : ", repr(exc)) | |
| def add_page( | |
| self, | |
| page: PageObject, | |
| excluded_keys: Iterable[str] = (), | |
| ) -> PageObject: | |
| """ | |
| Add a page to this PDF file. | |
| Recommended for advanced usage including the adequate excluded_keys | |
| The page is usually acquired from a :class:`PdfReader<PyPDF2.PdfReader>` | |
| instance. | |
| :param PageObject page: The page to add to the document. Should be | |
| an instance of :class:`PageObject<PyPDF2._page.PageObject>` | |
| """ | |
| return self._add_page(page, list.append, excluded_keys) | |
| def addPage( | |
| self, | |
| page: PageObject, | |
| excluded_keys: Iterable[str] = (), | |
| ) -> PageObject: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :meth:`add_page` instead. | |
| """ | |
| deprecation_with_replacement("addPage", "add_page", "3.0.0") | |
| return self.add_page(page, excluded_keys) | |
| def insert_page( | |
| self, | |
| page: PageObject, | |
| index: int = 0, | |
| excluded_keys: Iterable[str] = (), | |
| ) -> PageObject: | |
| """ | |
| Insert a page in this PDF file. The page is usually acquired from a | |
| :class:`PdfReader<PyPDF2.PdfReader>` instance. | |
| :param PageObject page: The page to add to the document. | |
| :param int index: Position at which the page will be inserted. | |
| """ | |
| return self._add_page(page, lambda l, p: l.insert(index, p)) | |
| def insertPage( | |
| self, | |
| page: PageObject, | |
| index: int = 0, | |
| excluded_keys: Iterable[str] = (), | |
| ) -> PageObject: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :meth:`insert_page` instead. | |
| """ | |
| deprecation_with_replacement("insertPage", "insert_page", "3.0.0") | |
| return self.insert_page(page, index, excluded_keys) | |
| def get_page( | |
| self, page_number: Optional[int] = None, pageNumber: Optional[int] = None | |
| ) -> PageObject: | |
| """ | |
| Retrieve a page by number from this PDF file. | |
| :param int page_number: The page number to retrieve | |
| (pages begin at zero) | |
| :return: the page at the index given by *page_number* | |
| """ | |
| if pageNumber is not None: # pragma: no cover | |
| if page_number is not None: | |
| raise ValueError("Please only use the page_number parameter") | |
| deprecate_with_replacement( | |
| "get_page(pageNumber)", "get_page(page_number)", "4.0.0" | |
| ) | |
| page_number = pageNumber | |
| if page_number is None and pageNumber is None: # pragma: no cover | |
| raise ValueError("Please specify the page_number") | |
| pages = cast(Dict[str, Any], self.get_object(self._pages)) | |
| # TODO: crude hack | |
| return cast(PageObject, pages[PA.KIDS][page_number].get_object()) | |
| def getPage(self, pageNumber: int) -> PageObject: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :code:`writer.pages[page_number]` instead. | |
| """ | |
| deprecation_with_replacement("getPage", "writer.pages[page_number]", "3.0.0") | |
| return self.get_page(pageNumber) | |
| def _get_num_pages(self) -> int: | |
| pages = cast(Dict[str, Any], self.get_object(self._pages)) | |
| return int(pages[NameObject("/Count")]) | |
| def getNumPages(self) -> int: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :code:`len(writer.pages)` instead. | |
| """ | |
| deprecation_with_replacement("getNumPages", "len(writer.pages)", "3.0.0") | |
| return self._get_num_pages() | |
| def pages(self) -> List[PageObject]: | |
| """Property that emulates a list of :class:`PageObject<PyPDF2._page.PageObject>`.""" | |
| return _VirtualList(self._get_num_pages, self.get_page) # type: ignore | |
| def add_blank_page( | |
| self, width: Optional[float] = None, height: Optional[float] = None | |
| ) -> PageObject: | |
| """ | |
| Append a blank page to this PDF file and returns it. If no page size | |
| is specified, use the size of the last page. | |
| :param float width: The width of the new page expressed in default user | |
| space units. | |
| :param float height: The height of the new page expressed in default | |
| user space units. | |
| :return: the newly appended page | |
| :raises PageSizeNotDefinedError: if width and height are not defined | |
| and previous page does not exist. | |
| """ | |
| page = PageObject.create_blank_page(self, width, height) | |
| self.add_page(page) | |
| return page | |
| def addBlankPage( | |
| self, width: Optional[float] = None, height: Optional[float] = None | |
| ) -> PageObject: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :meth:`add_blank_page` instead. | |
| """ | |
| deprecation_with_replacement("addBlankPage", "add_blank_page", "3.0.0") | |
| return self.add_blank_page(width, height) | |
| def insert_blank_page( | |
| self, | |
| width: Optional[decimal.Decimal] = None, | |
| height: Optional[decimal.Decimal] = None, | |
| index: int = 0, | |
| ) -> PageObject: | |
| """ | |
| Insert a blank page to this PDF file and returns it. If no page size | |
| is specified, use the size of the last page. | |
| :param float width: The width of the new page expressed in default user | |
| space units. | |
| :param float height: The height of the new page expressed in default | |
| user space units. | |
| :param int index: Position to add the page. | |
| :return: the newly appended page | |
| :raises PageSizeNotDefinedError: if width and height are not defined | |
| and previous page does not exist. | |
| """ | |
| if width is None or height is None and (self._get_num_pages() - 1) >= index: | |
| oldpage = self.pages[index] | |
| width = oldpage.mediabox.width | |
| height = oldpage.mediabox.height | |
| page = PageObject.create_blank_page(self, width, height) | |
| self.insert_page(page, index) | |
| return page | |
| def insertBlankPage( | |
| self, | |
| width: Optional[decimal.Decimal] = None, | |
| height: Optional[decimal.Decimal] = None, | |
| index: int = 0, | |
| ) -> PageObject: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :meth:`insertBlankPage` instead. | |
| """ | |
| deprecation_with_replacement("insertBlankPage", "insert_blank_page", "3.0.0") | |
| return self.insert_blank_page(width, height, index) | |
| def open_destination( | |
| self, | |
| ) -> Union[None, Destination, TextStringObject, ByteStringObject]: | |
| """ | |
| Property to access the opening destination ("/OpenAction" entry in the | |
| PDF catalog). | |
| it returns `None` if the entry does not exist is not set. | |
| :param destination:. | |
| the property can be set to a Destination, a Page or an string(NamedDest) or | |
| None (to remove "/OpenAction") | |
| (value stored in "/OpenAction" entry in the Pdf Catalog) | |
| """ | |
| if "/OpenAction" not in self._root_object: | |
| return None | |
| oa = self._root_object["/OpenAction"] | |
| if isinstance(oa, (str, bytes)): | |
| return create_string_object(str(oa)) | |
| elif isinstance(oa, ArrayObject): | |
| try: | |
| page, typ = oa[0:2] # type: ignore | |
| array = oa[2:] | |
| fit = Fit(typ, tuple(array)) | |
| return Destination("OpenAction", page, fit) | |
| except Exception as exc: | |
| raise Exception(f"Invalid Destination {oa}: {exc}") | |
| else: | |
| return None | |
| def open_destination(self, dest: Union[None, str, Destination, PageObject]) -> None: | |
| if dest is None: | |
| try: | |
| del self._root_object["/OpenAction"] | |
| except KeyError: | |
| pass | |
| elif isinstance(dest, str): | |
| self._root_object[NameObject("/OpenAction")] = TextStringObject(dest) | |
| elif isinstance(dest, Destination): | |
| self._root_object[NameObject("/OpenAction")] = dest.dest_array | |
| elif isinstance(dest, PageObject): | |
| self._root_object[NameObject("/OpenAction")] = Destination( | |
| "Opening", | |
| dest.indirect_reference | |
| if dest.indirect_reference is not None | |
| else NullObject(), | |
| PAGE_FIT, | |
| ).dest_array | |
| def add_js(self, javascript: str) -> None: | |
| """ | |
| Add Javascript which will launch upon opening this PDF. | |
| :param str javascript: Your Javascript. | |
| >>> output.add_js("this.print({bUI:true,bSilent:false,bShrinkToFit:true});") | |
| # Example: This will launch the print window when the PDF is opened. | |
| """ | |
| # Names / JavaScript prefered to be able to add multiple scripts | |
| if "/Names" not in self._root_object: | |
| self._root_object[NameObject(CA.NAMES)] = DictionaryObject() | |
| names = cast(DictionaryObject, self._root_object[CA.NAMES]) | |
| if "/JavaScript" not in names: | |
| names[NameObject("/JavaScript")] = DictionaryObject( | |
| {NameObject("/Names"): ArrayObject()} | |
| ) | |
| # cast(DictionaryObject, names[NameObject("/JavaScript")])[NameObject("/Names")] = ArrayObject() | |
| js_list = cast( | |
| ArrayObject, cast(DictionaryObject, names["/JavaScript"])["/Names"] | |
| ) | |
| js = DictionaryObject() | |
| js.update( | |
| { | |
| NameObject(PA.TYPE): NameObject("/Action"), | |
| NameObject("/S"): NameObject("/JavaScript"), | |
| NameObject("/JS"): TextStringObject(f"{javascript}"), | |
| } | |
| ) | |
| # We need a name for parameterized javascript in the pdf file, but it can be anything. | |
| js_list.append(create_string_object(str(uuid.uuid4()))) | |
| js_list.append(self._add_object(js)) | |
| def addJS(self, javascript: str) -> None: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :meth:`add_js` instead. | |
| """ | |
| deprecation_with_replacement("addJS", "add_js", "3.0.0") | |
| return self.add_js(javascript) | |
| def add_attachment(self, filename: str, data: Union[str, bytes]) -> None: | |
| """ | |
| Embed a file inside the PDF. | |
| :param str filename: The filename to display. | |
| :param str data: The data in the file. | |
| Reference: | |
| https://www.adobe.com/content/dam/Adobe/en/devnet/acrobat/pdfs/PDF32000_2008.pdf | |
| Section 7.11.3 | |
| """ | |
| # We need three entries: | |
| # * The file's data | |
| # * The /Filespec entry | |
| # * The file's name, which goes in the Catalog | |
| # The entry for the file | |
| # Sample: | |
| # 8 0 obj | |
| # << | |
| # /Length 12 | |
| # /Type /EmbeddedFile | |
| # >> | |
| # stream | |
| # Hello world! | |
| # endstream | |
| # endobj | |
| file_entry = DecodedStreamObject() | |
| file_entry.set_data(data) | |
| file_entry.update({NameObject(PA.TYPE): NameObject("/EmbeddedFile")}) | |
| # The Filespec entry | |
| # Sample: | |
| # 7 0 obj | |
| # << | |
| # /Type /Filespec | |
| # /F (hello.txt) | |
| # /EF << /F 8 0 R >> | |
| # >> | |
| ef_entry = DictionaryObject() | |
| ef_entry.update({NameObject("/F"): file_entry}) | |
| filespec = DictionaryObject() | |
| filespec.update( | |
| { | |
| NameObject(PA.TYPE): NameObject("/Filespec"), | |
| NameObject(FileSpecificationDictionaryEntries.F): create_string_object( | |
| filename | |
| ), # Perhaps also try TextStringObject | |
| NameObject(FileSpecificationDictionaryEntries.EF): ef_entry, | |
| } | |
| ) | |
| # Then create the entry for the root, as it needs a reference to the Filespec | |
| # Sample: | |
| # 1 0 obj | |
| # << | |
| # /Type /Catalog | |
| # /Outlines 2 0 R | |
| # /Pages 3 0 R | |
| # /Names << /EmbeddedFiles << /Names [(hello.txt) 7 0 R] >> >> | |
| # >> | |
| # endobj | |
| embedded_files_names_dictionary = DictionaryObject() | |
| embedded_files_names_dictionary.update( | |
| { | |
| NameObject(CA.NAMES): ArrayObject( | |
| [create_string_object(filename), filespec] | |
| ) | |
| } | |
| ) | |
| embedded_files_dictionary = DictionaryObject() | |
| embedded_files_dictionary.update( | |
| {NameObject("/EmbeddedFiles"): embedded_files_names_dictionary} | |
| ) | |
| # Update the root | |
| self._root_object.update({NameObject(CA.NAMES): embedded_files_dictionary}) | |
| def addAttachment( | |
| self, fname: str, fdata: Union[str, bytes] | |
| ) -> None: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :meth:`add_attachment` instead. | |
| """ | |
| deprecation_with_replacement("addAttachment", "add_attachment", "3.0.0") | |
| return self.add_attachment(fname, fdata) | |
| def append_pages_from_reader( | |
| self, | |
| reader: PdfReader, | |
| after_page_append: Optional[Callable[[PageObject], None]] = None, | |
| ) -> None: | |
| """ | |
| Copy pages from reader to writer. Includes an optional callback parameter | |
| which is invoked after pages are appended to the writer. | |
| :param PdfReader reader: a PdfReader object from which to copy page | |
| annotations to this writer object. The writer's annots | |
| will then be updated | |
| :param Callable[[PageObject], None] after_page_append: | |
| Callback function that is invoked after each page is appended to | |
| the writer. Signature includes a reference to the appended page | |
| (delegates to append_pages_from_reader). The single parameter of the | |
| callback is a reference to the page just appended to the document. | |
| """ | |
| # Get page count from writer and reader | |
| reader_num_pages = len(reader.pages) | |
| # Copy pages from reader to writer | |
| for reader_page_number in range(reader_num_pages): | |
| reader_page = reader.pages[reader_page_number] | |
| writer_page = self.add_page(reader_page) | |
| # Trigger callback, pass writer page as parameter | |
| if callable(after_page_append): | |
| after_page_append(writer_page) | |
| def appendPagesFromReader( | |
| self, | |
| reader: PdfReader, | |
| after_page_append: Optional[Callable[[PageObject], None]] = None, | |
| ) -> None: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :meth:`append_pages_from_reader` instead. | |
| """ | |
| deprecation_with_replacement( | |
| "appendPagesFromReader", "append_pages_from_reader", "3.0.0" | |
| ) | |
| self.append_pages_from_reader(reader, after_page_append) | |
| def update_page_form_field_values( | |
| self, | |
| page: PageObject, | |
| fields: Dict[str, Any], | |
| flags: FieldFlag = OPTIONAL_READ_WRITE_FIELD, | |
| ) -> None: | |
| """ | |
| Update the form field values for a given page from a fields dictionary. | |
| Copy field texts and values from fields to page. | |
| If the field links to a parent object, add the information to the parent. | |
| :param PageObject page: Page reference from PDF writer where the | |
| annotations and field data will be updated. | |
| :param dict fields: a Python dictionary of field names (/T) and text | |
| values (/V) | |
| :param int flags: An integer (0 to 7). The first bit sets ReadOnly, the | |
| second bit sets Required, the third bit sets NoExport. See | |
| PDF Reference Table 8.70 for details. | |
| """ | |
| self.set_need_appearances_writer() | |
| # Iterate through pages, update field values | |
| if PG.ANNOTS not in page: | |
| logger_warning("No fields to update on this page", __name__) | |
| return | |
| for j in range(len(page[PG.ANNOTS])): # type: ignore | |
| writer_annot = page[PG.ANNOTS][j].get_object() # type: ignore | |
| # retrieve parent field values, if present | |
| writer_parent_annot = {} # fallback if it's not there | |
| if PG.PARENT in writer_annot: | |
| writer_parent_annot = writer_annot[PG.PARENT] | |
| for field in fields: | |
| if writer_annot.get(FieldDictionaryAttributes.T) == field: | |
| if writer_annot.get(FieldDictionaryAttributes.FT) == "/Btn": | |
| writer_annot.update( | |
| { | |
| NameObject( | |
| AnnotationDictionaryAttributes.AS | |
| ): NameObject(fields[field]) | |
| } | |
| ) | |
| writer_annot.update( | |
| { | |
| NameObject(FieldDictionaryAttributes.V): TextStringObject( | |
| fields[field] | |
| ) | |
| } | |
| ) | |
| if flags: | |
| writer_annot.update( | |
| { | |
| NameObject(FieldDictionaryAttributes.Ff): NumberObject( | |
| flags | |
| ) | |
| } | |
| ) | |
| elif writer_parent_annot.get(FieldDictionaryAttributes.T) == field: | |
| writer_parent_annot.update( | |
| { | |
| NameObject(FieldDictionaryAttributes.V): TextStringObject( | |
| fields[field] | |
| ) | |
| } | |
| ) | |
| def updatePageFormFieldValues( | |
| self, | |
| page: PageObject, | |
| fields: Dict[str, Any], | |
| flags: FieldFlag = OPTIONAL_READ_WRITE_FIELD, | |
| ) -> None: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :meth:`update_page_form_field_values` instead. | |
| """ | |
| deprecation_with_replacement( | |
| "updatePageFormFieldValues", "update_page_form_field_values", "3.0.0" | |
| ) | |
| return self.update_page_form_field_values(page, fields, flags) | |
| def clone_reader_document_root(self, reader: PdfReader) -> None: | |
| """ | |
| Copy the reader document root to the writer. | |
| :param reader: PdfReader from the document root should be copied. | |
| """ | |
| self._root_object = cast(DictionaryObject, reader.trailer[TK.ROOT]) | |
| def cloneReaderDocumentRoot(self, reader: PdfReader) -> None: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :meth:`clone_reader_document_root` instead. | |
| """ | |
| deprecation_with_replacement( | |
| "cloneReaderDocumentRoot", "clone_reader_document_root", "3.0.0" | |
| ) | |
| self.clone_reader_document_root(reader) | |
| def clone_document_from_reader( | |
| self, | |
| reader: PdfReader, | |
| after_page_append: Optional[Callable[[PageObject], None]] = None, | |
| ) -> None: | |
| """ | |
| Create a copy (clone) of a document from a PDF file reader | |
| :param reader: PDF file reader instance from which the clone | |
| should be created. | |
| :param Callable[[PageObject], None] after_page_append: | |
| Callback function that is invoked after each page is appended to | |
| the writer. Signature includes a reference to the appended page | |
| (delegates to append_pages_from_reader). The single parameter of the | |
| callback is a reference to the page just appended to the document. | |
| """ | |
| # TODO : ppZZ may be limited because we do not copy all info... | |
| self.clone_reader_document_root(reader) | |
| self.append_pages_from_reader(reader, after_page_append) | |
| def cloneDocumentFromReader( | |
| self, | |
| reader: PdfReader, | |
| after_page_append: Optional[Callable[[PageObject], None]] = None, | |
| ) -> None: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :meth:`clone_document_from_reader` instead. | |
| """ | |
| deprecation_with_replacement( | |
| "cloneDocumentFromReader", "clone_document_from_reader", "3.0.0" | |
| ) | |
| self.clone_document_from_reader(reader, after_page_append) | |
| def encrypt( | |
| self, | |
| user_password: Optional[str] = None, | |
| owner_password: Optional[str] = None, | |
| use_128bit: bool = True, | |
| permissions_flag: UserAccessPermissions = ALL_DOCUMENT_PERMISSIONS, | |
| user_pwd: Optional[str] = None, # deprecated | |
| owner_pwd: Optional[str] = None, # deprecated | |
| ) -> None: | |
| """ | |
| Encrypt this PDF file with the PDF Standard encryption handler. | |
| :param str user_password: The "user password", which allows for opening | |
| and reading the PDF file with the restrictions provided. | |
| :param str owner_password: The "owner password", which allows for | |
| opening the PDF files without any restrictions. By default, | |
| the owner password is the same as the user password. | |
| :param bool use_128bit: flag as to whether to use 128bit | |
| encryption. When false, 40bit encryption will be used. By default, | |
| this flag is on. | |
| :param unsigned int permissions_flag: permissions as described in | |
| TABLE 3.20 of the PDF 1.7 specification. A bit value of 1 means the | |
| permission is grantend. Hence an integer value of -1 will set all | |
| flags. | |
| Bit position 3 is for printing, 4 is for modifying content, 5 and 6 | |
| control annotations, 9 for form fields, 10 for extraction of | |
| text and graphics. | |
| """ | |
| if user_pwd is not None: | |
| if user_password is not None: | |
| raise ValueError( | |
| "Please only set 'user_password'. " | |
| "The 'user_pwd' argument is deprecated." | |
| ) | |
| else: | |
| warnings.warn( | |
| "Please use 'user_password' instead of 'user_pwd'. " | |
| "The 'user_pwd' argument is deprecated and " | |
| "will be removed in PyPDF2 4.0.0." | |
| ) | |
| user_password = user_pwd | |
| if user_password is None: # deprecated | |
| # user_password is only Optional for due to the deprecated user_pwd | |
| raise ValueError("user_password may not be None") | |
| if owner_pwd is not None: # deprecated | |
| if owner_password is not None: | |
| raise ValueError( | |
| "The argument owner_pwd of encrypt is deprecated. Use owner_password only." | |
| ) | |
| else: | |
| old_term = "owner_pwd" | |
| new_term = "owner_password" | |
| warnings.warn( | |
| message=( | |
| f"{old_term} is deprecated as an argument and will be " | |
| f"removed in PyPDF2 4.0.0. Use {new_term} instead" | |
| ), | |
| category=DeprecationWarning, | |
| ) | |
| owner_password = owner_pwd | |
| if owner_password is None: | |
| owner_password = user_password | |
| if use_128bit: | |
| V = 2 | |
| rev = 3 | |
| keylen = int(128 / 8) | |
| else: | |
| V = 1 | |
| rev = 2 | |
| keylen = int(40 / 8) | |
| P = permissions_flag | |
| O = ByteStringObject(_alg33(owner_password, user_password, rev, keylen)) # type: ignore[arg-type] | |
| ID_1 = ByteStringObject(md5((repr(time.time())).encode("utf8")).digest()) | |
| ID_2 = ByteStringObject(md5((repr(random.random())).encode("utf8")).digest()) | |
| self._ID = ArrayObject((ID_1, ID_2)) | |
| if rev == 2: | |
| U, key = _alg34(user_password, O, P, ID_1) | |
| else: | |
| assert rev == 3 | |
| U, key = _alg35(user_password, rev, keylen, O, P, ID_1, False) # type: ignore[arg-type] | |
| encrypt = DictionaryObject() | |
| encrypt[NameObject(SA.FILTER)] = NameObject("/Standard") | |
| encrypt[NameObject("/V")] = NumberObject(V) | |
| if V == 2: | |
| encrypt[NameObject(SA.LENGTH)] = NumberObject(keylen * 8) | |
| encrypt[NameObject(ED.R)] = NumberObject(rev) | |
| encrypt[NameObject(ED.O)] = ByteStringObject(O) | |
| encrypt[NameObject(ED.U)] = ByteStringObject(U) | |
| encrypt[NameObject(ED.P)] = NumberObject(P) | |
| self._encrypt = self._add_object(encrypt) | |
| self._encrypt_key = key | |
| def write_stream(self, stream: StreamType) -> None: | |
| if hasattr(stream, "mode") and "b" not in stream.mode: | |
| logger_warning( | |
| f"File <{stream.name}> to write to is not in binary mode. " # type: ignore | |
| "It may not be written to correctly.", | |
| __name__, | |
| ) | |
| if not self._root: | |
| self._root = self._add_object(self._root_object) | |
| # PDF objects sometimes have circular references to their /Page objects | |
| # inside their object tree (for example, annotations). Those will be | |
| # indirect references to objects that we've recreated in this PDF. To | |
| # address this problem, PageObject's store their original object | |
| # reference number, and we add it to the external reference map before | |
| # we sweep for indirect references. This forces self-page-referencing | |
| # trees to reference the correct new object location, rather than | |
| # copying in a new copy of the page object. | |
| self._sweep_indirect_references(self._root) | |
| object_positions = self._write_header(stream) | |
| xref_location = self._write_xref_table(stream, object_positions) | |
| self._write_trailer(stream) | |
| stream.write(b_(f"\nstartxref\n{xref_location}\n%%EOF\n")) # eof | |
| def write(self, stream: Union[Path, StrByteType]) -> Tuple[bool, IO]: | |
| """ | |
| Write the collection of pages added to this object out as a PDF file. | |
| :param stream: An object to write the file to. The object can support | |
| the write method and the tell method, similar to a file object, or | |
| be a file path, just like the fileobj, just named it stream to keep | |
| existing workflow. | |
| """ | |
| my_file = False | |
| if stream == "": | |
| raise ValueError(f"Output(stream={stream}) is empty.") | |
| if isinstance(stream, (str, Path)): | |
| stream = FileIO(stream, "wb") | |
| self.with_as_usage = True # | |
| my_file = True | |
| self.write_stream(stream) | |
| if self.with_as_usage: | |
| stream.close() | |
| return my_file, stream | |
| def _write_header(self, stream: StreamType) -> List[int]: | |
| object_positions = [] | |
| stream.write(self.pdf_header + b"\n") | |
| stream.write(b"%\xE2\xE3\xCF\xD3\n") | |
| for i, obj in enumerate(self._objects): | |
| obj = self._objects[i] | |
| # If the obj is None we can't write anything | |
| if obj is not None: | |
| idnum = i + 1 | |
| object_positions.append(stream.tell()) | |
| stream.write(b_(str(idnum)) + b" 0 obj\n") | |
| key = None | |
| if hasattr(self, "_encrypt") and idnum != self._encrypt.idnum: | |
| pack1 = struct.pack("<i", i + 1)[:3] | |
| pack2 = struct.pack("<i", 0)[:2] | |
| key = self._encrypt_key + pack1 + pack2 | |
| assert len(key) == (len(self._encrypt_key) + 5) | |
| md5_hash = md5(key).digest() | |
| key = md5_hash[: min(16, len(self._encrypt_key) + 5)] | |
| obj.write_to_stream(stream, key) | |
| stream.write(b"\nendobj\n") | |
| return object_positions | |
| def _write_xref_table(self, stream: StreamType, object_positions: List[int]) -> int: | |
| xref_location = stream.tell() | |
| stream.write(b"xref\n") | |
| stream.write(b_(f"0 {len(self._objects) + 1}\n")) | |
| stream.write(b_(f"{0:0>10} {65535:0>5} f \n")) | |
| for offset in object_positions: | |
| stream.write(b_(f"{offset:0>10} {0:0>5} n \n")) | |
| return xref_location | |
| def _write_trailer(self, stream: StreamType) -> None: | |
| stream.write(b"trailer\n") | |
| trailer = DictionaryObject() | |
| trailer.update( | |
| { | |
| NameObject(TK.SIZE): NumberObject(len(self._objects) + 1), | |
| NameObject(TK.ROOT): self._root, | |
| NameObject(TK.INFO): self._info, | |
| } | |
| ) | |
| if hasattr(self, "_ID"): | |
| trailer[NameObject(TK.ID)] = self._ID | |
| if hasattr(self, "_encrypt"): | |
| trailer[NameObject(TK.ENCRYPT)] = self._encrypt | |
| trailer.write_to_stream(stream, None) | |
| def add_metadata(self, infos: Dict[str, Any]) -> None: | |
| """ | |
| Add custom metadata to the output. | |
| :param dict infos: a Python dictionary where each key is a field | |
| and each value is your new metadata. | |
| """ | |
| args = {} | |
| for key, value in list(infos.items()): | |
| args[NameObject(key)] = create_string_object(value) | |
| self.get_object(self._info).update(args) # type: ignore | |
| def addMetadata(self, infos: Dict[str, Any]) -> None: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :meth:`add_metadata` instead. | |
| """ | |
| deprecation_with_replacement("addMetadata", "add_metadata", "3.0.0") | |
| self.add_metadata(infos) | |
| def _sweep_indirect_references( | |
| self, | |
| root: Union[ | |
| ArrayObject, | |
| BooleanObject, | |
| DictionaryObject, | |
| FloatObject, | |
| IndirectObject, | |
| NameObject, | |
| PdfObject, | |
| NumberObject, | |
| TextStringObject, | |
| NullObject, | |
| ], | |
| ) -> None: | |
| stack: Deque[ | |
| Tuple[ | |
| Any, | |
| Optional[Any], | |
| Any, | |
| List[PdfObject], | |
| ] | |
| ] = collections.deque() | |
| discovered = [] | |
| parent = None | |
| grant_parents: List[PdfObject] = [] | |
| key_or_id = None | |
| # Start from root | |
| stack.append((root, parent, key_or_id, grant_parents)) | |
| while len(stack): | |
| data, parent, key_or_id, grant_parents = stack.pop() | |
| # Build stack for a processing depth-first | |
| if isinstance(data, (ArrayObject, DictionaryObject)): | |
| for key, value in data.items(): | |
| stack.append( | |
| ( | |
| value, | |
| data, | |
| key, | |
| grant_parents + [parent] if parent is not None else [], | |
| ) | |
| ) | |
| elif isinstance(data, IndirectObject): | |
| if data.pdf != self: | |
| data = self._resolve_indirect_object(data) | |
| if str(data) not in discovered: | |
| discovered.append(str(data)) | |
| stack.append((data.get_object(), None, None, [])) | |
| # Check if data has a parent and if it is a dict or an array update the value | |
| if isinstance(parent, (DictionaryObject, ArrayObject)): | |
| if isinstance(data, StreamObject): | |
| # a dictionary value is a stream. streams must be indirect | |
| # objects, so we need to change this value. | |
| data = self._resolve_indirect_object(self._add_object(data)) | |
| update_hashes = [] | |
| # Data changed and thus the hash value changed | |
| if parent[key_or_id] != data: | |
| update_hashes = [parent.hash_value()] + [ | |
| grant_parent.hash_value() for grant_parent in grant_parents | |
| ] | |
| parent[key_or_id] = data | |
| # Update old hash value to new hash value | |
| for old_hash in update_hashes: | |
| indirect_reference = self._idnum_hash.pop(old_hash, None) | |
| if indirect_reference is not None: | |
| indirect_reference_obj = indirect_reference.get_object() | |
| if indirect_reference_obj is not None: | |
| self._idnum_hash[ | |
| indirect_reference_obj.hash_value() | |
| ] = indirect_reference | |
| def _resolve_indirect_object(self, data: IndirectObject) -> IndirectObject: | |
| """ | |
| Resolves indirect object to this pdf indirect objects. | |
| If it is a new object then it is added to self._objects | |
| and new idnum is given and generation is always 0. | |
| """ | |
| if hasattr(data.pdf, "stream") and data.pdf.stream.closed: | |
| raise ValueError(f"I/O operation on closed file: {data.pdf.stream.name}") | |
| if data.pdf == self: | |
| return data | |
| # Get real object indirect object | |
| real_obj = data.pdf.get_object(data) | |
| if real_obj is None: | |
| logger_warning( | |
| f"Unable to resolve [{data.__class__.__name__}: {data}], " | |
| "returning NullObject instead", | |
| __name__, | |
| ) | |
| real_obj = NullObject() | |
| hash_value = real_obj.hash_value() | |
| # Check if object is handled | |
| if hash_value in self._idnum_hash: | |
| return self._idnum_hash[hash_value] | |
| if data.pdf == self: | |
| self._idnum_hash[hash_value] = IndirectObject(data.idnum, 0, self) | |
| # This is new object in this pdf | |
| else: | |
| self._idnum_hash[hash_value] = self._add_object(real_obj) | |
| return self._idnum_hash[hash_value] | |
| def get_reference(self, obj: PdfObject) -> IndirectObject: | |
| idnum = self._objects.index(obj) + 1 | |
| ref = IndirectObject(idnum, 0, self) | |
| assert ref.get_object() == obj | |
| return ref | |
| def getReference(self, obj: PdfObject) -> IndirectObject: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :meth:`get_reference` instead. | |
| """ | |
| deprecation_with_replacement("getReference", "get_reference", "3.0.0") | |
| return self.get_reference(obj) | |
| def get_outline_root(self) -> TreeObject: | |
| if CO.OUTLINES in self._root_object: | |
| # TABLE 3.25 Entries in the catalog dictionary | |
| outline = cast(TreeObject, self._root_object[CO.OUTLINES]) | |
| idnum = self._objects.index(outline) + 1 | |
| outline_ref = IndirectObject(idnum, 0, self) | |
| assert outline_ref.get_object() == outline | |
| else: | |
| outline = TreeObject() | |
| outline.update({}) | |
| outline_ref = self._add_object(outline) | |
| self._root_object[NameObject(CO.OUTLINES)] = outline_ref | |
| return outline | |
| def get_threads_root(self) -> ArrayObject: | |
| """ | |
| the list of threads see §8.3.2 from PDF 1.7 spec | |
| :return: an Array (possibly empty) of Dictionaries with "/F" and "/I" properties | |
| """ | |
| if CO.THREADS in self._root_object: | |
| # TABLE 3.25 Entries in the catalog dictionary | |
| threads = cast(ArrayObject, self._root_object[CO.THREADS]) | |
| else: | |
| threads = ArrayObject() | |
| self._root_object[NameObject(CO.THREADS)] = threads | |
| return threads | |
| def threads(self) -> ArrayObject: | |
| """ | |
| Read-only property for the list of threads see §8.3.2 from PDF 1.7 spec | |
| :return: an Array (possibly empty) of Dictionaries with "/F" and "/I" properties | |
| """ | |
| return self.get_threads_root() | |
| def getOutlineRoot(self) -> TreeObject: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :meth:`get_outline_root` instead. | |
| """ | |
| deprecation_with_replacement("getOutlineRoot", "get_outline_root", "3.0.0") | |
| return self.get_outline_root() | |
| def get_named_dest_root(self) -> ArrayObject: | |
| if CA.NAMES in self._root_object and isinstance( | |
| self._root_object[CA.NAMES], DictionaryObject | |
| ): | |
| names = cast(DictionaryObject, self._root_object[CA.NAMES]) | |
| names_ref = names.indirect_reference | |
| if CA.DESTS in names and isinstance(names[CA.DESTS], DictionaryObject): | |
| # 3.6.3 Name Dictionary (PDF spec 1.7) | |
| dests = cast(DictionaryObject, names[CA.DESTS]) | |
| dests_ref = dests.indirect_reference | |
| if CA.NAMES in dests: | |
| # TABLE 3.33 Entries in a name tree node dictionary | |
| nd = cast(ArrayObject, dests[CA.NAMES]) | |
| else: | |
| nd = ArrayObject() | |
| dests[NameObject(CA.NAMES)] = nd | |
| else: | |
| dests = DictionaryObject() | |
| dests_ref = self._add_object(dests) | |
| names[NameObject(CA.DESTS)] = dests_ref | |
| nd = ArrayObject() | |
| dests[NameObject(CA.NAMES)] = nd | |
| else: | |
| names = DictionaryObject() | |
| names_ref = self._add_object(names) | |
| self._root_object[NameObject(CA.NAMES)] = names_ref | |
| dests = DictionaryObject() | |
| dests_ref = self._add_object(dests) | |
| names[NameObject(CA.DESTS)] = dests_ref | |
| nd = ArrayObject() | |
| dests[NameObject(CA.NAMES)] = nd | |
| return nd | |
| def getNamedDestRoot(self) -> ArrayObject: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :meth:`get_named_dest_root` instead. | |
| """ | |
| deprecation_with_replacement("getNamedDestRoot", "get_named_dest_root", "3.0.0") | |
| return self.get_named_dest_root() | |
| def add_outline_item_destination( | |
| self, | |
| page_destination: Union[None, PageObject, TreeObject] = None, | |
| parent: Union[None, TreeObject, IndirectObject] = None, | |
| before: Union[None, TreeObject, IndirectObject] = None, | |
| dest: Union[None, PageObject, TreeObject] = None, # deprecated | |
| ) -> IndirectObject: | |
| if page_destination is not None and dest is not None: # deprecated | |
| raise ValueError( | |
| "The argument dest of add_outline_item_destination is deprecated. Use page_destination only." | |
| ) | |
| if dest is not None: # deprecated | |
| old_term = "dest" | |
| new_term = "page_destination" | |
| warnings.warn( | |
| message=( | |
| f"{old_term} is deprecated as an argument and will be " | |
| f"removed in PyPDF2 4.0.0. Use {new_term} instead" | |
| ), | |
| category=DeprecationWarning, | |
| ) | |
| page_destination = dest | |
| if page_destination is None: # deprecated | |
| # argument is only Optional due to deprecated argument. | |
| raise ValueError("page_destination may not be None") | |
| if parent is None: | |
| parent = self.get_outline_root() | |
| parent = cast(TreeObject, parent.get_object()) | |
| page_destination_ref = self._add_object(page_destination) | |
| if before is not None: | |
| before = before.indirect_reference | |
| parent.insert_child(page_destination_ref, before, self) | |
| return page_destination_ref | |
| def add_bookmark_destination( | |
| self, | |
| dest: Union[PageObject, TreeObject], | |
| parent: Union[None, TreeObject, IndirectObject] = None, | |
| ) -> IndirectObject: # pragma: no cover | |
| """ | |
| .. deprecated:: 2.9.0 | |
| Use :meth:`add_outline_item_destination` instead. | |
| """ | |
| deprecation_with_replacement( | |
| "add_bookmark_destination", "add_outline_item_destination", "3.0.0" | |
| ) | |
| return self.add_outline_item_destination(dest, parent) | |
| def addBookmarkDestination( | |
| self, dest: PageObject, parent: Optional[TreeObject] = None | |
| ) -> IndirectObject: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :meth:`add_outline_item_destination` instead. | |
| """ | |
| deprecation_with_replacement( | |
| "addBookmarkDestination", "add_outline_item_destination", "3.0.0" | |
| ) | |
| return self.add_outline_item_destination(dest, parent) | |
| def add_outline_item_dict( | |
| self, | |
| outline_item: OutlineItemType, | |
| parent: Union[None, TreeObject, IndirectObject] = None, | |
| before: Union[None, TreeObject, IndirectObject] = None, | |
| ) -> IndirectObject: | |
| outline_item_object = TreeObject() | |
| for k, v in list(outline_item.items()): | |
| outline_item_object[NameObject(str(k))] = v | |
| outline_item_object.update(outline_item) | |
| if "/A" in outline_item: | |
| action = DictionaryObject() | |
| a_dict = cast(DictionaryObject, outline_item["/A"]) | |
| for k, v in list(a_dict.items()): | |
| action[NameObject(str(k))] = v | |
| action_ref = self._add_object(action) | |
| outline_item_object[NameObject("/A")] = action_ref | |
| return self.add_outline_item_destination(outline_item_object, parent, before) | |
| def add_bookmark_dict( | |
| self, outline_item: OutlineItemType, parent: Optional[TreeObject] = None | |
| ) -> IndirectObject: # pragma: no cover | |
| """ | |
| .. deprecated:: 2.9.0 | |
| Use :meth:`add_outline_item_dict` instead. | |
| """ | |
| deprecation_with_replacement( | |
| "add_bookmark_dict", "add_outline_item_dict", "3.0.0" | |
| ) | |
| return self.add_outline_item_dict(outline_item, parent) | |
| def addBookmarkDict( | |
| self, outline_item: OutlineItemType, parent: Optional[TreeObject] = None | |
| ) -> IndirectObject: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :meth:`add_outline_item_dict` instead. | |
| """ | |
| deprecation_with_replacement( | |
| "addBookmarkDict", "add_outline_item_dict", "3.0.0" | |
| ) | |
| return self.add_outline_item_dict(outline_item, parent) | |
| def add_outline_item( | |
| self, | |
| title: str, | |
| page_number: Union[None, PageObject, IndirectObject, int], | |
| parent: Union[None, TreeObject, IndirectObject] = None, | |
| before: Union[None, TreeObject, IndirectObject] = None, | |
| color: Optional[Union[Tuple[float, float, float], str]] = None, | |
| bold: bool = False, | |
| italic: bool = False, | |
| fit: Fit = PAGE_FIT, | |
| pagenum: Optional[int] = None, # deprecated | |
| ) -> IndirectObject: | |
| """ | |
| Add an outline item (commonly referred to as a "Bookmark") to this PDF file. | |
| :param str title: Title to use for this outline item. | |
| :param int page_number: Page number this outline item will point to. | |
| :param parent: A reference to a parent outline item to create nested | |
| outline items. | |
| :param parent: A reference to a parent outline item to create nested | |
| outline items. | |
| :param tuple color: Color of the outline item's font as a red, green, blue tuple | |
| from 0.0 to 1.0 or as a Hex String (#RRGGBB) | |
| :param bool bold: Outline item font is bold | |
| :param bool italic: Outline item font is italic | |
| :param Fit fit: The fit of the destination page. | |
| """ | |
| page_ref: Union[None, NullObject, IndirectObject, NumberObject] | |
| if isinstance(italic, Fit): # it means that we are on the old params | |
| if fit is not None and page_number is None: | |
| page_number = fit # type: ignore | |
| return self.add_outline_item( | |
| title, page_number, parent, None, before, color, bold, italic # type: ignore | |
| ) | |
| if page_number is not None and pagenum is not None: | |
| raise ValueError( | |
| "The argument pagenum of add_outline_item is deprecated. Use page_number only." | |
| ) | |
| if page_number is None: | |
| action_ref = None | |
| else: | |
| if isinstance(page_number, IndirectObject): | |
| page_ref = page_number | |
| elif isinstance(page_number, PageObject): | |
| page_ref = page_number.indirect_reference | |
| elif isinstance(page_number, int): | |
| try: | |
| page_ref = self.pages[page_number].indirect_reference | |
| except IndexError: | |
| page_ref = NumberObject(page_number) | |
| if page_ref is None: | |
| logger_warning( | |
| f"can not find reference of page {page_number}", | |
| __name__, | |
| ) | |
| page_ref = NullObject() | |
| dest = Destination( | |
| NameObject("/" + title + " outline item"), | |
| page_ref, | |
| fit, | |
| ) | |
| action_ref = self._add_object( | |
| DictionaryObject( | |
| { | |
| NameObject(GoToActionArguments.D): dest.dest_array, | |
| NameObject(GoToActionArguments.S): NameObject("/GoTo"), | |
| } | |
| ) | |
| ) | |
| outline_item = _create_outline_item(action_ref, title, color, italic, bold) | |
| if parent is None: | |
| parent = self.get_outline_root() | |
| return self.add_outline_item_destination(outline_item, parent, before) | |
| def add_bookmark( | |
| self, | |
| title: str, | |
| pagenum: int, # deprecated, but the whole method is deprecated | |
| parent: Union[None, TreeObject, IndirectObject] = None, | |
| color: Optional[Tuple[float, float, float]] = None, | |
| bold: bool = False, | |
| italic: bool = False, | |
| fit: FitType = "/Fit", | |
| *args: ZoomArgType, | |
| ) -> IndirectObject: # pragma: no cover | |
| """ | |
| .. deprecated:: 2.9.0 | |
| Use :meth:`add_outline_item` instead. | |
| """ | |
| deprecation_with_replacement("add_bookmark", "add_outline_item", "3.0.0") | |
| return self.add_outline_item( | |
| title, | |
| pagenum, | |
| parent, | |
| color, # type: ignore | |
| bold, # type: ignore | |
| italic, | |
| Fit(fit_type=fit, fit_args=args), # type: ignore | |
| ) | |
| def addBookmark( | |
| self, | |
| title: str, | |
| pagenum: int, | |
| parent: Union[None, TreeObject, IndirectObject] = None, | |
| color: Optional[Tuple[float, float, float]] = None, | |
| bold: bool = False, | |
| italic: bool = False, | |
| fit: FitType = "/Fit", | |
| *args: ZoomArgType, | |
| ) -> IndirectObject: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :meth:`add_outline_item` instead. | |
| """ | |
| deprecation_with_replacement("addBookmark", "add_outline_item", "3.0.0") | |
| return self.add_outline_item( | |
| title, | |
| pagenum, | |
| parent, | |
| None, | |
| color, | |
| bold, | |
| italic, | |
| Fit(fit_type=fit, fit_args=args), | |
| ) | |
| def add_outline(self) -> None: | |
| raise NotImplementedError( | |
| "This method is not yet implemented. Use :meth:`add_outline_item` instead." | |
| ) | |
| def add_named_destination_array( | |
| self, title: TextStringObject, destination: Union[IndirectObject, ArrayObject] | |
| ) -> None: | |
| nd = self.get_named_dest_root() | |
| i = 0 | |
| while i < len(nd): | |
| if title < nd[i]: | |
| nd.insert(i, destination) | |
| nd.insert(i, TextStringObject(title)) | |
| return | |
| else: | |
| i += 2 | |
| nd.extend([TextStringObject(title), destination]) | |
| return | |
| def add_named_destination_object( | |
| self, | |
| page_destination: Optional[PdfObject] = None, | |
| dest: Optional[PdfObject] = None, | |
| ) -> IndirectObject: | |
| if page_destination is not None and dest is not None: | |
| raise ValueError( | |
| "The argument dest of add_named_destination_object is deprecated. Use page_destination only." | |
| ) | |
| if dest is not None: # deprecated | |
| old_term = "dest" | |
| new_term = "page_destination" | |
| warnings.warn( | |
| message=( | |
| f"{old_term} is deprecated as an argument and will be " | |
| f"removed in PyPDF2 4.0.0. Use {new_term} instead" | |
| ), | |
| category=DeprecationWarning, | |
| ) | |
| page_destination = dest | |
| if page_destination is None: # deprecated | |
| raise ValueError("page_destination may not be None") | |
| page_destination_ref = self._add_object(page_destination.dest_array) # type: ignore | |
| self.add_named_destination_array( | |
| cast("TextStringObject", page_destination["/Title"]), page_destination_ref # type: ignore | |
| ) | |
| return page_destination_ref | |
| def addNamedDestinationObject( | |
| self, dest: Destination | |
| ) -> IndirectObject: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :meth:`add_named_destination_object` instead. | |
| """ | |
| deprecation_with_replacement( | |
| "addNamedDestinationObject", "add_named_destination_object", "3.0.0" | |
| ) | |
| return self.add_named_destination_object(dest) | |
| def add_named_destination( | |
| self, | |
| title: str, | |
| page_number: Optional[int] = None, | |
| pagenum: Optional[int] = None, # deprecated | |
| ) -> IndirectObject: | |
| if page_number is not None and pagenum is not None: | |
| raise ValueError( | |
| "The argument pagenum of add_outline_item is deprecated. Use page_number only." | |
| ) | |
| if pagenum is not None: | |
| old_term = "pagenum" | |
| new_term = "page_number" | |
| warnings.warn( | |
| message=( | |
| f"{old_term} is deprecated as an argument and will be " | |
| f"removed in PyPDF2 4.0.0. Use {new_term} instead" | |
| ), | |
| category=DeprecationWarning, | |
| ) | |
| page_number = pagenum | |
| if page_number is None: | |
| raise ValueError("page_number may not be None") | |
| page_ref = self.get_object(self._pages)[PA.KIDS][page_number] # type: ignore | |
| dest = DictionaryObject() | |
| dest.update( | |
| { | |
| NameObject(GoToActionArguments.D): ArrayObject( | |
| [page_ref, NameObject(TypFitArguments.FIT_H), NumberObject(826)] | |
| ), | |
| NameObject(GoToActionArguments.S): NameObject("/GoTo"), | |
| } | |
| ) | |
| dest_ref = self._add_object(dest) | |
| nd = self.get_named_dest_root() | |
| if not isinstance(title, TextStringObject): | |
| title = TextStringObject(str(title)) | |
| nd.extend([title, dest_ref]) | |
| return dest_ref | |
| def addNamedDestination( | |
| self, title: str, pagenum: int | |
| ) -> IndirectObject: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :meth:`add_named_destination` instead. | |
| """ | |
| deprecation_with_replacement( | |
| "addNamedDestination", "add_named_destination", "3.0.0" | |
| ) | |
| return self.add_named_destination(title, pagenum) | |
| def remove_links(self) -> None: | |
| """Remove links and annotations from this output.""" | |
| pg_dict = cast(DictionaryObject, self.get_object(self._pages)) | |
| pages = cast(ArrayObject, pg_dict[PA.KIDS]) | |
| for page in pages: | |
| page_ref = cast(DictionaryObject, self.get_object(page)) | |
| if PG.ANNOTS in page_ref: | |
| del page_ref[PG.ANNOTS] | |
| def removeLinks(self) -> None: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :meth:`remove_links` instead. | |
| """ | |
| deprecation_with_replacement("removeLinks", "remove_links", "3.0.0") | |
| return self.remove_links() | |
| def remove_images(self, ignore_byte_string_object: bool = False) -> None: | |
| """ | |
| Remove images from this output. | |
| :param bool ignore_byte_string_object: optional parameter | |
| to ignore ByteString Objects. | |
| """ | |
| pg_dict = cast(DictionaryObject, self.get_object(self._pages)) | |
| pages = cast(ArrayObject, pg_dict[PA.KIDS]) | |
| jump_operators = ( | |
| b"cm", | |
| b"w", | |
| b"J", | |
| b"j", | |
| b"M", | |
| b"d", | |
| b"ri", | |
| b"i", | |
| b"gs", | |
| b"W", | |
| b"b", | |
| b"s", | |
| b"S", | |
| b"f", | |
| b"F", | |
| b"n", | |
| b"m", | |
| b"l", | |
| b"c", | |
| b"v", | |
| b"y", | |
| b"h", | |
| b"B", | |
| b"Do", | |
| b"sh", | |
| ) | |
| for page in pages: | |
| page_ref = cast(DictionaryObject, self.get_object(page)) | |
| content = page_ref["/Contents"].get_object() | |
| if not isinstance(content, ContentStream): | |
| content = ContentStream(content, page_ref) | |
| _operations = [] | |
| seq_graphics = False | |
| for operands, operator in content.operations: | |
| if operator in [b"Tj", b"'"]: | |
| text = operands[0] | |
| if ignore_byte_string_object and not isinstance( | |
| text, TextStringObject | |
| ): | |
| operands[0] = TextStringObject() | |
| elif operator == b'"': | |
| text = operands[2] | |
| if ignore_byte_string_object and not isinstance( | |
| text, TextStringObject | |
| ): | |
| operands[2] = TextStringObject() | |
| elif operator == b"TJ": | |
| for i in range(len(operands[0])): | |
| if ignore_byte_string_object and not isinstance( | |
| operands[0][i], TextStringObject | |
| ): | |
| operands[0][i] = TextStringObject() | |
| if operator == b"q": | |
| seq_graphics = True | |
| if operator == b"Q": | |
| seq_graphics = False | |
| if seq_graphics and operator in jump_operators: | |
| continue | |
| if operator == b"re": | |
| continue | |
| _operations.append((operands, operator)) | |
| content.operations = _operations | |
| page_ref.__setitem__(NameObject("/Contents"), content) | |
| def removeImages( | |
| self, ignoreByteStringObject: bool = False | |
| ) -> None: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :meth:`remove_images` instead. | |
| """ | |
| deprecation_with_replacement("removeImages", "remove_images", "3.0.0") | |
| return self.remove_images(ignoreByteStringObject) | |
| def remove_text(self, ignore_byte_string_object: bool = False) -> None: | |
| """ | |
| Remove text from this output. | |
| :param bool ignore_byte_string_object: optional parameter | |
| to ignore ByteString Objects. | |
| """ | |
| pg_dict = cast(DictionaryObject, self.get_object(self._pages)) | |
| pages = cast(List[IndirectObject], pg_dict[PA.KIDS]) | |
| for page in pages: | |
| page_ref = cast(PageObject, self.get_object(page)) | |
| content = page_ref["/Contents"].get_object() | |
| if not isinstance(content, ContentStream): | |
| content = ContentStream(content, page_ref) | |
| for operands, operator in content.operations: | |
| if operator in [b"Tj", b"'"]: | |
| text = operands[0] | |
| if not ignore_byte_string_object: | |
| if isinstance(text, TextStringObject): | |
| operands[0] = TextStringObject() | |
| else: | |
| if isinstance(text, (TextStringObject, ByteStringObject)): | |
| operands[0] = TextStringObject() | |
| elif operator == b'"': | |
| text = operands[2] | |
| if not ignore_byte_string_object: | |
| if isinstance(text, TextStringObject): | |
| operands[2] = TextStringObject() | |
| else: | |
| if isinstance(text, (TextStringObject, ByteStringObject)): | |
| operands[2] = TextStringObject() | |
| elif operator == b"TJ": | |
| for i in range(len(operands[0])): | |
| if not ignore_byte_string_object: | |
| if isinstance(operands[0][i], TextStringObject): | |
| operands[0][i] = TextStringObject() | |
| else: | |
| if isinstance( | |
| operands[0][i], (TextStringObject, ByteStringObject) | |
| ): | |
| operands[0][i] = TextStringObject() | |
| page_ref.__setitem__(NameObject("/Contents"), content) | |
| def removeText( | |
| self, ignoreByteStringObject: bool = False | |
| ) -> None: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :meth:`remove_text` instead. | |
| """ | |
| deprecation_with_replacement("removeText", "remove_text", "3.0.0") | |
| return self.remove_text(ignoreByteStringObject) | |
| def add_uri( | |
| self, | |
| page_number: int, | |
| uri: str, | |
| rect: RectangleObject, | |
| border: Optional[ArrayObject] = None, | |
| pagenum: Optional[int] = None, | |
| ) -> None: | |
| """ | |
| Add an URI from a rectangular area to the specified page. | |
| This uses the basic structure of :meth:`add_link` | |
| :param int page_number: index of the page on which to place the URI action. | |
| :param str uri: URI of resource to link to. | |
| :param Tuple[int, int, int, int] rect: :class:`RectangleObject<PyPDF2.generic.RectangleObject>` or array of four | |
| integers specifying the clickable rectangular area | |
| ``[xLL, yLL, xUR, yUR]``, or string in the form ``"[ xLL yLL xUR yUR ]"``. | |
| :param ArrayObject border: if provided, an array describing border-drawing | |
| properties. See the PDF spec for details. No border will be | |
| drawn if this argument is omitted. | |
| """ | |
| if pagenum is not None: | |
| warnings.warn( | |
| "The 'pagenum' argument of add_uri is deprecated and will be " | |
| "removed in PyPDF2 4.0.0. Use 'page_number' instead.", | |
| category=DeprecationWarning, | |
| ) | |
| page_number = pagenum | |
| page_link = self.get_object(self._pages)[PA.KIDS][page_number] # type: ignore | |
| page_ref = cast(Dict[str, Any], self.get_object(page_link)) | |
| border_arr: BorderArrayType | |
| if border is not None: | |
| border_arr = [NameObject(n) for n in border[:3]] | |
| if len(border) == 4: | |
| dash_pattern = ArrayObject([NameObject(n) for n in border[3]]) | |
| border_arr.append(dash_pattern) | |
| else: | |
| border_arr = [NumberObject(2)] * 3 | |
| if isinstance(rect, str): | |
| rect = NameObject(rect) | |
| elif isinstance(rect, RectangleObject): | |
| pass | |
| else: | |
| rect = RectangleObject(rect) | |
| lnk2 = DictionaryObject() | |
| lnk2.update( | |
| { | |
| NameObject("/S"): NameObject("/URI"), | |
| NameObject("/URI"): TextStringObject(uri), | |
| } | |
| ) | |
| lnk = DictionaryObject() | |
| lnk.update( | |
| { | |
| NameObject(AnnotationDictionaryAttributes.Type): NameObject(PG.ANNOTS), | |
| NameObject(AnnotationDictionaryAttributes.Subtype): NameObject("/Link"), | |
| NameObject(AnnotationDictionaryAttributes.P): page_link, | |
| NameObject(AnnotationDictionaryAttributes.Rect): rect, | |
| NameObject("/H"): NameObject("/I"), | |
| NameObject(AnnotationDictionaryAttributes.Border): ArrayObject( | |
| border_arr | |
| ), | |
| NameObject("/A"): lnk2, | |
| } | |
| ) | |
| lnk_ref = self._add_object(lnk) | |
| if PG.ANNOTS in page_ref: | |
| page_ref[PG.ANNOTS].append(lnk_ref) | |
| else: | |
| page_ref[NameObject(PG.ANNOTS)] = ArrayObject([lnk_ref]) | |
| def addURI( | |
| self, | |
| pagenum: int, # deprecated, but method is deprecated already | |
| uri: str, | |
| rect: RectangleObject, | |
| border: Optional[ArrayObject] = None, | |
| ) -> None: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :meth:`add_uri` instead. | |
| """ | |
| deprecation_with_replacement("addURI", "add_uri", "3.0.0") | |
| return self.add_uri(pagenum, uri, rect, border) | |
| def add_link( | |
| self, | |
| pagenum: int, # deprecated, but method is deprecated already | |
| page_destination: int, | |
| rect: RectangleObject, | |
| border: Optional[ArrayObject] = None, | |
| fit: FitType = "/Fit", | |
| *args: ZoomArgType, | |
| ) -> None: | |
| deprecation_with_replacement( | |
| "add_link", "add_annotation(AnnotationBuilder.link(...))" | |
| ) | |
| if isinstance(rect, str): | |
| rect = rect.strip()[1:-1] | |
| rect = RectangleObject( | |
| [float(num) for num in rect.split(" ") if len(num) > 0] | |
| ) | |
| elif isinstance(rect, RectangleObject): | |
| pass | |
| else: | |
| rect = RectangleObject(rect) | |
| annotation = AnnotationBuilder.link( | |
| rect=rect, | |
| border=border, | |
| target_page_index=page_destination, | |
| fit=Fit(fit_type=fit, fit_args=args), | |
| ) | |
| return self.add_annotation(page_number=pagenum, annotation=annotation) | |
| def addLink( | |
| self, | |
| pagenum: int, # deprecated, but method is deprecated already | |
| page_destination: int, | |
| rect: RectangleObject, | |
| border: Optional[ArrayObject] = None, | |
| fit: FitType = "/Fit", | |
| *args: ZoomArgType, | |
| ) -> None: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :meth:`add_link` instead. | |
| """ | |
| deprecate_with_replacement( | |
| "addLink", "add_annotation(AnnotationBuilder.link(...))", "4.0.0" | |
| ) | |
| return self.add_link(pagenum, page_destination, rect, border, fit, *args) | |
| _valid_layouts = ( | |
| "/NoLayout", | |
| "/SinglePage", | |
| "/OneColumn", | |
| "/TwoColumnLeft", | |
| "/TwoColumnRight", | |
| "/TwoPageLeft", | |
| "/TwoPageRight", | |
| ) | |
| def _get_page_layout(self) -> Optional[LayoutType]: | |
| try: | |
| return cast(LayoutType, self._root_object["/PageLayout"]) | |
| except KeyError: | |
| return None | |
| def getPageLayout(self) -> Optional[LayoutType]: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :py:attr:`page_layout` instead. | |
| """ | |
| deprecation_with_replacement("getPageLayout", "page_layout", "3.0.0") | |
| return self._get_page_layout() | |
| def _set_page_layout(self, layout: Union[NameObject, LayoutType]) -> None: | |
| """ | |
| Set the page layout. | |
| :param str layout: The page layout to be used. | |
| .. list-table:: Valid ``layout`` arguments | |
| :widths: 50 200 | |
| * - /NoLayout | |
| - Layout explicitly not specified | |
| * - /SinglePage | |
| - Show one page at a time | |
| * - /OneColumn | |
| - Show one column at a time | |
| * - /TwoColumnLeft | |
| - Show pages in two columns, odd-numbered pages on the left | |
| * - /TwoColumnRight | |
| - Show pages in two columns, odd-numbered pages on the right | |
| * - /TwoPageLeft | |
| - Show two pages at a time, odd-numbered pages on the left | |
| * - /TwoPageRight | |
| - Show two pages at a time, odd-numbered pages on the right | |
| """ | |
| if not isinstance(layout, NameObject): | |
| if layout not in self._valid_layouts: | |
| logger_warning( | |
| f"Layout should be one of: {'', ''.join(self._valid_layouts)}", | |
| __name__, | |
| ) | |
| layout = NameObject(layout) | |
| self._root_object.update({NameObject("/PageLayout"): layout}) | |
| def set_page_layout(self, layout: LayoutType) -> None: | |
| """ | |
| Set the page layout. | |
| :param str layout: The page layout to be used | |
| .. list-table:: Valid ``layout`` arguments | |
| :widths: 50 200 | |
| * - /NoLayout | |
| - Layout explicitly not specified | |
| * - /SinglePage | |
| - Show one page at a time | |
| * - /OneColumn | |
| - Show one column at a time | |
| * - /TwoColumnLeft | |
| - Show pages in two columns, odd-numbered pages on the left | |
| * - /TwoColumnRight | |
| - Show pages in two columns, odd-numbered pages on the right | |
| * - /TwoPageLeft | |
| - Show two pages at a time, odd-numbered pages on the left | |
| * - /TwoPageRight | |
| - Show two pages at a time, odd-numbered pages on the right | |
| """ | |
| self._set_page_layout(layout) | |
| def setPageLayout(self, layout: LayoutType) -> None: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :py:attr:`page_layout` instead. | |
| """ | |
| deprecation_with_replacement( | |
| "writer.setPageLayout(val)", "writer.page_layout = val", "3.0.0" | |
| ) | |
| return self._set_page_layout(layout) | |
| def page_layout(self) -> Optional[LayoutType]: | |
| """ | |
| Page layout property. | |
| .. list-table:: Valid ``layout`` values | |
| :widths: 50 200 | |
| * - /NoLayout | |
| - Layout explicitly not specified | |
| * - /SinglePage | |
| - Show one page at a time | |
| * - /OneColumn | |
| - Show one column at a time | |
| * - /TwoColumnLeft | |
| - Show pages in two columns, odd-numbered pages on the left | |
| * - /TwoColumnRight | |
| - Show pages in two columns, odd-numbered pages on the right | |
| * - /TwoPageLeft | |
| - Show two pages at a time, odd-numbered pages on the left | |
| * - /TwoPageRight | |
| - Show two pages at a time, odd-numbered pages on the right | |
| """ | |
| return self._get_page_layout() | |
| def page_layout(self, layout: LayoutType) -> None: | |
| self._set_page_layout(layout) | |
| def pageLayout(self) -> Optional[LayoutType]: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :py:attr:`page_layout` instead. | |
| """ | |
| deprecation_with_replacement("pageLayout", "page_layout", "3.0.0") | |
| return self.page_layout | |
| def pageLayout(self, layout: LayoutType) -> None: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :py:attr:`page_layout` instead. | |
| """ | |
| deprecation_with_replacement("pageLayout", "page_layout", "3.0.0") | |
| self.page_layout = layout | |
| _valid_modes = ( | |
| "/UseNone", | |
| "/UseOutlines", | |
| "/UseThumbs", | |
| "/FullScreen", | |
| "/UseOC", | |
| "/UseAttachments", | |
| ) | |
| def _get_page_mode(self) -> Optional[PagemodeType]: | |
| try: | |
| return cast(PagemodeType, self._root_object["/PageMode"]) | |
| except KeyError: | |
| return None | |
| def getPageMode(self) -> Optional[PagemodeType]: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :py:attr:`page_mode` instead. | |
| """ | |
| deprecation_with_replacement("getPageMode", "page_mode", "3.0.0") | |
| return self._get_page_mode() | |
| def set_page_mode(self, mode: PagemodeType) -> None: | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :py:attr:`page_mode` instead. | |
| """ | |
| if isinstance(mode, NameObject): | |
| mode_name: NameObject = mode | |
| else: | |
| if mode not in self._valid_modes: | |
| logger_warning( | |
| f"Mode should be one of: {', '.join(self._valid_modes)}", __name__ | |
| ) | |
| mode_name = NameObject(mode) | |
| self._root_object.update({NameObject("/PageMode"): mode_name}) | |
| def setPageMode(self, mode: PagemodeType) -> None: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :py:attr:`page_mode` instead. | |
| """ | |
| deprecation_with_replacement( | |
| "writer.setPageMode(val)", "writer.page_mode = val", "3.0.0" | |
| ) | |
| self.set_page_mode(mode) | |
| def page_mode(self) -> Optional[PagemodeType]: | |
| """ | |
| Page mode property. | |
| .. list-table:: Valid ``mode`` values | |
| :widths: 50 200 | |
| * - /UseNone | |
| - Do not show outline or thumbnails panels | |
| * - /UseOutlines | |
| - Show outline (aka bookmarks) panel | |
| * - /UseThumbs | |
| - Show page thumbnails panel | |
| * - /FullScreen | |
| - Fullscreen view | |
| * - /UseOC | |
| - Show Optional Content Group (OCG) panel | |
| * - /UseAttachments | |
| - Show attachments panel | |
| """ | |
| return self._get_page_mode() | |
| def page_mode(self, mode: PagemodeType) -> None: | |
| self.set_page_mode(mode) | |
| def pageMode(self) -> Optional[PagemodeType]: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :py:attr:`page_mode` instead. | |
| """ | |
| deprecation_with_replacement("pageMode", "page_mode", "3.0.0") | |
| return self.page_mode | |
| def pageMode(self, mode: PagemodeType) -> None: # pragma: no cover | |
| """ | |
| .. deprecated:: 1.28.0 | |
| Use :py:attr:`page_mode` instead. | |
| """ | |
| deprecation_with_replacement("pageMode", "page_mode", "3.0.0") | |
| self.page_mode = mode | |
| def add_annotation(self, page_number: int, annotation: Dict[str, Any]) -> None: | |
| to_add = cast(DictionaryObject, _pdf_objectify(annotation)) | |
| to_add[NameObject("/P")] = self.get_object(self._pages)["/Kids"][page_number] # type: ignore | |
| page = self.pages[page_number] | |
| if page.annotations is None: | |
| page[NameObject("/Annots")] = ArrayObject() | |
| assert page.annotations is not None | |
| # Internal link annotations need the correct object type for the | |
| # destination | |
| if to_add.get("/Subtype") == "/Link" and NameObject("/Dest") in to_add: | |
| tmp = cast(dict, to_add[NameObject("/Dest")]) | |
| dest = Destination( | |
| NameObject("/LinkName"), | |
| tmp["target_page_index"], | |
| Fit( | |
| fit_type=tmp["fit"], fit_args=dict(tmp)["fit_args"] | |
| ), # I have no clue why this dict-hack is necessary | |
| ) | |
| to_add[NameObject("/Dest")] = dest.dest_array | |
| ind_obj = self._add_object(to_add) | |
| page.annotations.append(ind_obj) | |
| def clean_page(self, page: Union[PageObject, IndirectObject]) -> PageObject: | |
| """ | |
| Perform some clean up in the page. | |
| Currently: convert NameObject nameddestination to TextStringObject (required for names/dests list) | |
| """ | |
| page = cast("PageObject", page.get_object()) | |
| for a in page.get("/Annots", []): | |
| a_obj = a.get_object() | |
| d = a_obj.get("/Dest", None) | |
| act = a_obj.get("/A", None) | |
| if isinstance(d, NameObject): | |
| a_obj[NameObject("/Dest")] = TextStringObject(d) | |
| elif act is not None: | |
| act = act.get_object() | |
| d = act.get("/D", None) | |
| if isinstance(d, NameObject): | |
| act[NameObject("/D")] = TextStringObject(d) | |
| return page | |
| def _create_stream( | |
| self, fileobj: Union[Path, StrByteType, PdfReader] | |
| ) -> Tuple[IOBase, Optional[Encryption]]: | |
| # If the fileobj parameter is a string, assume it is a path | |
| # and create a file object at that location. If it is a file, | |
| # copy the file's contents into a BytesIO stream object; if | |
| # it is a PdfReader, copy that reader's stream into a | |
| # BytesIO stream. | |
| # If fileobj is none of the above types, it is not modified | |
| encryption_obj = None | |
| stream: IOBase | |
| if isinstance(fileobj, (str, Path)): | |
| with FileIO(fileobj, "rb") as f: | |
| stream = BytesIO(f.read()) | |
| elif isinstance(fileobj, PdfReader): | |
| if fileobj._encryption: | |
| encryption_obj = fileobj._encryption | |
| orig_tell = fileobj.stream.tell() | |
| fileobj.stream.seek(0) | |
| stream = BytesIO(fileobj.stream.read()) | |
| # reset the stream to its original location | |
| fileobj.stream.seek(orig_tell) | |
| elif hasattr(fileobj, "seek") and hasattr(fileobj, "read"): | |
| fileobj.seek(0) | |
| filecontent = fileobj.read() | |
| stream = BytesIO(filecontent) | |
| else: | |
| raise NotImplementedError( | |
| "PdfMerger.merge requires an object that PdfReader can parse. " | |
| "Typically, that is a Path or a string representing a Path, " | |
| "a file object, or an object implementing .seek and .read. " | |
| "Passing a PdfReader directly works as well." | |
| ) | |
| return stream, encryption_obj | |
| def append( | |
| self, | |
| fileobj: Union[StrByteType, PdfReader, Path], | |
| outline_item: Union[ | |
| str, None, PageRange, Tuple[int, int], Tuple[int, int, int], List[int] | |
| ] = None, | |
| pages: Union[ | |
| None, PageRange, Tuple[int, int], Tuple[int, int, int], List[int] | |
| ] = None, | |
| import_outline: bool = True, | |
| excluded_fields: Optional[Union[List[str], Tuple[str, ...]]] = None, | |
| ) -> None: | |
| """ | |
| Identical to the :meth:`merge()<merge>` method, but assumes you want to | |
| concatenate all pages onto the end of the file instead of specifying a | |
| position. | |
| :param fileobj: A File Object or an object that supports the standard | |
| read and seek methods similar to a File Object. Could also be a | |
| string representing a path to a PDF file. | |
| :param str outline_item: Optionally, you may specify a string to build an outline | |
| (aka 'bookmark') to identify the | |
| beginning of the included file. | |
| :param pages: can be a :class:`PageRange<PyPDF2.pagerange.PageRange>` | |
| or a ``(start, stop[, step])`` tuple | |
| or a list of pages to be processed | |
| to merge only the specified range of pages from the source | |
| document into the output document. | |
| :param bool import_outline: You may prevent the source document's | |
| outline (collection of outline items, previously referred to as | |
| 'bookmarks') from being imported by specifying this as ``False``. | |
| :param List excluded_fields: provide the list of fields/keys to be ignored | |
| if "/Annots" is part of the list, the annotation will be ignored | |
| if "/B" is part of the list, the articles will be ignored | |
| """ | |
| if excluded_fields is None: | |
| excluded_fields = () | |
| if isinstance(outline_item, (tuple, list, PageRange)): | |
| if isinstance(pages, bool): | |
| if not isinstance(import_outline, bool): | |
| excluded_fields = import_outline | |
| import_outline = pages | |
| pages = outline_item | |
| self.merge(None, fileobj, None, pages, import_outline, excluded_fields) | |
| else: # if isinstance(outline_item,str): | |
| self.merge( | |
| None, fileobj, outline_item, pages, import_outline, excluded_fields | |
| ) | |
| def merge( | |
| self, | |
| position: Optional[int], | |
| fileobj: Union[Path, StrByteType, PdfReader], | |
| outline_item: Optional[str] = None, | |
| pages: Optional[PageRangeSpec] = None, | |
| import_outline: bool = True, | |
| excluded_fields: Optional[Union[List[str], Tuple[str, ...]]] = (), | |
| ) -> None: | |
| """ | |
| Merge the pages from the given file into the output file at the | |
| specified page number. | |
| :param int position: The *page number* to insert this file. File will | |
| be inserted after the given number. | |
| :param fileobj: A File Object or an object that supports the standard | |
| read and seek methods similar to a File Object. Could also be a | |
| string representing a path to a PDF file. | |
| :param str outline_item: Optionally, you may specify a string to build an outline | |
| (aka 'bookmark') to identify the | |
| beginning of the included file. | |
| :param pages: can be a :class:`PageRange<PyPDF2.pagerange.PageRange>` | |
| or a ``(start, stop[, step])`` tuple | |
| or a list of pages to be processed | |
| to merge only the specified range of pages from the source | |
| document into the output document. | |
| :param bool import_outline: You may prevent the source document's | |
| outline (collection of outline items, previously referred to as | |
| 'bookmarks') from being imported by specifying this as ``False``. | |
| :param List excluded_fields: provide the list of fields/keys to be ignored | |
| if "/Annots" is part of the list, the annotation will be ignored | |
| if "/B" is part of the list, the articles will be ignored | |
| """ | |
| if isinstance(fileobj, PdfReader): | |
| reader = fileobj | |
| else: | |
| stream, encryption_obj = self._create_stream(fileobj) | |
| # Create a new PdfReader instance using the stream | |
| # (either file or BytesIO or StringIO) created above | |
| reader = PdfReader(stream, strict=False) # type: ignore[arg-type] | |
| if excluded_fields is None: | |
| excluded_fields = () | |
| # Find the range of pages to merge. | |
| if pages is None: | |
| pages = list(range(0, len(reader.pages))) | |
| elif isinstance(pages, PageRange): | |
| pages = list(range(*pages.indices(len(reader.pages)))) | |
| elif isinstance(pages, list): | |
| pass # keep unchanged | |
| elif isinstance(pages, tuple) and len(pages) <= 3: | |
| pages = list(range(*pages)) | |
| elif not isinstance(pages, tuple): | |
| raise TypeError( | |
| '"pages" must be a tuple of (start, stop[, step]) or a list' | |
| ) | |
| srcpages = {} | |
| for i in pages: | |
| pg = reader.pages[i] | |
| assert pg.indirect_reference is not None | |
| if position is None: | |
| srcpages[pg.indirect_reference.idnum] = self.add_page( | |
| pg, list(excluded_fields) + ["/B", "/Annots"] # type: ignore | |
| ) | |
| else: | |
| srcpages[pg.indirect_reference.idnum] = self.insert_page( | |
| pg, position, list(excluded_fields) + ["/B", "/Annots"] # type: ignore | |
| ) | |
| position += 1 | |
| srcpages[pg.indirect_reference.idnum].original_page = pg | |
| reader._namedDests = ( | |
| reader.named_destinations | |
| ) # need for the outline processing below | |
| for dest in reader._namedDests.values(): | |
| arr = dest.dest_array | |
| # try: | |
| if isinstance(dest["/Page"], NullObject): | |
| pass # self.add_named_destination_array(dest["/Title"],arr) | |
| elif dest["/Page"].indirect_reference.idnum in srcpages: | |
| arr[NumberObject(0)] = srcpages[ | |
| dest["/Page"].indirect_reference.idnum | |
| ].indirect_reference | |
| self.add_named_destination_array(dest["/Title"], arr) | |
| # except Exception as e: | |
| # logger_warning(f"can not insert {dest} : {e.msg}",__name__) | |
| outline_item_typ: TreeObject | |
| if outline_item is not None: | |
| outline_item_typ = cast( | |
| "TreeObject", | |
| self.add_outline_item( | |
| TextStringObject(outline_item), | |
| list(srcpages.values())[0].indirect_reference, | |
| fit=PAGE_FIT, | |
| ).get_object(), | |
| ) | |
| else: | |
| outline_item_typ = self.get_outline_root() | |
| _ro = cast("DictionaryObject", reader.trailer[TK.ROOT]) | |
| if import_outline and CO.OUTLINES in _ro: | |
| outline = self._get_filtered_outline( | |
| _ro.get(CO.OUTLINES, None), srcpages, reader | |
| ) | |
| self._insert_filtered_outline( | |
| outline, outline_item_typ, None | |
| ) # TODO : use before parameter | |
| if "/Annots" not in excluded_fields: | |
| for pag in srcpages.values(): | |
| lst = self._insert_filtered_annotations( | |
| pag.original_page.get("/Annots", ()), pag, srcpages, reader | |
| ) | |
| if len(lst) > 0: | |
| pag[NameObject("/Annots")] = lst | |
| self.clean_page(pag) | |
| if "/B" not in excluded_fields: | |
| self.add_filtered_articles("", srcpages, reader) | |
| return | |
| def _add_articles_thread( | |
| self, | |
| thread: DictionaryObject, # thread entry from the reader's array of threads | |
| pages: Dict[int, PageObject], | |
| reader: PdfReader, | |
| ) -> IndirectObject: | |
| """ | |
| clone the thread with only the applicable articles | |
| """ | |
| nthread = thread.clone( | |
| self, force_duplicate=True, ignore_fields=("/F",) | |
| ) # use of clone to keep link between reader and writer | |
| self.threads.append(nthread.indirect_reference) | |
| first_article = cast("DictionaryObject", thread["/F"]) | |
| current_article: Optional[DictionaryObject] = first_article | |
| new_article: Optional[DictionaryObject] = None | |
| while current_article is not None: | |
| pag = self._get_cloned_page( | |
| cast("PageObject", current_article["/P"]), pages, reader | |
| ) | |
| if pag is not None: | |
| if new_article is None: | |
| new_article = cast( | |
| "DictionaryObject", | |
| self._add_object(DictionaryObject()).get_object(), | |
| ) | |
| new_first = new_article | |
| nthread[NameObject("/F")] = new_article.indirect_reference | |
| else: | |
| new_article2 = cast( | |
| "DictionaryObject", | |
| self._add_object( | |
| DictionaryObject( | |
| {NameObject("/V"): new_article.indirect_reference} | |
| ) | |
| ).get_object(), | |
| ) | |
| new_article[NameObject("/N")] = new_article2.indirect_reference | |
| new_article = new_article2 | |
| new_article[NameObject("/P")] = pag | |
| new_article[NameObject("/T")] = nthread.indirect_reference | |
| new_article[NameObject("/R")] = current_article["/R"] | |
| pag_obj = cast("PageObject", pag.get_object()) | |
| if "/B" not in pag_obj: | |
| pag_obj[NameObject("/B")] = ArrayObject() | |
| cast("ArrayObject", pag_obj["/B"]).append( | |
| new_article.indirect_reference | |
| ) | |
| current_article = cast("DictionaryObject", current_article["/N"]) | |
| if current_article == first_article: | |
| new_article[NameObject("/N")] = new_first.indirect_reference # type: ignore | |
| new_first[NameObject("/V")] = new_article.indirect_reference # type: ignore | |
| current_article = None | |
| assert nthread.indirect_reference is not None | |
| return nthread.indirect_reference | |
| def add_filtered_articles( | |
| self, | |
| fltr: Union[Pattern, str], # thread entry from the reader's array of threads | |
| pages: Dict[int, PageObject], | |
| reader: PdfReader, | |
| ) -> None: | |
| """ | |
| Add articles matching the defined criteria | |
| """ | |
| if isinstance(fltr, str): | |
| fltr = re.compile(fltr) | |
| elif not isinstance(fltr, Pattern): | |
| fltr = re.compile("") | |
| for p in pages.values(): | |
| pp = p.original_page | |
| for a in pp.get("/B", ()): | |
| thr = a.get_object()["/T"] | |
| if thr.indirect_reference.idnum not in self._id_translated[ | |
| id(reader) | |
| ] and fltr.search(thr["/I"]["/Title"]): | |
| self._add_articles_thread(thr, pages, reader) | |
| def _get_cloned_page( | |
| self, | |
| page: Union[None, int, IndirectObject, PageObject, NullObject], | |
| pages: Dict[int, PageObject], | |
| reader: PdfReader, | |
| ) -> Optional[IndirectObject]: | |
| if isinstance(page, NullObject): | |
| return None | |
| if isinstance(page, int): | |
| _i = reader.pages[page].indirect_reference | |
| # elif isinstance(page, PageObject): | |
| # _i = page.indirect_reference | |
| elif isinstance(page, DictionaryObject) and page.get("/Type", "") == "/Page": | |
| _i = page.indirect_reference | |
| elif isinstance(page, IndirectObject): | |
| _i = page | |
| try: | |
| return pages[_i.idnum].indirect_reference # type: ignore | |
| except Exception: | |
| return None | |
| def _insert_filtered_annotations( | |
| self, | |
| annots: Union[IndirectObject, List[DictionaryObject]], | |
| page: PageObject, | |
| pages: Dict[int, PageObject], | |
| reader: PdfReader, | |
| ) -> List[Destination]: | |
| outlist = ArrayObject() | |
| if isinstance(annots, IndirectObject): | |
| annots = cast("List", annots.get_object()) | |
| for an in annots: | |
| ano = cast("DictionaryObject", an.get_object()) | |
| if ( | |
| ano["/Subtype"] != "/Link" | |
| or "/A" not in ano | |
| or cast("DictionaryObject", ano["/A"])["/S"] != "/GoTo" | |
| or "/Dest" in ano | |
| ): | |
| if "/Dest" not in ano: | |
| outlist.append(ano.clone(self).indirect_reference) | |
| else: | |
| d = ano["/Dest"] | |
| if isinstance(d, str): | |
| # it is a named dest | |
| if str(d) in self.get_named_dest_root(): | |
| outlist.append(ano.clone(self).indirect_reference) | |
| else: | |
| d = cast("ArrayObject", d) | |
| p = self._get_cloned_page(d[0], pages, reader) | |
| if p is not None: | |
| anc = ano.clone(self, ignore_fields=("/Dest",)) | |
| anc[NameObject("/Dest")] = ArrayObject([p] + d[1:]) | |
| outlist.append(anc.indirect_reference) | |
| else: | |
| d = cast("DictionaryObject", ano["/A"])["/D"] | |
| if isinstance(d, str): | |
| # it is a named dest | |
| if str(d) in self.get_named_dest_root(): | |
| outlist.append(ano.clone(self).indirect_reference) | |
| else: | |
| d = cast("ArrayObject", d) | |
| p = self._get_cloned_page(d[0], pages, reader) | |
| if p is not None: | |
| anc = ano.clone(self, ignore_fields=("/D",)) | |
| anc = cast("DictionaryObject", anc) | |
| cast("DictionaryObject", anc["/A"])[ | |
| NameObject("/D") | |
| ] = ArrayObject([p] + d[1:]) | |
| outlist.append(anc.indirect_reference) | |
| return outlist | |
| def _get_filtered_outline( | |
| self, | |
| node: Any, | |
| pages: Dict[int, PageObject], | |
| reader: PdfReader, | |
| ) -> List[Destination]: | |
| """Extract outline item entries that are part of the specified page set.""" | |
| new_outline = [] | |
| node = node.get_object() | |
| if node.get("/Type", "") == "/Outlines" or "/Title" not in node: | |
| node = node.get("/First", None) | |
| if node is not None: | |
| node = node.get_object() | |
| new_outline += self._get_filtered_outline(node, pages, reader) | |
| else: | |
| v: Union[None, IndirectObject, NullObject] | |
| while node is not None: | |
| node = node.get_object() | |
| o = cast("Destination", reader._build_outline_item(node)) | |
| v = self._get_cloned_page(cast("PageObject", o["/Page"]), pages, reader) | |
| if v is None: | |
| v = NullObject() | |
| o[NameObject("/Page")] = v | |
| if "/First" in node: | |
| o.childs = self._get_filtered_outline(node["/First"], pages, reader) | |
| else: | |
| o.childs = [] | |
| if not isinstance(o["/Page"], NullObject) or len(o.childs) > 0: | |
| new_outline.append(o) | |
| node = node.get("/Next", None) | |
| return new_outline | |
| def _clone_outline(self, dest: Destination) -> TreeObject: | |
| n_ol = TreeObject() | |
| self._add_object(n_ol) | |
| n_ol[NameObject("/Title")] = TextStringObject(dest["/Title"]) | |
| if not isinstance(dest["/Page"], NullObject): | |
| if dest.node is not None and "/A" in dest.node: | |
| n_ol[NameObject("/A")] = dest.node["/A"].clone(self) | |
| # elif "/D" in dest.node: | |
| # n_ol[NameObject("/Dest")] = dest.node["/D"].clone(self) | |
| # elif "/Dest" in dest.node: | |
| # n_ol[NameObject("/Dest")] = dest.node["/Dest"].clone(self) | |
| else: | |
| n_ol[NameObject("/Dest")] = dest.dest_array | |
| # TODO: /SE | |
| if dest.node is not None: | |
| n_ol[NameObject("/F")] = NumberObject(dest.node.get("/F", 0)) | |
| n_ol[NameObject("/C")] = ArrayObject( | |
| dest.node.get( | |
| "/C", [FloatObject(0.0), FloatObject(0.0), FloatObject(0.0)] | |
| ) | |
| ) | |
| return n_ol | |
| def _insert_filtered_outline( | |
| self, | |
| outlines: List[Destination], | |
| parent: Union[TreeObject, IndirectObject], | |
| before: Union[None, TreeObject, IndirectObject] = None, | |
| ) -> None: | |
| for dest in outlines: | |
| # TODO : can be improved to keep A and SE entries (ignored for the moment) | |
| # np=self.add_outline_item_destination(dest,parent,before) | |
| if dest.get("/Type", "") == "/Outlines" or "/Title" not in dest: | |
| np = parent | |
| else: | |
| np = self._clone_outline(dest) | |
| cast(TreeObject, parent.get_object()).insert_child(np, before, self) | |
| self._insert_filtered_outline(dest.childs, np, None) | |
| def close(self) -> None: | |
| """To match the functions from Merger""" | |
| return | |
| # @deprecation_bookmark(bookmark="outline_item") | |
| def find_outline_item( | |
| self, | |
| outline_item: Dict[str, Any], | |
| root: Optional[OutlineType] = None, | |
| ) -> Optional[List[int]]: | |
| if root is None: | |
| o = self.get_outline_root() | |
| else: | |
| o = cast("TreeObject", root) | |
| i = 0 | |
| while o is not None: | |
| if ( | |
| o.indirect_reference == outline_item | |
| or o.get("/Title", None) == outline_item | |
| ): | |
| return [i] | |
| else: | |
| if "/First" in o: | |
| res = self.find_outline_item( | |
| outline_item, cast(OutlineType, o["/First"]) | |
| ) | |
| if res: | |
| return ([i] if "/Title" in o else []) + res | |
| if "/Next" in o: | |
| i += 1 | |
| o = cast(TreeObject, o["/Next"]) | |
| else: | |
| return None | |
| def find_bookmark( | |
| self, | |
| outline_item: Dict[str, Any], | |
| root: Optional[OutlineType] = None, | |
| ) -> Optional[List[int]]: # pragma: no cover | |
| """ | |
| .. deprecated:: 2.9.0 | |
| Use :meth:`find_outline_item` instead. | |
| """ | |
| return self.find_outline_item(outline_item, root) | |
| def reset_translation( | |
| self, reader: Union[None, PdfReader, IndirectObject] = None | |
| ) -> None: | |
| """ | |
| reset the translation table between reader and the writer object. | |
| late cloning will create new independent objects | |
| :param reader: PdfReader or IndirectObject refering a PdfReader object. | |
| if set to None or omitted, all tables will be reset. | |
| """ | |
| if reader is None: | |
| self._id_translated = {} | |
| elif isinstance(reader, PdfReader): | |
| try: | |
| del self._id_translated[id(reader)] | |
| except Exception: | |
| pass | |
| elif isinstance(reader, IndirectObject): | |
| try: | |
| del self._id_translated[id(reader.pdf)] | |
| except Exception: | |
| pass | |
| else: | |
| raise Exception("invalid parameter {reader}") | |
| def _pdf_objectify(obj: Union[Dict[str, Any], str, int, List[Any]]) -> PdfObject: | |
| if isinstance(obj, PdfObject): | |
| return obj | |
| if isinstance(obj, dict): | |
| to_add = DictionaryObject() | |
| for key, value in obj.items(): | |
| name_key = NameObject(key) | |
| casted_value = _pdf_objectify(value) | |
| to_add[name_key] = casted_value | |
| return to_add | |
| elif isinstance(obj, list): | |
| arr = ArrayObject() | |
| for el in obj: | |
| arr.append(_pdf_objectify(el)) | |
| return arr | |
| elif isinstance(obj, str): | |
| if obj.startswith("/"): | |
| return NameObject(obj) | |
| else: | |
| return TextStringObject(obj) | |
| elif isinstance(obj, (int, float)): | |
| return FloatObject(obj) | |
| else: | |
| raise NotImplementedError( | |
| f"type(obj)={type(obj)} could not be casted to PdfObject" | |
| ) | |
| def _create_outline_item( | |
| action_ref: Union[None, IndirectObject], | |
| title: str, | |
| color: Union[Tuple[float, float, float], str, None], | |
| italic: bool, | |
| bold: bool, | |
| ) -> TreeObject: | |
| outline_item = TreeObject() | |
| if action_ref is not None: | |
| outline_item[NameObject("/A")] = action_ref | |
| outline_item.update( | |
| { | |
| NameObject("/Title"): create_string_object(title), | |
| } | |
| ) | |
| if color: | |
| if isinstance(color, str): | |
| color = hex_to_rgb(color) | |
| prec = decimal.Decimal("1.00000") | |
| outline_item.update( | |
| { | |
| NameObject("/C"): ArrayObject( | |
| [FloatObject(decimal.Decimal(c).quantize(prec)) for c in color] | |
| ) | |
| } | |
| ) | |
| if italic or bold: | |
| format_flag = 0 | |
| if italic: | |
| format_flag += 1 | |
| if bold: | |
| format_flag += 2 | |
| outline_item.update({NameObject("/F"): NumberObject(format_flag)}) | |
| return outline_item | |
| class PdfFileWriter(PdfWriter): # pragma: no cover | |
| def __init__(self, *args: Any, **kwargs: Any) -> None: | |
| deprecation_with_replacement("PdfFileWriter", "PdfWriter", "3.0.0") | |
| super().__init__(*args, **kwargs) | |