| | """ |
| | ================================================================================ |
| | PDF MANIPULATOR - Full-Featured PDF Page Manipulation Toolkit |
| | ================================================================================ |
| | Author : algorembrant |
| | Version : 1.0.0 |
| | License : MIT |
| | |
| | USAGE COMMANDS (run from terminal): |
| | -------------------------------------------------------------------------------- |
| | |
| | MERGE |
| | python pdf_manipulator.py merge -i file1.pdf file2.pdf file3.pdf -o merged.pdf |
| | python pdf_manipulator.py merge -i file1.pdf file2.pdf -o out.pdf --interleave |
| | |
| | SPLIT |
| | python pdf_manipulator.py split -i input.pdf -o ./output_dir |
| | python pdf_manipulator.py split -i input.pdf -o ./output_dir --range 1-5 |
| | python pdf_manipulator.py split -i input.pdf -o ./output_dir --range 2,4,6 |
| | |
| | REMOVE PAGES |
| | python pdf_manipulator.py remove -i input.pdf -o output.pdf --pages 3 |
| | python pdf_manipulator.py remove -i input.pdf -o output.pdf --pages 1,3,5 |
| | python pdf_manipulator.py remove -i input.pdf -o output.pdf --pages 2-5 |
| | python pdf_manipulator.py remove -i input.pdf -o output.pdf --pages 1,3-5,7 |
| | |
| | EXTRACT PAGES |
| | python pdf_manipulator.py extract -i input.pdf -o output.pdf --pages 1-3 |
| | python pdf_manipulator.py extract -i input.pdf -o output.pdf --pages 2,4,6 |
| | |
| | REORDER PAGES |
| | python pdf_manipulator.py reorder -i input.pdf -o output.pdf --order 3,1,2,4 |
| | |
| | ROTATE PAGES |
| | python pdf_manipulator.py rotate -i input.pdf -o output.pdf --angle 90 |
| | python pdf_manipulator.py rotate -i input.pdf -o output.pdf --angle 180 --pages 1,3 |
| | python pdf_manipulator.py rotate -i input.pdf -o output.pdf --angle 270 --pages 2-4 |
| | |
| | REVERSE |
| | python pdf_manipulator.py reverse -i input.pdf -o output.pdf |
| | |
| | DUPLICATE PAGES |
| | python pdf_manipulator.py duplicate -i input.pdf -o output.pdf --pages 2 --times 3 |
| | |
| | INSERT BLANK PAGES |
| | python pdf_manipulator.py insert-blank -i input.pdf -o output.pdf --after 2 |
| | python pdf_manipulator.py insert-blank -i input.pdf -o output.pdf --before 1 |
| | |
| | INSERT PDF PAGES |
| | python pdf_manipulator.py insert -i base.pdf --insert-file extra.pdf -o output.pdf --after 3 |
| | python pdf_manipulator.py insert -i base.pdf --insert-file extra.pdf -o output.pdf --before 2 |
| | |
| | REPLACE PAGES |
| | python pdf_manipulator.py replace -i base.pdf --replace-file new.pdf -o output.pdf --pages 2 --replace-pages 1 |
| | |
| | CROP PAGES |
| | python pdf_manipulator.py crop -i input.pdf -o output.pdf --box "50,50,500,700" |
| | python pdf_manipulator.py crop -i input.pdf -o output.pdf --box "50,50,500,700" --pages 1-3 |
| | |
| | SCALE / RESIZE |
| | python pdf_manipulator.py scale -i input.pdf -o output.pdf --factor 0.5 |
| | python pdf_manipulator.py scale -i input.pdf -o output.pdf --to-size A4 |
| | python pdf_manipulator.py scale -i input.pdf -o output.pdf --to-size letter |
| | |
| | WATERMARK |
| | python pdf_manipulator.py watermark -i input.pdf -o output.pdf --text "CONFIDENTIAL" |
| | python pdf_manipulator.py watermark -i input.pdf -o output.pdf --text "DRAFT" --opacity 0.3 --angle 45 |
| | python pdf_manipulator.py watermark -i input.pdf -o output.pdf --watermark-pdf wm.pdf |
| | |
| | STAMP / OVERLAY |
| | python pdf_manipulator.py stamp -i input.pdf -o output.pdf --stamp-pdf stamp.pdf |
| | python pdf_manipulator.py stamp -i input.pdf -o output.pdf --stamp-pdf stamp.pdf --pages 1 |
| | |
| | ADD PAGE NUMBERS |
| | python pdf_manipulator.py number -i input.pdf -o output.pdf |
| | python pdf_manipulator.py number -i input.pdf -o output.pdf --position bottom-center --start 1 |
| | python pdf_manipulator.py number -i input.pdf -o output.pdf --position top-right --format "Page {n}" |
| | |
| | ENCRYPT / DECRYPT |
| | python pdf_manipulator.py encrypt -i input.pdf -o output.pdf --user-pass mypass --owner-pass ownerpass |
| | python pdf_manipulator.py encrypt -i input.pdf -o output.pdf --user-pass mypass |
| | python pdf_manipulator.py decrypt -i encrypted.pdf -o decrypted.pdf --password mypass |
| | |
| | METADATA |
| | python pdf_manipulator.py metadata -i input.pdf |
| | python pdf_manipulator.py metadata -i input.pdf -o output.pdf --set-title "My Title" --set-author "algorembrant" |
| | python pdf_manipulator.py metadata -i input.pdf -o output.pdf --set-subject "Report" --set-keywords "pdf,report" |
| | |
| | BOOKMARKS / OUTLINE |
| | python pdf_manipulator.py bookmarks -i input.pdf |
| | python pdf_manipulator.py bookmarks -i input.pdf -o output.pdf --add "Chapter 1:1,Chapter 2:5" |
| | |
| | EXTRACT TEXT |
| | python pdf_manipulator.py text -i input.pdf |
| | python pdf_manipulator.py text -i input.pdf --pages 1-3 -o extracted.txt |
| | |
| | INFO / INSPECT |
| | python pdf_manipulator.py info -i input.pdf |
| | |
| | N-UP (multiple pages per sheet) |
| | python pdf_manipulator.py nup -i input.pdf -o output.pdf --layout 2x1 |
| | python pdf_manipulator.py nup -i input.pdf -o output.pdf --layout 2x2 |
| | |
| | COMPRESS |
| | python pdf_manipulator.py compress -i input.pdf -o output.pdf |
| | |
| | BATCH OPERATIONS |
| | python pdf_manipulator.py batch-remove --dir ./pdfs --pages 1 --suffix _no_cover |
| | python pdf_manipulator.py batch-merge --dir ./pdfs -o merged_all.pdf |
| | python pdf_manipulator.py batch-split --dir ./pdfs --out-dir ./split_output |
| | |
| | -------------------------------------------------------------------------------- |
| | PAGE RANGE SYNTAX: |
| | Single page : 3 |
| | Multiple pages: 1,3,5 |
| | Range : 2-5 (inclusive) |
| | Mixed : 1,3-5,7,9-11 |
| | Pages are always 1-indexed (first page = 1) |
| | -------------------------------------------------------------------------------- |
| | """ |
| |
|
| | from __future__ import annotations |
| |
|
| | import argparse |
| | import io |
| | import os |
| | import re |
| | import sys |
| | import glob |
| | from copy import deepcopy |
| | from pathlib import Path |
| | from typing import List, Optional, Tuple |
| |
|
| | from pypdf import PdfReader, PdfWriter |
| | from pypdf.generic import NameObject, NumberObject |
| | from reportlab.lib.pagesizes import A4, letter, A3, A5, LETTER |
| | from reportlab.lib.units import mm, inch |
| | from reportlab.pdfgen import canvas as rl_canvas |
| | from reportlab.lib import colors |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | PAGE_SIZES = { |
| | "a3": A3, |
| | "a4": A4, |
| | "a5": A5, |
| | "letter": letter, |
| | "LETTER": LETTER, |
| | } |
| |
|
| | NUMBER_POSITIONS = { |
| | "bottom-center": lambda w, h: (w / 2, 20), |
| | "bottom-left": lambda w, h: (30, 20), |
| | "bottom-right": lambda w, h: (w - 30, 20), |
| | "top-center": lambda w, h: (w / 2, h - 20), |
| | "top-left": lambda w, h: (30, h - 20), |
| | "top-right": lambda w, h: (w - 30, h - 20), |
| | } |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | def parse_page_range(spec: str, total: int) -> List[int]: |
| | """ |
| | Parse a page-range string into a sorted list of 0-based indices. |
| | Input is 1-based, e.g. "1,3-5,7" -> [0, 2, 3, 4, 6] |
| | """ |
| | indices: set[int] = set() |
| | for part in spec.split(","): |
| | part = part.strip() |
| | if "-" in part: |
| | a, b = part.split("-", 1) |
| | a_i, b_i = int(a.strip()), int(b.strip()) |
| | if a_i < 1 or b_i > total or a_i > b_i: |
| | raise ValueError( |
| | f"Range {a_i}-{b_i} is out of bounds (document has {total} pages)." |
| | ) |
| | indices.update(range(a_i - 1, b_i)) |
| | else: |
| | n = int(part) |
| | if n < 1 or n > total: |
| | raise ValueError( |
| | f"Page {n} is out of bounds (document has {total} pages)." |
| | ) |
| | indices.add(n - 1) |
| | return sorted(indices) |
| |
|
| |
|
| | def open_pdf(path: str, password: Optional[str] = None) -> PdfReader: |
| | reader = PdfReader(path) |
| | if reader.is_encrypted: |
| | if password is None: |
| | password = "" |
| | reader.decrypt(password) |
| | return reader |
| |
|
| |
|
| | def save_pdf(writer: PdfWriter, output_path: str) -> None: |
| | out = Path(output_path) |
| | out.parent.mkdir(parents=True, exist_ok=True) |
| | with open(out, "wb") as f: |
| | writer.write(f) |
| | print(f"[OK] Saved -> {out.resolve()}") |
| |
|
| |
|
| | def page_count(path: str) -> int: |
| | return len(open_pdf(path).pages) |
| |
|
| |
|
| | def make_watermark_pdf( |
| | text: str, |
| | page_width: float, |
| | page_height: float, |
| | opacity: float = 0.15, |
| | angle: float = 45, |
| | font_size: int = 60, |
| | ) -> io.BytesIO: |
| | buf = io.BytesIO() |
| | c = rl_canvas.Canvas(buf, pagesize=(page_width, page_height)) |
| | c.setFont("Helvetica-Bold", font_size) |
| | c.setFillColor(colors.red, alpha=opacity) |
| | c.saveState() |
| | c.translate(page_width / 2, page_height / 2) |
| | c.rotate(angle) |
| | c.drawCentredString(0, 0, text) |
| | c.restoreState() |
| | c.save() |
| | buf.seek(0) |
| | return buf |
| |
|
| |
|
| | def make_page_number_pdf( |
| | number_str: str, |
| | page_width: float, |
| | page_height: float, |
| | position: str = "bottom-center", |
| | font_size: int = 10, |
| | ) -> io.BytesIO: |
| | buf = io.BytesIO() |
| | c = rl_canvas.Canvas(buf, pagesize=(page_width, page_height)) |
| | c.setFont("Helvetica", font_size) |
| | c.setFillColor(colors.black) |
| | pos_func = NUMBER_POSITIONS.get(position, NUMBER_POSITIONS["bottom-center"]) |
| | x, y = pos_func(page_width, page_height) |
| | c.drawCentredString(x, y, number_str) |
| | c.save() |
| | buf.seek(0) |
| | return buf |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | def cmd_merge(args: argparse.Namespace) -> None: |
| | """Merge multiple PDFs into one.""" |
| | writer = PdfWriter() |
| | files = args.inputs |
| |
|
| | if args.interleave: |
| | readers = [open_pdf(f) for f in files] |
| | max_pages = max(len(r.pages) for r in readers) |
| | for i in range(max_pages): |
| | for r in readers: |
| | if i < len(r.pages): |
| | writer.add_page(r.pages[i]) |
| | else: |
| | for f in files: |
| | reader = open_pdf(f) |
| | for page in reader.pages: |
| | writer.add_page(page) |
| |
|
| | save_pdf(writer, args.output) |
| |
|
| |
|
| | def cmd_split(args: argparse.Namespace) -> None: |
| | """Split a PDF into individual pages or ranges.""" |
| | reader = open_pdf(args.input) |
| | total = len(reader.pages) |
| | out_dir = Path(args.output) |
| | out_dir.mkdir(parents=True, exist_ok=True) |
| | stem = Path(args.input).stem |
| |
|
| | if args.range: |
| | indices = parse_page_range(args.range, total) |
| | writer = PdfWriter() |
| | for idx in indices: |
| | writer.add_page(reader.pages[idx]) |
| | out_path = out_dir / f"{stem}_pages_{args.range.replace(',', '_')}.pdf" |
| | save_pdf(writer, str(out_path)) |
| | else: |
| | for i, page in enumerate(reader.pages): |
| | writer = PdfWriter() |
| | writer.add_page(page) |
| | out_path = out_dir / f"{stem}_page_{i + 1:04d}.pdf" |
| | save_pdf(writer, str(out_path)) |
| |
|
| |
|
| | def cmd_remove(args: argparse.Namespace) -> None: |
| | """Remove specified pages from a PDF.""" |
| | reader = open_pdf(args.input) |
| | total = len(reader.pages) |
| | to_remove = set(parse_page_range(args.pages, total)) |
| |
|
| | writer = PdfWriter() |
| | for i, page in enumerate(reader.pages): |
| | if i not in to_remove: |
| | writer.add_page(page) |
| |
|
| | if len(writer.pages) == 0: |
| | print("[WARN] All pages removed - output file will have 0 pages.") |
| | save_pdf(writer, args.output) |
| |
|
| |
|
| | def cmd_extract(args: argparse.Namespace) -> None: |
| | """Extract specific pages into a new PDF.""" |
| | reader = open_pdf(args.input) |
| | total = len(reader.pages) |
| | indices = parse_page_range(args.pages, total) |
| |
|
| | writer = PdfWriter() |
| | for idx in indices: |
| | writer.add_page(reader.pages[idx]) |
| | save_pdf(writer, args.output) |
| |
|
| |
|
| | def cmd_reorder(args: argparse.Namespace) -> None: |
| | """Reorder pages according to a specified order.""" |
| | reader = open_pdf(args.input) |
| | total = len(reader.pages) |
| | order = [int(x.strip()) - 1 for x in args.order.split(",")] |
| |
|
| | for idx in order: |
| | if idx < 0 or idx >= total: |
| | raise ValueError(f"Page {idx + 1} is out of bounds (document has {total} pages).") |
| |
|
| | writer = PdfWriter() |
| | for idx in order: |
| | writer.add_page(reader.pages[idx]) |
| | save_pdf(writer, args.output) |
| |
|
| |
|
| | def cmd_rotate(args: argparse.Namespace) -> None: |
| | """Rotate pages by a given angle (90, 180, 270).""" |
| | if args.angle not in (90, 180, 270): |
| | raise ValueError("Rotation angle must be 90, 180, or 270.") |
| |
|
| | reader = open_pdf(args.input) |
| | total = len(reader.pages) |
| | indices = set(parse_page_range(args.pages, total)) if args.pages else set(range(total)) |
| |
|
| | writer = PdfWriter() |
| | for i, page in enumerate(reader.pages): |
| | if i in indices: |
| | page.rotate(args.angle) |
| | writer.add_page(page) |
| | save_pdf(writer, args.output) |
| |
|
| |
|
| | def cmd_reverse(args: argparse.Namespace) -> None: |
| | """Reverse the page order of a PDF.""" |
| | reader = open_pdf(args.input) |
| | writer = PdfWriter() |
| | for page in reversed(reader.pages): |
| | writer.add_page(page) |
| | save_pdf(writer, args.output) |
| |
|
| |
|
| | def cmd_duplicate(args: argparse.Namespace) -> None: |
| | """Duplicate specific pages N times and insert them consecutively.""" |
| | reader = open_pdf(args.input) |
| | total = len(reader.pages) |
| | indices = set(parse_page_range(args.pages, total)) |
| | times = args.times |
| |
|
| | writer = PdfWriter() |
| | for i, page in enumerate(reader.pages): |
| | if i in indices: |
| | for _ in range(times): |
| | writer.add_page(deepcopy(page)) |
| | else: |
| | writer.add_page(page) |
| | save_pdf(writer, args.output) |
| |
|
| |
|
| | def cmd_insert_blank(args: argparse.Namespace) -> None: |
| | """Insert one or more blank pages into a PDF.""" |
| | reader = open_pdf(args.input) |
| | total = len(reader.pages) |
| | pages_list = list(reader.pages) |
| |
|
| | |
| | ref_page = pages_list[0] |
| | width = float(ref_page.mediabox.width) |
| | height = float(ref_page.mediabox.height) |
| |
|
| | |
| | blank_buf = io.BytesIO() |
| | c = rl_canvas.Canvas(blank_buf, pagesize=(width, height)) |
| | c.save() |
| | blank_buf.seek(0) |
| | blank_reader = PdfReader(blank_buf) |
| | blank_page = blank_reader.pages[0] |
| |
|
| | if args.after is not None: |
| | insert_idx = args.after |
| | if insert_idx < 0 or insert_idx > total: |
| | raise ValueError(f"--after {args.after} is out of range.") |
| | pages_list.insert(insert_idx, blank_page) |
| | elif args.before is not None: |
| | insert_idx = args.before - 1 |
| | if insert_idx < 0 or insert_idx > total: |
| | raise ValueError(f"--before {args.before} is out of range.") |
| | pages_list.insert(insert_idx, blank_page) |
| | else: |
| | raise ValueError("Specify --after N or --before N.") |
| |
|
| | writer = PdfWriter() |
| | for p in pages_list: |
| | writer.add_page(p) |
| | save_pdf(writer, args.output) |
| |
|
| |
|
| | def cmd_insert_pdf(args: argparse.Namespace) -> None: |
| | """Insert pages from another PDF into the base PDF.""" |
| | base_reader = open_pdf(args.input) |
| | ins_reader = open_pdf(args.insert_file) |
| | base_pages = list(base_reader.pages) |
| | ins_pages = list(ins_reader.pages) |
| |
|
| | if args.after is not None: |
| | pos = args.after |
| | elif args.before is not None: |
| | pos = args.before - 1 |
| | else: |
| | raise ValueError("Specify --after N or --before N.") |
| |
|
| | result = base_pages[:pos] + ins_pages + base_pages[pos:] |
| | writer = PdfWriter() |
| | for p in result: |
| | writer.add_page(p) |
| | save_pdf(writer, args.output) |
| |
|
| |
|
| | def cmd_replace(args: argparse.Namespace) -> None: |
| | """Replace specific pages in the base PDF with pages from another PDF.""" |
| | base_reader = open_pdf(args.input) |
| | rep_reader = open_pdf(args.replace_file) |
| | total_base = len(base_reader.pages) |
| | total_rep = len(rep_reader.pages) |
| |
|
| | base_indices = parse_page_range(args.pages, total_base) |
| | rep_indices = parse_page_range(args.replace_pages, total_rep) |
| |
|
| | if len(base_indices) != len(rep_indices): |
| | raise ValueError( |
| | f"Number of pages to replace ({len(base_indices)}) must match " |
| | f"number of replacement pages ({len(rep_indices)})." |
| | ) |
| |
|
| | replace_map = dict(zip(base_indices, rep_indices)) |
| |
|
| | writer = PdfWriter() |
| | for i, page in enumerate(base_reader.pages): |
| | if i in replace_map: |
| | writer.add_page(rep_reader.pages[replace_map[i]]) |
| | else: |
| | writer.add_page(page) |
| | save_pdf(writer, args.output) |
| |
|
| |
|
| | def cmd_crop(args: argparse.Namespace) -> None: |
| | """Crop pages to a specific bounding box (left,bottom,right,top).""" |
| | box_vals = [float(v) for v in args.box.split(",")] |
| | if len(box_vals) != 4: |
| | raise ValueError("--box must be 'left,bottom,right,top'.") |
| | left, bottom, right, top = box_vals |
| |
|
| | reader = open_pdf(args.input) |
| | total = len(reader.pages) |
| | indices = set(parse_page_range(args.pages, total)) if args.pages else set(range(total)) |
| |
|
| | writer = PdfWriter() |
| | for i, page in enumerate(reader.pages): |
| | if i in indices: |
| | page.mediabox.lower_left = (left, bottom) |
| | page.mediabox.upper_right = (right, top) |
| | writer.add_page(page) |
| | save_pdf(writer, args.output) |
| |
|
| |
|
| | def cmd_scale(args: argparse.Namespace) -> None: |
| | """Scale pages by a factor or resize to a standard page size.""" |
| | reader = open_pdf(args.input) |
| | writer = PdfWriter() |
| |
|
| | for page in reader.pages: |
| | orig_w = float(page.mediabox.width) |
| | orig_h = float(page.mediabox.height) |
| |
|
| | if args.factor: |
| | f = args.factor |
| | page.scale(f, f) |
| | elif args.to_size: |
| | target = PAGE_SIZES.get(args.to_size.lower()) |
| | if target is None: |
| | raise ValueError(f"Unknown page size: {args.to_size}. Choose from {list(PAGE_SIZES.keys())}") |
| | tw, th = target |
| | fx = tw / orig_w |
| | fy = th / orig_h |
| | page.scale(fx, fy) |
| |
|
| | writer.add_page(page) |
| | save_pdf(writer, args.output) |
| |
|
| |
|
| | def cmd_watermark(args: argparse.Namespace) -> None: |
| | """Add a text or PDF watermark to each page.""" |
| | reader = open_pdf(args.input) |
| | writer = PdfWriter() |
| |
|
| | for page in reader.pages: |
| | w = float(page.mediabox.width) |
| | h = float(page.mediabox.height) |
| |
|
| | if args.watermark_pdf: |
| | wm_reader = open_pdf(args.watermark_pdf) |
| | wm_page = wm_reader.pages[0] |
| | else: |
| | text = args.text or "WATERMARK" |
| | opacity = args.opacity if args.opacity else 0.15 |
| | angle = args.angle if args.angle else 45 |
| | wm_buf = make_watermark_pdf(text, w, h, opacity=opacity, angle=angle) |
| | wm_reader = PdfReader(wm_buf) |
| | wm_page = wm_reader.pages[0] |
| |
|
| | page.merge_page(wm_page) |
| | writer.add_page(page) |
| | save_pdf(writer, args.output) |
| |
|
| |
|
| | def cmd_stamp(args: argparse.Namespace) -> None: |
| | """Overlay a stamp PDF on top of pages.""" |
| | reader = open_pdf(args.input) |
| | stamp_reader = open_pdf(args.stamp_pdf) |
| | stamp_page = stamp_reader.pages[0] |
| | total = len(reader.pages) |
| | indices = set(parse_page_range(args.pages, total)) if args.pages else set(range(total)) |
| |
|
| | writer = PdfWriter() |
| | for i, page in enumerate(reader.pages): |
| | if i in indices: |
| | page.merge_page(stamp_page) |
| | writer.add_page(page) |
| | save_pdf(writer, args.output) |
| |
|
| |
|
| | def cmd_number(args: argparse.Namespace) -> None: |
| | """Add page numbers to each page.""" |
| | reader = open_pdf(args.input) |
| | writer = PdfWriter() |
| | position = args.position or "bottom-center" |
| | start = args.start if args.start else 1 |
| | fmt = args.format or "{n}" |
| |
|
| | for i, page in enumerate(reader.pages): |
| | w = float(page.mediabox.width) |
| | h = float(page.mediabox.height) |
| | number_str = fmt.replace("{n}", str(i + start)) |
| | num_buf = make_page_number_pdf(number_str, w, h, position=position) |
| | num_reader = PdfReader(num_buf) |
| | page.merge_page(num_reader.pages[0]) |
| | writer.add_page(page) |
| | save_pdf(writer, args.output) |
| |
|
| |
|
| | def cmd_encrypt(args: argparse.Namespace) -> None: |
| | """Encrypt a PDF with user and owner passwords.""" |
| | reader = open_pdf(args.input) |
| | writer = PdfWriter() |
| | for page in reader.pages: |
| | writer.add_page(page) |
| | user_pw = args.user_pass or "" |
| | owner_pw = args.owner_pass or args.user_pass or "" |
| | writer.encrypt(user_pw, owner_pw) |
| | save_pdf(writer, args.output) |
| |
|
| |
|
| | def cmd_decrypt(args: argparse.Namespace) -> None: |
| | """Decrypt / remove password from a PDF.""" |
| | reader = open_pdf(args.input, password=args.password) |
| | if not reader.is_encrypted and not args.password: |
| | print("[INFO] File is not encrypted.") |
| | writer = PdfWriter() |
| | for page in reader.pages: |
| | writer.add_page(page) |
| | save_pdf(writer, args.output) |
| |
|
| |
|
| | def cmd_metadata(args: argparse.Namespace) -> None: |
| | """View or set PDF metadata.""" |
| | reader = open_pdf(args.input) |
| | meta = reader.metadata |
| | print("\n--- PDF Metadata ---") |
| | print(f" Title : {meta.title}") |
| | print(f" Author : {meta.author}") |
| | print(f" Subject : {meta.subject}") |
| | print(f" Keywords : {meta.get('/Keywords', '')}") |
| | print(f" Creator : {meta.creator}") |
| | print(f" Producer : {meta.producer}") |
| | print(f" Created : {meta.get('/CreationDate', '')}") |
| | print(f" Modified : {meta.get('/ModDate', '')}") |
| | print() |
| |
|
| | if args.output and any([args.set_title, args.set_author, args.set_subject, args.set_keywords]): |
| | writer = PdfWriter() |
| | for page in reader.pages: |
| | writer.add_page(page) |
| | new_meta = {} |
| | if args.set_title: |
| | new_meta["/Title"] = args.set_title |
| | if args.set_author: |
| | new_meta["/Author"] = args.set_author |
| | if args.set_subject: |
| | new_meta["/Subject"] = args.set_subject |
| | if args.set_keywords: |
| | new_meta["/Keywords"] = args.set_keywords |
| | writer.add_metadata(new_meta) |
| | save_pdf(writer, args.output) |
| |
|
| |
|
| | def cmd_bookmarks(args: argparse.Namespace) -> None: |
| | """List or add bookmarks/outline entries.""" |
| | reader = open_pdf(args.input) |
| | outlines = reader.outline |
| |
|
| | def _print_outline(items, indent=0): |
| | for item in items: |
| | if isinstance(item, list): |
| | _print_outline(item, indent + 2) |
| | else: |
| | try: |
| | title = item.title |
| | page_obj = reader.get_destination_page_number(item) |
| | print(f"{' ' * indent} {title} (page {page_obj + 1})") |
| | except Exception: |
| | pass |
| |
|
| | print("\n--- Bookmarks / Outline ---") |
| | if outlines: |
| | _print_outline(outlines) |
| | else: |
| | print(" (none)") |
| | print() |
| |
|
| | if args.output and args.add: |
| | writer = PdfWriter() |
| | for page in reader.pages: |
| | writer.add_page(page) |
| | for entry in args.add.split(","): |
| | title, pg = entry.strip().split(":") |
| | writer.add_outline_item(title.strip(), int(pg.strip()) - 1) |
| | save_pdf(writer, args.output) |
| |
|
| |
|
| | def cmd_text(args: argparse.Namespace) -> None: |
| | """Extract text from PDF pages.""" |
| | reader = open_pdf(args.input) |
| | total = len(reader.pages) |
| | indices = parse_page_range(args.pages, total) if args.pages else list(range(total)) |
| |
|
| | lines = [] |
| | for idx in indices: |
| | text = reader.pages[idx].extract_text() or "" |
| | lines.append(f"=== Page {idx + 1} ===\n{text}\n") |
| |
|
| | full_text = "\n".join(lines) |
| |
|
| | if args.output: |
| | with open(args.output, "w", encoding="utf-8") as f: |
| | f.write(full_text) |
| | print(f"[OK] Text saved -> {args.output}") |
| | else: |
| | print(full_text) |
| |
|
| |
|
| | def cmd_info(args: argparse.Namespace) -> None: |
| | """Display detailed information about a PDF.""" |
| | reader = open_pdf(args.input) |
| | total = len(reader.pages) |
| | meta = reader.metadata |
| | print("\n--- PDF Info ---") |
| | print(f" File : {args.input}") |
| | print(f" Pages : {total}") |
| | print(f" Encrypted : {reader.is_encrypted}") |
| | print(f" Title : {meta.title}") |
| | print(f" Author : {meta.author}") |
| | print() |
| | print(" Page Dimensions:") |
| | for i, page in enumerate(reader.pages): |
| | w = float(page.mediabox.width) |
| | h = float(page.mediabox.height) |
| | print(f" Page {i + 1:4d}: {w:.1f} x {h:.1f} pt ({w/72:.2f} x {h/72:.2f} in)") |
| | print() |
| |
|
| |
|
| | def cmd_nup(args: argparse.Namespace) -> None: |
| | """Arrange N pages per output sheet (e.g. 2x1, 2x2).""" |
| | layout = args.layout.lower() |
| | try: |
| | cols, rows = [int(x) for x in layout.split("x")] |
| | except ValueError: |
| | raise ValueError("--layout must be CxR, e.g. 2x1 or 2x2") |
| |
|
| | reader = open_pdf(args.input) |
| | per_sheet = cols * rows |
| | total = len(reader.pages) |
| |
|
| | |
| | first_page = reader.pages[0] |
| | pw = float(first_page.mediabox.width) |
| | ph = float(first_page.mediabox.height) |
| | cell_w = pw / cols |
| | cell_h = ph / rows |
| | sheet_w = pw |
| | sheet_h = ph |
| |
|
| | writer = PdfWriter() |
| |
|
| | i = 0 |
| | while i < total: |
| | buf = io.BytesIO() |
| | c = rl_canvas.Canvas(buf, pagesize=(sheet_w, sheet_h)) |
| |
|
| | for slot in range(per_sheet): |
| | if i + slot >= total: |
| | break |
| | col = slot % cols |
| | row = slot // cols |
| | x_off = col * cell_w |
| | y_off = sheet_h - (row + 1) * cell_h |
| |
|
| | |
| | sub_buf = io.BytesIO() |
| | sub_writer = PdfWriter() |
| | sub_writer.add_page(reader.pages[i + slot]) |
| | sub_writer.write(sub_buf) |
| | sub_buf.seek(0) |
| |
|
| | from reportlab.lib.utils import ImageReader |
| | from pdf2image import convert_from_bytes |
| | imgs = convert_from_bytes(sub_buf.read(), dpi=72) |
| | if imgs: |
| | img = imgs[0] |
| | c.drawInlineImage(img, x_off, y_off, width=cell_w, height=cell_h) |
| |
|
| | c.save() |
| | buf.seek(0) |
| | nup_reader = PdfReader(buf) |
| | writer.add_page(nup_reader.pages[0]) |
| | i += per_sheet |
| |
|
| | save_pdf(writer, args.output) |
| |
|
| |
|
| | def cmd_compress(args: argparse.Namespace) -> None: |
| | """Apply lossless compression to all page streams.""" |
| | reader = open_pdf(args.input) |
| | writer = PdfWriter() |
| | for page in reader.pages: |
| | writer.add_page(page) |
| | writer.compress_identical_objects(remove_identicals=True, remove_orphans=True) |
| | save_pdf(writer, args.output) |
| |
|
| |
|
| | def cmd_batch_remove(args: argparse.Namespace) -> None: |
| | """Remove pages from all PDFs in a directory.""" |
| | pdfs = sorted(glob.glob(os.path.join(args.dir, "*.pdf"))) |
| | suffix = args.suffix or "_modified" |
| | for pdf_path in pdfs: |
| | stem = Path(pdf_path).stem |
| | out_path = os.path.join(args.dir, f"{stem}{suffix}.pdf") |
| | reader = open_pdf(pdf_path) |
| | total = len(reader.pages) |
| | try: |
| | to_remove = set(parse_page_range(args.pages, total)) |
| | except ValueError as e: |
| | print(f"[SKIP] {pdf_path}: {e}") |
| | continue |
| | writer = PdfWriter() |
| | for i, page in enumerate(reader.pages): |
| | if i not in to_remove: |
| | writer.add_page(page) |
| | save_pdf(writer, out_path) |
| |
|
| |
|
| | def cmd_batch_merge(args: argparse.Namespace) -> None: |
| | """Merge all PDFs in a directory into one.""" |
| | pdfs = sorted(glob.glob(os.path.join(args.dir, "*.pdf"))) |
| | writer = PdfWriter() |
| | for pdf_path in pdfs: |
| | reader = open_pdf(pdf_path) |
| | for page in reader.pages: |
| | writer.add_page(page) |
| | save_pdf(writer, args.output) |
| |
|
| |
|
| | def cmd_batch_split(args: argparse.Namespace) -> None: |
| | """Split all PDFs in a directory into individual pages.""" |
| | pdfs = sorted(glob.glob(os.path.join(args.dir, "*.pdf"))) |
| | out_dir = Path(args.out_dir) |
| | out_dir.mkdir(parents=True, exist_ok=True) |
| | for pdf_path in pdfs: |
| | stem = Path(pdf_path).stem |
| | reader = open_pdf(pdf_path) |
| | for i, page in enumerate(reader.pages): |
| | writer = PdfWriter() |
| | writer.add_page(page) |
| | out_path = out_dir / f"{stem}_page_{i + 1:04d}.pdf" |
| | save_pdf(writer, str(out_path)) |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | def build_parser() -> argparse.ArgumentParser: |
| | parser = argparse.ArgumentParser( |
| | prog="pdf_manipulator", |
| | description="Full-featured PDF page manipulation toolkit by algorembrant", |
| | formatter_class=argparse.RawDescriptionHelpFormatter, |
| | ) |
| | sub = parser.add_subparsers(dest="command", required=True) |
| |
|
| | |
| | p = sub.add_parser("merge", help="Merge multiple PDFs") |
| | p.add_argument("-i", "--inputs", nargs="+", required=True, metavar="FILE") |
| | p.add_argument("-o", "--output", required=True) |
| | p.add_argument("--interleave", action="store_true", help="Interleave pages from each file") |
| |
|
| | |
| | p = sub.add_parser("split", help="Split PDF into pages or a range") |
| | p.add_argument("-i", "--input", required=True) |
| | p.add_argument("-o", "--output", required=True, help="Output directory") |
| | p.add_argument("--range", help="Page range to extract (e.g. 1-5 or 2,4,6)") |
| |
|
| | |
| | p = sub.add_parser("remove", help="Remove pages from a PDF") |
| | p.add_argument("-i", "--input", required=True) |
| | p.add_argument("-o", "--output", required=True) |
| | p.add_argument("--pages", required=True, help="Pages to remove, e.g. 1 or 1,3-5") |
| |
|
| | |
| | p = sub.add_parser("extract", help="Extract pages to a new PDF") |
| | p.add_argument("-i", "--input", required=True) |
| | p.add_argument("-o", "--output", required=True) |
| | p.add_argument("--pages", required=True, help="Pages to extract, e.g. 1-3") |
| |
|
| | |
| | p = sub.add_parser("reorder", help="Reorder pages") |
| | p.add_argument("-i", "--input", required=True) |
| | p.add_argument("-o", "--output", required=True) |
| | p.add_argument("--order", required=True, help="New order, e.g. 3,1,2,4") |
| |
|
| | |
| | p = sub.add_parser("rotate", help="Rotate pages") |
| | p.add_argument("-i", "--input", required=True) |
| | p.add_argument("-o", "--output", required=True) |
| | p.add_argument("--angle", required=True, type=int, choices=[90, 180, 270]) |
| | p.add_argument("--pages", help="Pages to rotate (all if omitted)") |
| |
|
| | |
| | p = sub.add_parser("reverse", help="Reverse page order") |
| | p.add_argument("-i", "--input", required=True) |
| | p.add_argument("-o", "--output", required=True) |
| |
|
| | |
| | p = sub.add_parser("duplicate", help="Duplicate specified pages") |
| | p.add_argument("-i", "--input", required=True) |
| | p.add_argument("-o", "--output", required=True) |
| | p.add_argument("--pages", required=True, help="Pages to duplicate") |
| | p.add_argument("--times", type=int, default=2, help="Number of copies (default 2)") |
| |
|
| | |
| | p = sub.add_parser("insert-blank", help="Insert blank page(s)") |
| | p.add_argument("-i", "--input", required=True) |
| | p.add_argument("-o", "--output", required=True) |
| | p.add_argument("--after", type=int, help="Insert after page N (1-indexed)") |
| | p.add_argument("--before", type=int, help="Insert before page N (1-indexed)") |
| |
|
| | |
| | p = sub.add_parser("insert", help="Insert pages from another PDF") |
| | p.add_argument("-i", "--input", required=True) |
| | p.add_argument("-o", "--output", required=True) |
| | p.add_argument("--insert-file", required=True) |
| | p.add_argument("--after", type=int, help="Insert after page N") |
| | p.add_argument("--before", type=int, help="Insert before page N") |
| |
|
| | |
| | p = sub.add_parser("replace", help="Replace pages with pages from another PDF") |
| | p.add_argument("-i", "--input", required=True) |
| | p.add_argument("-o", "--output", required=True) |
| | p.add_argument("--replace-file", required=True) |
| | p.add_argument("--pages", required=True, help="Pages in base to replace") |
| | p.add_argument("--replace-pages", required=True, help="Pages in replacement file to use") |
| |
|
| | |
| | p = sub.add_parser("crop", help="Crop pages to a bounding box") |
| | p.add_argument("-i", "--input", required=True) |
| | p.add_argument("-o", "--output", required=True) |
| | p.add_argument("--box", required=True, help="left,bottom,right,top in points") |
| | p.add_argument("--pages", help="Pages to crop (all if omitted)") |
| |
|
| | |
| | p = sub.add_parser("scale", help="Scale or resize pages") |
| | p.add_argument("-i", "--input", required=True) |
| | p.add_argument("-o", "--output", required=True) |
| | p.add_argument("--factor", type=float, help="Scale factor, e.g. 0.5") |
| | p.add_argument("--to-size", help="Target page size: a4, a3, a5, letter") |
| |
|
| | |
| | p = sub.add_parser("watermark", help="Add watermark to pages") |
| | p.add_argument("-i", "--input", required=True) |
| | p.add_argument("-o", "--output", required=True) |
| | p.add_argument("--text", help="Watermark text") |
| | p.add_argument("--opacity", type=float, default=0.15) |
| | p.add_argument("--angle", type=float, default=45.0) |
| | p.add_argument("--watermark-pdf", help="Use a PDF as watermark instead of text") |
| |
|
| | |
| | p = sub.add_parser("stamp", help="Overlay a stamp PDF on pages") |
| | p.add_argument("-i", "--input", required=True) |
| | p.add_argument("-o", "--output", required=True) |
| | p.add_argument("--stamp-pdf", required=True) |
| | p.add_argument("--pages", help="Pages to stamp (all if omitted)") |
| |
|
| | |
| | p = sub.add_parser("number", help="Add page numbers") |
| | p.add_argument("-i", "--input", required=True) |
| | p.add_argument("-o", "--output", required=True) |
| | p.add_argument("--position", default="bottom-center", |
| | choices=list(NUMBER_POSITIONS.keys())) |
| | p.add_argument("--start", type=int, default=1) |
| | p.add_argument("--format", default="{n}", help="Number format, use {n} for page number") |
| |
|
| | |
| | p = sub.add_parser("encrypt", help="Encrypt a PDF") |
| | p.add_argument("-i", "--input", required=True) |
| | p.add_argument("-o", "--output", required=True) |
| | p.add_argument("--user-pass", required=True) |
| | p.add_argument("--owner-pass", default=None) |
| |
|
| | |
| | p = sub.add_parser("decrypt", help="Remove password from PDF") |
| | p.add_argument("-i", "--input", required=True) |
| | p.add_argument("-o", "--output", required=True) |
| | p.add_argument("--password", required=True) |
| |
|
| | |
| | p = sub.add_parser("metadata", help="View or edit PDF metadata") |
| | p.add_argument("-i", "--input", required=True) |
| | p.add_argument("-o", "--output", default=None) |
| | p.add_argument("--set-title") |
| | p.add_argument("--set-author") |
| | p.add_argument("--set-subject") |
| | p.add_argument("--set-keywords") |
| |
|
| | |
| | p = sub.add_parser("bookmarks", help="List or add bookmarks") |
| | p.add_argument("-i", "--input", required=True) |
| | p.add_argument("-o", "--output", default=None) |
| | p.add_argument("--add", help="Bookmarks to add: 'Title:page,Title2:page2'") |
| |
|
| | |
| | p = sub.add_parser("text", help="Extract text from PDF") |
| | p.add_argument("-i", "--input", required=True) |
| | p.add_argument("-o", "--output", default=None, help="Save to file instead of printing") |
| | p.add_argument("--pages", help="Pages to extract (all if omitted)") |
| |
|
| | |
| | p = sub.add_parser("info", help="Display PDF information") |
| | p.add_argument("-i", "--input", required=True) |
| |
|
| | |
| | p = sub.add_parser("nup", help="Arrange N pages per sheet") |
| | p.add_argument("-i", "--input", required=True) |
| | p.add_argument("-o", "--output", required=True) |
| | p.add_argument("--layout", default="2x1", help="Layout e.g. 2x1, 2x2, 4x1") |
| |
|
| | |
| | p = sub.add_parser("compress", help="Compress PDF streams") |
| | p.add_argument("-i", "--input", required=True) |
| | p.add_argument("-o", "--output", required=True) |
| |
|
| | |
| | p = sub.add_parser("batch-remove", help="Remove pages from all PDFs in a directory") |
| | p.add_argument("--dir", required=True) |
| | p.add_argument("--pages", required=True) |
| | p.add_argument("--suffix", default="_modified") |
| |
|
| | |
| | p = sub.add_parser("batch-merge", help="Merge all PDFs in a directory") |
| | p.add_argument("--dir", required=True) |
| | p.add_argument("-o", "--output", required=True) |
| |
|
| | |
| | p = sub.add_parser("batch-split", help="Split all PDFs in a directory into pages") |
| | p.add_argument("--dir", required=True) |
| | p.add_argument("--out-dir", required=True) |
| |
|
| | return parser |
| |
|
| |
|
| | COMMANDS = { |
| | "merge": cmd_merge, |
| | "split": cmd_split, |
| | "remove": cmd_remove, |
| | "extract": cmd_extract, |
| | "reorder": cmd_reorder, |
| | "rotate": cmd_rotate, |
| | "reverse": cmd_reverse, |
| | "duplicate": cmd_duplicate, |
| | "insert-blank": cmd_insert_blank, |
| | "insert": cmd_insert_pdf, |
| | "replace": cmd_replace, |
| | "crop": cmd_crop, |
| | "scale": cmd_scale, |
| | "watermark": cmd_watermark, |
| | "stamp": cmd_stamp, |
| | "number": cmd_number, |
| | "encrypt": cmd_encrypt, |
| | "decrypt": cmd_decrypt, |
| | "metadata": cmd_metadata, |
| | "bookmarks": cmd_bookmarks, |
| | "text": cmd_text, |
| | "info": cmd_info, |
| | "nup": cmd_nup, |
| | "compress": cmd_compress, |
| | "batch-remove": cmd_batch_remove, |
| | "batch-merge": cmd_batch_merge, |
| | "batch-split": cmd_batch_split, |
| | } |
| |
|
| |
|
| | def main() -> None: |
| | parser = build_parser() |
| | args = parser.parse_args() |
| | handler = COMMANDS.get(args.command) |
| | if handler is None: |
| | parser.print_help() |
| | sys.exit(1) |
| | try: |
| | handler(args) |
| | except Exception as exc: |
| | print(f"[ERROR] {exc}", file=sys.stderr) |
| | sys.exit(1) |
| |
|
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|