Spaces:
Sleeping
Sleeping
File size: 2,760 Bytes
80a3675 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 | #!/usr/bin/env python3
"""Split PDF by page ranges."""
import argparse, json, sys, os, zipfile, re
from pathlib import Path
def parse_ranges(range_str, total_pages):
"""Parse page range string like '1-3, 5, 7-10' into list of page numbers (0-indexed)."""
pages = []
parts = [p.strip() for p in range_str.split(',')]
for part in parts:
if '-' in part:
start, end = part.split('-', 1)
start = max(1, int(start.strip()))
end = min(total_pages, int(end.strip()))
pages.extend(range(start - 1, end))
else:
num = int(part.strip())
if 1 <= num <= total_pages:
pages.append(num - 1)
return sorted(set(pages))
def split(input_path, output_path, page_ranges=None):
from PyPDF2 import PdfReader, PdfWriter
reader = PdfReader(input_path)
total = len(reader.pages)
base = Path(input_path).stem
out_dir = Path(output_path).parent
if page_ranges and page_ranges.strip():
pages = parse_ranges(page_ranges, total)
if not pages:
raise ValueError(f"No valid pages in range '{page_ranges}'. Document has {total} pages.")
writer = PdfWriter()
for p in pages:
writer.add_page(reader.pages[p])
if not output_path:
output_path = str(out_dir / f'{base}_split.pdf')
with open(output_path, 'wb') as f:
writer.write(f)
return output_path
else:
# Split every page
zip_path = str(out_dir / f'{base}_pages.zip')
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
for i in range(total):
writer = PdfWriter()
writer.add_page(reader.pages[i])
page_path = str(out_dir / f'{base}_page_{i+1}.pdf')
with open(page_path, 'wb') as f:
writer.write(f)
zf.write(page_path, f'{base}_page_{i+1}.pdf')
os.remove(page_path)
return zip_path
def main():
parser = argparse.ArgumentParser(description='Split PDF')
parser.add_argument('--input', required=True)
parser.add_argument('--output', required=True)
parser.add_argument('--pages', default='', help='Page ranges, e.g. 1-3,5,7-10')
args = parser.parse_args()
try:
result = split(args.input, args.output, args.pages)
print(json.dumps({"success": True, "output": result, "message": "PDF split successfully"}))
except Exception as e:
print(json.dumps({"success": False, "output": "", "message": str(e)}))
sys.exit(1)
if __name__ == '__main__':
main()
|