Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """Split PDF by page ranges.""" | |
| import argparse, json, sys, os, zipfile, re | |
| from pathlib import Path | |
| def parse_ranges(range_str, total_pages): | |
| """Parse page range string like '1-3, 5, 7-10' into list of page numbers (0-indexed).""" | |
| pages = [] | |
| parts = [p.strip() for p in range_str.split(',')] | |
| for part in parts: | |
| if '-' in part: | |
| start, end = part.split('-', 1) | |
| start = max(1, int(start.strip())) | |
| end = min(total_pages, int(end.strip())) | |
| pages.extend(range(start - 1, end)) | |
| else: | |
| num = int(part.strip()) | |
| if 1 <= num <= total_pages: | |
| pages.append(num - 1) | |
| return sorted(set(pages)) | |
| def split(input_path, output_path, page_ranges=None): | |
| from PyPDF2 import PdfReader, PdfWriter | |
| reader = PdfReader(input_path) | |
| total = len(reader.pages) | |
| base = Path(input_path).stem | |
| out_dir = Path(output_path).parent | |
| if page_ranges and page_ranges.strip(): | |
| pages = parse_ranges(page_ranges, total) | |
| if not pages: | |
| raise ValueError(f"No valid pages in range '{page_ranges}'. Document has {total} pages.") | |
| writer = PdfWriter() | |
| for p in pages: | |
| writer.add_page(reader.pages[p]) | |
| if not output_path: | |
| output_path = str(out_dir / f'{base}_split.pdf') | |
| with open(output_path, 'wb') as f: | |
| writer.write(f) | |
| return output_path | |
| else: | |
| # Split every page | |
| zip_path = str(out_dir / f'{base}_pages.zip') | |
| with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf: | |
| for i in range(total): | |
| writer = PdfWriter() | |
| writer.add_page(reader.pages[i]) | |
| page_path = str(out_dir / f'{base}_page_{i+1}.pdf') | |
| with open(page_path, 'wb') as f: | |
| writer.write(f) | |
| zf.write(page_path, f'{base}_page_{i+1}.pdf') | |
| os.remove(page_path) | |
| return zip_path | |
| def main(): | |
| parser = argparse.ArgumentParser(description='Split PDF') | |
| parser.add_argument('--input', required=True) | |
| parser.add_argument('--output', required=True) | |
| parser.add_argument('--pages', default='', help='Page ranges, e.g. 1-3,5,7-10') | |
| args = parser.parse_args() | |
| try: | |
| result = split(args.input, args.output, args.pages) | |
| print(json.dumps({"success": True, "output": result, "message": "PDF split successfully"})) | |
| except Exception as e: | |
| print(json.dumps({"success": False, "output": "", "message": str(e)})) | |
| sys.exit(1) | |
| if __name__ == '__main__': | |
| main() | |