import os import tempfile from typing import List import gradio as gr from PyPDF2 import PdfReader, PdfWriter # Load custom CSS with open(os.path.join(os.path.dirname(__file__), "fancy.css")) as f: custom_css = f.read() # Maximum upload size (1 GB) MAX_SIZE_BYTES = 1 * 1024 * 1024 * 1024 def parse_page_ranges(ranges: str, num_pages: int) -> List[int]: """ Convert a string like "1-3,5,8-10" into a sorted list of zero-based page indices. """ pages = set() for part in ranges.split(','): part = part.strip() if '-' in part: start_str, end_str = part.split('-', 1) start = max(1, int(start_str)) end = min(num_pages, int(end_str)) pages.update(range(start - 1, end)) else: p = int(part) if 1 <= p <= num_pages: pages.add(p - 1) return sorted(pages) def split_pdf(file, page_ranges: str): # Validate file size file_size = os.path.getsize(file.name) if file_size > MAX_SIZE_BYTES: return None, f"File size exceeds 1 GB limit ({file_size / (1024*1024*1024):.2f} GB)." reader = PdfReader(file.name) num_pages = len(reader.pages) try: page_indices = parse_page_ranges(page_ranges, num_pages) except Exception as e: return None, f"Error parsing page ranges: {e}" if not page_indices: return None, "No valid pages selected." writer = PdfWriter() for idx in page_indices: writer.add_page(reader.pages[idx]) out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name with open(out_path, "wb") as f_out: writer.write(f_out) return out_path, None def merge_pdfs(files): # Merge multiple PDFs in upload order if not files: return None, "Please upload at least two PDF files to merge." # Validate total size total_size = sum(os.path.getsize(f.name) for f in files) if total_size > MAX_SIZE_BYTES: return None, f"Total file size exceeds 1 GB limit ({total_size / (1024*1024*1024):.2f} GB)." writer = PdfWriter() try: for f in files: reader = PdfReader(f.name) for page in reader.pages: writer.add_page(page) except Exception as e: return None, f"Error reading PDFs: {e}" out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name with open(out_path, "wb") as f_out: writer.write(f_out) return out_path, None # Build Gradio interface with gr.Blocks(css=custom_css) as demo: gr.HTML("