import os import tempfile from typing import List import gradio as gr from PyPDF2 import PdfReader, PdfWriter # Load custom CSS with open(os.path.join(os.path.dirname(__file__), "fancy.css")) as f: custom_css = f.read() # Maximum upload size (1 GB) MAX_SIZE_BYTES = 1 * 1024 * 1024 * 1024 def parse_page_ranges(ranges: str, num_pages: int) -> List[int]: """ Convert a string like "1-3,5,8-10" into a sorted list of zero-based page indices. """ pages = set() for part in ranges.split(','): part = part.strip() if '-' in part: start_str, end_str = part.split('-', 1) start = max(1, int(start_str)) end = min(num_pages, int(end_str)) pages.update(range(start - 1, end)) else: p = int(part) if 1 <= p <= num_pages: pages.add(p - 1) return sorted(pages) def split_pdf(file, page_ranges: str): # Validate file size file_size = os.path.getsize(file.name) if file_size > MAX_SIZE_BYTES: return None, f"File size exceeds 1 GB limit ({file_size / (1024*1024*1024):.2f} GB)." reader = PdfReader(file.name) num_pages = len(reader.pages) try: page_indices = parse_page_ranges(page_ranges, num_pages) except Exception as e: return None, f"Error parsing page ranges: {e}" if not page_indices: return None, "No valid pages selected." writer = PdfWriter() for idx in page_indices: writer.add_page(reader.pages[idx]) out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name with open(out_path, "wb") as f_out: writer.write(f_out) return out_path, None def merge_pdfs(files): # Merge multiple PDFs in upload order if not files: return None, "Please upload at least two PDF files to merge." # Validate total size total_size = sum(os.path.getsize(f.name) for f in files) if total_size > MAX_SIZE_BYTES: return None, f"Total file size exceeds 1 GB limit ({total_size / (1024*1024*1024):.2f} GB)." writer = PdfWriter() try: for f in files: reader = PdfReader(f.name) for page in reader.pages: writer.add_page(page) except Exception as e: return None, f"Error reading PDFs: {e}" out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name with open(out_path, "wb") as f_out: writer.write(f_out) return out_path, None # Build Gradio interface with gr.Blocks(css=custom_css) as demo: gr.HTML("

PDF Splitter & Merger

") gr.Markdown( "**Free & Quick Solution**: Process your PDFs in-memory instantly without storage.\n" "Use the **Split** tab to extract specific pages or the **Merge** tab to combine multiple PDFs into one.\n\n" "Maximum file size: **1 GB**. If you'd like to increase the limit, feel free to clone the space and adjust the code yourself." ) with gr.Tabs(): # Split Tab with gr.TabItem("Split PDF"): with gr.Row(elem_classes="input-row"): pdf_input = gr.File(label="Select PDF to split", file_types=['.pdf']) page_input = gr.Textbox(label="Page ranges", placeholder="e.g. 1-3,5,7-9") with gr.Row(elem_classes="button-row"): split_button = gr.Button("Split PDF", variant="primary") output_split = gr.File(label="Download Split PDF") error_split = gr.Textbox(label="Error Message", interactive=False, visible=False) def run_split(file, ranges): if file is None or not ranges: return None, "Please upload a PDF and specify page ranges.", True out_path, error = split_pdf(file, ranges) if error: return None, error, True return out_path, "", False split_button.click( fn=run_split, inputs=[pdf_input, page_input], outputs=[output_split, error_split, error_split], api_name="split_pdf" ) error_split.change(lambda msg: msg != "", inputs=error_split, outputs=error_split) # Merge Tab with gr.TabItem("Merge PDF"): with gr.Row(elem_classes="input-row"): merge_inputs = gr.Files(label="Select PDF files to merge", file_types=['.pdf']) with gr.Row(elem_classes="button-row"): merge_button = gr.Button("Merge PDFs", variant="primary") output_merge = gr.File(label="Download Merged PDF") error_merge = gr.Textbox(label="Error Message", interactive=False, visible=False) def run_merge(files): if not files or len(files) < 2: return None, "Please upload at least two PDF files.", True out_path, error = merge_pdfs(files) if error: return None, error, True return out_path, "", False merge_button.click( fn=run_merge, inputs=[merge_inputs], outputs=[output_merge, error_merge, error_merge], api_name="merge_pdfs" ) error_merge.change(lambda msg: msg != "", inputs=error_merge, outputs=error_merge) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))