import os import gradio as gr from PyPDF2 import PdfWriter, PdfReader import zipfile import tempfile import fitz # PyMuPDF def merge_pdfs(pdf_files): if not pdf_files: return "❌ No PDF files uploaded.", None output_dir = tempfile.mkdtemp() output_file = os.path.join(output_dir, "merged.pdf") doc_out = fitz.open() a4_rect = fitz.paper_rect("a4") for file in pdf_files: src = fitz.open(file.name) for page in src: page_out = doc_out.new_page(width=a4_rect.width, height=a4_rect.height) page_out.show_pdf_page(a4_rect, src, page.number, keep_proportion=True) src.close() doc_out.save(output_file) doc_out.close() return "✅ PDFs merged successfully.", output_file def compress_pdf(file, dpi_threshold, dpi_target, quality): input_path = file.name # Create a unique temporary file for the compressed output temp_dir = tempfile.mkdtemp() output_path = os.path.join(temp_dir, "compressed_output.pdf") doc = fitz.open(input_path) # Recompress images above dpi_threshold to target DPI with JPEG at quality doc.rewrite_images( dpi_threshold=dpi_threshold, dpi_target=dpi_target, quality=quality, lossy=True, lossless=True, bitonal=True, color=True, gray=True, set_to_gray=False, ) # Subset fonts and apply full garbage collection + stream compression doc.subset_fonts() doc.save(output_path, garbage=3, deflate=True, use_objstms=True) status = f"✅ PDF compressed successfully!" return status, output_path def split_pdf(file_path, start_page, end_page): file_name = os.path.basename(file_path) base_name = file_name[:-4] # Remove .pdf extension output_dir = tempfile.mkdtemp() zip_path = os.path.join(output_dir, f"{base_name}_split_pages.zip") input_pdf = PdfReader(open(file_path, "rb")) total_pages = len(input_pdf.pages) # Clamp values within range start_page = max(0, min(start_page, total_pages - 1)) end_page = max(start_page, min(end_page, total_pages - 1)) zipf = zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) for i in range(start_page, end_page + 1): writer = PdfWriter() writer.add_page(input_pdf.pages[i]) split_pdf_path = os.path.join(output_dir, f"{base_name}-page{i+1}.pdf") with open(split_pdf_path, "wb") as f_out: writer.write(f_out) zipf.write(split_pdf_path, arcname=os.path.basename(split_pdf_path)) zipf.close() return zip_path, start_page + 1, end_page + 1 def remove_pages(file, pages_to_remove): if file is None: return "❌ No file uploaded.", None input_pdf = PdfReader(open(file.name, "rb")) writer = PdfWriter() total_pages = len(input_pdf.pages) # Parse page numbers pages_to_remove = pages_to_remove.replace(" ", "") remove_set = set() try: for part in pages_to_remove.split(","): if "-" in part: start, end = map(int, part.split("-")) remove_set.update(range(start, end + 1)) else: remove_set.add(int(part)) except: return "❌ Invalid page format.", None # Keep valid pages only remove_set = {p for p in remove_set if 0 <= p < total_pages} for i in range(total_pages): if i not in remove_set: writer.add_page(input_pdf.pages[i]) output_dir = tempfile.mkdtemp() output_path = os.path.join(output_dir, "pages_removed.pdf") with open(output_path, "wb") as f: writer.write(f) status = f"✅ Removed pages: {sorted(remove_set)}" return status, output_path def process_pdf(file, start_page, end_page): if file is None: return "❌ No file uploaded.", None zip_file_path, actual_start, actual_end = split_pdf(file.name, start_page, end_page) status = f"✅ File '{file.name}' split from page {actual_start} to {actual_end}." return status, zip_file_path with gr.Blocks(title="PDF Utility") as demo: gr.Markdown("# 📄 PDF Utility App") with gr.Tabs(): with gr.TabItem("Split PDF"): gr.Markdown("Upload a PDF, select page range, and click **Split PDF** to download a ZIP of split pages.") with gr.Row(): file_input = gr.File(label="Upload PDF", file_types=[".pdf"]) with gr.Row(): start_page = gr.Number(label="Start Page (0-based)", value=0, precision=0) end_page = gr.Number(label="End Page (0-based)", value=0, precision=0) split_button = gr.Button("🚀 Split PDF") status_text = gr.Textbox(label="Status", lines=2) download_link = gr.File(label="Download ZIP") split_button.click( fn=process_pdf, inputs=[file_input, start_page, end_page], outputs=[status_text, download_link] ) with gr.TabItem("Compress PDF"): gr.Markdown("Upload a PDF and click **Compress PDF** to download the compressed version.") with gr.Row(): file_input_compress = gr.File(label="Upload PDF", file_types=[".pdf"]) with gr.Row(): dpi_threshold = gr.Number(label="DPI Threshold", value=100, precision=0) dpi_target = gr.Number(label="Target DPI", value=72, precision=0) quality = gr.Number(label="JPEG Quality (1-100)", value=60, precision=0) compress_button = gr.Button("🚀 Compress PDF") status_text_compress = gr.Textbox(label="Status", lines=2) download_link_compress = gr.File(label="Download compressed PDF") compress_button.click( fn=compress_pdf, inputs=[file_input_compress, dpi_threshold, dpi_target, quality], outputs=[status_text_compress, download_link_compress] ) with gr.TabItem("Merge PDFs"): gr.Markdown("Upload multiple PDFs and click **Merge PDFs** to download the merged version.") pdf_uploads = gr.File(label="Upload PDFs", file_types=[".pdf"], file_count="multiple") merge_button = gr.Button("📎 Merge PDF Files") merge_status = gr.Textbox(label="Status", lines=2) merged_file = gr.File(label="Download Merged PDF") merge_button.click( fn=merge_pdfs, inputs=[pdf_uploads], outputs=[merge_status, merged_file] ) with gr.TabItem("Remove Pages"): gr.Markdown( "Remove one or more pages from a PDF.\n\n" "**Examples:** `2`, `1,3,5`, `2-6`, `1,3,5-7`\n\n" "⚠️ Page numbers are **0-based**." ) remove_file = gr.File(label="Upload PDF", file_types=[".pdf"]) pages_input = gr.Textbox( label="Pages to remove", placeholder="e.g. 1,3,5-7" ) remove_button = gr.Button("🗑 Remove Pages") remove_status = gr.Textbox(label="Status", lines=2) removed_pdf = gr.File(label="Download PDF") remove_button.click( fn=remove_pages, inputs=[remove_file, pages_input], outputs=[remove_status, removed_pdf] ) demo.launch()