import os import gradio as gr from PyPDF2 import PdfWriter, PdfReader import zipfile import tempfile import fitz # PyMuPDF def merge_pdfs(pdf_files): if not pdf_files: return "❌ No PDF files uploaded.", None output_dir = tempfile.mkdtemp() output_file = os.path.join(output_dir, "merged.pdf") doc_out = fitz.open() a4_rect = fitz.paper_rect("a4") for file in pdf_files: src = fitz.open(file.name) for page in src: page_out = doc_out.new_page(width=a4_rect.width, height=a4_rect.height) page_out.show_pdf_page(a4_rect, src, page.number, keep_proportion=True) src.close() doc_out.save(output_file) doc_out.close() return "✅ PDFs merged successfully.", output_file def compress_pdf(file, dpi_threshold, dpi_target, quality): input_path = file.name # Create a unique temporary file for the compressed output temp_dir = tempfile.mkdtemp() output_path = os.path.join(temp_dir, "compressed_output.pdf") doc = fitz.open(input_path) # Recompress images above dpi_threshold to target DPI with JPEG at quality doc.rewrite_images( dpi_threshold=dpi_threshold, dpi_target=dpi_target, quality=quality, lossy=True, lossless=True, bitonal=True, color=True, gray=True, set_to_gray=False, ) # Subset fonts and apply full garbage collection + stream compression doc.subset_fonts() doc.save(output_path, garbage=3, deflate=True, use_objstms=True) status = f"✅ PDF compressed successfully!" return status, output_path def split_pdf(file_path, start_page, end_page): file_name = os.path.basename(file_path) base_name = file_name[:-4] # Remove .pdf extension output_dir = tempfile.mkdtemp() zip_path = os.path.join(output_dir, f"{base_name}_split_pages.zip") input_pdf = PdfReader(open(file_path, "rb")) total_pages = len(input_pdf.pages) # Clamp values within range start_page = max(0, min(start_page, total_pages - 1)) end_page = max(start_page, min(end_page, total_pages - 1)) zipf = zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) for i in range(start_page, end_page + 1): writer = PdfWriter() writer.add_page(input_pdf.pages[i]) split_pdf_path = os.path.join(output_dir, f"{base_name}-page{i+1}.pdf") with open(split_pdf_path, "wb") as f_out: writer.write(f_out) zipf.write(split_pdf_path, arcname=os.path.basename(split_pdf_path)) zipf.close() return zip_path, start_page + 1, end_page + 1 def process_pdf(file, start_page, end_page): if file is None: return "❌ No file uploaded.", None zip_file_path, actual_start, actual_end = split_pdf(file.name, start_page, end_page) status = f"✅ File '{file.name}' split from page {actual_start} to {actual_end}." return status, zip_file_path with gr.Blocks(title="PDF Utility") as demo: gr.Markdown("# 📄 PDF Utility App") with gr.Tabs(): with gr.TabItem("Split PDF"): gr.Markdown("Upload a PDF, select page range, and click **Split PDF** to download a ZIP of split pages.") with gr.Row(): file_input = gr.File(label="Upload PDF", file_types=[".pdf"]) with gr.Row(): start_page = gr.Number(label="Start Page (0-based)", value=0, precision=0) end_page = gr.Number(label="End Page (0-based)", value=0, precision=0) split_button = gr.Button("🚀 Split PDF") status_text = gr.Textbox(label="Status", lines=2) download_link = gr.File(label="Download ZIP") split_button.click( fn=process_pdf, inputs=[file_input, start_page, end_page], outputs=[status_text, download_link] ) with gr.TabItem("Compress PDF"): gr.Markdown("Upload a PDF and click **Compress PDF** to download the compressed version.") with gr.Row(): file_input_compress = gr.File(label="Upload PDF", file_types=[".pdf"]) with gr.Row(): dpi_threshold = gr.Number(label="DPI Threshold", value=100, precision=0) dpi_target = gr.Number(label="Target DPI", value=72, precision=0) quality = gr.Number(label="JPEG Quality (1-100)", value=60, precision=0) compress_button = gr.Button("🚀 Compress PDF") status_text_compress = gr.Textbox(label="Status", lines=2) download_link_compress = gr.File(label="Download compressed PDF") compress_button.click( fn=compress_pdf, inputs=[file_input_compress, dpi_threshold, dpi_target, quality], outputs=[status_text_compress, download_link_compress] ) with gr.TabItem("Merge PDFs"): gr.Markdown("Upload multiple PDFs and click **Merge PDFs** to download the merged version.") pdf_uploads = gr.File(label="Upload PDFs", file_types=[".pdf"], file_count="multiple") merge_button = gr.Button("📎 Merge PDF Files") merge_status = gr.Textbox(label="Status", lines=2) merged_file = gr.File(label="Download Merged PDF") merge_button.click( fn=merge_pdfs, inputs=[pdf_uploads], outputs=[merge_status, merged_file] ) if __name__ == "__main__": demo.launch()