# app.py import gradio as gr import fitz # PyMuPDF import PyPDF2 import os import zipfile from PIL import Image import io # Merge PDFs def merge_pdfs(files): merger = PyPDF2.PdfMerger() for file in files: merger.append(file.name) output_path = "merged_output.pdf" merger.write(output_path) merger.close() return output_path # Split PDF def split_pdf(file): reader = PyPDF2.PdfReader(file.name) output_folder = "split_outputs" os.makedirs(output_folder, exist_ok=True) for f in os.listdir(output_folder): os.remove(os.path.join(output_folder, f)) split_files = [] for i, page in enumerate(reader.pages): writer = PyPDF2.PdfWriter() writer.add_page(page) output_filename = os.path.join(output_folder, f"page_{i+1}.pdf") with open(output_filename, "wb") as f_out: writer.write(f_out) split_files.append(output_filename) zip_filename = "split_pages.zip" with zipfile.ZipFile(zip_filename, "w") as zipf: for f in split_files: zipf.write(f, os.path.basename(f)) return zip_filename # Compress PDF def compress_pdf(file, quality): doc = fitz.open(file.name) output = fitz.open() quality = min(max(int(quality), 1), 95) # Safe JPEG quality range for page in doc: pix = page.get_pixmap(dpi=150) img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) buffer = io.BytesIO() img.save(buffer, format="JPEG", quality=quality) buffer.seek(0) img_doc = fitz.open(stream=buffer, filetype="jpeg") rect = img_doc[0].rect new_page = output.new_page(width=rect.width, height=rect.height) new_page.insert_image(rect, stream=buffer.read()) output_path = "compressed_output.pdf" output.save(output_path) output.close() doc.close() return output_path # Extract Text def extract_text(file): doc = fitz.open(file.name) text = "" for page in doc: text += page.get_text() doc.close() output_path = "extracted_text.txt" with open(output_path, "w", encoding="utf-8") as f: f.write(text) return output_path, text # Gradio Interface with gr.Blocks(theme=gr.themes.Base(primary_hue="orange")) as demo: gr.Markdown(""" # 📁 Local PDF Toolkit Merge, Split, Compress, and Extract Text from PDFs — Safely inside Hugging Face """) with gr.Tab("🔗 Merge PDFs"): merge_input = gr.File(file_types=[".pdf"], file_count="multiple", label="Select PDFs to Merge") merge_btn = gr.Button("🚀 Merge PDFs", variant="primary") merge_output = gr.File(label="⬇️ Download Merged PDF") merge_btn.click(merge_pdfs, inputs=merge_input, outputs=merge_output) with gr.Tab("✂️ Split PDF"): split_input = gr.File(file_types=[".pdf"], label="Select PDF to Split") split_btn = gr.Button("✂️ Split PDF", variant="primary") split_output = gr.File(label="⬇️ Download Split ZIP") split_btn.click(split_pdf, inputs=split_input, outputs=split_output) with gr.Tab("📉 Compress PDF"): compress_input = gr.File(file_types=[".pdf"], label="Select PDF to Compress") compress_quality = gr.Slider(minimum=10, maximum=100, value=60, label="Compression Quality (%)") compress_btn = gr.Button("📉 Compress PDF", variant="primary") compress_output = gr.File(label="⬇️ Download Compressed PDF") compress_btn.click(compress_pdf, inputs=[compress_input, compress_quality], outputs=compress_output) with gr.Tab("📜 Extract Text"): extract_input = gr.File(file_types=[".pdf"], label="Select PDF to Extract Text") extract_btn = gr.Button("📜 Extract Text", variant="primary") extract_file = gr.File(label="⬇️ Download Extracted Text File") extract_preview = gr.Textbox(label="📖 Preview Text", lines=20, max_lines=100, interactive=False, show_copy_button=True) extract_btn.click(extract_text, inputs=extract_input, outputs=[extract_file, extract_preview]) demo.launch()