Spaces:
Sleeping
Sleeping
| # app.py | |
| import gradio as gr | |
| import fitz # PyMuPDF | |
| import PyPDF2 | |
| import os | |
| import zipfile | |
| from PIL import Image | |
| import io | |
| # Merge PDFs | |
| def merge_pdfs(files): | |
| merger = PyPDF2.PdfMerger() | |
| for file in files: | |
| merger.append(file.name) | |
| output_path = "merged_output.pdf" | |
| merger.write(output_path) | |
| merger.close() | |
| return output_path | |
| # Split PDF | |
| def split_pdf(file): | |
| reader = PyPDF2.PdfReader(file.name) | |
| output_folder = "split_outputs" | |
| os.makedirs(output_folder, exist_ok=True) | |
| for f in os.listdir(output_folder): | |
| os.remove(os.path.join(output_folder, f)) | |
| split_files = [] | |
| for i, page in enumerate(reader.pages): | |
| writer = PyPDF2.PdfWriter() | |
| writer.add_page(page) | |
| output_filename = os.path.join(output_folder, f"page_{i+1}.pdf") | |
| with open(output_filename, "wb") as f_out: | |
| writer.write(f_out) | |
| split_files.append(output_filename) | |
| zip_filename = "split_pages.zip" | |
| with zipfile.ZipFile(zip_filename, "w") as zipf: | |
| for f in split_files: | |
| zipf.write(f, os.path.basename(f)) | |
| return zip_filename | |
| # Compress PDF | |
| def compress_pdf(file, quality): | |
| doc = fitz.open(file.name) | |
| output = fitz.open() | |
| quality = min(max(int(quality), 1), 95) # Safe JPEG quality range | |
| for page in doc: | |
| pix = page.get_pixmap(dpi=150) | |
| img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) | |
| buffer = io.BytesIO() | |
| img.save(buffer, format="JPEG", quality=quality) | |
| buffer.seek(0) | |
| img_doc = fitz.open(stream=buffer, filetype="jpeg") | |
| rect = img_doc[0].rect | |
| new_page = output.new_page(width=rect.width, height=rect.height) | |
| new_page.insert_image(rect, stream=buffer.read()) | |
| output_path = "compressed_output.pdf" | |
| output.save(output_path) | |
| output.close() | |
| doc.close() | |
| return output_path | |
| # Extract Text | |
| def extract_text(file): | |
| doc = fitz.open(file.name) | |
| text = "" | |
| for page in doc: | |
| text += page.get_text() | |
| doc.close() | |
| output_path = "extracted_text.txt" | |
| with open(output_path, "w", encoding="utf-8") as f: | |
| f.write(text) | |
| return output_path, text | |
| # Gradio Interface | |
| with gr.Blocks(theme=gr.themes.Base(primary_hue="orange")) as demo: | |
| gr.Markdown(""" | |
| # π Local PDF Toolkit | |
| Merge, Split, Compress, and Extract Text from PDFs β Safely inside Hugging Face | |
| """) | |
| with gr.Tab("π Merge PDFs"): | |
| merge_input = gr.File(file_types=[".pdf"], file_count="multiple", label="Select PDFs to Merge") | |
| merge_btn = gr.Button("π Merge PDFs", variant="primary") | |
| merge_output = gr.File(label="β¬οΈ Download Merged PDF") | |
| merge_btn.click(merge_pdfs, inputs=merge_input, outputs=merge_output) | |
| with gr.Tab("βοΈ Split PDF"): | |
| split_input = gr.File(file_types=[".pdf"], label="Select PDF to Split") | |
| split_btn = gr.Button("βοΈ Split PDF", variant="primary") | |
| split_output = gr.File(label="β¬οΈ Download Split ZIP") | |
| split_btn.click(split_pdf, inputs=split_input, outputs=split_output) | |
| with gr.Tab("π Compress PDF"): | |
| compress_input = gr.File(file_types=[".pdf"], label="Select PDF to Compress") | |
| compress_quality = gr.Slider(minimum=10, maximum=100, value=60, label="Compression Quality (%)") | |
| compress_btn = gr.Button("π Compress PDF", variant="primary") | |
| compress_output = gr.File(label="β¬οΈ Download Compressed PDF") | |
| compress_btn.click(compress_pdf, inputs=[compress_input, compress_quality], outputs=compress_output) | |
| with gr.Tab("π Extract Text"): | |
| extract_input = gr.File(file_types=[".pdf"], label="Select PDF to Extract Text") | |
| extract_btn = gr.Button("π Extract Text", variant="primary") | |
| extract_file = gr.File(label="β¬οΈ Download Extracted Text File") | |
| extract_preview = gr.Textbox(label="π Preview Text", lines=20, max_lines=100, interactive=False, show_copy_button=True) | |
| extract_btn.click(extract_text, inputs=extract_input, outputs=[extract_file, extract_preview]) | |
| demo.launch() | |