Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| from PyPDF2 import PdfWriter, PdfReader | |
| import zipfile | |
| import tempfile | |
| import fitz # PyMuPDF | |
| def merge_pdfs(pdf_files): | |
| if not pdf_files: | |
| return "β No PDF files uploaded.", None | |
| output_dir = tempfile.mkdtemp() | |
| output_file = os.path.join(output_dir, "merged.pdf") | |
| doc_out = fitz.open() | |
| a4_rect = fitz.paper_rect("a4") | |
| for file in pdf_files: | |
| src = fitz.open(file.name) | |
| for page in src: | |
| page_out = doc_out.new_page(width=a4_rect.width, height=a4_rect.height) | |
| page_out.show_pdf_page(a4_rect, src, page.number, keep_proportion=True) | |
| src.close() | |
| doc_out.save(output_file) | |
| doc_out.close() | |
| return "β PDFs merged successfully.", output_file | |
| def compress_pdf(file, dpi_threshold, dpi_target, quality): | |
| input_path = file.name | |
| # Create a unique temporary file for the compressed output | |
| temp_dir = tempfile.mkdtemp() | |
| output_path = os.path.join(temp_dir, "compressed_output.pdf") | |
| doc = fitz.open(input_path) | |
| # Recompress images above dpi_threshold to target DPI with JPEG at quality | |
| doc.rewrite_images( | |
| dpi_threshold=dpi_threshold, | |
| dpi_target=dpi_target, | |
| quality=quality, | |
| lossy=True, | |
| lossless=True, | |
| bitonal=True, | |
| color=True, | |
| gray=True, | |
| set_to_gray=False, | |
| ) | |
| # Subset fonts and apply full garbage collection + stream compression | |
| doc.subset_fonts() | |
| doc.save(output_path, | |
| garbage=3, | |
| deflate=True, | |
| use_objstms=True) | |
| status = f"β PDF compressed successfully!" | |
| return status, output_path | |
| def split_pdf(file_path, start_page, end_page): | |
| file_name = os.path.basename(file_path) | |
| base_name = file_name[:-4] # Remove .pdf extension | |
| output_dir = tempfile.mkdtemp() | |
| zip_path = os.path.join(output_dir, f"{base_name}_split_pages.zip") | |
| input_pdf = PdfReader(open(file_path, "rb")) | |
| total_pages = len(input_pdf.pages) | |
| # Clamp values within range | |
| start_page = max(0, min(start_page, total_pages - 1)) | |
| end_page = max(start_page, min(end_page, total_pages - 1)) | |
| zipf = zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) | |
| for i in range(start_page, end_page + 1): | |
| writer = PdfWriter() | |
| writer.add_page(input_pdf.pages[i]) | |
| split_pdf_path = os.path.join(output_dir, f"{base_name}-page{i+1}.pdf") | |
| with open(split_pdf_path, "wb") as f_out: | |
| writer.write(f_out) | |
| zipf.write(split_pdf_path, arcname=os.path.basename(split_pdf_path)) | |
| zipf.close() | |
| return zip_path, start_page + 1, end_page + 1 | |
| def process_pdf(file, start_page, end_page): | |
| if file is None: | |
| return "β No file uploaded.", None | |
| zip_file_path, actual_start, actual_end = split_pdf(file.name, start_page, end_page) | |
| status = f"β File '{file.name}' split from page {actual_start} to {actual_end}." | |
| return status, zip_file_path | |
| with gr.Blocks(title="PDF Utility") as demo: | |
| gr.Markdown("# π PDF Utility App") | |
| with gr.Tabs(): | |
| with gr.TabItem("Split PDF"): | |
| gr.Markdown("Upload a PDF, select page range, and click **Split PDF** to download a ZIP of split pages.") | |
| with gr.Row(): | |
| file_input = gr.File(label="Upload PDF", file_types=[".pdf"]) | |
| with gr.Row(): | |
| start_page = gr.Number(label="Start Page (0-based)", value=0, precision=0) | |
| end_page = gr.Number(label="End Page (0-based)", value=0, precision=0) | |
| split_button = gr.Button("π Split PDF") | |
| status_text = gr.Textbox(label="Status", lines=2) | |
| download_link = gr.File(label="Download ZIP") | |
| split_button.click( | |
| fn=process_pdf, | |
| inputs=[file_input, start_page, end_page], | |
| outputs=[status_text, download_link] | |
| ) | |
| with gr.TabItem("Compress PDF"): | |
| gr.Markdown("Upload a PDF and click **Compress PDF** to download the compressed version.") | |
| with gr.Row(): | |
| file_input_compress = gr.File(label="Upload PDF", file_types=[".pdf"]) | |
| with gr.Row(): | |
| dpi_threshold = gr.Number(label="DPI Threshold", value=100, precision=0) | |
| dpi_target = gr.Number(label="Target DPI", value=72, precision=0) | |
| quality = gr.Number(label="JPEG Quality (1-100)", value=60, precision=0) | |
| compress_button = gr.Button("π Compress PDF") | |
| status_text_compress = gr.Textbox(label="Status", lines=2) | |
| download_link_compress = gr.File(label="Download compressed PDF") | |
| compress_button.click( | |
| fn=compress_pdf, | |
| inputs=[file_input_compress, dpi_threshold, dpi_target, quality], | |
| outputs=[status_text_compress, download_link_compress] | |
| ) | |
| with gr.TabItem("Merge PDFs"): | |
| gr.Markdown("Upload multiple PDFs and click **Merge PDFs** to download the merged version.") | |
| pdf_uploads = gr.File(label="Upload PDFs", file_types=[".pdf"], file_count="multiple") | |
| merge_button = gr.Button("π Merge PDF Files") | |
| merge_status = gr.Textbox(label="Status", lines=2) | |
| merged_file = gr.File(label="Download Merged PDF") | |
| merge_button.click( | |
| fn=merge_pdfs, | |
| inputs=[pdf_uploads], | |
| outputs=[merge_status, merged_file] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |