PDF_Utility / app.py
HawkeyeHS's picture
Add files
dff2b65
import os
import gradio as gr
from PyPDF2 import PdfWriter, PdfReader
import zipfile
import tempfile
import fitz # PyMuPDF
def merge_pdfs(pdf_files):
if not pdf_files:
return "❌ No PDF files uploaded.", None
output_dir = tempfile.mkdtemp()
output_file = os.path.join(output_dir, "merged.pdf")
doc_out = fitz.open()
a4_rect = fitz.paper_rect("a4")
for file in pdf_files:
src = fitz.open(file.name)
for page in src:
page_out = doc_out.new_page(width=a4_rect.width, height=a4_rect.height)
page_out.show_pdf_page(a4_rect, src, page.number, keep_proportion=True)
src.close()
doc_out.save(output_file)
doc_out.close()
return "βœ… PDFs merged successfully.", output_file
def compress_pdf(file, dpi_threshold, dpi_target, quality):
input_path = file.name
# Create a unique temporary file for the compressed output
temp_dir = tempfile.mkdtemp()
output_path = os.path.join(temp_dir, "compressed_output.pdf")
doc = fitz.open(input_path)
# Recompress images above dpi_threshold to target DPI with JPEG at quality
doc.rewrite_images(
dpi_threshold=dpi_threshold,
dpi_target=dpi_target,
quality=quality,
lossy=True,
lossless=True,
bitonal=True,
color=True,
gray=True,
set_to_gray=False,
)
# Subset fonts and apply full garbage collection + stream compression
doc.subset_fonts()
doc.save(output_path,
garbage=3,
deflate=True,
use_objstms=True)
status = f"βœ… PDF compressed successfully!"
return status, output_path
def split_pdf(file_path, start_page, end_page):
file_name = os.path.basename(file_path)
base_name = file_name[:-4] # Remove .pdf extension
output_dir = tempfile.mkdtemp()
zip_path = os.path.join(output_dir, f"{base_name}_split_pages.zip")
input_pdf = PdfReader(open(file_path, "rb"))
total_pages = len(input_pdf.pages)
# Clamp values within range
start_page = max(0, min(start_page, total_pages - 1))
end_page = max(start_page, min(end_page, total_pages - 1))
zipf = zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED)
for i in range(start_page, end_page + 1):
writer = PdfWriter()
writer.add_page(input_pdf.pages[i])
split_pdf_path = os.path.join(output_dir, f"{base_name}-page{i+1}.pdf")
with open(split_pdf_path, "wb") as f_out:
writer.write(f_out)
zipf.write(split_pdf_path, arcname=os.path.basename(split_pdf_path))
zipf.close()
return zip_path, start_page + 1, end_page + 1
def process_pdf(file, start_page, end_page):
if file is None:
return "❌ No file uploaded.", None
zip_file_path, actual_start, actual_end = split_pdf(file.name, start_page, end_page)
status = f"βœ… File '{file.name}' split from page {actual_start} to {actual_end}."
return status, zip_file_path
with gr.Blocks(title="PDF Utility") as demo:
gr.Markdown("# πŸ“„ PDF Utility App")
with gr.Tabs():
with gr.TabItem("Split PDF"):
gr.Markdown("Upload a PDF, select page range, and click **Split PDF** to download a ZIP of split pages.")
with gr.Row():
file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
with gr.Row():
start_page = gr.Number(label="Start Page (0-based)", value=0, precision=0)
end_page = gr.Number(label="End Page (0-based)", value=0, precision=0)
split_button = gr.Button("πŸš€ Split PDF")
status_text = gr.Textbox(label="Status", lines=2)
download_link = gr.File(label="Download ZIP")
split_button.click(
fn=process_pdf,
inputs=[file_input, start_page, end_page],
outputs=[status_text, download_link]
)
with gr.TabItem("Compress PDF"):
gr.Markdown("Upload a PDF and click **Compress PDF** to download the compressed version.")
with gr.Row():
file_input_compress = gr.File(label="Upload PDF", file_types=[".pdf"])
with gr.Row():
dpi_threshold = gr.Number(label="DPI Threshold", value=100, precision=0)
dpi_target = gr.Number(label="Target DPI", value=72, precision=0)
quality = gr.Number(label="JPEG Quality (1-100)", value=60, precision=0)
compress_button = gr.Button("πŸš€ Compress PDF")
status_text_compress = gr.Textbox(label="Status", lines=2)
download_link_compress = gr.File(label="Download compressed PDF")
compress_button.click(
fn=compress_pdf,
inputs=[file_input_compress, dpi_threshold, dpi_target, quality],
outputs=[status_text_compress, download_link_compress]
)
with gr.TabItem("Merge PDFs"):
gr.Markdown("Upload multiple PDFs and click **Merge PDFs** to download the merged version.")
pdf_uploads = gr.File(label="Upload PDFs", file_types=[".pdf"], file_count="multiple")
merge_button = gr.Button("πŸ“Ž Merge PDF Files")
merge_status = gr.Textbox(label="Status", lines=2)
merged_file = gr.File(label="Download Merged PDF")
merge_button.click(
fn=merge_pdfs,
inputs=[pdf_uploads],
outputs=[merge_status, merged_file]
)
if __name__ == "__main__":
demo.launch()