Spaces:
Sleeping
Sleeping
File size: 5,635 Bytes
dff2b65 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
import os
import gradio as gr
from PyPDF2 import PdfWriter, PdfReader
import zipfile
import tempfile
import fitz # PyMuPDF
def merge_pdfs(pdf_files):
if not pdf_files:
return "β No PDF files uploaded.", None
output_dir = tempfile.mkdtemp()
output_file = os.path.join(output_dir, "merged.pdf")
doc_out = fitz.open()
a4_rect = fitz.paper_rect("a4")
for file in pdf_files:
src = fitz.open(file.name)
for page in src:
page_out = doc_out.new_page(width=a4_rect.width, height=a4_rect.height)
page_out.show_pdf_page(a4_rect, src, page.number, keep_proportion=True)
src.close()
doc_out.save(output_file)
doc_out.close()
return "β
PDFs merged successfully.", output_file
def compress_pdf(file, dpi_threshold, dpi_target, quality):
input_path = file.name
# Create a unique temporary file for the compressed output
temp_dir = tempfile.mkdtemp()
output_path = os.path.join(temp_dir, "compressed_output.pdf")
doc = fitz.open(input_path)
# Recompress images above dpi_threshold to target DPI with JPEG at quality
doc.rewrite_images(
dpi_threshold=dpi_threshold,
dpi_target=dpi_target,
quality=quality,
lossy=True,
lossless=True,
bitonal=True,
color=True,
gray=True,
set_to_gray=False,
)
# Subset fonts and apply full garbage collection + stream compression
doc.subset_fonts()
doc.save(output_path,
garbage=3,
deflate=True,
use_objstms=True)
status = f"β
PDF compressed successfully!"
return status, output_path
def split_pdf(file_path, start_page, end_page):
file_name = os.path.basename(file_path)
base_name = file_name[:-4] # Remove .pdf extension
output_dir = tempfile.mkdtemp()
zip_path = os.path.join(output_dir, f"{base_name}_split_pages.zip")
input_pdf = PdfReader(open(file_path, "rb"))
total_pages = len(input_pdf.pages)
# Clamp values within range
start_page = max(0, min(start_page, total_pages - 1))
end_page = max(start_page, min(end_page, total_pages - 1))
zipf = zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED)
for i in range(start_page, end_page + 1):
writer = PdfWriter()
writer.add_page(input_pdf.pages[i])
split_pdf_path = os.path.join(output_dir, f"{base_name}-page{i+1}.pdf")
with open(split_pdf_path, "wb") as f_out:
writer.write(f_out)
zipf.write(split_pdf_path, arcname=os.path.basename(split_pdf_path))
zipf.close()
return zip_path, start_page + 1, end_page + 1
def process_pdf(file, start_page, end_page):
if file is None:
return "β No file uploaded.", None
zip_file_path, actual_start, actual_end = split_pdf(file.name, start_page, end_page)
status = f"β
File '{file.name}' split from page {actual_start} to {actual_end}."
return status, zip_file_path
with gr.Blocks(title="PDF Utility") as demo:
gr.Markdown("# π PDF Utility App")
with gr.Tabs():
with gr.TabItem("Split PDF"):
gr.Markdown("Upload a PDF, select page range, and click **Split PDF** to download a ZIP of split pages.")
with gr.Row():
file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
with gr.Row():
start_page = gr.Number(label="Start Page (0-based)", value=0, precision=0)
end_page = gr.Number(label="End Page (0-based)", value=0, precision=0)
split_button = gr.Button("π Split PDF")
status_text = gr.Textbox(label="Status", lines=2)
download_link = gr.File(label="Download ZIP")
split_button.click(
fn=process_pdf,
inputs=[file_input, start_page, end_page],
outputs=[status_text, download_link]
)
with gr.TabItem("Compress PDF"):
gr.Markdown("Upload a PDF and click **Compress PDF** to download the compressed version.")
with gr.Row():
file_input_compress = gr.File(label="Upload PDF", file_types=[".pdf"])
with gr.Row():
dpi_threshold = gr.Number(label="DPI Threshold", value=100, precision=0)
dpi_target = gr.Number(label="Target DPI", value=72, precision=0)
quality = gr.Number(label="JPEG Quality (1-100)", value=60, precision=0)
compress_button = gr.Button("π Compress PDF")
status_text_compress = gr.Textbox(label="Status", lines=2)
download_link_compress = gr.File(label="Download compressed PDF")
compress_button.click(
fn=compress_pdf,
inputs=[file_input_compress, dpi_threshold, dpi_target, quality],
outputs=[status_text_compress, download_link_compress]
)
with gr.TabItem("Merge PDFs"):
gr.Markdown("Upload multiple PDFs and click **Merge PDFs** to download the merged version.")
pdf_uploads = gr.File(label="Upload PDFs", file_types=[".pdf"], file_count="multiple")
merge_button = gr.Button("π Merge PDF Files")
merge_status = gr.Textbox(label="Status", lines=2)
merged_file = gr.File(label="Download Merged PDF")
merge_button.click(
fn=merge_pdfs,
inputs=[pdf_uploads],
outputs=[merge_status, merged_file]
)
if __name__ == "__main__":
demo.launch()
|