pdf-toolkit / app.py
vaibhavbalar's picture
Update app.py
441853b verified
# app.py
import gradio as gr
import fitz # PyMuPDF
import PyPDF2
import os
import zipfile
from PIL import Image
import io
# Merge PDFs
def merge_pdfs(files):
merger = PyPDF2.PdfMerger()
for file in files:
merger.append(file.name)
output_path = "merged_output.pdf"
merger.write(output_path)
merger.close()
return output_path
# Split PDF
def split_pdf(file):
reader = PyPDF2.PdfReader(file.name)
output_folder = "split_outputs"
os.makedirs(output_folder, exist_ok=True)
for f in os.listdir(output_folder):
os.remove(os.path.join(output_folder, f))
split_files = []
for i, page in enumerate(reader.pages):
writer = PyPDF2.PdfWriter()
writer.add_page(page)
output_filename = os.path.join(output_folder, f"page_{i+1}.pdf")
with open(output_filename, "wb") as f_out:
writer.write(f_out)
split_files.append(output_filename)
zip_filename = "split_pages.zip"
with zipfile.ZipFile(zip_filename, "w") as zipf:
for f in split_files:
zipf.write(f, os.path.basename(f))
return zip_filename
# Compress PDF
def compress_pdf(file, quality):
doc = fitz.open(file.name)
output = fitz.open()
quality = min(max(int(quality), 1), 95) # Safe JPEG quality range
for page in doc:
pix = page.get_pixmap(dpi=150)
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
buffer = io.BytesIO()
img.save(buffer, format="JPEG", quality=quality)
buffer.seek(0)
img_doc = fitz.open(stream=buffer, filetype="jpeg")
rect = img_doc[0].rect
new_page = output.new_page(width=rect.width, height=rect.height)
new_page.insert_image(rect, stream=buffer.read())
output_path = "compressed_output.pdf"
output.save(output_path)
output.close()
doc.close()
return output_path
# Extract Text
def extract_text(file):
doc = fitz.open(file.name)
text = ""
for page in doc:
text += page.get_text()
doc.close()
output_path = "extracted_text.txt"
with open(output_path, "w", encoding="utf-8") as f:
f.write(text)
return output_path, text
# Gradio Interface
with gr.Blocks(theme=gr.themes.Base(primary_hue="orange")) as demo:
gr.Markdown("""
# πŸ“ Local PDF Toolkit
Merge, Split, Compress, and Extract Text from PDFs β€” Safely inside Hugging Face
""")
with gr.Tab("πŸ”— Merge PDFs"):
merge_input = gr.File(file_types=[".pdf"], file_count="multiple", label="Select PDFs to Merge")
merge_btn = gr.Button("πŸš€ Merge PDFs", variant="primary")
merge_output = gr.File(label="⬇️ Download Merged PDF")
merge_btn.click(merge_pdfs, inputs=merge_input, outputs=merge_output)
with gr.Tab("βœ‚οΈ Split PDF"):
split_input = gr.File(file_types=[".pdf"], label="Select PDF to Split")
split_btn = gr.Button("βœ‚οΈ Split PDF", variant="primary")
split_output = gr.File(label="⬇️ Download Split ZIP")
split_btn.click(split_pdf, inputs=split_input, outputs=split_output)
with gr.Tab("πŸ“‰ Compress PDF"):
compress_input = gr.File(file_types=[".pdf"], label="Select PDF to Compress")
compress_quality = gr.Slider(minimum=10, maximum=100, value=60, label="Compression Quality (%)")
compress_btn = gr.Button("πŸ“‰ Compress PDF", variant="primary")
compress_output = gr.File(label="⬇️ Download Compressed PDF")
compress_btn.click(compress_pdf, inputs=[compress_input, compress_quality], outputs=compress_output)
with gr.Tab("πŸ“œ Extract Text"):
extract_input = gr.File(file_types=[".pdf"], label="Select PDF to Extract Text")
extract_btn = gr.Button("πŸ“œ Extract Text", variant="primary")
extract_file = gr.File(label="⬇️ Download Extracted Text File")
extract_preview = gr.Textbox(label="πŸ“– Preview Text", lines=20, max_lines=100, interactive=False, show_copy_button=True)
extract_btn.click(extract_text, inputs=extract_input, outputs=[extract_file, extract_preview])
demo.launch()