pdf-split / app.py
plozia's picture
Update app.py
a84133d verified
import os
import tempfile
from typing import List
import gradio as gr
from PyPDF2 import PdfReader, PdfWriter
# Load custom CSS
with open(os.path.join(os.path.dirname(__file__), "fancy.css")) as f:
custom_css = f.read()
# Maximum upload size (1 GB)
MAX_SIZE_BYTES = 1 * 1024 * 1024 * 1024
def parse_page_ranges(ranges: str, num_pages: int) -> List[int]:
"""
Convert a string like "1-3,5,8-10" into a sorted list of zero-based page indices.
"""
pages = set()
for part in ranges.split(','):
part = part.strip()
if '-' in part:
start_str, end_str = part.split('-', 1)
start = max(1, int(start_str))
end = min(num_pages, int(end_str))
pages.update(range(start - 1, end))
else:
p = int(part)
if 1 <= p <= num_pages:
pages.add(p - 1)
return sorted(pages)
def split_pdf(file, page_ranges: str):
# Validate file size
file_size = os.path.getsize(file.name)
if file_size > MAX_SIZE_BYTES:
return None, f"File size exceeds 1 GB limit ({file_size / (1024*1024*1024):.2f} GB)."
reader = PdfReader(file.name)
num_pages = len(reader.pages)
try:
page_indices = parse_page_ranges(page_ranges, num_pages)
except Exception as e:
return None, f"Error parsing page ranges: {e}"
if not page_indices:
return None, "No valid pages selected."
writer = PdfWriter()
for idx in page_indices:
writer.add_page(reader.pages[idx])
out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name
with open(out_path, "wb") as f_out:
writer.write(f_out)
return out_path, None
def merge_pdfs(files):
# Merge multiple PDFs in upload order
if not files:
return None, "Please upload at least two PDF files to merge."
# Validate total size
total_size = sum(os.path.getsize(f.name) for f in files)
if total_size > MAX_SIZE_BYTES:
return None, f"Total file size exceeds 1 GB limit ({total_size / (1024*1024*1024):.2f} GB)."
writer = PdfWriter()
try:
for f in files:
reader = PdfReader(f.name)
for page in reader.pages:
writer.add_page(page)
except Exception as e:
return None, f"Error reading PDFs: {e}"
out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name
with open(out_path, "wb") as f_out:
writer.write(f_out)
return out_path, None
# Build Gradio interface
with gr.Blocks(css=custom_css) as demo:
gr.HTML("<h1 id='header'>PDF Splitter & Merger</h1>")
gr.Markdown(
"**Free & Quick Solution**: Process your PDFs in-memory instantly without storage.\n"
"Use the **Split** tab to extract specific pages or the **Merge** tab to combine multiple PDFs into one.\n\n"
"Maximum file size: **1 GB**. If you'd like to increase the limit, feel free to clone the space and adjust the code yourself."
)
with gr.Tabs():
# Split Tab
with gr.TabItem("Split PDF"):
with gr.Row(elem_classes="input-row"):
pdf_input = gr.File(label="Select PDF to split", file_types=['.pdf'])
page_input = gr.Textbox(label="Page ranges", placeholder="e.g. 1-3,5,7-9")
with gr.Row(elem_classes="button-row"):
split_button = gr.Button("Split PDF", variant="primary")
output_split = gr.File(label="Download Split PDF")
error_split = gr.Textbox(label="Error Message", interactive=False, visible=False)
def run_split(file, ranges):
if file is None or not ranges:
return None, "Please upload a PDF and specify page ranges.", True
out_path, error = split_pdf(file, ranges)
if error:
return None, error, True
return out_path, "", False
split_button.click(
fn=run_split,
inputs=[pdf_input, page_input],
outputs=[output_split, error_split, error_split],
api_name="split_pdf"
)
error_split.change(lambda msg: msg != "", inputs=error_split, outputs=error_split)
# Merge Tab
with gr.TabItem("Merge PDF"):
with gr.Row(elem_classes="input-row"):
merge_inputs = gr.Files(label="Select PDF files to merge", file_types=['.pdf'])
with gr.Row(elem_classes="button-row"):
merge_button = gr.Button("Merge PDFs", variant="primary")
output_merge = gr.File(label="Download Merged PDF")
error_merge = gr.Textbox(label="Error Message", interactive=False, visible=False)
def run_merge(files):
if not files or len(files) < 2:
return None, "Please upload at least two PDF files.", True
out_path, error = merge_pdfs(files)
if error:
return None, error, True
return out_path, "", False
merge_button.click(
fn=run_merge,
inputs=[merge_inputs],
outputs=[output_merge, error_merge, error_merge],
api_name="merge_pdfs"
)
error_merge.change(lambda msg: msg != "", inputs=error_merge, outputs=error_merge)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))