|
|
import os |
|
|
import tempfile |
|
|
from typing import List |
|
|
import gradio as gr |
|
|
from PyPDF2 import PdfReader, PdfWriter |
|
|
|
|
|
|
|
|
with open(os.path.join(os.path.dirname(__file__), "fancy.css")) as f: |
|
|
custom_css = f.read() |
|
|
|
|
|
|
|
|
MAX_SIZE_BYTES = 1 * 1024 * 1024 * 1024 |
|
|
|
|
|
def parse_page_ranges(ranges: str, num_pages: int) -> List[int]: |
|
|
""" |
|
|
Convert a string like "1-3,5,8-10" into a sorted list of zero-based page indices. |
|
|
""" |
|
|
pages = set() |
|
|
for part in ranges.split(','): |
|
|
part = part.strip() |
|
|
if '-' in part: |
|
|
start_str, end_str = part.split('-', 1) |
|
|
start = max(1, int(start_str)) |
|
|
end = min(num_pages, int(end_str)) |
|
|
pages.update(range(start - 1, end)) |
|
|
else: |
|
|
p = int(part) |
|
|
if 1 <= p <= num_pages: |
|
|
pages.add(p - 1) |
|
|
return sorted(pages) |
|
|
|
|
|
|
|
|
def split_pdf(file, page_ranges: str): |
|
|
|
|
|
file_size = os.path.getsize(file.name) |
|
|
if file_size > MAX_SIZE_BYTES: |
|
|
return None, f"File size exceeds 1 GB limit ({file_size / (1024*1024*1024):.2f} GB)." |
|
|
|
|
|
reader = PdfReader(file.name) |
|
|
num_pages = len(reader.pages) |
|
|
try: |
|
|
page_indices = parse_page_ranges(page_ranges, num_pages) |
|
|
except Exception as e: |
|
|
return None, f"Error parsing page ranges: {e}" |
|
|
|
|
|
if not page_indices: |
|
|
return None, "No valid pages selected." |
|
|
|
|
|
writer = PdfWriter() |
|
|
for idx in page_indices: |
|
|
writer.add_page(reader.pages[idx]) |
|
|
|
|
|
out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name |
|
|
with open(out_path, "wb") as f_out: |
|
|
writer.write(f_out) |
|
|
|
|
|
return out_path, None |
|
|
|
|
|
|
|
|
def merge_pdfs(files): |
|
|
|
|
|
if not files: |
|
|
return None, "Please upload at least two PDF files to merge." |
|
|
|
|
|
|
|
|
total_size = sum(os.path.getsize(f.name) for f in files) |
|
|
if total_size > MAX_SIZE_BYTES: |
|
|
return None, f"Total file size exceeds 1 GB limit ({total_size / (1024*1024*1024):.2f} GB)." |
|
|
|
|
|
writer = PdfWriter() |
|
|
try: |
|
|
for f in files: |
|
|
reader = PdfReader(f.name) |
|
|
for page in reader.pages: |
|
|
writer.add_page(page) |
|
|
except Exception as e: |
|
|
return None, f"Error reading PDFs: {e}" |
|
|
|
|
|
out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name |
|
|
with open(out_path, "wb") as f_out: |
|
|
writer.write(f_out) |
|
|
|
|
|
return out_path, None |
|
|
|
|
|
|
|
|
with gr.Blocks(css=custom_css) as demo: |
|
|
gr.HTML("<h1 id='header'>PDF Splitter & Merger</h1>") |
|
|
gr.Markdown( |
|
|
"**Free & Quick Solution**: Process your PDFs in-memory instantly without storage.\n" |
|
|
"Use the **Split** tab to extract specific pages or the **Merge** tab to combine multiple PDFs into one.\n\n" |
|
|
"Maximum file size: **1 GB**. If you'd like to increase the limit, feel free to clone the space and adjust the code yourself." |
|
|
) |
|
|
|
|
|
with gr.Tabs(): |
|
|
|
|
|
with gr.TabItem("Split PDF"): |
|
|
with gr.Row(elem_classes="input-row"): |
|
|
pdf_input = gr.File(label="Select PDF to split", file_types=['.pdf']) |
|
|
page_input = gr.Textbox(label="Page ranges", placeholder="e.g. 1-3,5,7-9") |
|
|
with gr.Row(elem_classes="button-row"): |
|
|
split_button = gr.Button("Split PDF", variant="primary") |
|
|
output_split = gr.File(label="Download Split PDF") |
|
|
error_split = gr.Textbox(label="Error Message", interactive=False, visible=False) |
|
|
|
|
|
def run_split(file, ranges): |
|
|
if file is None or not ranges: |
|
|
return None, "Please upload a PDF and specify page ranges.", True |
|
|
out_path, error = split_pdf(file, ranges) |
|
|
if error: |
|
|
return None, error, True |
|
|
return out_path, "", False |
|
|
|
|
|
split_button.click( |
|
|
fn=run_split, |
|
|
inputs=[pdf_input, page_input], |
|
|
outputs=[output_split, error_split, error_split], |
|
|
api_name="split_pdf" |
|
|
) |
|
|
error_split.change(lambda msg: msg != "", inputs=error_split, outputs=error_split) |
|
|
|
|
|
|
|
|
with gr.TabItem("Merge PDF"): |
|
|
with gr.Row(elem_classes="input-row"): |
|
|
merge_inputs = gr.Files(label="Select PDF files to merge", file_types=['.pdf']) |
|
|
with gr.Row(elem_classes="button-row"): |
|
|
merge_button = gr.Button("Merge PDFs", variant="primary") |
|
|
output_merge = gr.File(label="Download Merged PDF") |
|
|
error_merge = gr.Textbox(label="Error Message", interactive=False, visible=False) |
|
|
|
|
|
def run_merge(files): |
|
|
if not files or len(files) < 2: |
|
|
return None, "Please upload at least two PDF files.", True |
|
|
out_path, error = merge_pdfs(files) |
|
|
if error: |
|
|
return None, error, True |
|
|
return out_path, "", False |
|
|
|
|
|
merge_button.click( |
|
|
fn=run_merge, |
|
|
inputs=[merge_inputs], |
|
|
outputs=[output_merge, error_merge, error_merge], |
|
|
api_name="merge_pdfs" |
|
|
) |
|
|
error_merge.change(lambda msg: msg != "", inputs=error_merge, outputs=error_merge) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860))) |