File size: 5,501 Bytes
0ac70f9 742f5dc 5e3f9ac 0ac70f9 5e3f9ac 0ac70f9 2700cfa 0ac70f9 2700cfa 0ac70f9 2700cfa 0ac70f9 2700cfa 0ac70f9 2700cfa 5e3f9ac 2700cfa 5e3f9ac 2700cfa 5e3f9ac d167a56 2700cfa 0ac70f9 2700cfa d167a56 2700cfa 54f5976 a84133d 2700cfa 5e3f9ac 2700cfa 5e3f9ac 2700cfa 5e3f9ac 2700cfa 0ac70f9 2700cfa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
import os
import tempfile
from typing import List
import gradio as gr
from PyPDF2 import PdfReader, PdfWriter
# Load custom CSS
with open(os.path.join(os.path.dirname(__file__), "fancy.css")) as f:
custom_css = f.read()
# Maximum upload size (1 GB)
MAX_SIZE_BYTES = 1 * 1024 * 1024 * 1024
def parse_page_ranges(ranges: str, num_pages: int) -> List[int]:
"""
Convert a string like "1-3,5,8-10" into a sorted list of zero-based page indices.
"""
pages = set()
for part in ranges.split(','):
part = part.strip()
if '-' in part:
start_str, end_str = part.split('-', 1)
start = max(1, int(start_str))
end = min(num_pages, int(end_str))
pages.update(range(start - 1, end))
else:
p = int(part)
if 1 <= p <= num_pages:
pages.add(p - 1)
return sorted(pages)
def split_pdf(file, page_ranges: str):
# Validate file size
file_size = os.path.getsize(file.name)
if file_size > MAX_SIZE_BYTES:
return None, f"File size exceeds 1 GB limit ({file_size / (1024*1024*1024):.2f} GB)."
reader = PdfReader(file.name)
num_pages = len(reader.pages)
try:
page_indices = parse_page_ranges(page_ranges, num_pages)
except Exception as e:
return None, f"Error parsing page ranges: {e}"
if not page_indices:
return None, "No valid pages selected."
writer = PdfWriter()
for idx in page_indices:
writer.add_page(reader.pages[idx])
out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name
with open(out_path, "wb") as f_out:
writer.write(f_out)
return out_path, None
def merge_pdfs(files):
# Merge multiple PDFs in upload order
if not files:
return None, "Please upload at least two PDF files to merge."
# Validate total size
total_size = sum(os.path.getsize(f.name) for f in files)
if total_size > MAX_SIZE_BYTES:
return None, f"Total file size exceeds 1 GB limit ({total_size / (1024*1024*1024):.2f} GB)."
writer = PdfWriter()
try:
for f in files:
reader = PdfReader(f.name)
for page in reader.pages:
writer.add_page(page)
except Exception as e:
return None, f"Error reading PDFs: {e}"
out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name
with open(out_path, "wb") as f_out:
writer.write(f_out)
return out_path, None
# Build Gradio interface
with gr.Blocks(css=custom_css) as demo:
gr.HTML("<h1 id='header'>PDF Splitter & Merger</h1>")
gr.Markdown(
"**Free & Quick Solution**: Process your PDFs in-memory instantly without storage.\n"
"Use the **Split** tab to extract specific pages or the **Merge** tab to combine multiple PDFs into one.\n\n"
"Maximum file size: **1 GB**. If you'd like to increase the limit, feel free to clone the space and adjust the code yourself."
)
with gr.Tabs():
# Split Tab
with gr.TabItem("Split PDF"):
with gr.Row(elem_classes="input-row"):
pdf_input = gr.File(label="Select PDF to split", file_types=['.pdf'])
page_input = gr.Textbox(label="Page ranges", placeholder="e.g. 1-3,5,7-9")
with gr.Row(elem_classes="button-row"):
split_button = gr.Button("Split PDF", variant="primary")
output_split = gr.File(label="Download Split PDF")
error_split = gr.Textbox(label="Error Message", interactive=False, visible=False)
def run_split(file, ranges):
if file is None or not ranges:
return None, "Please upload a PDF and specify page ranges.", True
out_path, error = split_pdf(file, ranges)
if error:
return None, error, True
return out_path, "", False
split_button.click(
fn=run_split,
inputs=[pdf_input, page_input],
outputs=[output_split, error_split, error_split],
api_name="split_pdf"
)
error_split.change(lambda msg: msg != "", inputs=error_split, outputs=error_split)
# Merge Tab
with gr.TabItem("Merge PDF"):
with gr.Row(elem_classes="input-row"):
merge_inputs = gr.Files(label="Select PDF files to merge", file_types=['.pdf'])
with gr.Row(elem_classes="button-row"):
merge_button = gr.Button("Merge PDFs", variant="primary")
output_merge = gr.File(label="Download Merged PDF")
error_merge = gr.Textbox(label="Error Message", interactive=False, visible=False)
def run_merge(files):
if not files or len(files) < 2:
return None, "Please upload at least two PDF files.", True
out_path, error = merge_pdfs(files)
if error:
return None, error, True
return out_path, "", False
merge_button.click(
fn=run_merge,
inputs=[merge_inputs],
outputs=[output_merge, error_merge, error_merge],
api_name="merge_pdfs"
)
error_merge.change(lambda msg: msg != "", inputs=error_merge, outputs=error_merge)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860))) |