File size: 5,501 Bytes
0ac70f9
 
 
 
 
 
742f5dc
 
 
 
5e3f9ac
 
0ac70f9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5e3f9ac
0ac70f9
2700cfa
 
 
 
 
0ac70f9
2700cfa
0ac70f9
2700cfa
0ac70f9
2700cfa
 
0ac70f9
2700cfa
 
5e3f9ac
2700cfa
 
5e3f9ac
2700cfa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5e3f9ac
d167a56
2700cfa
 
 
 
 
 
 
 
 
 
 
0ac70f9
2700cfa
 
 
d167a56
2700cfa
 
 
54f5976
a84133d
2700cfa
 
5e3f9ac
2700cfa
5e3f9ac
2700cfa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5e3f9ac
2700cfa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0ac70f9
 
2700cfa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import os
import tempfile
from typing import List
import gradio as gr
from PyPDF2 import PdfReader, PdfWriter

# Load custom CSS
with open(os.path.join(os.path.dirname(__file__), "fancy.css")) as f:
    custom_css = f.read()

# Maximum upload size (1 GB)
MAX_SIZE_BYTES = 1 * 1024 * 1024 * 1024

def parse_page_ranges(ranges: str, num_pages: int) -> List[int]:
    """
    Convert a string like "1-3,5,8-10" into a sorted list of zero-based page indices.
    """
    pages = set()
    for part in ranges.split(','):
        part = part.strip()
        if '-' in part:
            start_str, end_str = part.split('-', 1)
            start = max(1, int(start_str))
            end = min(num_pages, int(end_str))
            pages.update(range(start - 1, end))
        else:
            p = int(part)
            if 1 <= p <= num_pages:
                pages.add(p - 1)
    return sorted(pages)


def split_pdf(file, page_ranges: str):
    # Validate file size
    file_size = os.path.getsize(file.name)
    if file_size > MAX_SIZE_BYTES:
        return None, f"File size exceeds 1 GB limit ({file_size / (1024*1024*1024):.2f} GB)."

    reader = PdfReader(file.name)
    num_pages = len(reader.pages)
    try:
        page_indices = parse_page_ranges(page_ranges, num_pages)
    except Exception as e:
        return None, f"Error parsing page ranges: {e}"

    if not page_indices:
        return None, "No valid pages selected."

    writer = PdfWriter()
    for idx in page_indices:
        writer.add_page(reader.pages[idx])

    out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name
    with open(out_path, "wb") as f_out:
        writer.write(f_out)

    return out_path, None


def merge_pdfs(files):
    # Merge multiple PDFs in upload order
    if not files:
        return None, "Please upload at least two PDF files to merge."

    # Validate total size
    total_size = sum(os.path.getsize(f.name) for f in files)
    if total_size > MAX_SIZE_BYTES:
        return None, f"Total file size exceeds 1 GB limit ({total_size / (1024*1024*1024):.2f} GB)."

    writer = PdfWriter()
    try:
        for f in files:
            reader = PdfReader(f.name)
            for page in reader.pages:
                writer.add_page(page)
    except Exception as e:
        return None, f"Error reading PDFs: {e}"

    out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name
    with open(out_path, "wb") as f_out:
        writer.write(f_out)

    return out_path, None

# Build Gradio interface
with gr.Blocks(css=custom_css) as demo:
    gr.HTML("<h1 id='header'>PDF Splitter & Merger</h1>")
    gr.Markdown(
        "**Free & Quick Solution**: Process your PDFs in-memory instantly without storage.\n"
        "Use the **Split** tab to extract specific pages or the **Merge** tab to combine multiple PDFs into one.\n\n"
        "Maximum file size: **1 GB**. If you'd like to increase the limit, feel free to clone the space and adjust the code yourself."
    )

    with gr.Tabs():
        # Split Tab
        with gr.TabItem("Split PDF"):
            with gr.Row(elem_classes="input-row"):
                pdf_input = gr.File(label="Select PDF to split", file_types=['.pdf'])
                page_input = gr.Textbox(label="Page ranges", placeholder="e.g. 1-3,5,7-9")
            with gr.Row(elem_classes="button-row"):
                split_button = gr.Button("Split PDF", variant="primary")
            output_split = gr.File(label="Download Split PDF")
            error_split = gr.Textbox(label="Error Message", interactive=False, visible=False)

            def run_split(file, ranges):
                if file is None or not ranges:
                    return None, "Please upload a PDF and specify page ranges.", True
                out_path, error = split_pdf(file, ranges)
                if error:
                    return None, error, True
                return out_path, "", False

            split_button.click(
                fn=run_split,
                inputs=[pdf_input, page_input],
                outputs=[output_split, error_split, error_split],
                api_name="split_pdf"
            )
            error_split.change(lambda msg: msg != "", inputs=error_split, outputs=error_split)

        # Merge Tab
        with gr.TabItem("Merge PDF"):
            with gr.Row(elem_classes="input-row"):
                merge_inputs = gr.Files(label="Select PDF files to merge", file_types=['.pdf'])
            with gr.Row(elem_classes="button-row"):
                merge_button = gr.Button("Merge PDFs", variant="primary")
            output_merge = gr.File(label="Download Merged PDF")
            error_merge = gr.Textbox(label="Error Message", interactive=False, visible=False)

            def run_merge(files):
                if not files or len(files) < 2:
                    return None, "Please upload at least two PDF files.", True
                out_path, error = merge_pdfs(files)
                if error:
                    return None, error, True
                return out_path, "", False

            merge_button.click(
                fn=run_merge,
                inputs=[merge_inputs],
                outputs=[output_merge, error_merge, error_merge],
                api_name="merge_pdfs"
            )
            error_merge.change(lambda msg: msg != "", inputs=error_merge, outputs=error_merge)

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))