plozia commited on
Commit
5e3f9ac
·
verified ·
1 Parent(s): 5bdb76d

added merge

Browse files
Files changed (1) hide show
  1. app.py +85 -36
app.py CHANGED
@@ -8,11 +8,8 @@ from PyPDF2 import PdfReader, PdfWriter
8
  with open(os.path.join(os.path.dirname(__file__), "fancy.css")) as f:
9
  custom_css = f.read()
10
 
11
- # Maximum upload size
12
- MAX_SIZE_BYTES = 1 * 1024 * 1024 * 1024 # 1 GB
13
-
14
- # Note: Uploaded PDFs are processed in-memory and not stored on the server.
15
- # This is a quick solution for splitting PDFs without any external storage.
16
 
17
  def parse_page_ranges(ranges: str, num_pages: int) -> List[int]:
18
  """
@@ -32,6 +29,7 @@ def parse_page_ranges(ranges: str, num_pages: int) -> List[int]:
32
  pages.add(p - 1)
33
  return sorted(pages)
34
 
 
35
  def split_pdf(file, page_ranges: str):
36
  # Validate file size
37
  file_size = os.path.getsize(file.name)
@@ -58,40 +56,91 @@ def split_pdf(file, page_ranges: str):
58
 
59
  return out_path, None
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  # Build Gradio interface
62
  with gr.Blocks(css=custom_css) as demo:
63
- # Header
64
- gr.HTML("<h1 id='header'>PDF Splitter</h1>")
65
- gr.Markdown("**Free & Quick Solution**: Uploaded PDFs are processed but not stored. Ideal for lazy splitting without installing anything.")
66
- gr.Markdown("Upload a PDF (up to 1 GB) and extract specific pages using ranges like `1-3,5,7-9`.")
67
-
68
- with gr.Row(elem_classes="input-row"):
69
- pdf_input = gr.File(label="Select PDF file", file_types=['.pdf'])
70
- page_input = gr.Textbox(label="Page ranges", placeholder="e.g. 1-3,5,7-9")
71
-
72
- with gr.Row(elem_classes="button-row"):
73
- split_button = gr.Button("Split PDF", variant="primary")
74
-
75
- output_file = gr.File(label="Download Split PDF")
76
- error_text = gr.Textbox(label="Error Message", interactive=False, visible=False)
77
-
78
- def run_split(file, ranges):
79
- if file is None or not ranges:
80
- return None, "Please upload a PDF and specify page ranges.", True
81
- out_path, error = split_pdf(file, ranges)
82
- if error:
83
- return None, error, True
84
- return out_path, "", False
85
-
86
- split_button.click(
87
- fn=run_split,
88
- inputs=[pdf_input, page_input],
89
- outputs=[output_file, error_text, error_text],
90
- api_name="split_pdf"
91
  )
92
 
93
- # Hide error box when no error
94
- error_text.change(lambda msg: msg != "", inputs=error_text, outputs=error_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
  if __name__ == "__main__":
97
- demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))
 
8
  with open(os.path.join(os.path.dirname(__file__), "fancy.css")) as f:
9
  custom_css = f.read()
10
 
11
+ # Maximum upload size (1 GB)
12
+ MAX_SIZE_BYTES = 1 * 1024 * 1024 * 1024
 
 
 
13
 
14
  def parse_page_ranges(ranges: str, num_pages: int) -> List[int]:
15
  """
 
29
  pages.add(p - 1)
30
  return sorted(pages)
31
 
32
+
33
  def split_pdf(file, page_ranges: str):
34
  # Validate file size
35
  file_size = os.path.getsize(file.name)
 
56
 
57
  return out_path, None
58
 
59
+
60
+ def merge_pdfs(files):
61
+ # Merge multiple PDFs in upload order
62
+ if not files:
63
+ return None, "Please upload at least two PDF files to merge."
64
+
65
+ # Validate total size
66
+ total_size = sum(os.path.getsize(f.name) for f in files)
67
+ if total_size > MAX_SIZE_BYTES:
68
+ return None, f"Total file size exceeds 1 GB limit ({total_size / (1024*1024*1024):.2f} GB)."
69
+
70
+ writer = PdfWriter()
71
+ try:
72
+ for f in files:
73
+ reader = PdfReader(f.name)
74
+ for page in reader.pages:
75
+ writer.add_page(page)
76
+ except Exception as e:
77
+ return None, f"Error reading PDFs: {e}"
78
+
79
+ out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name
80
+ with open(out_path, "wb") as f_out:
81
+ writer.write(f_out)
82
+
83
+ return out_path, None
84
+
85
  # Build Gradio interface
86
  with gr.Blocks(css=custom_css) as demo:
87
+ gr.HTML("<h1 id='header'>PDF Splitter & Merger</h1>")
88
+ gr.Markdown(
89
+ "**Free & Quick Solution**: Process your PDFs in-memory instantly without storage.\n"
90
+ "Use the **Split** tab to extract specific pages or the **Merge** tab to combine multiple PDFs into one."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  )
92
 
93
+ with gr.Tabs():
94
+ # Split Tab
95
+ with gr.TabItem("Split PDF"):
96
+ with gr.Row(elem_classes="input-row"):
97
+ pdf_input = gr.File(label="Select PDF to split", file_types=['.pdf'])
98
+ page_input = gr.Textbox(label="Page ranges", placeholder="e.g. 1-3,5,7-9")
99
+ with gr.Row(elem_classes="button-row"):
100
+ split_button = gr.Button("Split PDF", variant="primary")
101
+ output_split = gr.File(label="Download Split PDF")
102
+ error_split = gr.Textbox(label="Error Message", interactive=False, visible=False)
103
+
104
+ def run_split(file, ranges):
105
+ if file is None or not ranges:
106
+ return None, "Please upload a PDF and specify page ranges.", True
107
+ out_path, error = split_pdf(file, ranges)
108
+ if error:
109
+ return None, error, True
110
+ return out_path, "", False
111
+
112
+ split_button.click(
113
+ fn=run_split,
114
+ inputs=[pdf_input, page_input],
115
+ outputs=[output_split, error_split, error_split],
116
+ api_name="split_pdf"
117
+ )
118
+ error_split.change(lambda msg: msg != "", inputs=error_split, outputs=error_split)
119
+
120
+ # Merge Tab
121
+ with gr.TabItem("Merge PDF"):
122
+ with gr.Row(elem_classes="input-row"):
123
+ merge_inputs = gr.Files(label="Select PDF files to merge", file_types=['.pdf'])
124
+ with gr.Row(elem_classes="button-row"):
125
+ merge_button = gr.Button("Merge PDFs", variant="primary")
126
+ output_merge = gr.File(label="Download Merged PDF")
127
+ error_merge = gr.Textbox(label="Error Message", interactive=False, visible=False)
128
+
129
+ def run_merge(files):
130
+ if not files or len(files) < 2:
131
+ return None, "Please upload at least two PDF files.", True
132
+ out_path, error = merge_pdfs(files)
133
+ if error:
134
+ return None, error, True
135
+ return out_path, "", False
136
+
137
+ merge_button.click(
138
+ fn=run_merge,
139
+ inputs=[merge_inputs],
140
+ outputs=[output_merge, error_merge, error_merge],
141
+ api_name="merge_pdfs"
142
+ )
143
+ error_merge.change(lambda msg: msg != "", inputs=error_merge, outputs=error_merge)
144
 
145
  if __name__ == "__main__":
146
+ demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))