plozia commited on
Commit
2700cfa
·
verified ·
1 Parent(s): 190ee3a
Files changed (1) hide show
  1. app.py +98 -84
app.py CHANGED
@@ -3,7 +3,6 @@ import tempfile
3
  from typing import List
4
  import gradio as gr
5
  from PyPDF2 import PdfReader, PdfWriter
6
- import pikepdf # for advanced compression
7
 
8
  # Load custom CSS
9
  with open(os.path.join(os.path.dirname(__file__), "fancy.css")) as f:
@@ -32,101 +31,116 @@ def parse_page_ranges(ranges: str, num_pages: int) -> List[int]:
32
 
33
 
34
  def split_pdf(file, page_ranges: str):
35
- if not file:
36
- return None, "No file provided."
37
- if os.path.getsize(file.name) > MAX_SIZE_BYTES:
38
- return None, "File exceeds 1 GB limit."
 
39
  reader = PdfReader(file.name)
 
40
  try:
41
- page_indices = parse_page_ranges(page_ranges, len(reader.pages))
42
  except Exception as e:
43
- return None, str(e)
 
44
  if not page_indices:
45
- return None, "No valid pages."
46
- writer = PdfWriter()
47
- for i in page_indices:
48
- writer.add_page(reader.pages[i])
49
- out = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name
50
- writer.write(out)
51
- return out, None
52
-
53
-
54
- def merge_pdfs(files: List, order: List[int] = None):
55
- if not files or len(files) < 2:
56
- return None, "Upload at least two PDFs."
57
- if sum(os.path.getsize(f.name) for f in files) > MAX_SIZE_BYTES:
58
- return None, "Total exceeds 1 GB."
59
- if order and len(order) == len(files):
60
- files = [f for _, f in sorted(zip(order, files), key=lambda x: x[0])]
61
  writer = PdfWriter()
62
- for f in files:
63
- for p in PdfReader(f.name).pages:
64
- writer.add_page(p)
65
- out = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name
66
- writer.write(out)
67
- return out, None
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
- def compress_pdf_stream(file):
71
- reader = PdfReader(file.name)
72
  writer = PdfWriter()
73
- for p in reader.pages:
74
- writer.add_page(p)
75
- writer.compress_content_streams()
76
- out = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name
77
- writer.write(out)
78
- return out, None
79
-
80
-
81
- def compress_pdf_pike(file, quality: int = 75):
82
- pdf = pikepdf.Pdf.open(file.name)
83
- out = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name
84
- # optimize structure and recompress images with JPEG
85
- pdf.save(
86
- out,
87
- optimize_version=True,
88
- compression=pikepdf.Compression.jpeg,
89
- jpeg_quality=quality,
90
- linearize=True
91
- )
92
- return out, None
93
 
94
- # Interface
 
 
95
  with gr.Blocks(css=custom_css) as demo:
96
- gr.HTML("<h1>PDF Toolbox: Split, Merge & Compress</h1>")
97
- gr.Markdown("**Free & Quick**: Manage PDFs in-memory. Privacy first.")
 
 
 
 
98
  with gr.Tabs():
99
- # Split
100
  with gr.TabItem("Split PDF"):
101
- inp = gr.File(label="PDF")
102
- ranges = gr.Textbox(label="Page ranges", placeholder="e.g. 1-3,6-8")
103
- btn = gr.Button("Split PDF")
104
- out, err = gr.File(label="Download Split PDF"), gr.Textbox(label="Error", visible=False)
105
- btn.click(lambda f,r: (*split_pdf(f,r),), [inp,ranges], [out,err,err])
106
-
107
- # Merge
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  with gr.TabItem("Merge PDF"):
109
- files = gr.Files(label="Upload & reorder PDFs (drag to reorder)")
110
- btn_m = gr.Button("Merge PDFs")
111
- out_m, err_m = gr.File(label="Download Merged PDF"), gr.Textbox(label="Error", visible=False)
112
-
113
- def run_merge(file_list):
114
- if not file_list or len(file_list) < 2:
115
- return None, "Please upload at least two PDFs.", True
116
- out, err = merge_pdfs(file_list)
117
- return out, err, err is not None
118
-
119
- btn_m.click(run_merge, inputs=[files], outputs=[out_m, err_m, err_m])
120
-
121
- # Compress
122
- with gr.TabItem("Compress PDF"):
123
- comp_file = gr.File(label="PDF to compress")
124
- method = gr.Radio(choices=["Stream only","PikePDF"], value="PikePDF", label="Method")
125
- quality = gr.Slider(10,100, value=75, step=5, label="JPEG Quality")
126
- btn_c = gr.Button("Compress PDF")
127
- out_c, err_c = gr.File(label="Download Compressed PDF"), gr.Textbox(label="Error", visible=False)
128
- btn_c.click(lambda f,m,q: compress_pdf_pike(f,q) if m=="PikePDF" else compress_pdf_stream(f),
129
- inputs=[comp_file,method,quality], outputs=[out_c,err_c,err_c])
 
130
 
131
  if __name__ == "__main__":
132
- demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT",7860)))
 
3
  from typing import List
4
  import gradio as gr
5
  from PyPDF2 import PdfReader, PdfWriter
 
6
 
7
  # Load custom CSS
8
  with open(os.path.join(os.path.dirname(__file__), "fancy.css")) as f:
 
31
 
32
 
33
  def split_pdf(file, page_ranges: str):
34
+ # Validate file size
35
+ file_size = os.path.getsize(file.name)
36
+ if file_size > MAX_SIZE_BYTES:
37
+ return None, f"File size exceeds 1 GB limit ({file_size / (1024*1024*1024):.2f} GB)."
38
+
39
  reader = PdfReader(file.name)
40
+ num_pages = len(reader.pages)
41
  try:
42
+ page_indices = parse_page_ranges(page_ranges, num_pages)
43
  except Exception as e:
44
+ return None, f"Error parsing page ranges: {e}"
45
+
46
  if not page_indices:
47
+ return None, "No valid pages selected."
48
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  writer = PdfWriter()
50
+ for idx in page_indices:
51
+ writer.add_page(reader.pages[idx])
 
 
 
 
52
 
53
+ out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name
54
+ with open(out_path, "wb") as f_out:
55
+ writer.write(f_out)
56
+
57
+ return out_path, None
58
+
59
+
60
+ def merge_pdfs(files):
61
+ # Merge multiple PDFs in upload order
62
+ if not files:
63
+ return None, "Please upload at least two PDF files to merge."
64
+
65
+ # Validate total size
66
+ total_size = sum(os.path.getsize(f.name) for f in files)
67
+ if total_size > MAX_SIZE_BYTES:
68
+ return None, f"Total file size exceeds 1 GB limit ({total_size / (1024*1024*1024):.2f} GB)."
69
 
 
 
70
  writer = PdfWriter()
71
+ try:
72
+ for f in files:
73
+ reader = PdfReader(f.name)
74
+ for page in reader.pages:
75
+ writer.add_page(page)
76
+ except Exception as e:
77
+ return None, f"Error reading PDFs: {e}"
78
+
79
+ out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name
80
+ with open(out_path, "wb") as f_out:
81
+ writer.write(f_out)
 
 
 
 
 
 
 
 
 
82
 
83
+ return out_path, None
84
+
85
+ # Build Gradio interface
86
  with gr.Blocks(css=custom_css) as demo:
87
+ gr.HTML("<h1 id='header'>PDF Splitter & Merger</h1>")
88
+ gr.Markdown(
89
+ "**Free & Quick Solution**: Process your PDFs in-memory instantly without storage.\n"
90
+ "Use the **Split** tab to extract specific pages or the **Merge** tab to combine multiple PDFs into one."
91
+ )
92
+
93
  with gr.Tabs():
94
+ # Split Tab
95
  with gr.TabItem("Split PDF"):
96
+ with gr.Row(elem_classes="input-row"):
97
+ pdf_input = gr.File(label="Select PDF to split", file_types=['.pdf'])
98
+ page_input = gr.Textbox(label="Page ranges", placeholder="e.g. 1-3,5,7-9")
99
+ with gr.Row(elem_classes="button-row"):
100
+ split_button = gr.Button("Split PDF", variant="primary")
101
+ output_split = gr.File(label="Download Split PDF")
102
+ error_split = gr.Textbox(label="Error Message", interactive=False, visible=False)
103
+
104
+ def run_split(file, ranges):
105
+ if file is None or not ranges:
106
+ return None, "Please upload a PDF and specify page ranges.", True
107
+ out_path, error = split_pdf(file, ranges)
108
+ if error:
109
+ return None, error, True
110
+ return out_path, "", False
111
+
112
+ split_button.click(
113
+ fn=run_split,
114
+ inputs=[pdf_input, page_input],
115
+ outputs=[output_split, error_split, error_split],
116
+ api_name="split_pdf"
117
+ )
118
+ error_split.change(lambda msg: msg != "", inputs=error_split, outputs=error_split)
119
+
120
+ # Merge Tab
121
  with gr.TabItem("Merge PDF"):
122
+ with gr.Row(elem_classes="input-row"):
123
+ merge_inputs = gr.Files(label="Select PDF files to merge", file_types=['.pdf'])
124
+ with gr.Row(elem_classes="button-row"):
125
+ merge_button = gr.Button("Merge PDFs", variant="primary")
126
+ output_merge = gr.File(label="Download Merged PDF")
127
+ error_merge = gr.Textbox(label="Error Message", interactive=False, visible=False)
128
+
129
+ def run_merge(files):
130
+ if not files or len(files) < 2:
131
+ return None, "Please upload at least two PDF files.", True
132
+ out_path, error = merge_pdfs(files)
133
+ if error:
134
+ return None, error, True
135
+ return out_path, "", False
136
+
137
+ merge_button.click(
138
+ fn=run_merge,
139
+ inputs=[merge_inputs],
140
+ outputs=[output_merge, error_merge, error_merge],
141
+ api_name="merge_pdfs"
142
+ )
143
+ error_merge.change(lambda msg: msg != "", inputs=error_merge, outputs=error_merge)
144
 
145
  if __name__ == "__main__":
146
+ demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))