plozia commited on
Commit
d167a56
·
verified ·
1 Parent(s): bf53f7f

added compress

Browse files
Files changed (1) hide show
  1. app.py +79 -97
app.py CHANGED
@@ -3,6 +3,7 @@ import tempfile
3
  from typing import List
4
  import gradio as gr
5
  from PyPDF2 import PdfReader, PdfWriter
 
6
 
7
  # Load custom CSS
8
  with open(os.path.join(os.path.dirname(__file__), "fancy.css")) as f:
@@ -31,116 +32,97 @@ def parse_page_ranges(ranges: str, num_pages: int) -> List[int]:
31
 
32
 
33
  def split_pdf(file, page_ranges: str):
34
- # Validate file size
35
- file_size = os.path.getsize(file.name)
36
- if file_size > MAX_SIZE_BYTES:
37
- return None, f"File size exceeds 1 GB limit ({file_size / (1024*1024*1024):.2f} GB)."
38
-
39
  reader = PdfReader(file.name)
40
- num_pages = len(reader.pages)
41
  try:
42
- page_indices = parse_page_ranges(page_ranges, num_pages)
43
  except Exception as e:
44
- return None, f"Error parsing page ranges: {e}"
45
-
46
  if not page_indices:
47
- return None, "No valid pages selected."
48
-
49
  writer = PdfWriter()
50
- for idx in page_indices:
51
- writer.add_page(reader.pages[idx])
52
-
53
- out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name
54
- with open(out_path, "wb") as f_out:
55
- writer.write(f_out)
56
-
57
- return out_path, None
58
-
59
-
60
- def merge_pdfs(files):
61
- # Merge multiple PDFs in upload order
62
- if not files:
63
- return None, "Please upload at least two PDF files to merge."
64
-
65
- # Validate total size
66
- total_size = sum(os.path.getsize(f.name) for f in files)
67
- if total_size > MAX_SIZE_BYTES:
68
- return None, f"Total file size exceeds 1 GB limit ({total_size / (1024*1024*1024):.2f} GB)."
69
-
70
  writer = PdfWriter()
71
- try:
72
- for f in files:
73
- reader = PdfReader(f.name)
74
- for page in reader.pages:
75
- writer.add_page(page)
76
- except Exception as e:
77
- return None, f"Error reading PDFs: {e}"
78
-
79
- out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name
80
- with open(out_path, "wb") as f_out:
81
- writer.write(f_out)
82
 
83
- return out_path, None
84
 
85
- # Build Gradio interface
86
- with gr.Blocks(css=custom_css) as demo:
87
- gr.HTML("<h1 id='header'>PDF Splitter & Merger</h1>")
88
- gr.Markdown(
89
- "**Free & Quick Solution**: Process your PDFs in-memory instantly without storage.\n"
90
- "Use the **Split** tab to extract specific pages or the **Merge** tab to combine multiple PDFs into one."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  )
 
92
 
 
 
 
 
93
  with gr.Tabs():
94
- # Split Tab
95
  with gr.TabItem("Split PDF"):
96
- with gr.Row(elem_classes="input-row"):
97
- pdf_input = gr.File(label="Select PDF to split", file_types=['.pdf'])
98
- page_input = gr.Textbox(label="Page ranges", placeholder="e.g. 1-3,5,7-9")
99
- with gr.Row(elem_classes="button-row"):
100
- split_button = gr.Button("Split PDF", variant="primary")
101
- output_split = gr.File(label="Download Split PDF")
102
- error_split = gr.Textbox(label="Error Message", interactive=False, visible=False)
103
-
104
- def run_split(file, ranges):
105
- if file is None or not ranges:
106
- return None, "Please upload a PDF and specify page ranges.", True
107
- out_path, error = split_pdf(file, ranges)
108
- if error:
109
- return None, error, True
110
- return out_path, "", False
111
-
112
- split_button.click(
113
- fn=run_split,
114
- inputs=[pdf_input, page_input],
115
- outputs=[output_split, error_split, error_split],
116
- api_name="split_pdf"
117
- )
118
- error_split.change(lambda msg: msg != "", inputs=error_split, outputs=error_split)
119
 
120
- # Merge Tab
121
  with gr.TabItem("Merge PDF"):
122
- with gr.Row(elem_classes="input-row"):
123
- merge_inputs = gr.Files(label="Select PDF files to merge", file_types=['.pdf'])
124
- with gr.Row(elem_classes="button-row"):
125
- merge_button = gr.Button("Merge PDFs", variant="primary")
126
- output_merge = gr.File(label="Download Merged PDF")
127
- error_merge = gr.Textbox(label="Error Message", interactive=False, visible=False)
128
-
129
- def run_merge(files):
130
- if not files or len(files) < 2:
131
- return None, "Please upload at least two PDF files.", True
132
- out_path, error = merge_pdfs(files)
133
- if error:
134
- return None, error, True
135
- return out_path, "", False
136
-
137
- merge_button.click(
138
- fn=run_merge,
139
- inputs=[merge_inputs],
140
- outputs=[output_merge, error_merge, error_merge],
141
- api_name="merge_pdfs"
142
- )
143
- error_merge.change(lambda msg: msg != "", inputs=error_merge, outputs=error_merge)
144
 
145
  if __name__ == "__main__":
146
- demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))
 
3
  from typing import List
4
  import gradio as gr
5
  from PyPDF2 import PdfReader, PdfWriter
6
+ import pikepdf
7
 
8
  # Load custom CSS
9
  with open(os.path.join(os.path.dirname(__file__), "fancy.css")) as f:
 
32
 
33
 
34
  def split_pdf(file, page_ranges: str):
35
+ if not file:
36
+ return None, "No file provided."
37
+ if os.path.getsize(file.name) > MAX_SIZE_BYTES:
38
+ return None, "File exceeds 1 GB limit."
 
39
  reader = PdfReader(file.name)
 
40
  try:
41
+ page_indices = parse_page_ranges(page_ranges, len(reader.pages))
42
  except Exception as e:
43
+ return None, str(e)
 
44
  if not page_indices:
45
+ return None, "No valid pages."
 
46
  writer = PdfWriter()
47
+ for i in page_indices:
48
+ writer.add_page(reader.pages[i])
49
+ out = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name
50
+ writer.write(out)
51
+ return out, None
52
+
53
+
54
+ def merge_pdfs(files: List, order: List[int] = None):
55
+ if not files or len(files) < 2:
56
+ return None, "Upload at least two PDFs."
57
+ if sum(os.path.getsize(f.name) for f in files) > MAX_SIZE_BYTES:
58
+ return None, "Total exceeds 1 GB."
59
+ if order and len(order) == len(files):
60
+ files = [f for _, f in sorted(zip(order, files), key=lambda x: x[0])]
 
 
 
 
 
 
61
  writer = PdfWriter()
62
+ for f in files:
63
+ for p in PdfReader(f.name).pages:
64
+ writer.add_page(p)
65
+ out = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name
66
+ writer.write(out)
67
+ return out, None
 
 
 
 
 
68
 
 
69
 
70
+ def compress_pdf_stream(file):
71
+ reader = PdfReader(file.name)
72
+ writer = PdfWriter()
73
+ for p in reader.pages:
74
+ writer.add_page(p)
75
+ writer.compress_content_streams()
76
+ out = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name
77
+ writer.write(out)
78
+ return out, None
79
+
80
+
81
+ def compress_pdf_pike(file, quality: int = 75):
82
+ pdf = pikepdf.Pdf.open(file.name)
83
+ out = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name
84
+ # optimize structure and recompress images with JPEG
85
+ pdf.save(
86
+ out,
87
+ optimize_version=True,
88
+ compression=pikepdf.Compression.jpeg,
89
+ jpeg_quality=quality,
90
+ linearize=True
91
  )
92
+ return out, None
93
 
94
+ # Interface
95
+ with gr.Blocks(css=custom_css) as demo:
96
+ gr.HTML("<h1>PDF Toolbox: Split, Merge & Compress</h1>")
97
+ gr.Markdown("**Free & Quick**: Manage PDFs in-memory. Privacy first.")
98
  with gr.Tabs():
99
+ # Split
100
  with gr.TabItem("Split PDF"):
101
+ inp, ranges = gr.File(label="PDF"), gr.Textbox(label="Ranges")
102
+ btn = gr.Button("Split")
103
+ out, err = gr.File(), gr.Textbox(visible=False)
104
+ btn.click(lambda f,r: (*split_pdf(f,r),), [inp,ranges],[out,err,err])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
+ # Merge
107
  with gr.TabItem("Merge PDF"):
108
+ files = gr.Files(label="PDFs")
109
+ order_df = gr.Dataframe(headers=["Filename","Order"], interactive=True)
110
+ files.change(lambda fs: [[os.path.basename(f.name), i+1] for i,f in enumerate(fs)], inputs=files, outputs=order_df)
111
+ btn_m = gr.Button("Merge")
112
+ out_m, err_m = gr.File(), gr.Textbox(visible=False)
113
+ btn_m.click(lambda fs,df: merge_pdfs(fs,[int(r[1]) for r in df] if df else None), [files,order_df], [out_m,err_m,err_m])
114
+
115
+ # Compress
116
+ with gr.TabItem("Compress PDF"):
117
+ comp_file = gr.File(label="PDF to compress")
118
+ method = gr.Radio(choices=["Stream only","PikePDF"], value="PikePDF", label="Method")
119
+ quality = gr.Slider(10,100, value=75, step=5, label="JPEG Quality")
120
+ btn_c = gr.Button("Compress")
121
+ out_c, err_c = gr.File(), gr.Textbox(visible=False)
122
+ def do_compress(f,m,q):
123
+ if m=="PikePDF": return compress_pdf_pike(f,q)
124
+ return compress_pdf_stream(f)
125
+ btn_c.click(do_compress, [comp_file,method,quality], [out_c,err_c,err_c])
 
 
 
 
126
 
127
  if __name__ == "__main__":
128
+ demo.launch(server_name="0.0.0.0", port=int(os.environ.get("PORT",7860)))