vaibhavbalar commited on
Commit
f9b11f1
Β·
verified Β·
1 Parent(s): 36fd017

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -16
app.py CHANGED
@@ -1,26 +1,101 @@
1
- def compress_pdf(file, quality):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  doc = fitz.open(file.name)
3
  output_path = "compressed_output.pdf"
 
 
 
4
 
 
 
 
 
5
  for page in doc:
6
- img_list = page.get_images(full=True)
7
- for img in img_list:
8
- xref = img[0]
9
- base_image = doc.extract_image(xref)
10
- img_bytes = base_image["image"]
11
 
12
- # Load image from bytes
13
- pix = fitz.Pixmap(fitz.csRGB, fitz.Pixmap(img_bytes))
 
14
 
15
- temp_filename = "temp_img.jpg"
16
- pix.save(temp_filename) # No quality parameter here
17
 
18
- rect = page.get_image_rects(xref)[0]
19
- page.insert_image(rect, filename=temp_filename)
 
 
 
 
 
 
20
 
21
- os.remove(temp_filename)
 
 
 
 
22
 
23
- doc.save(output_path, garbage=4, deflate=True, clean=True)
24
- doc.close()
 
 
 
25
 
26
- return output_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import gradio as gr
3
+ import PyPDF2
4
+ import fitz # PyMuPDF
5
+ import os
6
+ import zipfile
7
+
8
+ # Merge PDFs
9
+ def merge_pdfs(files):
10
+ merger = PyPDF2.PdfMerger()
11
+ for file in files:
12
+ merger.append(file.name)
13
+ output_path = "merged_output.pdf"
14
+ merger.write(output_path)
15
+ merger.close()
16
+ return output_path
17
+
18
+ # Split PDF
19
+ def split_pdf(file):
20
+ reader = PyPDF2.PdfReader(file.name)
21
+ output_folder = "split_outputs"
22
+ os.makedirs(output_folder, exist_ok=True)
23
+
24
+ # Clean old files
25
+ for f in os.listdir(output_folder):
26
+ os.remove(os.path.join(output_folder, f))
27
+
28
+ split_files = []
29
+ for i, page in enumerate(reader.pages):
30
+ writer = PyPDF2.PdfWriter()
31
+ writer.add_page(page)
32
+ output_filename = os.path.join(output_folder, f"page_{i+1}.pdf")
33
+ with open(output_filename, "wb") as f_out:
34
+ writer.write(f_out)
35
+ split_files.append(output_filename)
36
+
37
+ zip_filename = "split_pages.zip"
38
+ with zipfile.ZipFile(zip_filename, "w") as zipf:
39
+ for f in split_files:
40
+ zipf.write(f, os.path.basename(f))
41
+
42
+ return zip_filename
43
+
44
+ # Compress PDF
45
+ def compress_pdf(file):
46
  doc = fitz.open(file.name)
47
  output_path = "compressed_output.pdf"
48
+ doc.save(output_path, garbage=4, deflate=True, clean=True)
49
+ doc.close()
50
+ return output_path
51
 
52
+ # Extract Text
53
+ def extract_text(file):
54
+ doc = fitz.open(file.name)
55
+ text = ""
56
  for page in doc:
57
+ text += page.get_text()
58
+ doc.close()
 
 
 
59
 
60
+ output_path = "extracted_text.txt"
61
+ with open(output_path, "w", encoding="utf-8") as f:
62
+ f.write(text)
63
 
64
+ return output_path, text
 
65
 
66
+ # Gradio App
67
+ with gr.Blocks(theme=gr.themes.Base(primary_hue="orange")) as demo:
68
+ gr.Markdown(
69
+ """
70
+ # πŸ“ Local PDF Toolkit
71
+ Merge, Split, Compress, and Extract Text from PDFs β€” Safely inside Hugging Face.
72
+ """
73
+ )
74
 
75
+ with gr.Tab("πŸ”— Merge PDFs"):
76
+ merge_input = gr.File(file_types=[".pdf"], file_count="multiple", label="Select PDFs to Merge")
77
+ merge_btn = gr.Button("πŸš€ Merge PDFs")
78
+ merge_output = gr.File(label="⬇️ Download Merged PDF")
79
+ merge_btn.click(merge_pdfs, inputs=merge_input, outputs=merge_output)
80
 
81
+ with gr.Tab("βœ‚οΈ Split PDF"):
82
+ split_input = gr.File(file_types=[".pdf"], label="Select PDF to Split")
83
+ split_btn = gr.Button("βœ‚οΈ Split PDF")
84
+ split_output = gr.File(label="⬇️ Download Split ZIP")
85
+ split_btn.click(split_pdf, inputs=split_input, outputs=split_output)
86
 
87
+ with gr.Tab("πŸ“‰ Compress PDF"):
88
+ compress_input = gr.File(file_types=[".pdf"], label="Select PDF to Compress")
89
+ compress_btn = gr.Button("πŸ“‰ Compress PDF")
90
+ compress_output = gr.File(label="⬇️ Download Compressed PDF")
91
+ compress_btn.click(compress_pdf, inputs=compress_input, outputs=compress_output)
92
+
93
+ with gr.Tab("πŸ“œ Extract Text"):
94
+ extract_input = gr.File(file_types=[".pdf"], label="Select PDF to Extract Text")
95
+ extract_btn = gr.Button("πŸ“œ Extract Text")
96
+ extract_file = gr.File(label="⬇️ Download Extracted Text File")
97
+ extract_preview = gr.Textbox(label="πŸ“– Preview Text", lines=20, max_lines=100, interactive=False, show_copy_button=True)
98
+
99
+ extract_btn.click(extract_text, inputs=extract_input, outputs=[extract_file, extract_preview])
100
+
101
+ demo.launch()