Ramzan0553 commited on
Commit
15a335d
·
verified ·
1 Parent(s): d6af51e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -0
app.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import fitz # PyMuPDF
3
+ import os
4
+ import tempfile
5
+ import shutil
6
+ import zipfile
7
+
8
+ def extract_text(pdf_file):
9
+ doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
10
+ text = ""
11
+ for page in doc:
12
+ text += page.get_text()
13
+ return text
14
+
15
+ def extract_images(pdf_file):
16
+ doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
17
+ output_dir = tempfile.mkdtemp()
18
+ img_count = 0
19
+ for page_num in range(len(doc)):
20
+ page = doc.load_page(page_num)
21
+ image_list = page.get_images(full=True)
22
+ for img_index, img in enumerate(image_list):
23
+ xref = img[0]
24
+ try:
25
+ base_image = doc.extract_image(xref)
26
+ image_bytes = base_image["image"]
27
+ image_ext = base_image["ext"]
28
+ image_filename = os.path.join(output_dir, f"image_{page_num+1}_{img_index+1}_{xref}.{image_ext}")
29
+ with open(image_filename, "wb") as image_file:
30
+ image_file.write(image_bytes)
31
+ img_count += 1
32
+ except Exception as e:
33
+ continue
34
+
35
+ if img_count == 0:
36
+ return "No images found in the PDF.", None
37
+
38
+ # Zip all extracted images
39
+ zip_path = os.path.join(output_dir, "images.zip")
40
+ with zipfile.ZipFile(zip_path, "w") as zipf:
41
+ for fname in os.listdir(output_dir):
42
+ if fname.endswith((".png", ".jpg", ".jpeg")):
43
+ zipf.write(os.path.join(output_dir, fname), fname)
44
+
45
+ return f"{img_count} images extracted.", zip_path
46
+
47
+ def merge_pdfs(pdf_files):
48
+ merged_pdf = fitz.open()
49
+ for pdf_file in pdf_files:
50
+ with fitz.open(stream=pdf_file.read(), filetype="pdf") as doc:
51
+ merged_pdf.insert_pdf(doc)
52
+ temp_path = tempfile.mktemp(suffix=".pdf")
53
+ merged_pdf.save(temp_path)
54
+ return temp_path
55
+
56
+ def split_pdf(pdf_file):
57
+ doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
58
+ output_dir = tempfile.mkdtemp()
59
+ for page_num in range(len(doc)):
60
+ new_doc = fitz.open()
61
+ new_doc.insert_pdf(doc, from_page=page_num, to_page=page_num)
62
+ page_path = os.path.join(output_dir, f"page_{page_num+1}.pdf")
63
+ new_doc.save(page_path)
64
+
65
+ zip_path = os.path.join(output_dir, "split_pages.zip")
66
+ with zipfile.ZipFile(zip_path, "w") as zipf:
67
+ for fname in os.listdir(output_dir):
68
+ if fname.endswith(".pdf"):
69
+ zipf.write(os.path.join(output_dir, fname), fname)
70
+ return zip_path
71
+
72
+ def pdf_tool(task, pdf_input1=None, pdf_input2=None):
73
+ if task == "Extract Text":
74
+ if not pdf_input1:
75
+ return "Please upload a PDF file.", None
76
+ return extract_text(pdf_input1), None
77
+
78
+ elif task == "Extract Images":
79
+ if not pdf_input1:
80
+ return "Please upload a PDF file.", None
81
+ msg, zip_path = extract_images(pdf_input1)
82
+ return msg, zip_path
83
+
84
+ elif task == "Merge PDFs":
85
+ if not pdf_input1 or not pdf_input2:
86
+ return "Please upload two PDF files to merge.", None
87
+ merged_path = merge_pdfs([pdf_input1, pdf_input2])
88
+ return "PDFs merged successfully.", merged_path
89
+
90
+ elif task == "Split PDF":
91
+ if not pdf_input1:
92
+ return "Please upload a PDF file.", None
93
+ zip_path = split_pdf(pdf_input1)
94
+ return "PDF split into separate pages.", zip_path
95
+
96
+ else:
97
+ return "Invalid task selected.", None
98
+
99
+ with gr.Blocks() as demo:
100
+ gr.Markdown("## 🛠️ PDF Utility Tool")
101
+ task = gr.Radio(choices=["Extract Text", "Extract Images", "Merge PDFs", "Split PDF"], label="Select a Task")
102
+
103
+ with gr.Row():
104
+ pdf_input1 = gr.File(label="PDF File 1", file_types=[".pdf"])
105
+ pdf_input2 = gr.File(label="PDF File 2 (for Merge only)", file_types=[".pdf"], visible=True)
106
+
107
+ task.change(lambda t: gr.update(visible=(t == "Merge PDFs")), inputs=task, outputs=pdf_input2)
108
+
109
+ run_button = gr.Button("Run")
110
+ output_text = gr.Textbox(label="Output Text")
111
+ output_file = gr.File(label="Download Result", file_types=[".pdf", ".zip"], visible=False)
112
+
113
+ def process(task, pdf_input1, pdf_input2):
114
+ result_text, result_file = pdf_tool(task, pdf_input1, pdf_input2)
115
+ return result_text, gr.update(value=result_file, visible=bool(result_file))
116
+
117
+ run_button.click(process, inputs=[task, pdf_input1, pdf_input2], outputs=[output_text, output_file])
118
+
119
+ demo.launch()