kivilaid commited on
Commit
eacf57c
·
verified ·
1 Parent(s): 7e19131

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -0
app.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import fitz # PyMuPDF
3
+ import os
4
+ import shutil
5
+ from PIL import Image
6
+
7
+ def extract_images_pymupdf(pdf_file):
8
+ pdf_path = "extract_images/input_docs/uploaded_pdf.pdf"
9
+ with open(pdf_path, "wb") as f:
10
+ f.write(pdf_file)
11
+
12
+ doc = fitz.open(pdf_path)
13
+ images = []
14
+ for page in doc:
15
+ pix = page.get_pixmap()
16
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
17
+ images.append(img)
18
+ return images if images else None
19
+
20
+ def clear_directory(directory):
21
+ for filename in os.listdir(directory):
22
+ file_path = os.path.join(directory, filename)
23
+ try:
24
+ if os.path.isfile(file_path) or os.path.islink(file_path):
25
+ os.unlink(file_path)
26
+ elif os.path.isdir(file_path):
27
+ shutil.rmtree(file_path)
28
+ except Exception as e:
29
+ print(f"Failed to delete {file_path}. Reason: {e}")
30
+
31
+ def handle_pymupdf_extraction(pdf_file):
32
+ images = extract_images_pymupdf(pdf_file)
33
+ clear_directory("extract_images/image_outputs")
34
+ return images
35
+
36
+ interface = gr.Interface(
37
+ fn=handle_pymupdf_extraction,
38
+ inputs=gr.File(type="binary", label="Upload PDF"),
39
+ outputs=gr.Gallery(label="Extracted Images"),
40
+ title="PDF Image Extractor - PyMuPDF",
41
+ description="Upload a PDF to extract images using PyMuPDF.",
42
+ )
43
+
44
+ interface.launch(share=True)