|
|
import gradio as gr |
|
|
import fitz |
|
|
import os |
|
|
import shutil |
|
|
from PIL import Image |
|
|
|
|
|
def extract_images_pymupdf(pdf_file): |
|
|
pdf_path = "uploaded_pdf.pdf" |
|
|
with open(pdf_path, "wb") as f: |
|
|
f.write(pdf_file) |
|
|
|
|
|
doc = fitz.open(pdf_path) |
|
|
images = [] |
|
|
for page in doc: |
|
|
pix = page.get_pixmap() |
|
|
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) |
|
|
images.append(img) |
|
|
return images if images else None |
|
|
|
|
|
def clear_directory(directory): |
|
|
for filename in os.listdir(directory): |
|
|
file_path = os.path.join(directory, filename) |
|
|
try: |
|
|
if os.path.isfile(file_path) or os.path.islink(file_path): |
|
|
os.unlink(file_path) |
|
|
elif os.path.isdir(file_path): |
|
|
shutil.rmtree(file_path) |
|
|
except Exception as e: |
|
|
print(f"Failed to delete {file_path}. Reason: {e}") |
|
|
|
|
|
def handle_pymupdf_extraction(pdf_file): |
|
|
images = extract_images_pymupdf(pdf_file) |
|
|
return images |
|
|
|
|
|
interface = gr.Interface( |
|
|
fn=handle_pymupdf_extraction, |
|
|
inputs=gr.File(type="binary", label="Upload PDF"), |
|
|
outputs=gr.Gallery(label="Extracted Images"), |
|
|
title="PDF Image Extractor - PyMuPDF", |
|
|
description="Upload a PDF to extract images using PyMuPDF.", |
|
|
) |
|
|
|
|
|
interface.launch(share=True) |
|
|
|