Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -55,23 +55,31 @@ def extract_image_from_pdf(pdf_url, dpi=75):
|
|
| 55 |
PIL.Image: First page as image or None
|
| 56 |
"""
|
| 57 |
try:
|
|
|
|
|
|
|
| 58 |
# Download PDF
|
| 59 |
response = requests.get(pdf_url, timeout=30)
|
| 60 |
response.raise_for_status()
|
|
|
|
| 61 |
|
| 62 |
# Open PDF from bytes
|
|
|
|
| 63 |
pdf_document = fitz.open(stream=response.content, filetype="pdf")
|
| 64 |
|
| 65 |
# Get first page
|
|
|
|
| 66 |
first_page = pdf_document[0]
|
| 67 |
|
| 68 |
# Render page to pixmap
|
|
|
|
| 69 |
pix = first_page.get_pixmap(matrix=fitz.Matrix(dpi/72, dpi/72))
|
| 70 |
|
| 71 |
# Convert to PIL Image
|
|
|
|
| 72 |
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
| 73 |
|
| 74 |
pdf_document.close()
|
|
|
|
| 75 |
return img
|
| 76 |
|
| 77 |
except Exception as e:
|
|
|
|
| 55 |
PIL.Image: First page as image or None
|
| 56 |
"""
|
| 57 |
try:
|
| 58 |
+
# Download PDF
|
| 59 |
+
print(f"Attempting to download PDF from: {pdf_url}")
|
| 60 |
# Download PDF
|
| 61 |
response = requests.get(pdf_url, timeout=30)
|
| 62 |
response.raise_for_status()
|
| 63 |
+
print(f"PDF download status code: {response.status_code}")
|
| 64 |
|
| 65 |
# Open PDF from bytes
|
| 66 |
+
print("Opening PDF document...")
|
| 67 |
pdf_document = fitz.open(stream=response.content, filetype="pdf")
|
| 68 |
|
| 69 |
# Get first page
|
| 70 |
+
print("Getting first page...")
|
| 71 |
first_page = pdf_document[0]
|
| 72 |
|
| 73 |
# Render page to pixmap
|
| 74 |
+
print("Rendering page to pixmap...")
|
| 75 |
pix = first_page.get_pixmap(matrix=fitz.Matrix(dpi/72, dpi/72))
|
| 76 |
|
| 77 |
# Convert to PIL Image
|
| 78 |
+
print("Converting to PIL Image...")
|
| 79 |
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
| 80 |
|
| 81 |
pdf_document.close()
|
| 82 |
+
print("Successfully extracted image from PDF")
|
| 83 |
return img
|
| 84 |
|
| 85 |
except Exception as e:
|