Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -99,7 +99,7 @@ def extract_invoice_data(file_data, content_type, json_schema):
|
|
| 99 |
Extracts data from a PDF (converted to images) or an image.
|
| 100 |
Only PDFs with 1 or 2 pages are allowed.
|
| 101 |
"""
|
| 102 |
-
system_prompt = "You are an expert in document data extraction."
|
| 103 |
base64_images = []
|
| 104 |
|
| 105 |
if content_type == "application/pdf":
|
|
@@ -107,13 +107,13 @@ def extract_invoice_data(file_data, content_type, json_schema):
|
|
| 107 |
images = convert_from_bytes(file_data) # Convert PDF to images
|
| 108 |
|
| 109 |
if len(images) > 2:
|
| 110 |
-
raise ValueError("PDF contains more than 2 pages.")
|
| 111 |
|
| 112 |
-
for img in images[:2]:
|
| 113 |
img_byte_arr = io.BytesIO()
|
| 114 |
-
img.save(img_byte_arr, format="PNG"
|
| 115 |
base64_encoded = base64.b64encode(img_byte_arr.getvalue()).decode('utf-8')
|
| 116 |
-
base64_images.append(f"data:
|
| 117 |
|
| 118 |
except Exception as e:
|
| 119 |
logger.error(f"Error converting PDF to image: {e}")
|
|
|
|
| 99 |
Extracts data from a PDF (converted to images) or an image.
|
| 100 |
Only PDFs with 1 or 2 pages are allowed.
|
| 101 |
"""
|
| 102 |
+
system_prompt = "You are an expert in document data extraction. Extract relevant fields from the document and return structured JSON based on the provided schema."
|
| 103 |
base64_images = []
|
| 104 |
|
| 105 |
if content_type == "application/pdf":
|
|
|
|
| 107 |
images = convert_from_bytes(file_data) # Convert PDF to images
|
| 108 |
|
| 109 |
if len(images) > 2:
|
| 110 |
+
raise ValueError("PDF contains more than 2 pages. Only PDFs with 1 or 2 pages are supported.")
|
| 111 |
|
| 112 |
+
for img in images[:2]: # Convert up to 2 pages
|
| 113 |
img_byte_arr = io.BytesIO()
|
| 114 |
+
img.save(img_byte_arr, format="PNG")
|
| 115 |
base64_encoded = base64.b64encode(img_byte_arr.getvalue()).decode('utf-8')
|
| 116 |
+
base64_images.append(f"data:{content_type};base64,{base64_encoded}")
|
| 117 |
|
| 118 |
except Exception as e:
|
| 119 |
logger.error(f"Error converting PDF to image: {e}")
|