Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -102,14 +102,15 @@ def extract_invoice_data(file_data, content_type, json_schema):
|
|
| 102 |
system_prompt = "You are an expert in document data extraction."
|
| 103 |
base64_images = []
|
| 104 |
base64DataResp = []
|
| 105 |
-
|
| 106 |
if content_type == "application/pdf":
|
| 107 |
try:
|
| 108 |
extracted_text = extract_pdf_text(file_data)
|
|
|
|
| 109 |
# Store PDF as Base64
|
| 110 |
base64_pdf = base64.b64encode(file_data).decode('utf-8')
|
| 111 |
base64DataResp.append(f"data:application/pdf;base64,{base64_pdf}")
|
| 112 |
-
|
| 113 |
images = convert_from_bytes(file_data) # Convert PDF to images
|
| 114 |
|
| 115 |
if len(images) > 2:
|
|
@@ -121,19 +122,16 @@ def extract_invoice_data(file_data, content_type, json_schema):
|
|
| 121 |
base64_encoded = base64.b64encode(img_byte_arr.getvalue()).decode('utf-8')
|
| 122 |
base64_images.append(f"data:image/png;base64,{base64_encoded}")
|
| 123 |
|
| 124 |
-
# Store all images as a single JSON object
|
| 125 |
-
# base64DataResp = json.dumps(base64_images)
|
| 126 |
-
|
| 127 |
except Exception as e:
|
| 128 |
logger.error(f"Error converting PDF to image: {e}")
|
| 129 |
return {"error": "Failed to process PDF"}, None
|
| 130 |
|
| 131 |
-
|
| 132 |
# Handle direct image files
|
| 133 |
-
|
| 134 |
-
base64DataResp.append(f"data:{content_type};base64,{
|
| 135 |
-
|
| 136 |
-
|
| 137 |
|
| 138 |
# Prepare OpenAI request
|
| 139 |
openai_content = [{"type": "image_url", "image_url": {"url": img_base64}} for img_base64 in base64_images]
|
|
|
|
| 102 |
system_prompt = "You are an expert in document data extraction."
|
| 103 |
base64_images = []
|
| 104 |
base64DataResp = []
|
| 105 |
+
|
| 106 |
if content_type == "application/pdf":
|
| 107 |
try:
|
| 108 |
extracted_text = extract_pdf_text(file_data)
|
| 109 |
+
|
| 110 |
# Store PDF as Base64
|
| 111 |
base64_pdf = base64.b64encode(file_data).decode('utf-8')
|
| 112 |
base64DataResp.append(f"data:application/pdf;base64,{base64_pdf}")
|
| 113 |
+
|
| 114 |
images = convert_from_bytes(file_data) # Convert PDF to images
|
| 115 |
|
| 116 |
if len(images) > 2:
|
|
|
|
| 122 |
base64_encoded = base64.b64encode(img_byte_arr.getvalue()).decode('utf-8')
|
| 123 |
base64_images.append(f"data:image/png;base64,{base64_encoded}")
|
| 124 |
|
|
|
|
|
|
|
|
|
|
| 125 |
except Exception as e:
|
| 126 |
logger.error(f"Error converting PDF to image: {e}")
|
| 127 |
return {"error": "Failed to process PDF"}, None
|
| 128 |
|
| 129 |
+
elif content_type.startswith("image/"):
|
| 130 |
# Handle direct image files
|
| 131 |
+
base64_pdf = base64.b64encode(file_data).decode('utf-8')
|
| 132 |
+
base64DataResp.append(f"data:{content_type};base64,{base64_pdf}")
|
| 133 |
+
else:
|
| 134 |
+
return {"error": f"Unsupported file type: {content_type}"}
|
| 135 |
|
| 136 |
# Prepare OpenAI request
|
| 137 |
openai_content = [{"type": "image_url", "image_url": {"url": img_base64}} for img_base64 in base64_images]
|