vkumartr commited on
Commit
24259cd
·
verified ·
1 Parent(s): a4b95eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -10
app.py CHANGED
@@ -102,14 +102,15 @@ def extract_invoice_data(file_data, content_type, json_schema):
102
  system_prompt = "You are an expert in document data extraction."
103
  base64_images = []
104
  base64DataResp = []
105
-
106
  if content_type == "application/pdf":
107
  try:
108
  extracted_text = extract_pdf_text(file_data)
 
109
  # Store PDF as Base64
110
  base64_pdf = base64.b64encode(file_data).decode('utf-8')
111
  base64DataResp.append(f"data:application/pdf;base64,{base64_pdf}")
112
-
113
  images = convert_from_bytes(file_data) # Convert PDF to images
114
 
115
  if len(images) > 2:
@@ -121,19 +122,16 @@ def extract_invoice_data(file_data, content_type, json_schema):
121
  base64_encoded = base64.b64encode(img_byte_arr.getvalue()).decode('utf-8')
122
  base64_images.append(f"data:image/png;base64,{base64_encoded}")
123
 
124
- # Store all images as a single JSON object
125
- # base64DataResp = json.dumps(base64_images)
126
-
127
  except Exception as e:
128
  logger.error(f"Error converting PDF to image: {e}")
129
  return {"error": "Failed to process PDF"}, None
130
 
131
- else:
132
  # Handle direct image files
133
- base64_encoded = base64.b64encode(file_data).decode('utf-8')
134
- base64DataResp.append(f"data:{content_type};base64,{base64_encoded}")
135
- # base64_images.append(f"data:{content_type};base64,{base64_encoded}")
136
- # base64DataResp = json.dumps(base64_images) # Store as a JSON object
137
 
138
  # Prepare OpenAI request
139
  openai_content = [{"type": "image_url", "image_url": {"url": img_base64}} for img_base64 in base64_images]
 
102
  system_prompt = "You are an expert in document data extraction."
103
  base64_images = []
104
  base64DataResp = []
105
+
106
  if content_type == "application/pdf":
107
  try:
108
  extracted_text = extract_pdf_text(file_data)
109
+
110
  # Store PDF as Base64
111
  base64_pdf = base64.b64encode(file_data).decode('utf-8')
112
  base64DataResp.append(f"data:application/pdf;base64,{base64_pdf}")
113
+
114
  images = convert_from_bytes(file_data) # Convert PDF to images
115
 
116
  if len(images) > 2:
 
122
  base64_encoded = base64.b64encode(img_byte_arr.getvalue()).decode('utf-8')
123
  base64_images.append(f"data:image/png;base64,{base64_encoded}")
124
 
 
 
 
125
  except Exception as e:
126
  logger.error(f"Error converting PDF to image: {e}")
127
  return {"error": "Failed to process PDF"}, None
128
 
129
+ elif content_type.startswith("image/"):
130
  # Handle direct image files
131
+ base64_pdf = base64.b64encode(file_data).decode('utf-8')
132
+ base64DataResp.append(f"data:{content_type};base64,{base64_pdf}")
133
+ else:
134
+ return {"error": f"Unsupported file type: {content_type}"}
135
 
136
  # Prepare OpenAI request
137
  openai_content = [{"type": "image_url", "image_url": {"url": img_base64}} for img_base64 in base64_images]