vkumartr commited on
Commit
78199be
·
verified ·
1 Parent(s): a5871da

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -99,7 +99,7 @@ def extract_invoice_data(file_data, content_type, json_schema):
99
  Extracts data from a PDF (converted to images) or an image.
100
  Only PDFs with 1 or 2 pages are allowed.
101
  """
102
- system_prompt = "You are an expert in document data extraction."
103
  base64_images = []
104
 
105
  if content_type == "application/pdf":
@@ -107,13 +107,13 @@ def extract_invoice_data(file_data, content_type, json_schema):
107
  images = convert_from_bytes(file_data) # Convert PDF to images
108
 
109
  if len(images) > 2:
110
- raise ValueError("PDF contains more than 2 pages.")
111
 
112
- for img in images[:2]:
113
  img_byte_arr = io.BytesIO()
114
- img.save(img_byte_arr, format="PNG", dpi=(300, 300))
115
  base64_encoded = base64.b64encode(img_byte_arr.getvalue()).decode('utf-8')
116
- base64_images.append(f"data:image/png;base64,{base64_encoded}")
117
 
118
  except Exception as e:
119
  logger.error(f"Error converting PDF to image: {e}")
 
99
  Extracts data from a PDF (converted to images) or an image.
100
  Only PDFs with 1 or 2 pages are allowed.
101
  """
102
+ system_prompt = "You are an expert in document data extraction. Extract relevant fields from the document and return structured JSON based on the provided schema."
103
  base64_images = []
104
 
105
  if content_type == "application/pdf":
 
107
  images = convert_from_bytes(file_data) # Convert PDF to images
108
 
109
  if len(images) > 2:
110
+ raise ValueError("PDF contains more than 2 pages. Only PDFs with 1 or 2 pages are supported.")
111
 
112
+ for img in images[:2]: # Convert up to 2 pages
113
  img_byte_arr = io.BytesIO()
114
+ img.save(img_byte_arr, format="PNG")
115
  base64_encoded = base64.b64encode(img_byte_arr.getvalue()).decode('utf-8')
116
+ base64_images.append(f"data:{content_type};base64,{base64_encoded}")
117
 
118
  except Exception as e:
119
  logger.error(f"Error converting PDF to image: {e}")