vkumartr commited on
Commit
464cd64
·
verified ·
1 Parent(s): ff448cb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -2
app.py CHANGED
@@ -80,6 +80,16 @@ def fetch_file_from_s3(file_key):
80
  except Exception as e:
81
  raise Exception(f"Failed to fetch file from S3: {str(e)}")
82
 
 
 
 
 
 
 
 
 
 
 
83
  # Function to summarize text using OpenAI GPT
84
  def extract_invoice_data(file_data, content_type):
85
  system_prompt = "You are an expert in document data extraction."
@@ -91,7 +101,9 @@ def extract_invoice_data(file_data, content_type):
91
  if content_type.startswith("image/"):
92
  mime_type = content_type # e.g., image/png, image/jpeg
93
  elif content_type == "application/pdf":
94
- mime_type = content_type
 
 
95
  else:
96
  raise ValueError(f"Unsupported content type: {content_type}")
97
 
@@ -106,7 +118,10 @@ def extract_invoice_data(file_data, content_type):
106
  {
107
  "type": "image_url",
108
  "image_url": {
109
- "url": f"data:{mime_type};base64,{base64_encoded}"
 
 
 
110
  }
111
  }
112
  ]
 
80
  except Exception as e:
81
  raise Exception(f"Failed to fetch file from S3: {str(e)}")
82
 
83
+ # def extract_text_from_pdf(file_data):
84
+ # """Extracts text from a PDF file."""
85
+ # try:
86
+ # doc = fitz.open(stream=file_data, filetype="pdf")
87
+ # text = "\n".join([page.get_text("text") for page in doc])
88
+ # return text.strip()
89
+ # except Exception as e:
90
+ # logger.error(f"Failed to extract text from PDF: {e}")
91
+ # return ""
92
+
93
  # Function to summarize text using OpenAI GPT
94
  def extract_invoice_data(file_data, content_type):
95
  system_prompt = "You are an expert in document data extraction."
 
101
  if content_type.startswith("image/"):
102
  mime_type = content_type # e.g., image/png, image/jpeg
103
  elif content_type == "application/pdf":
104
+ mime_type = "application/pdf"
105
+ # text = extract_text_from_pdf(file_data)
106
+ # mime_type = [{"role": "user", "content": text}]
107
  else:
108
  raise ValueError(f"Unsupported content type: {content_type}")
109
 
 
118
  {
119
  "type": "image_url",
120
  "image_url": {
121
+ "url": f"data:image/{mime_type};base64,{base64_encoded}"
122
+ },
123
+ "image_url": {
124
+ "url": f"data:application/pdf;base64,{base64_encoded}"
125
  }
126
  }
127
  ]