vkumartr commited on
Commit
6761af7
·
verified ·
1 Parent(s): acc3d5a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -2
app.py CHANGED
@@ -3,7 +3,6 @@ from fastapi.staticfiles import StaticFiles
3
  import hashlib
4
  from enum import Enum
5
  from fastapi import FastAPI, Header, Query, Depends, HTTPException
6
- from PIL import Image
7
  from pdf2image import convert_from_bytes
8
  import io
9
  import fitz # PyMuPDF for PDF handling
@@ -100,7 +99,28 @@ def extract_invoice_data(file_data, content_type, json_schema):
100
  Extracts invoice data from PDFs (text-based) and images using OpenAI's GPT-4o-mini model.
101
  Ensures accurate JSON schema binding.
102
  """
103
- system_prompt = "You are an expert in invoice data extraction."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
  base64_images = []
106
  base64DataResp = []
 
3
  import hashlib
4
  from enum import Enum
5
  from fastapi import FastAPI, Header, Query, Depends, HTTPException
 
6
  from pdf2image import convert_from_bytes
7
  import io
8
  import fitz # PyMuPDF for PDF handling
 
99
  Extracts invoice data from PDFs (text-based) and images using OpenAI's GPT-4o-mini model.
100
  Ensures accurate JSON schema binding.
101
  """
102
+ system_prompt = """You are an expert in invoice data extraction.
103
+ Your task is to extract key fields from an invoice image. Ensure accurate extraction and return the data in JSON format.
104
+
105
+ Extract the following fields:
106
+ 1. Line Items: A list containing:
107
+ - Product Code
108
+ - Description
109
+ - Amount (numeric)
110
+ 2. Tax Amount (if available)
111
+ 3. Vendor GST (if available)
112
+ 4. Vendor Name
113
+ 5. Invoice Date (format: "DD-MMM-YYYY")
114
+ 6. Total Amount (numeric)
115
+ 7. Invoice Number (alpha-numeric)
116
+ 8. Vendor Address
117
+ 9. Invoice Currency
118
+
119
+ Ensure that:
120
+ - All extracted fields match the invoice.
121
+ - If any field is missing, return null instead of hallucinating data.
122
+ - Do not generate synthetic values—only extract real information from the image.
123
+ """
124
 
125
  base64_images = []
126
  base64DataResp = []