Seth0330 commited on
Commit
8c52b14
·
verified ·
1 Parent(s): 0eb1833

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -5
app.py CHANGED
@@ -116,15 +116,49 @@ def fallback_supplier(text):
116
  return None
117
 
118
  def get_extraction_prompt(model_choice, txt):
119
- # New, broad prompt for all models:
120
  return (
121
- "Extract all possible metadata fields from the following invoice, including but not limited to header information, supplier and customer details, payment terms, tax details, references, and every possible line item with all available attributes. "
122
- "Return a detailed JSON object containing every field you can identify, and make sure to include all line items as an array. "
123
- "If any field is missing in the invoice, use null. Do not add any explanation or extra text outside the JSON. "
124
- "\n\nInvoice Text:\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  f"{txt}"
126
  )
127
 
 
128
  def extract_invoice_info(model_choice, text):
129
  prompt = get_extraction_prompt(model_choice, text)
130
  raw = query_llm(model_choice, prompt)
 
116
  return None
117
 
118
  def get_extraction_prompt(model_choice, txt):
119
+ # Example output shows both header & line items
120
  return (
121
+ "Extract every possible piece of metadata and detail from the following invoice text—including all header information, supplier details, customer details, addresses, invoice numbers, dates, tax information, payment terms, references, summary totals, and a full list of line items with as many columns as possible. "
122
+ "Return a structured JSON with two keys: 'invoice_header' (an object with all header fields found) and 'line_items' (an array of all detected line items and their attributes). "
123
+ "If any field is not present, use null. Do not invent/hallucinate fields not present. "
124
+ "Your output must match the format of this example (but include only fields found in the invoice):\n"
125
+ '{\n'
126
+ ' "invoice_header": {\n'
127
+ ' "invoice_number": "string or null",\n'
128
+ ' "invoice_date": "string or null",\n'
129
+ ' "supplier_name": "string or null",\n'
130
+ ' "supplier_address": "string or null",\n'
131
+ ' "customer_name": "string or null",\n'
132
+ ' "customer_address": "string or null",\n'
133
+ ' "po_number": "string or null",\n'
134
+ ' "tax_id": "string or null",\n'
135
+ ' "payment_terms": "string or null",\n'
136
+ ' "total_before_tax": "string or null",\n'
137
+ ' "tax_amount": "string or null",\n'
138
+ ' "total_due": "string or null",\n'
139
+ ' "currency": "string or null",\n'
140
+ ' "due_date": "string or null",\n'
141
+ ' "any_other_metadata": "string or null"\n'
142
+ ' },\n'
143
+ ' "line_items": [\n'
144
+ ' {\n'
145
+ ' "item_number": "string or null",\n'
146
+ ' "description": "string or null",\n'
147
+ ' "quantity": "string or null",\n'
148
+ ' "unit_price": "string or null",\n'
149
+ ' "total_price": "string or null",\n'
150
+ ' "tax_rate": "string or null",\n'
151
+ ' "sku": "string or null",\n'
152
+ ' "any_other_line_item_field": "string or null"\n'
153
+ ' }\n'
154
+ ' ]\n'
155
+ '}'
156
+ "\nReturn ONLY the JSON object, no explanations.\n"
157
+ "\nInvoice Text:\n"
158
  f"{txt}"
159
  )
160
 
161
+
162
  def extract_invoice_info(model_choice, text):
163
  prompt = get_extraction_prompt(model_choice, text)
164
  raw = query_llm(model_choice, prompt)