Seth0330 commited on
Commit
e31081a
·
verified ·
1 Parent(s): ccfa357

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -35
app.py CHANGED
@@ -36,6 +36,16 @@ MODELS = {
36
  "HTTP-Referer": "https://huggingface.co",
37
  "X-Title": "Invoice Extractor"
38
  }
 
 
 
 
 
 
 
 
 
 
39
  }
40
  }
41
 
@@ -142,6 +152,37 @@ def clean_json_response(text):
142
 
143
  def get_extraction_prompt(model_choice, text):
144
  """Return the appropriate prompt based on model choice"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  if model_choice == "DeepSeek v3":
146
  return f"""Extract complete invoice information from the text below and return ONLY a valid JSON object with these fields:
147
  {{
@@ -186,37 +227,8 @@ Invoice Text:
186
  Invoice Text:
187
  """ + text
188
 
189
- else: # Llama 4 Mavericks
190
- return f"""Extract complete invoice information and return a VALID JSON object with these fields:
191
- {{
192
- "invoice_header": {{
193
- "invoice_number": "string",
194
- "invoice_date": "YYYY-MM-DD",
195
- "po_number": "string or null",
196
- "invoice_value": "string with currency",
197
- "supplier_name": "string or null",
198
- "customer_name": "string or null"
199
- }},
200
- "line_items": [
201
- {{
202
- "item_number": "string or null",
203
- "description": "string",
204
- "quantity": "number",
205
- "unit_price": "string with currency",
206
- "total_price": "string with currency"
207
- }}
208
- ]
209
- }}
210
- Rules:
211
- 1. Return ONLY valid JSON (no additional text or markdown)
212
- 2. Use null for missing fields
213
- 3. Date format must be YYYY-MM-DD
214
- 4. All currency values must include currency symbol or code
215
- 5. Include all line items found in the invoice
216
- 6. For line items, quantity should be a number, prices as strings with currency
217
- 7. Do not include any explanations or notes
218
- Invoice Text:
219
- """ + text
220
 
221
  def format_currency(value):
222
  """Helper function to format currency values consistently"""
@@ -234,8 +246,8 @@ def display_line_items(line_items, model_choice="DeepSeek v3"):
234
 
235
  st.subheader("📋 Line Items")
236
 
237
- if model_choice == "Llama 4 Mavericks":
238
- # Display as a table for Llama
239
  items_display = []
240
  for idx, item in enumerate(line_items, 1):
241
  items_display.append({
@@ -267,7 +279,7 @@ def display_invoice_data(model_choice, invoice_data):
267
  if not invoice_data:
268
  return
269
 
270
- if model_choice == "Llama 4 Mavericks":
271
  # Display header information
272
  st.subheader("Invoice Summary")
273
  header = invoice_data.get("invoice_header", {})
@@ -326,7 +338,7 @@ def extract_invoice_info(model_choice, text):
326
  return None
327
 
328
  # Normalize data structure based on model
329
- if model_choice == "Llama 4 Mavericks":
330
  if "invoice_header" not in parsed_data:
331
  parsed_data["invoice_header"] = {}
332
  if "line_items" not in parsed_data:
 
36
  "HTTP-Referer": "https://huggingface.co",
37
  "X-Title": "Invoice Extractor"
38
  }
39
+ },
40
+ "Mistral Small": {
41
+ "api_url": "https://openrouter.ai/api/v1/chat/completions",
42
+ "model_name": "mistralai/mistral-small-3.1-24b-instruct:free",
43
+ "api_key_env": "OPENROUTER_API_KEY",
44
+ "response_format": {"type": "json_object"},
45
+ "extra_headers": {
46
+ "HTTP-Referer": "https://huggingface.co",
47
+ "X-Title": "Invoice Extractor"
48
+ }
49
  }
50
  }
51
 
 
152
 
153
  def get_extraction_prompt(model_choice, text):
154
  """Return the appropriate prompt based on model choice"""
155
+ base_prompt = """Extract complete invoice information and return a VALID JSON object with these fields:
156
+ {
157
+ "invoice_header": {
158
+ "invoice_number": "string",
159
+ "invoice_date": "YYYY-MM-DD",
160
+ "po_number": "string or null",
161
+ "invoice_value": "string with currency",
162
+ "supplier_name": "string or null",
163
+ "customer_name": "string or null"
164
+ },
165
+ "line_items": [
166
+ {
167
+ "item_number": "string or null",
168
+ "description": "string",
169
+ "quantity": "number",
170
+ "unit_price": "string with currency",
171
+ "total_price": "string with currency"
172
+ }
173
+ ]
174
+ }
175
+ Rules:
176
+ 1. Return ONLY valid JSON (no additional text or markdown)
177
+ 2. Use null for missing fields
178
+ 3. Date format must be YYYY-MM-DD
179
+ 4. All currency values must include currency symbol or code
180
+ 5. Include all line items found in the invoice
181
+ 6. For line items, quantity should be a number, prices as strings with currency
182
+ 7. Do not include any explanations or notes
183
+ Invoice Text:
184
+ """ + text
185
+
186
  if model_choice == "DeepSeek v3":
187
  return f"""Extract complete invoice information from the text below and return ONLY a valid JSON object with these fields:
188
  {{
 
227
  Invoice Text:
228
  """ + text
229
 
230
+ else: # For Llama 4 and Mistral
231
+ return base_prompt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
 
233
  def format_currency(value):
234
  """Helper function to format currency values consistently"""
 
246
 
247
  st.subheader("📋 Line Items")
248
 
249
+ if model_choice in ["Llama 4 Mavericks", "Mistral Small"]:
250
+ # Display as a table for Llama/Mistral
251
  items_display = []
252
  for idx, item in enumerate(line_items, 1):
253
  items_display.append({
 
279
  if not invoice_data:
280
  return
281
 
282
+ if model_choice in ["Llama 4 Mavericks", "Mistral Small"]:
283
  # Display header information
284
  st.subheader("Invoice Summary")
285
  header = invoice_data.get("invoice_header", {})
 
338
  return None
339
 
340
  # Normalize data structure based on model
341
+ if model_choice in ["Llama 4 Mavericks", "Mistral Small"]:
342
  if "invoice_header" not in parsed_data:
343
  parsed_data["invoice_header"] = {}
344
  if "line_items" not in parsed_data: