Seth0330 commited on
Commit
0eb1833
·
verified ·
1 Parent(s): c241ea6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -41
app.py CHANGED
@@ -68,7 +68,10 @@ def query_llm(model_choice, prompt):
68
  with st.spinner(f"🔍 Querying {model_choice}..."):
69
  r = requests.post(cfg["api_url"], headers=headers, json=payload, timeout=90)
70
  if r.status_code != 200:
71
- st.error(f"🚨 API Error {r.status_code}: {r.text}")
 
 
 
72
  return None
73
  content = r.json()["choices"][0]["message"]["content"]
74
  st.session_state.last_api = content
@@ -113,26 +116,14 @@ def fallback_supplier(text):
113
  return None
114
 
115
  def get_extraction_prompt(model_choice, txt):
116
- if model_choice.startswith("DeepSeek"):
117
- return (
118
- "Extract full invoice info and RETURN ONLY a single-line json object with fields:\n"
119
- '{"invoice_number":"string","invoice_date":"YYYY-MM-DD",'
120
- '"po_number":"string|null","invoice_value":"string with currency",'
121
- '"line_items":[{"description":"string","quantity":"number","unit_price":"string with currency","total_price":"string with currency"}]}\n'
122
- "Use null for missing. NO extra text.\n\n"
123
- f"Invoice Text:\n{txt}"
124
- )
125
- else:
126
- return (
127
- "Extract invoice data and RETURN ONLY a compact, one-line json object exactly:\n"
128
- '{"invoice_header":{"invoice_number":"string","invoice_date":"YYYY-MM-DD",'
129
- '"po_number":"string|null","invoice_value":"string with currency",'
130
- '"supplier_name":"string|null","customer_name":"string|null"},'
131
- '"line_items":[{"item_number":"string|null","description":"string","quantity":number,'
132
- '"unit_price":"string with currency","total_price":"string with currency"}]}\n'
133
- "Use null for missing. NO extras.\n\n"
134
- f"Invoice Text:\n{txt}"
135
- )
136
 
137
  def extract_invoice_info(model_choice, text):
138
  prompt = get_extraction_prompt(model_choice, text)
@@ -145,23 +136,32 @@ def extract_invoice_info(model_choice, text):
145
 
146
  # DeepSeek models: flat format
147
  if model_choice.startswith("DeepSeek"):
148
- for k in ("invoice_number","invoice_date","po_number","invoice_value"):
149
- data.setdefault(k, None)
150
- items = data.setdefault("line_items", [])
151
- for itm in items:
 
152
  for k in ("description","quantity","unit_price","total_price"):
153
  itm.setdefault(k, None)
154
  return data
155
- # Other models (OpenAI GPT-4.1, Mistral): nested format
156
- hdr = data.setdefault("invoice_header", {})
 
 
 
 
157
  for k in ("invoice_number","invoice_date","po_number","invoice_value","supplier_name","customer_name"):
158
  hdr.setdefault(k, None)
159
  if not hdr.get("supplier_name"):
160
  hdr["supplier_name"] = fallback_supplier(text)
161
- items = data.setdefault("line_items", [])
162
  for itm in items:
 
 
163
  for k in ("item_number","description","quantity","unit_price","total_price"):
164
  itm.setdefault(k, None)
 
 
165
 
166
  return data
167
 
@@ -188,25 +188,31 @@ with tab2:
188
  info = extract_invoice_info(mdl, txt)
189
  if info:
190
  st.success("Extraction Complete")
 
191
  if mdl.startswith("DeepSeek"):
 
 
192
  c1, c2 = st.columns(2)
193
- c1.metric("Invoice #", info["invoice_number"])
194
- c1.metric("PO #", info["po_number"])
195
- c2.metric("Date", info["invoice_date"])
196
- c2.metric("Value", info["invoice_value"])
197
  st.subheader("Line Items")
198
- st.table(info["line_items"])
199
  else:
200
- h = info["invoice_header"]
201
  c1, c2, c3 = st.columns(3)
202
- c1.metric("Invoice #", h["invoice_number"])
203
- c1.metric("Supplier", h["supplier_name"])
204
- c2.metric("Date", h["invoice_date"])
205
- c2.metric("Customer", h["customer_name"])
206
- c3.metric("PO #", h["po_number"])
207
- c3.metric("Total", h["invoice_value"])
 
 
 
 
 
208
  st.subheader("Line Items")
209
- st.table(info["line_items"])
210
 
211
  if "last_api" in st.session_state:
212
  with st.expander("Debug"):
 
68
  with st.spinner(f"🔍 Querying {model_choice}..."):
69
  r = requests.post(cfg["api_url"], headers=headers, json=payload, timeout=90)
70
  if r.status_code != 200:
71
+ if "No instances available" in r.text or r.status_code == 503:
72
+ st.error(f"{model_choice} is currently unavailable. Please try again later or select another model.")
73
+ else:
74
+ st.error(f"🚨 API Error {r.status_code}: {r.text}")
75
  return None
76
  content = r.json()["choices"][0]["message"]["content"]
77
  st.session_state.last_api = content
 
116
  return None
117
 
118
  def get_extraction_prompt(model_choice, txt):
119
+ # New, broad prompt for all models:
120
+ return (
121
+ "Extract all possible metadata fields from the following invoice, including but not limited to header information, supplier and customer details, payment terms, tax details, references, and every possible line item with all available attributes. "
122
+ "Return a detailed JSON object containing every field you can identify, and make sure to include all line items as an array. "
123
+ "If any field is missing in the invoice, use null. Do not add any explanation or extra text outside the JSON. "
124
+ "\n\nInvoice Text:\n"
125
+ f"{txt}"
126
+ )
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
  def extract_invoice_info(model_choice, text):
129
  prompt = get_extraction_prompt(model_choice, text)
 
136
 
137
  # DeepSeek models: flat format
138
  if model_choice.startswith("DeepSeek"):
139
+ # Dynamically handle flat or semi-structured output (may contain any fields)
140
+ data.setdefault("line_items", [])
141
+ for itm in data["line_items"]:
142
+ if not isinstance(itm, dict):
143
+ continue
144
  for k in ("description","quantity","unit_price","total_price"):
145
  itm.setdefault(k, None)
146
  return data
147
+ # Other models (OpenAI GPT-4.1, Mistral): usually nested under invoice_header, but now prompt is broader, so handle flexibly
148
+ # Accepts a flexible schema as model may include extra keys
149
+ hdr = data.get("invoice_header", {})
150
+ if not hdr and any(k in data for k in ("invoice_number","supplier_name","customer_name")):
151
+ # If model returned flat, treat top-level keys as header
152
+ hdr = data
153
  for k in ("invoice_number","invoice_date","po_number","invoice_value","supplier_name","customer_name"):
154
  hdr.setdefault(k, None)
155
  if not hdr.get("supplier_name"):
156
  hdr["supplier_name"] = fallback_supplier(text)
157
+ items = data.get("line_items", [])
158
  for itm in items:
159
+ if not isinstance(itm, dict):
160
+ continue
161
  for k in ("item_number","description","quantity","unit_price","total_price"):
162
  itm.setdefault(k, None)
163
+ data["invoice_header"] = hdr
164
+ data["line_items"] = items
165
 
166
  return data
167
 
 
188
  info = extract_invoice_info(mdl, txt)
189
  if info:
190
  st.success("Extraction Complete")
191
+ # For DeepSeek, output may be flat; for others, prefer "invoice_header" nesting
192
  if mdl.startswith("DeepSeek"):
193
+ # Show all keys except line_items
194
+ non_items = {k: v for k, v in info.items() if k != "line_items"}
195
  c1, c2 = st.columns(2)
196
+ for i, (k, v) in enumerate(non_items.items()):
197
+ (c1 if i % 2 == 0 else c2).metric(k.replace("_", " ").title(), v)
 
 
198
  st.subheader("Line Items")
199
+ st.table(info.get("line_items", []))
200
  else:
201
+ h = info.get("invoice_header", {})
202
  c1, c2, c3 = st.columns(3)
203
+ c1.metric("Invoice #", h.get("invoice_number"))
204
+ c1.metric("Supplier", h.get("supplier_name"))
205
+ c2.metric("Date", h.get("invoice_date"))
206
+ c2.metric("Customer", h.get("customer_name"))
207
+ c3.metric("PO #", h.get("po_number"))
208
+ c3.metric("Total", h.get("invoice_value"))
209
+ # Show any additional header fields detected
210
+ extra_fields = {k: v for k, v in h.items() if k not in ("invoice_number", "supplier_name", "customer_name", "invoice_date", "po_number", "invoice_value")}
211
+ if extra_fields:
212
+ st.subheader("Additional Header Metadata")
213
+ st.json(extra_fields)
214
  st.subheader("Line Items")
215
+ st.table(info.get("line_items", []))
216
 
217
  if "last_api" in st.session_state:
218
  with st.expander("Debug"):