Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -68,7 +68,10 @@ def query_llm(model_choice, prompt):
|
|
| 68 |
with st.spinner(f"🔍 Querying {model_choice}..."):
|
| 69 |
r = requests.post(cfg["api_url"], headers=headers, json=payload, timeout=90)
|
| 70 |
if r.status_code != 200:
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
| 72 |
return None
|
| 73 |
content = r.json()["choices"][0]["message"]["content"]
|
| 74 |
st.session_state.last_api = content
|
|
@@ -113,26 +116,14 @@ def fallback_supplier(text):
|
|
| 113 |
return None
|
| 114 |
|
| 115 |
def get_extraction_prompt(model_choice, txt):
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
)
|
| 125 |
-
else:
|
| 126 |
-
return (
|
| 127 |
-
"Extract invoice data and RETURN ONLY a compact, one-line json object exactly:\n"
|
| 128 |
-
'{"invoice_header":{"invoice_number":"string","invoice_date":"YYYY-MM-DD",'
|
| 129 |
-
'"po_number":"string|null","invoice_value":"string with currency",'
|
| 130 |
-
'"supplier_name":"string|null","customer_name":"string|null"},'
|
| 131 |
-
'"line_items":[{"item_number":"string|null","description":"string","quantity":number,'
|
| 132 |
-
'"unit_price":"string with currency","total_price":"string with currency"}]}\n'
|
| 133 |
-
"Use null for missing. NO extras.\n\n"
|
| 134 |
-
f"Invoice Text:\n{txt}"
|
| 135 |
-
)
|
| 136 |
|
| 137 |
def extract_invoice_info(model_choice, text):
|
| 138 |
prompt = get_extraction_prompt(model_choice, text)
|
|
@@ -145,23 +136,32 @@ def extract_invoice_info(model_choice, text):
|
|
| 145 |
|
| 146 |
# DeepSeek models: flat format
|
| 147 |
if model_choice.startswith("DeepSeek"):
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
|
|
|
| 152 |
for k in ("description","quantity","unit_price","total_price"):
|
| 153 |
itm.setdefault(k, None)
|
| 154 |
return data
|
| 155 |
-
# Other models (OpenAI GPT-4.1, Mistral): nested
|
| 156 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
for k in ("invoice_number","invoice_date","po_number","invoice_value","supplier_name","customer_name"):
|
| 158 |
hdr.setdefault(k, None)
|
| 159 |
if not hdr.get("supplier_name"):
|
| 160 |
hdr["supplier_name"] = fallback_supplier(text)
|
| 161 |
-
items = data.
|
| 162 |
for itm in items:
|
|
|
|
|
|
|
| 163 |
for k in ("item_number","description","quantity","unit_price","total_price"):
|
| 164 |
itm.setdefault(k, None)
|
|
|
|
|
|
|
| 165 |
|
| 166 |
return data
|
| 167 |
|
|
@@ -188,25 +188,31 @@ with tab2:
|
|
| 188 |
info = extract_invoice_info(mdl, txt)
|
| 189 |
if info:
|
| 190 |
st.success("Extraction Complete")
|
|
|
|
| 191 |
if mdl.startswith("DeepSeek"):
|
|
|
|
|
|
|
| 192 |
c1, c2 = st.columns(2)
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
c2.metric("Date", info["invoice_date"])
|
| 196 |
-
c2.metric("Value", info["invoice_value"])
|
| 197 |
st.subheader("Line Items")
|
| 198 |
-
st.table(info
|
| 199 |
else:
|
| 200 |
-
h = info
|
| 201 |
c1, c2, c3 = st.columns(3)
|
| 202 |
-
c1.metric("Invoice #", h
|
| 203 |
-
c1.metric("Supplier", h
|
| 204 |
-
c2.metric("Date", h
|
| 205 |
-
c2.metric("Customer", h
|
| 206 |
-
c3.metric("PO #", h
|
| 207 |
-
c3.metric("Total", h
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
st.subheader("Line Items")
|
| 209 |
-
st.table(info
|
| 210 |
|
| 211 |
if "last_api" in st.session_state:
|
| 212 |
with st.expander("Debug"):
|
|
|
|
| 68 |
with st.spinner(f"🔍 Querying {model_choice}..."):
|
| 69 |
r = requests.post(cfg["api_url"], headers=headers, json=payload, timeout=90)
|
| 70 |
if r.status_code != 200:
|
| 71 |
+
if "No instances available" in r.text or r.status_code == 503:
|
| 72 |
+
st.error(f"{model_choice} is currently unavailable. Please try again later or select another model.")
|
| 73 |
+
else:
|
| 74 |
+
st.error(f"🚨 API Error {r.status_code}: {r.text}")
|
| 75 |
return None
|
| 76 |
content = r.json()["choices"][0]["message"]["content"]
|
| 77 |
st.session_state.last_api = content
|
|
|
|
| 116 |
return None
|
| 117 |
|
| 118 |
def get_extraction_prompt(model_choice, txt):
|
| 119 |
+
# New, broad prompt for all models:
|
| 120 |
+
return (
|
| 121 |
+
"Extract all possible metadata fields from the following invoice, including but not limited to header information, supplier and customer details, payment terms, tax details, references, and every possible line item with all available attributes. "
|
| 122 |
+
"Return a detailed JSON object containing every field you can identify, and make sure to include all line items as an array. "
|
| 123 |
+
"If any field is missing in the invoice, use null. Do not add any explanation or extra text outside the JSON. "
|
| 124 |
+
"\n\nInvoice Text:\n"
|
| 125 |
+
f"{txt}"
|
| 126 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
|
| 128 |
def extract_invoice_info(model_choice, text):
|
| 129 |
prompt = get_extraction_prompt(model_choice, text)
|
|
|
|
| 136 |
|
| 137 |
# DeepSeek models: flat format
|
| 138 |
if model_choice.startswith("DeepSeek"):
|
| 139 |
+
# Dynamically handle flat or semi-structured output (may contain any fields)
|
| 140 |
+
data.setdefault("line_items", [])
|
| 141 |
+
for itm in data["line_items"]:
|
| 142 |
+
if not isinstance(itm, dict):
|
| 143 |
+
continue
|
| 144 |
for k in ("description","quantity","unit_price","total_price"):
|
| 145 |
itm.setdefault(k, None)
|
| 146 |
return data
|
| 147 |
+
# Other models (OpenAI GPT-4.1, Mistral): usually nested under invoice_header, but now prompt is broader, so handle flexibly
|
| 148 |
+
# Accepts a flexible schema as model may include extra keys
|
| 149 |
+
hdr = data.get("invoice_header", {})
|
| 150 |
+
if not hdr and any(k in data for k in ("invoice_number","supplier_name","customer_name")):
|
| 151 |
+
# If model returned flat, treat top-level keys as header
|
| 152 |
+
hdr = data
|
| 153 |
for k in ("invoice_number","invoice_date","po_number","invoice_value","supplier_name","customer_name"):
|
| 154 |
hdr.setdefault(k, None)
|
| 155 |
if not hdr.get("supplier_name"):
|
| 156 |
hdr["supplier_name"] = fallback_supplier(text)
|
| 157 |
+
items = data.get("line_items", [])
|
| 158 |
for itm in items:
|
| 159 |
+
if not isinstance(itm, dict):
|
| 160 |
+
continue
|
| 161 |
for k in ("item_number","description","quantity","unit_price","total_price"):
|
| 162 |
itm.setdefault(k, None)
|
| 163 |
+
data["invoice_header"] = hdr
|
| 164 |
+
data["line_items"] = items
|
| 165 |
|
| 166 |
return data
|
| 167 |
|
|
|
|
| 188 |
info = extract_invoice_info(mdl, txt)
|
| 189 |
if info:
|
| 190 |
st.success("Extraction Complete")
|
| 191 |
+
# For DeepSeek, output may be flat; for others, prefer "invoice_header" nesting
|
| 192 |
if mdl.startswith("DeepSeek"):
|
| 193 |
+
# Show all keys except line_items
|
| 194 |
+
non_items = {k: v for k, v in info.items() if k != "line_items"}
|
| 195 |
c1, c2 = st.columns(2)
|
| 196 |
+
for i, (k, v) in enumerate(non_items.items()):
|
| 197 |
+
(c1 if i % 2 == 0 else c2).metric(k.replace("_", " ").title(), v)
|
|
|
|
|
|
|
| 198 |
st.subheader("Line Items")
|
| 199 |
+
st.table(info.get("line_items", []))
|
| 200 |
else:
|
| 201 |
+
h = info.get("invoice_header", {})
|
| 202 |
c1, c2, c3 = st.columns(3)
|
| 203 |
+
c1.metric("Invoice #", h.get("invoice_number"))
|
| 204 |
+
c1.metric("Supplier", h.get("supplier_name"))
|
| 205 |
+
c2.metric("Date", h.get("invoice_date"))
|
| 206 |
+
c2.metric("Customer", h.get("customer_name"))
|
| 207 |
+
c3.metric("PO #", h.get("po_number"))
|
| 208 |
+
c3.metric("Total", h.get("invoice_value"))
|
| 209 |
+
# Show any additional header fields detected
|
| 210 |
+
extra_fields = {k: v for k, v in h.items() if k not in ("invoice_number", "supplier_name", "customer_name", "invoice_date", "po_number", "invoice_value")}
|
| 211 |
+
if extra_fields:
|
| 212 |
+
st.subheader("Additional Header Metadata")
|
| 213 |
+
st.json(extra_fields)
|
| 214 |
st.subheader("Line Items")
|
| 215 |
+
st.table(info.get("line_items", []))
|
| 216 |
|
| 217 |
if "last_api" in st.session_state:
|
| 218 |
with st.expander("Debug"):
|