Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -71,7 +71,7 @@ def query_llm(model_choice, prompt):
|
|
| 71 |
|
| 72 |
try:
|
| 73 |
with st.spinner(f"🔍 Analyzing with {model_choice}..."):
|
| 74 |
-
response = requests.post(config["api_url"], headers=headers, json=payload, timeout=90)
|
| 75 |
|
| 76 |
if response.status_code != 200:
|
| 77 |
st.error(f"🚨 API Error {response.status_code}: {response.text}")
|
|
@@ -81,17 +81,6 @@ def query_llm(model_choice, prompt):
|
|
| 81 |
content = response.json()["choices"][0]["message"]["content"]
|
| 82 |
st.session_state.last_api_response = content
|
| 83 |
st.session_state.last_api_response_raw = response.text
|
| 84 |
-
|
| 85 |
-
# Enhanced JSON repair for Llama 4
|
| 86 |
-
if model_choice == "Llama 4 Mavericks":
|
| 87 |
-
if '"line_items":' in content and not content.strip().endswith('}}'):
|
| 88 |
-
st.warning("⚠️ Detected incomplete response. Attempting repair...")
|
| 89 |
-
# Try to salvage what we have
|
| 90 |
-
if '"line_items": [' in content:
|
| 91 |
-
content = content.split('"line_items": [')[0] + '"line_items": []}}'
|
| 92 |
-
else:
|
| 93 |
-
content = content.split('"line_items":')[0] + '"line_items": []}}'
|
| 94 |
-
|
| 95 |
return content
|
| 96 |
except KeyError as e:
|
| 97 |
st.error(f"KeyError in response: {e}\nFull response: {response.json()}")
|
|
@@ -102,36 +91,55 @@ def query_llm(model_choice, prompt):
|
|
| 102 |
return None
|
| 103 |
|
| 104 |
def clean_json_response(text):
|
| 105 |
-
"""Improved JSON extraction with
|
| 106 |
if not text:
|
| 107 |
return None
|
| 108 |
|
| 109 |
-
#
|
| 110 |
-
if '"line_items":' in text:
|
| 111 |
-
if not text.strip().endswith('}}'):
|
| 112 |
-
# Case 1: Line items started but not finished
|
| 113 |
-
if '"line_items": [' in text and not text.strip().endswith(']}'):
|
| 114 |
-
text = text.split('"line_items": [')[0] + '"line_items": []}}'
|
| 115 |
-
# Case 2: Line items key exists but no array
|
| 116 |
-
else:
|
| 117 |
-
text = text.split('"line_items":')[0] + '"line_items": []}}'
|
| 118 |
-
|
| 119 |
-
# Try parsing the (possibly repaired) JSON
|
| 120 |
try:
|
| 121 |
data = json.loads(text)
|
| 122 |
-
|
| 123 |
-
# Ensure line_items exists even if empty
|
| 124 |
-
if "line_items" not in data:
|
| 125 |
-
if "invoice_header" in data: # Llama 4 structure
|
| 126 |
-
data["line_items"] = []
|
| 127 |
-
else: # DeepSeek structure
|
| 128 |
-
data["line_items"] = []
|
| 129 |
-
|
| 130 |
return data
|
| 131 |
-
except json.JSONDecodeError
|
| 132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
return None
|
| 134 |
|
|
|
|
|
|
|
| 135 |
def get_extraction_prompt(model_choice, text):
|
| 136 |
"""Return the appropriate prompt based on model choice"""
|
| 137 |
if model_choice == "DeepSeek v3":
|
|
@@ -210,6 +218,8 @@ Rules:
|
|
| 210 |
Invoice Text:
|
| 211 |
""" + text
|
| 212 |
|
|
|
|
|
|
|
| 213 |
def format_currency(value):
|
| 214 |
"""Helper function to format currency values consistently"""
|
| 215 |
if not value:
|
|
|
|
| 71 |
|
| 72 |
try:
|
| 73 |
with st.spinner(f"🔍 Analyzing with {model_choice}..."):
|
| 74 |
+
response = requests.post(config["api_url"], headers=headers, json=payload, timeout=90)
|
| 75 |
|
| 76 |
if response.status_code != 200:
|
| 77 |
st.error(f"🚨 API Error {response.status_code}: {response.text}")
|
|
|
|
| 81 |
content = response.json()["choices"][0]["message"]["content"]
|
| 82 |
st.session_state.last_api_response = content
|
| 83 |
st.session_state.last_api_response_raw = response.text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
return content
|
| 85 |
except KeyError as e:
|
| 86 |
st.error(f"KeyError in response: {e}\nFull response: {response.json()}")
|
|
|
|
| 91 |
return None
|
| 92 |
|
| 93 |
def clean_json_response(text):
|
| 94 |
+
"""Improved JSON extraction with comprehensive error handling"""
|
| 95 |
if not text:
|
| 96 |
return None
|
| 97 |
|
| 98 |
+
# First attempt to parse directly
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
try:
|
| 100 |
data = json.loads(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
return data
|
| 102 |
+
except json.JSONDecodeError:
|
| 103 |
+
pass
|
| 104 |
+
|
| 105 |
+
# Try to extract JSON from potential markdown
|
| 106 |
+
json_match = re.search(r'```(?:json)?\n({.*?})\n```', text, re.DOTALL)
|
| 107 |
+
if json_match:
|
| 108 |
+
try:
|
| 109 |
+
return json.loads(json_match.group(1))
|
| 110 |
+
except json.JSONDecodeError:
|
| 111 |
+
pass
|
| 112 |
+
|
| 113 |
+
# Try to find any JSON-like structure
|
| 114 |
+
try:
|
| 115 |
+
start_idx = text.find('{')
|
| 116 |
+
end_idx = text.rfind('}') + 1
|
| 117 |
+
if start_idx != -1 and end_idx != 0:
|
| 118 |
+
return json.loads(text[start_idx:end_idx])
|
| 119 |
+
except:
|
| 120 |
+
pass
|
| 121 |
+
|
| 122 |
+
# Final fallback - manual reconstruction
|
| 123 |
+
try:
|
| 124 |
+
if '"invoice_header":' in text and '"line_items":' in text:
|
| 125 |
+
header_part = text.split('"line_items":')[0]
|
| 126 |
+
line_items_part = text.split('"line_items":')[1]
|
| 127 |
+
|
| 128 |
+
# Ensure proper closing of JSON
|
| 129 |
+
if not header_part.strip().endswith('{'):
|
| 130 |
+
header_part += '{'
|
| 131 |
+
|
| 132 |
+
if not line_items_part.strip().endswith('}}'):
|
| 133 |
+
line_items_part = line_items_part.split('}')[0] + ']}}'
|
| 134 |
+
|
| 135 |
+
reconstructed = header_part + '"line_items":' + line_items_part
|
| 136 |
+
return json.loads(reconstructed)
|
| 137 |
+
except Exception as e:
|
| 138 |
+
st.warning(f"Could not fully reconstruct JSON: {str(e)}")
|
| 139 |
return None
|
| 140 |
|
| 141 |
+
return None
|
| 142 |
+
|
| 143 |
def get_extraction_prompt(model_choice, text):
|
| 144 |
"""Return the appropriate prompt based on model choice"""
|
| 145 |
if model_choice == "DeepSeek v3":
|
|
|
|
| 218 |
Invoice Text:
|
| 219 |
""" + text
|
| 220 |
|
| 221 |
+
[Rest of the code remains exactly the same from the previous version...]
|
| 222 |
+
|
| 223 |
def format_currency(value):
|
| 224 |
"""Helper function to format currency values consistently"""
|
| 225 |
if not value:
|