Seth0330 commited on
Commit
0be1fe6
·
verified ·
1 Parent(s): a025f40

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -34
app.py CHANGED
@@ -71,7 +71,7 @@ def query_llm(model_choice, prompt):
71
 
72
  try:
73
  with st.spinner(f"🔍 Analyzing with {model_choice}..."):
74
- response = requests.post(config["api_url"], headers=headers, json=payload, timeout=90) # Increased timeout
75
 
76
  if response.status_code != 200:
77
  st.error(f"🚨 API Error {response.status_code}: {response.text}")
@@ -81,17 +81,6 @@ def query_llm(model_choice, prompt):
81
  content = response.json()["choices"][0]["message"]["content"]
82
  st.session_state.last_api_response = content
83
  st.session_state.last_api_response_raw = response.text
84
-
85
- # Enhanced JSON repair for Llama 4
86
- if model_choice == "Llama 4 Mavericks":
87
- if '"line_items":' in content and not content.strip().endswith('}}'):
88
- st.warning("⚠️ Detected incomplete response. Attempting repair...")
89
- # Try to salvage what we have
90
- if '"line_items": [' in content:
91
- content = content.split('"line_items": [')[0] + '"line_items": []}}'
92
- else:
93
- content = content.split('"line_items":')[0] + '"line_items": []}}'
94
-
95
  return content
96
  except KeyError as e:
97
  st.error(f"KeyError in response: {e}\nFull response: {response.json()}")
@@ -102,36 +91,55 @@ def query_llm(model_choice, prompt):
102
  return None
103
 
104
  def clean_json_response(text):
105
- """Improved JSON extraction with better Llama 4 handling"""
106
  if not text:
107
  return None
108
 
109
- # Enhanced repair for Llama 4's common truncation points
110
- if '"line_items":' in text:
111
- if not text.strip().endswith('}}'):
112
- # Case 1: Line items started but not finished
113
- if '"line_items": [' in text and not text.strip().endswith(']}'):
114
- text = text.split('"line_items": [')[0] + '"line_items": []}}'
115
- # Case 2: Line items key exists but no array
116
- else:
117
- text = text.split('"line_items":')[0] + '"line_items": []}}'
118
-
119
- # Try parsing the (possibly repaired) JSON
120
  try:
121
  data = json.loads(text)
122
-
123
- # Ensure line_items exists even if empty
124
- if "line_items" not in data:
125
- if "invoice_header" in data: # Llama 4 structure
126
- data["line_items"] = []
127
- else: # DeepSeek structure
128
- data["line_items"] = []
129
-
130
  return data
131
- except json.JSONDecodeError as e:
132
- st.warning(f"JSON parsing failed after repair attempts: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  return None
134
 
 
 
135
  def get_extraction_prompt(model_choice, text):
136
  """Return the appropriate prompt based on model choice"""
137
  if model_choice == "DeepSeek v3":
@@ -210,6 +218,8 @@ Rules:
210
  Invoice Text:
211
  """ + text
212
 
 
 
213
  def format_currency(value):
214
  """Helper function to format currency values consistently"""
215
  if not value:
 
71
 
72
  try:
73
  with st.spinner(f"🔍 Analyzing with {model_choice}..."):
74
+ response = requests.post(config["api_url"], headers=headers, json=payload, timeout=90)
75
 
76
  if response.status_code != 200:
77
  st.error(f"🚨 API Error {response.status_code}: {response.text}")
 
81
  content = response.json()["choices"][0]["message"]["content"]
82
  st.session_state.last_api_response = content
83
  st.session_state.last_api_response_raw = response.text
 
 
 
 
 
 
 
 
 
 
 
84
  return content
85
  except KeyError as e:
86
  st.error(f"KeyError in response: {e}\nFull response: {response.json()}")
 
91
  return None
92
 
93
  def clean_json_response(text):
94
+ """Improved JSON extraction with comprehensive error handling"""
95
  if not text:
96
  return None
97
 
98
+ # First attempt to parse directly
 
 
 
 
 
 
 
 
 
 
99
  try:
100
  data = json.loads(text)
 
 
 
 
 
 
 
 
101
  return data
102
+ except json.JSONDecodeError:
103
+ pass
104
+
105
+ # Try to extract JSON from potential markdown
106
+ json_match = re.search(r'```(?:json)?\n({.*?})\n```', text, re.DOTALL)
107
+ if json_match:
108
+ try:
109
+ return json.loads(json_match.group(1))
110
+ except json.JSONDecodeError:
111
+ pass
112
+
113
+ # Try to find any JSON-like structure
114
+ try:
115
+ start_idx = text.find('{')
116
+ end_idx = text.rfind('}') + 1
117
+ if start_idx != -1 and end_idx != 0:
118
+ return json.loads(text[start_idx:end_idx])
119
+ except:
120
+ pass
121
+
122
+ # Final fallback - manual reconstruction
123
+ try:
124
+ if '"invoice_header":' in text and '"line_items":' in text:
125
+ header_part = text.split('"line_items":')[0]
126
+ line_items_part = text.split('"line_items":')[1]
127
+
128
+ # Ensure proper closing of JSON
129
+ if not header_part.strip().endswith('{'):
130
+ header_part += '{'
131
+
132
+ if not line_items_part.strip().endswith('}}'):
133
+ line_items_part = line_items_part.split('}')[0] + ']}}'
134
+
135
+ reconstructed = header_part + '"line_items":' + line_items_part
136
+ return json.loads(reconstructed)
137
+ except Exception as e:
138
+ st.warning(f"Could not fully reconstruct JSON: {str(e)}")
139
  return None
140
 
141
+ return None
142
+
143
  def get_extraction_prompt(model_choice, text):
144
  """Return the appropriate prompt based on model choice"""
145
  if model_choice == "DeepSeek v3":
 
218
  Invoice Text:
219
  """ + text
220
 
221
+ [Rest of the code remains exactly the same from the previous version...]
222
+
223
  def format_currency(value):
224
  """Helper function to format currency values consistently"""
225
  if not value: