Seth0330 commited on
Commit
a025f40
·
verified ·
1 Parent(s): 3abc562

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -50
app.py CHANGED
@@ -56,7 +56,6 @@ def query_llm(model_choice, prompt):
56
  "Content-Type": "application/json",
57
  }
58
 
59
- # Add extra headers if they exist (for OpenRouter)
60
  if "extra_headers" in config:
61
  headers.update(config["extra_headers"])
62
 
@@ -67,13 +66,12 @@ def query_llm(model_choice, prompt):
67
  "max_tokens": 2000,
68
  }
69
 
70
- # Add response format if specified
71
  if config["response_format"]:
72
  payload["response_format"] = config["response_format"]
73
 
74
  try:
75
  with st.spinner(f"🔍 Analyzing with {model_choice}..."):
76
- response = requests.post(config["api_url"], headers=headers, json=payload, timeout=60)
77
 
78
  if response.status_code != 200:
79
  st.error(f"🚨 API Error {response.status_code}: {response.text}")
@@ -84,12 +82,15 @@ def query_llm(model_choice, prompt):
84
  st.session_state.last_api_response = content
85
  st.session_state.last_api_response_raw = response.text
86
 
87
- # Special handling for Llama 4 Mavericks incomplete responses
88
- if model_choice == "Llama 4 Mavericks" and content.count('{') != content.count('}'):
89
- st.warning("⚠️ Received incomplete JSON response. Trying to fix...")
90
- # Try to complete the JSON by adding missing closing braces
91
- missing_braces = content.count('{') - content.count('}')
92
- content += '}' * missing_braces + ']' * (content.count('[') - content.count(']'))
 
 
 
93
 
94
  return content
95
  except KeyError as e:
@@ -101,50 +102,34 @@ def query_llm(model_choice, prompt):
101
  return None
102
 
103
  def clean_json_response(text):
104
- """Improved JSON extraction from API response with better error handling"""
105
  if not text:
106
  return None
107
-
108
- # Special handling for Llama 4 Mavericks incomplete responses
109
- if 'line_items":' in text and ']' not in text.split('line_items":')[-1]:
110
- # Try to complete the line items array
111
- text = text.split('line_items":')[0] + 'line_items": []}'
112
-
113
- # First try to parse directly as JSON
 
 
 
 
 
114
  try:
115
- return json.loads(text)
 
 
 
 
 
 
 
 
 
116
  except json.JSONDecodeError as e:
117
- st.warning(f"First JSON parse attempt failed: {str(e)}")
118
-
119
- # Try to extract JSON from markdown code blocks
120
- json_match = re.search(r'```(?:json)?\n({.*?})\n```', text, re.DOTALL)
121
- if json_match:
122
- try:
123
- return json.loads(json_match.group(1))
124
- except json.JSONDecodeError as e:
125
- st.warning(f"Markdown JSON parse failed: {str(e)}")
126
-
127
- # Try to extract any JSON-like content
128
- json_match = re.search(r'\{.*\}', text, re.DOTALL)
129
- if json_match:
130
- try:
131
- return json.loads(json_match.group(0))
132
- except json.JSONDecodeError as e:
133
- st.warning(f"Loose JSON parse failed: {str(e)}")
134
-
135
- # Fallback to simple key-value parsing
136
- try:
137
- data = {}
138
- for line in text.split('\n'):
139
- if ':' in line:
140
- parts = line.split(':', 1)
141
- if len(parts) == 2:
142
- key, val = parts
143
- key = key.strip().strip('"').lower().replace(' ', '_')
144
- data[key] = val.strip().strip('"')
145
- return data if data else None
146
- except Exception as e:
147
- st.error(f"Final fallback parse failed: {str(e)}")
148
  return None
149
 
150
  def get_extraction_prompt(model_choice, text):
@@ -236,7 +221,7 @@ def format_currency(value):
236
  def display_line_items(line_items, model_choice="DeepSeek v3"):
237
  """Display line items in a formatted table"""
238
  if not line_items:
239
- st.info("No line items found in this invoice.")
240
  return
241
 
242
  st.subheader("📋 Line Items")
 
56
  "Content-Type": "application/json",
57
  }
58
 
 
59
  if "extra_headers" in config:
60
  headers.update(config["extra_headers"])
61
 
 
66
  "max_tokens": 2000,
67
  }
68
 
 
69
  if config["response_format"]:
70
  payload["response_format"] = config["response_format"]
71
 
72
  try:
73
  with st.spinner(f"🔍 Analyzing with {model_choice}..."):
74
+ response = requests.post(config["api_url"], headers=headers, json=payload, timeout=90) # Increased timeout
75
 
76
  if response.status_code != 200:
77
  st.error(f"🚨 API Error {response.status_code}: {response.text}")
 
82
  st.session_state.last_api_response = content
83
  st.session_state.last_api_response_raw = response.text
84
 
85
+ # Enhanced JSON repair for Llama 4
86
+ if model_choice == "Llama 4 Mavericks":
87
+ if '"line_items":' in content and not content.strip().endswith('}}'):
88
+ st.warning("⚠️ Detected incomplete response. Attempting repair...")
89
+ # Try to salvage what we have
90
+ if '"line_items": [' in content:
91
+ content = content.split('"line_items": [')[0] + '"line_items": []}}'
92
+ else:
93
+ content = content.split('"line_items":')[0] + '"line_items": []}}'
94
 
95
  return content
96
  except KeyError as e:
 
102
  return None
103
 
104
  def clean_json_response(text):
105
+ """Improved JSON extraction with better Llama 4 handling"""
106
  if not text:
107
  return None
108
+
109
+ # Enhanced repair for Llama 4's common truncation points
110
+ if '"line_items":' in text:
111
+ if not text.strip().endswith('}}'):
112
+ # Case 1: Line items started but not finished
113
+ if '"line_items": [' in text and not text.strip().endswith(']}'):
114
+ text = text.split('"line_items": [')[0] + '"line_items": []}}'
115
+ # Case 2: Line items key exists but no array
116
+ else:
117
+ text = text.split('"line_items":')[0] + '"line_items": []}}'
118
+
119
+ # Try parsing the (possibly repaired) JSON
120
  try:
121
+ data = json.loads(text)
122
+
123
+ # Ensure line_items exists even if empty
124
+ if "line_items" not in data:
125
+ if "invoice_header" in data: # Llama 4 structure
126
+ data["line_items"] = []
127
+ else: # DeepSeek structure
128
+ data["line_items"] = []
129
+
130
+ return data
131
  except json.JSONDecodeError as e:
132
+ st.warning(f"JSON parsing failed after repair attempts: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  return None
134
 
135
  def get_extraction_prompt(model_choice, text):
 
221
  def display_line_items(line_items, model_choice="DeepSeek v3"):
222
  """Display line items in a formatted table"""
223
  if not line_items:
224
+ st.info("No line items found in this invoice. This may be due to incomplete data from the API.")
225
  return
226
 
227
  st.subheader("📋 Line Items")