Spaces:
Sleeping
Sleeping
Fix LLM failure visibility and abort handling
Browse files- Update LLM status in real-time as providers fail (not just at end)
- Log individual LLM provider failures to activity log
- Mark "All LLM providers failed" as abort condition (not retryable error)
- Move LLM status update before error check so frontend sees failures
- Show "Aborted" button instead of "Failed-Retry" for LLM failures
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
- src/nodes/analyzer.py +13 -1
- src/services/workflow_store.py +17 -13
src/nodes/analyzer.py
CHANGED
|
@@ -266,15 +266,27 @@ Remember: Every bullet point must reference actual data provided above. Do not i
|
|
| 266 |
response, provider, error, providers_failed = llm.query(prompt, temperature=0)
|
| 267 |
elapsed = time.time() - start_time
|
| 268 |
|
| 269 |
-
# Log failed providers
|
| 270 |
for pf in providers_failed:
|
| 271 |
_add_activity_log(workflow_id, progress_store, "analyzer", f"LLM {pf['name']} failed: {pf['error']}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
|
| 273 |
# Track failed providers in state for frontend
|
| 274 |
if "llm_providers_failed" not in state:
|
| 275 |
state["llm_providers_failed"] = []
|
| 276 |
state["llm_providers_failed"].extend([pf["name"] for pf in providers_failed])
|
| 277 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 278 |
if error:
|
| 279 |
state["draft_report"] = f"Error generating analysis: {error}"
|
| 280 |
state["provider_used"] = None
|
|
|
|
| 266 |
response, provider, error, providers_failed = llm.query(prompt, temperature=0)
|
| 267 |
elapsed = time.time() - start_time
|
| 268 |
|
| 269 |
+
# Log failed providers and update LLM status in real-time
|
| 270 |
for pf in providers_failed:
|
| 271 |
_add_activity_log(workflow_id, progress_store, "analyzer", f"LLM {pf['name']} failed: {pf['error']}")
|
| 272 |
+
# Update LLM status in real-time for frontend
|
| 273 |
+
if workflow_id and progress_store and workflow_id in progress_store:
|
| 274 |
+
llm_status = progress_store[workflow_id].get("llm_status", {})
|
| 275 |
+
if pf["name"] in llm_status:
|
| 276 |
+
llm_status[pf["name"]] = "failed"
|
| 277 |
|
| 278 |
# Track failed providers in state for frontend
|
| 279 |
if "llm_providers_failed" not in state:
|
| 280 |
state["llm_providers_failed"] = []
|
| 281 |
state["llm_providers_failed"].extend([pf["name"] for pf in providers_failed])
|
| 282 |
|
| 283 |
+
# Update successful provider status
|
| 284 |
+
if provider and workflow_id and progress_store and workflow_id in progress_store:
|
| 285 |
+
llm_status = progress_store[workflow_id].get("llm_status", {})
|
| 286 |
+
provider_name = provider.split(":")[0]
|
| 287 |
+
if provider_name in llm_status:
|
| 288 |
+
llm_status[provider_name] = "completed"
|
| 289 |
+
|
| 290 |
if error:
|
| 291 |
state["draft_report"] = f"Error generating analysis: {error}"
|
| 292 |
state["provider_used"] = None
|
src/services/workflow_store.py
CHANGED
|
@@ -150,6 +150,7 @@ def run_workflow_background(workflow_id: str, company_name: str, ticker: str, st
|
|
| 150 |
add_activity_log(workflow_id, source, f"MCP server failed")
|
| 151 |
|
| 152 |
# Update LLM status based on failed providers and used provider
|
|
|
|
| 153 |
llm_providers_failed = result.get("llm_providers_failed", [])
|
| 154 |
provider_used = result.get("provider_used", "")
|
| 155 |
llm_status = WORKFLOWS[workflow_id]["llm_status"]
|
|
@@ -158,6 +159,7 @@ def run_workflow_background(workflow_id: str, company_name: str, ticker: str, st
|
|
| 158 |
for provider in llm_providers_failed:
|
| 159 |
if provider in llm_status:
|
| 160 |
llm_status[provider] = "failed"
|
|
|
|
| 161 |
|
| 162 |
# Mark the used provider as completed
|
| 163 |
if provider_used:
|
|
@@ -165,6 +167,18 @@ def run_workflow_background(workflow_id: str, company_name: str, ticker: str, st
|
|
| 165 |
if provider_name in llm_status:
|
| 166 |
llm_status[provider_name] = "completed"
|
| 167 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
# Parse SWOT from draft report
|
| 169 |
swot_data = parse_swot_text(result.get("draft_report", ""))
|
| 170 |
|
|
@@ -186,17 +200,6 @@ def run_workflow_background(workflow_id: str, company_name: str, ticker: str, st
|
|
| 186 |
except Exception as e:
|
| 187 |
logger.warning(f"Could not merge MCP SWOT data: {e}")
|
| 188 |
|
| 189 |
-
# Check if workflow ended with an error (LLM failures etc)
|
| 190 |
-
if result.get("error"):
|
| 191 |
-
error_msg = result.get("error")
|
| 192 |
-
add_activity_log(workflow_id, "workflow", f"Workflow failed: {error_msg}")
|
| 193 |
-
WORKFLOWS[workflow_id].update({
|
| 194 |
-
"status": "aborted",
|
| 195 |
-
"error": error_msg,
|
| 196 |
-
"current_step": "aborted"
|
| 197 |
-
})
|
| 198 |
-
return
|
| 199 |
-
|
| 200 |
# Parse raw_data for MCP display
|
| 201 |
raw_data_parsed = {}
|
| 202 |
try:
|
|
@@ -238,11 +241,12 @@ def run_workflow_background(workflow_id: str, company_name: str, ticker: str, st
|
|
| 238 |
except Exception as e:
|
| 239 |
error_msg = str(e)
|
| 240 |
# Determine if this is an abort (critical) or error (retryable)
|
| 241 |
-
# Aborts: Core MCP failures, insufficient data
|
| 242 |
is_abort = any(phrase in error_msg for phrase in [
|
| 243 |
"Insufficient core data",
|
| 244 |
"All MCP servers failed",
|
| 245 |
-
"Need at least 2 of"
|
|
|
|
| 246 |
])
|
| 247 |
|
| 248 |
WORKFLOWS[workflow_id].update({
|
|
|
|
| 150 |
add_activity_log(workflow_id, source, f"MCP server failed")
|
| 151 |
|
| 152 |
# Update LLM status based on failed providers and used provider
|
| 153 |
+
# IMPORTANT: Do this BEFORE checking for errors so frontend sees failures
|
| 154 |
llm_providers_failed = result.get("llm_providers_failed", [])
|
| 155 |
provider_used = result.get("provider_used", "")
|
| 156 |
llm_status = WORKFLOWS[workflow_id]["llm_status"]
|
|
|
|
| 159 |
for provider in llm_providers_failed:
|
| 160 |
if provider in llm_status:
|
| 161 |
llm_status[provider] = "failed"
|
| 162 |
+
add_activity_log(workflow_id, "llm", f"{provider.capitalize()} provider failed")
|
| 163 |
|
| 164 |
# Mark the used provider as completed
|
| 165 |
if provider_used:
|
|
|
|
| 167 |
if provider_name in llm_status:
|
| 168 |
llm_status[provider_name] = "completed"
|
| 169 |
|
| 170 |
+
# Check if workflow ended with an error (LLM failures etc)
|
| 171 |
+
# Do this BEFORE parsing SWOT so we properly abort on errors
|
| 172 |
+
if result.get("error"):
|
| 173 |
+
error_msg = result.get("error")
|
| 174 |
+
add_activity_log(workflow_id, "workflow", f"Workflow failed: {error_msg}")
|
| 175 |
+
WORKFLOWS[workflow_id].update({
|
| 176 |
+
"status": "aborted",
|
| 177 |
+
"error": error_msg,
|
| 178 |
+
"current_step": "aborted"
|
| 179 |
+
})
|
| 180 |
+
return
|
| 181 |
+
|
| 182 |
# Parse SWOT from draft report
|
| 183 |
swot_data = parse_swot_text(result.get("draft_report", ""))
|
| 184 |
|
|
|
|
| 200 |
except Exception as e:
|
| 201 |
logger.warning(f"Could not merge MCP SWOT data: {e}")
|
| 202 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
# Parse raw_data for MCP display
|
| 204 |
raw_data_parsed = {}
|
| 205 |
try:
|
|
|
|
| 241 |
except Exception as e:
|
| 242 |
error_msg = str(e)
|
| 243 |
# Determine if this is an abort (critical) or error (retryable)
|
| 244 |
+
# Aborts: Core MCP failures, insufficient data, LLM failures
|
| 245 |
is_abort = any(phrase in error_msg for phrase in [
|
| 246 |
"Insufficient core data",
|
| 247 |
"All MCP servers failed",
|
| 248 |
+
"Need at least 2 of",
|
| 249 |
+
"All LLM providers failed"
|
| 250 |
])
|
| 251 |
|
| 252 |
WORKFLOWS[workflow_id].update({
|