Spaces:
Sleeping
Sleeping
Refactor: Analyzer handles revisions directly, remove Editor node
Browse filesArchitecture change: Researcher → Analyzer → Critic → Analyzer (revision loop)
- Add conditional focus area blocks to Analyzer based on failed rubric criteria
- Add _build_revision_prompt() for structured Critic feedback integration
- Analyzer detects revision mode via revision_count > 0 and critique_details
- Route "retry" from Critic back to Analyzer instead of Editor
- Rename editor_skipped → analyzer_revision_skipped in conditions.py
- Delete redundant src/nodes/editor.py
- Update Critic to LLM-only weighted rubric evaluation
- Update workflow version to 2.0
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
- src/api/schemas.py +1 -1
- src/graph_cyclic.py +9 -13
- src/nodes/analyzer.py +435 -62
- src/nodes/critic.py +298 -284
- src/nodes/editor.py +0 -138
- src/utils/analysis_cache.py +1 -1
- src/utils/conditions.py +5 -4
- src/workflow/graph.py +6 -10
- tests/test_self_correcting_loop.py +3 -3
src/api/schemas.py
CHANGED
|
@@ -30,7 +30,7 @@ class WorkflowStartResponse(BaseModel):
|
|
| 30 |
class WorkflowStatus(BaseModel):
|
| 31 |
"""Workflow status model."""
|
| 32 |
status: str # 'running', 'completed', 'error'
|
| 33 |
-
current_step: str # 'starting', 'Researcher', 'Analyzer', 'Critic'
|
| 34 |
revision_count: int
|
| 35 |
score: int
|
| 36 |
|
|
|
|
| 30 |
class WorkflowStatus(BaseModel):
|
| 31 |
"""Workflow status model."""
|
| 32 |
status: str # 'running', 'completed', 'error'
|
| 33 |
+
current_step: str # 'starting', 'Researcher', 'Analyzer', 'Critic'
|
| 34 |
revision_count: int
|
| 35 |
score: int
|
| 36 |
|
src/graph_cyclic.py
CHANGED
|
@@ -4,18 +4,16 @@ from src.state import AgentState
|
|
| 4 |
from src.nodes.researcher import researcher_node
|
| 5 |
from src.nodes.analyzer import analyzer_node
|
| 6 |
from src.nodes.critic import critic_node
|
| 7 |
-
from src.nodes.editor import editor_node
|
| 8 |
from src.utils.conditions import should_continue
|
| 9 |
from langsmith import traceable
|
| 10 |
|
| 11 |
# Create the cyclic workflow
|
| 12 |
workflow = StateGraph(AgentState)
|
| 13 |
|
| 14 |
-
# Add
|
| 15 |
workflow.add_node("Researcher", RunnableLambda(researcher_node))
|
| 16 |
workflow.add_node("Analyzer", RunnableLambda(analyzer_node))
|
| 17 |
workflow.add_node("Critic", RunnableLambda(critic_node))
|
| 18 |
-
workflow.add_node("Editor", RunnableLambda(editor_node))
|
| 19 |
|
| 20 |
# Define the workflow edges
|
| 21 |
workflow.set_entry_point("Researcher")
|
|
@@ -23,18 +21,16 @@ workflow.add_edge("Researcher", "Analyzer")
|
|
| 23 |
workflow.add_edge("Analyzer", "Critic")
|
| 24 |
|
| 25 |
# Add conditional edges for the self-correcting loop
|
|
|
|
| 26 |
workflow.add_conditional_edges(
|
| 27 |
-
"Critic",
|
| 28 |
-
should_continue,
|
| 29 |
{
|
| 30 |
"exit": "__end__",
|
| 31 |
-
"retry": "
|
| 32 |
}
|
| 33 |
)
|
| 34 |
|
| 35 |
-
# Complete the loop: Editor → Critic
|
| 36 |
-
workflow.add_edge("Editor", "Critic")
|
| 37 |
-
|
| 38 |
# Set the finish point
|
| 39 |
workflow.set_finish_point("Critic")
|
| 40 |
|
|
@@ -43,9 +39,9 @@ workflow.config = {
|
|
| 43 |
"project_name": "AI-strategy-agent-cyclic",
|
| 44 |
"tags": ["self-correcting", "quality-loop", "swot-analysis"],
|
| 45 |
"metadata": {
|
| 46 |
-
"version": "
|
| 47 |
"environment": "development",
|
| 48 |
-
"workflow_type": "researcher-analyzer-critic
|
| 49 |
}
|
| 50 |
}
|
| 51 |
|
|
@@ -91,7 +87,7 @@ if __name__ == "__main__":
|
|
| 91 |
target_company = "Tesla"
|
| 92 |
|
| 93 |
print(f"🔍 Running Self-Correcting SWOT Analysis for {target_company}...")
|
| 94 |
-
print("📝 This workflow includes: Researcher → Analyzer → Critic →
|
| 95 |
print("🎯 Loop continues until score ≥ 7 or 3 revisions attempted\n")
|
| 96 |
|
| 97 |
# Execute the workflow
|
|
@@ -115,7 +111,7 @@ if __name__ == "__main__":
|
|
| 115 |
print(f" - Initial Quality: Improved from unknown to {final_score}/10")
|
| 116 |
print(f" - Revisions Made: {final_revision_count}")
|
| 117 |
print(f" - Final Report Length: {len(result['draft_report'])} characters")
|
| 118 |
-
print(f" - Workflow: Researcher → Analyzer → Critic →
|
| 119 |
print(f" - Tracing: Enhanced LangSmith traces available")
|
| 120 |
|
| 121 |
# Quality assessment
|
|
|
|
| 4 |
from src.nodes.researcher import researcher_node
|
| 5 |
from src.nodes.analyzer import analyzer_node
|
| 6 |
from src.nodes.critic import critic_node
|
|
|
|
| 7 |
from src.utils.conditions import should_continue
|
| 8 |
from langsmith import traceable
|
| 9 |
|
| 10 |
# Create the cyclic workflow
|
| 11 |
workflow = StateGraph(AgentState)
|
| 12 |
|
| 13 |
+
# Add nodes to the workflow (Analyzer handles both initial generation and revisions)
|
| 14 |
workflow.add_node("Researcher", RunnableLambda(researcher_node))
|
| 15 |
workflow.add_node("Analyzer", RunnableLambda(analyzer_node))
|
| 16 |
workflow.add_node("Critic", RunnableLambda(critic_node))
|
|
|
|
| 17 |
|
| 18 |
# Define the workflow edges
|
| 19 |
workflow.set_entry_point("Researcher")
|
|
|
|
| 21 |
workflow.add_edge("Analyzer", "Critic")
|
| 22 |
|
| 23 |
# Add conditional edges for the self-correcting loop
|
| 24 |
+
# Analyzer now handles revisions directly (no separate Editor node)
|
| 25 |
workflow.add_conditional_edges(
|
| 26 |
+
"Critic",
|
| 27 |
+
should_continue,
|
| 28 |
{
|
| 29 |
"exit": "__end__",
|
| 30 |
+
"retry": "Analyzer" # Route back to Analyzer for revisions
|
| 31 |
}
|
| 32 |
)
|
| 33 |
|
|
|
|
|
|
|
|
|
|
| 34 |
# Set the finish point
|
| 35 |
workflow.set_finish_point("Critic")
|
| 36 |
|
|
|
|
| 39 |
"project_name": "AI-strategy-agent-cyclic",
|
| 40 |
"tags": ["self-correcting", "quality-loop", "swot-analysis"],
|
| 41 |
"metadata": {
|
| 42 |
+
"version": "2.0",
|
| 43 |
"environment": "development",
|
| 44 |
+
"workflow_type": "researcher-analyzer-critic"
|
| 45 |
}
|
| 46 |
}
|
| 47 |
|
|
|
|
| 87 |
target_company = "Tesla"
|
| 88 |
|
| 89 |
print(f"🔍 Running Self-Correcting SWOT Analysis for {target_company}...")
|
| 90 |
+
print("📝 This workflow includes: Researcher → Analyzer → Critic → Analyzer (revision loop)")
|
| 91 |
print("🎯 Loop continues until score ≥ 7 or 3 revisions attempted\n")
|
| 92 |
|
| 93 |
# Execute the workflow
|
|
|
|
| 111 |
print(f" - Initial Quality: Improved from unknown to {final_score}/10")
|
| 112 |
print(f" - Revisions Made: {final_revision_count}")
|
| 113 |
print(f" - Final Report Length: {len(result['draft_report'])} characters")
|
| 114 |
+
print(f" - Workflow: Researcher → Analyzer → Critic → Analyzer (revision loop)")
|
| 115 |
print(f" - Tracing: Enhanced LangSmith traces available")
|
| 116 |
|
| 117 |
# Quality assessment
|
src/nodes/analyzer.py
CHANGED
|
@@ -1,10 +1,123 @@
|
|
| 1 |
-
from src.tools import get_strategy_context
|
| 2 |
from src.llm_client import get_llm_client
|
| 3 |
from langsmith import traceable
|
| 4 |
import time
|
| 5 |
import json
|
| 6 |
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
def _add_activity_log(workflow_id, progress_store, step, message):
|
| 9 |
"""Helper to add activity log entry."""
|
| 10 |
if workflow_id and progress_store:
|
|
@@ -118,8 +231,13 @@ def _get_value(metric_data) -> any:
|
|
| 118 |
return metric_data
|
| 119 |
|
| 120 |
|
| 121 |
-
def _generate_data_report(raw_data: str) -> str:
|
| 122 |
-
"""Generate complete multi-source data report with simple tables.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
try:
|
| 124 |
data = json.loads(raw_data)
|
| 125 |
except json.JSONDecodeError:
|
|
@@ -195,14 +313,17 @@ def _generate_data_report(raw_data: str) -> str:
|
|
| 195 |
("P/E Forward", "forward_pe", lambda v: _format_number(v, "x")),
|
| 196 |
("P/B Ratio", "pb_ratio", lambda v: _format_number(v, "x")),
|
| 197 |
("P/S Ratio", "ps_ratio", lambda v: _format_number(v, "x")),
|
| 198 |
-
("EV/EBITDA", "ev_ebitda", lambda v: _format_number(v, "x")),
|
| 199 |
-
("EV/Revenue", "ev_revenue", lambda v: _format_number(v, "x")),
|
| 200 |
("PEG Ratio", "trailing_peg", lambda v: _format_number(v, "x")),
|
| 201 |
("Price/FCF", "price_to_fcf", lambda v: _format_number(v, "x")),
|
| 202 |
("Revenue Growth", "revenue_growth", lambda v: _format_number(v * 100 if v and abs(v) < 10 else v, "%") if v else "N/A"),
|
| 203 |
("Earnings Growth", "earnings_growth", lambda v: _format_number(v * 100 if v and abs(v) < 10 else v, "%") if v else "N/A"),
|
| 204 |
]
|
| 205 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
for name, key, fmt in val_metrics:
|
| 207 |
y = yf_val.get(key)
|
| 208 |
a = av_val.get(key)
|
|
@@ -470,8 +591,13 @@ def _extract_key_metrics(raw_data: str) -> dict:
|
|
| 470 |
return extracted
|
| 471 |
|
| 472 |
|
| 473 |
-
def _format_metrics_for_prompt(extracted: dict) -> str:
|
| 474 |
-
"""Format extracted metrics into a clear text for the LLM.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 475 |
lines = []
|
| 476 |
lines.append(f"Company: {extracted['company']} ({extracted['ticker']})")
|
| 477 |
lines.append("")
|
|
@@ -542,7 +668,7 @@ def _format_metrics_for_prompt(extracted: dict) -> str:
|
|
| 542 |
lines.append(f"- P/B Ratio: {val['pb_ratio']:.2f}")
|
| 543 |
if val.get("ps_ratio"):
|
| 544 |
lines.append(f"- P/S Ratio: {val['ps_ratio']:.2f}")
|
| 545 |
-
if val.get("ev_ebitda"):
|
| 546 |
lines.append(f"- EV/EBITDA: {val['ev_ebitda']:.1f}")
|
| 547 |
if val.get("valuation_signal"):
|
| 548 |
lines.append(f"- Overall Signal: {val['valuation_signal']}")
|
|
@@ -608,6 +734,221 @@ def _format_metrics_for_prompt(extracted: dict) -> str:
|
|
| 608 |
return "\n".join(lines)
|
| 609 |
|
| 610 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 611 |
@traceable(name="Analyzer")
|
| 612 |
def analyzer_node(state, workflow_id=None, progress_store=None):
|
| 613 |
# Extract workflow_id and progress_store from state (graph invokes with state only)
|
|
@@ -628,56 +969,56 @@ def analyzer_node(state, workflow_id=None, progress_store=None):
|
|
| 628 |
user_keys = state.get("user_api_keys", {})
|
| 629 |
llm = get_llm_client(user_keys) if user_keys else get_llm_client()
|
| 630 |
raw = state["raw_data"]
|
| 631 |
-
strategy_name = state.get("strategy_focus", "Cost Leadership")
|
| 632 |
-
strategy_context = get_strategy_context(strategy_name)
|
| 633 |
company = state["company_name"]
|
| 634 |
ticker = state.get("ticker", "")
|
| 635 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 636 |
# Extract and format metrics for better LLM understanding
|
| 637 |
extracted = _extract_key_metrics(raw)
|
| 638 |
-
formatted_data = _format_metrics_for_prompt(extracted)
|
| 639 |
|
| 640 |
# Generate detailed data report (shown before SWOT)
|
| 641 |
-
data_report = _generate_data_report(raw)
|
| 642 |
-
|
| 643 |
-
#
|
| 644 |
-
|
| 645 |
-
|
| 646 |
-
|
| 647 |
-
|
| 648 |
-
|
| 649 |
-
|
| 650 |
-
|
| 651 |
-
|
| 652 |
-
|
| 653 |
-
|
| 654 |
-
|
| 655 |
-
|
| 656 |
-
|
| 657 |
-
|
| 658 |
-
|
| 659 |
-
|
| 660 |
-
|
| 661 |
-
|
| 662 |
-
|
| 663 |
-
|
| 664 |
-
|
| 665 |
-
|
| 666 |
-
|
| 667 |
-
|
| 668 |
-
|
| 669 |
-
|
| 670 |
-
|
| 671 |
-
|
| 672 |
-
- [Single sentence with metric, under 25 words]
|
| 673 |
-
|
| 674 |
-
Opportunities:
|
| 675 |
-
- [Single sentence citing macro/market data, under 25 words]
|
| 676 |
-
|
| 677 |
-
Threats:
|
| 678 |
-
- [Single sentence citing risks, under 25 words]
|
| 679 |
|
| 680 |
-
Remember: Every bullet must cite actual data. Keep each point brief and impactful."""
|
| 681 |
start_time = time.time()
|
| 682 |
response, provider, error, providers_failed = llm.query(prompt, temperature=0)
|
| 683 |
elapsed = time.time() - start_time
|
|
@@ -704,18 +1045,50 @@ Remember: Every bullet must cite actual data. Keep each point brief and impactfu
|
|
| 704 |
llm_status[provider_name] = "completed"
|
| 705 |
|
| 706 |
if error:
|
| 707 |
-
|
| 708 |
-
|
| 709 |
-
|
| 710 |
-
|
| 711 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 712 |
else:
|
| 713 |
-
|
| 714 |
-
|
| 715 |
-
|
| 716 |
-
|
| 717 |
-
|
| 718 |
-
|
| 719 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 720 |
|
| 721 |
return state
|
|
|
|
|
|
|
| 1 |
from src.llm_client import get_llm_client
|
| 2 |
from langsmith import traceable
|
| 3 |
import time
|
| 4 |
import json
|
| 5 |
|
| 6 |
|
| 7 |
+
# Financial institution detection for EV/EBITDA exclusion
|
| 8 |
+
FINANCIAL_SECTORS = {
|
| 9 |
+
"financial services", "financial", "banking", "banks",
|
| 10 |
+
"insurance", "real estate investment trust", "reit",
|
| 11 |
+
"investment management", "capital markets", "diversified financial services",
|
| 12 |
+
"consumer finance", "asset management", "mortgage finance",
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
FINANCIAL_INDUSTRIES = {
|
| 16 |
+
"banks", "regional banks", "diversified banks", "money center banks",
|
| 17 |
+
"insurance", "life insurance", "property insurance", "reinsurance",
|
| 18 |
+
"real estate", "reit", "mortgage reits", "equity reits",
|
| 19 |
+
"asset management", "investment banking", "capital markets",
|
| 20 |
+
"consumer finance", "specialty finance",
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
# Fallback: known financial tickers when sector data unavailable
|
| 24 |
+
FINANCIAL_TICKERS = {
|
| 25 |
+
"JPM", "BAC", "WFC", "GS", "MS", "C", "USB", "PNC", "TFC", "COF",
|
| 26 |
+
"AXP", "BLK", "SCHW", "CME", "ICE", "SPGI", "MCO",
|
| 27 |
+
"BRK.A", "BRK.B", "MET", "PRU", "AIG", "ALL", "TRV", "PGR", "CB",
|
| 28 |
+
"AMT", "PLD", "CCI", "EQIX", "PSA", "O", "WELL", "AVB", "EQR",
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
# =============================================================================
|
| 32 |
+
# REVISION MODE: Conditional Focus Area Blocks
|
| 33 |
+
# These are included in revision prompts based on which rubric criteria failed
|
| 34 |
+
# =============================================================================
|
| 35 |
+
|
| 36 |
+
EVIDENCE_GROUNDING_BLOCK = """
|
| 37 |
+
**EVIDENCE GROUNDING (Critical)**
|
| 38 |
+
- Every claim must cite a specific metric from the input data
|
| 39 |
+
- Use exact field names: `revenue`, `net_margin_pct`, `trailing_pe`, etc.
|
| 40 |
+
- Format citations as: "[Metric]: [Value] ([Source], [Period])"
|
| 41 |
+
- If a metric was flagged as fabricated, remove it entirely or replace with actual data
|
| 42 |
+
"""
|
| 43 |
+
|
| 44 |
+
CONSTRAINT_COMPLIANCE_BLOCK = """
|
| 45 |
+
**CONSTRAINT COMPLIANCE (Critical)**
|
| 46 |
+
- Remove any language that sounds like investment advice
|
| 47 |
+
- Check all temporal labels — TTM vs FY vs Q must match the source
|
| 48 |
+
- Add confidence levels to key conclusions: (High/Medium/Low)
|
| 49 |
+
- Do not use EV/EBITDA for financial institutions
|
| 50 |
+
- For missing data, state "DATA NOT PROVIDED" — do not estimate
|
| 51 |
+
"""
|
| 52 |
+
|
| 53 |
+
SPECIFICITY_BLOCK = """
|
| 54 |
+
**SPECIFICITY & ACTIONABILITY**
|
| 55 |
+
- Replace generic statements with company-specific observations
|
| 56 |
+
- Quantify every claim possible: not "strong margins" but "31.0% operating margin"
|
| 57 |
+
- Remove business clichés: "leveraging," "best-in-class," "synergies"
|
| 58 |
+
"""
|
| 59 |
+
|
| 60 |
+
INSIGHT_BLOCK = """
|
| 61 |
+
**STRATEGIC INSIGHT**
|
| 62 |
+
- Connect observations across data baskets (e.g., link margin trends to macro rates)
|
| 63 |
+
- Go beyond restating metrics — explain WHY they matter
|
| 64 |
+
- Identify non-obvious relationships in the data
|
| 65 |
+
"""
|
| 66 |
+
|
| 67 |
+
COMPLETENESS_BLOCK = """
|
| 68 |
+
**COMPLETENESS & BALANCE**
|
| 69 |
+
- Ensure ALL required sections are present (Strengths, Weaknesses, Opportunities, Threats, Data Quality Notes)
|
| 70 |
+
- Balance quadrants — no section should be filler or disproportionately thin
|
| 71 |
+
"""
|
| 72 |
+
|
| 73 |
+
CLARITY_BLOCK = """
|
| 74 |
+
**CLARITY & STRUCTURE**
|
| 75 |
+
- Use consistent formatting throughout
|
| 76 |
+
- Ensure no contradictions across sections
|
| 77 |
+
- Make output scannable — executives should grasp key points in 30 seconds
|
| 78 |
+
"""
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def _is_financial_institution(sector: str, industry: str, ticker: str) -> bool:
|
| 82 |
+
"""Detect if company is a financial institution (EV/EBITDA not meaningful)."""
|
| 83 |
+
sector_lower = (sector or "").lower().strip()
|
| 84 |
+
industry_lower = (industry or "").lower().strip()
|
| 85 |
+
|
| 86 |
+
if any(fs in sector_lower for fs in FINANCIAL_SECTORS):
|
| 87 |
+
return True
|
| 88 |
+
if any(fi in industry_lower for fi in FINANCIAL_INDUSTRIES):
|
| 89 |
+
return True
|
| 90 |
+
if ticker and ticker.upper() in FINANCIAL_TICKERS:
|
| 91 |
+
return True
|
| 92 |
+
return False
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def _extract_company_profile(raw_data: str) -> dict:
|
| 96 |
+
"""Extract sector/industry from Yahoo Finance data if available."""
|
| 97 |
+
try:
|
| 98 |
+
data = json.loads(raw_data)
|
| 99 |
+
except json.JSONDecodeError:
|
| 100 |
+
return {}
|
| 101 |
+
|
| 102 |
+
multi_source = data.get("multi_source", {})
|
| 103 |
+
|
| 104 |
+
# Try valuation Yahoo Finance data first
|
| 105 |
+
yf_val = multi_source.get("valuation_all", {}).get("yahoo_finance", {}).get("data", {})
|
| 106 |
+
profile = yf_val.get("profile", {})
|
| 107 |
+
|
| 108 |
+
if profile.get("sector"):
|
| 109 |
+
return {"sector": profile.get("sector"), "industry": profile.get("industry")}
|
| 110 |
+
|
| 111 |
+
# Fallback to fundamentals Yahoo Finance
|
| 112 |
+
yf_fund = multi_source.get("fundamentals_all", {}).get("yahoo_finance", {}).get("data", {})
|
| 113 |
+
fund_profile = yf_fund.get("profile", {})
|
| 114 |
+
|
| 115 |
+
return {
|
| 116 |
+
"sector": fund_profile.get("sector", ""),
|
| 117 |
+
"industry": fund_profile.get("industry", "")
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
|
| 121 |
def _add_activity_log(workflow_id, progress_store, step, message):
|
| 122 |
"""Helper to add activity log entry."""
|
| 123 |
if workflow_id and progress_store:
|
|
|
|
| 231 |
return metric_data
|
| 232 |
|
| 233 |
|
| 234 |
+
def _generate_data_report(raw_data: str, is_financial: bool = False) -> str:
|
| 235 |
+
"""Generate complete multi-source data report with simple tables.
|
| 236 |
+
|
| 237 |
+
Args:
|
| 238 |
+
raw_data: JSON string of research data
|
| 239 |
+
is_financial: If True, exclude EV/EBITDA for financial institutions
|
| 240 |
+
"""
|
| 241 |
try:
|
| 242 |
data = json.loads(raw_data)
|
| 243 |
except json.JSONDecodeError:
|
|
|
|
| 313 |
("P/E Forward", "forward_pe", lambda v: _format_number(v, "x")),
|
| 314 |
("P/B Ratio", "pb_ratio", lambda v: _format_number(v, "x")),
|
| 315 |
("P/S Ratio", "ps_ratio", lambda v: _format_number(v, "x")),
|
|
|
|
|
|
|
| 316 |
("PEG Ratio", "trailing_peg", lambda v: _format_number(v, "x")),
|
| 317 |
("Price/FCF", "price_to_fcf", lambda v: _format_number(v, "x")),
|
| 318 |
("Revenue Growth", "revenue_growth", lambda v: _format_number(v * 100 if v and abs(v) < 10 else v, "%") if v else "N/A"),
|
| 319 |
("Earnings Growth", "earnings_growth", lambda v: _format_number(v * 100 if v and abs(v) < 10 else v, "%") if v else "N/A"),
|
| 320 |
]
|
| 321 |
|
| 322 |
+
# Only include EV/EBITDA for non-financial companies
|
| 323 |
+
if not is_financial:
|
| 324 |
+
val_metrics.insert(6, ("EV/EBITDA", "ev_ebitda", lambda v: _format_number(v, "x")))
|
| 325 |
+
val_metrics.insert(7, ("EV/Revenue", "ev_revenue", lambda v: _format_number(v, "x")))
|
| 326 |
+
|
| 327 |
for name, key, fmt in val_metrics:
|
| 328 |
y = yf_val.get(key)
|
| 329 |
a = av_val.get(key)
|
|
|
|
| 591 |
return extracted
|
| 592 |
|
| 593 |
|
| 594 |
+
def _format_metrics_for_prompt(extracted: dict, is_financial: bool = False) -> str:
|
| 595 |
+
"""Format extracted metrics into a clear text for the LLM.
|
| 596 |
+
|
| 597 |
+
Args:
|
| 598 |
+
extracted: Extracted metrics dictionary
|
| 599 |
+
is_financial: If True, exclude EV/EBITDA from valuation metrics
|
| 600 |
+
"""
|
| 601 |
lines = []
|
| 602 |
lines.append(f"Company: {extracted['company']} ({extracted['ticker']})")
|
| 603 |
lines.append("")
|
|
|
|
| 668 |
lines.append(f"- P/B Ratio: {val['pb_ratio']:.2f}")
|
| 669 |
if val.get("ps_ratio"):
|
| 670 |
lines.append(f"- P/S Ratio: {val['ps_ratio']:.2f}")
|
| 671 |
+
if val.get("ev_ebitda") and not is_financial:
|
| 672 |
lines.append(f"- EV/EBITDA: {val['ev_ebitda']:.1f}")
|
| 673 |
if val.get("valuation_signal"):
|
| 674 |
lines.append(f"- Overall Signal: {val['valuation_signal']}")
|
|
|
|
| 734 |
return "\n".join(lines)
|
| 735 |
|
| 736 |
|
| 737 |
+
# New institutional-grade prompt
|
| 738 |
+
ANALYZER_SYSTEM_PROMPT = """You are a senior financial analyst producing institutional-grade SWOT analyses.
|
| 739 |
+
|
| 740 |
+
## DATA GROUNDING RULES (CRITICAL)
|
| 741 |
+
1. USE ONLY the provided data. Never invent or assume metrics not given.
|
| 742 |
+
2. CITE specific numbers for every finding (e.g., "Net margin: 24.3%", "P/E: 21.3x").
|
| 743 |
+
3. If data is missing, state "Insufficient data" - do NOT fabricate.
|
| 744 |
+
4. Distinguish trailing (historical) vs forward (projected) metrics.
|
| 745 |
+
|
| 746 |
+
## AVAILABLE DATA BASKETS
|
| 747 |
+
|
| 748 |
+
### Fundamentals (SEC EDGAR + Yahoo Finance)
|
| 749 |
+
revenue, net_income, net_margin_pct, gross_margin_pct, operating_margin_pct,
|
| 750 |
+
total_assets, total_liabilities, stockholders_equity, free_cash_flow,
|
| 751 |
+
operating_cash_flow, long_term_debt, debt_to_equity, eps
|
| 752 |
+
|
| 753 |
+
### Valuation (Yahoo Finance)
|
| 754 |
+
market_cap, enterprise_value, trailing_pe, forward_pe, pb_ratio, ps_ratio,
|
| 755 |
+
trailing_peg, price_to_fcf, revenue_growth, earnings_growth
|
| 756 |
+
{ev_ebitda_note}
|
| 757 |
+
|
| 758 |
+
### Volatility (FRED + Yahoo)
|
| 759 |
+
vix, vxn, beta, historical_volatility, implied_volatility
|
| 760 |
+
|
| 761 |
+
### Macro (BEA/BLS/FRED)
|
| 762 |
+
gdp_growth, interest_rate, cpi_inflation, unemployment
|
| 763 |
+
|
| 764 |
+
### News & Sentiment
|
| 765 |
+
News articles with title, source, url
|
| 766 |
+
Sentiment scores from Finnhub and Reddit
|
| 767 |
+
|
| 768 |
+
## WHAT YOU DO NOT DO
|
| 769 |
+
- Provide buy/sell/hold recommendations
|
| 770 |
+
- Compare to sector/peer benchmarks (data not provided)
|
| 771 |
+
- Speculate beyond provided data
|
| 772 |
+
- Use vague hedge words without quantification"""
|
| 773 |
+
|
| 774 |
+
|
| 775 |
+
def _build_revision_prompt(
|
| 776 |
+
critique_details: dict,
|
| 777 |
+
company_data: str,
|
| 778 |
+
current_draft: str,
|
| 779 |
+
is_financial: bool
|
| 780 |
+
) -> str:
|
| 781 |
+
"""Build revision prompt with conditional focus areas based on failed criteria.
|
| 782 |
+
|
| 783 |
+
Args:
|
| 784 |
+
critique_details: Structured dict from Critic with scores and feedback
|
| 785 |
+
company_data: Formatted metrics string for reference
|
| 786 |
+
current_draft: The current SWOT draft to be revised
|
| 787 |
+
is_financial: Whether the company is a financial institution
|
| 788 |
+
|
| 789 |
+
Returns:
|
| 790 |
+
Complete revision prompt string
|
| 791 |
+
"""
|
| 792 |
+
scores = critique_details.get("scores", {})
|
| 793 |
+
|
| 794 |
+
# Determine which focus areas to include based on failed criteria
|
| 795 |
+
focus_areas = []
|
| 796 |
+
if scores.get("evidence_grounding", 10) < 7:
|
| 797 |
+
focus_areas.append(EVIDENCE_GROUNDING_BLOCK)
|
| 798 |
+
if scores.get("constraint_compliance", 10) < 6:
|
| 799 |
+
focus_areas.append(CONSTRAINT_COMPLIANCE_BLOCK)
|
| 800 |
+
if scores.get("specificity_actionability", 10) < 7:
|
| 801 |
+
focus_areas.append(SPECIFICITY_BLOCK)
|
| 802 |
+
if scores.get("strategic_insight", 10) < 7:
|
| 803 |
+
focus_areas.append(INSIGHT_BLOCK)
|
| 804 |
+
if scores.get("completeness_balance", 10) < 7:
|
| 805 |
+
focus_areas.append(COMPLETENESS_BLOCK)
|
| 806 |
+
if scores.get("clarity_structure", 10) < 7:
|
| 807 |
+
focus_areas.append(CLARITY_BLOCK)
|
| 808 |
+
|
| 809 |
+
# Format critic feedback components
|
| 810 |
+
deficiencies = critique_details.get("key_deficiencies", [])
|
| 811 |
+
strengths = critique_details.get("strengths_to_preserve", [])
|
| 812 |
+
feedback = critique_details.get("actionable_feedback", [])
|
| 813 |
+
|
| 814 |
+
# Build deficiencies section
|
| 815 |
+
deficiencies_text = "\n".join(f"- {d}" for d in deficiencies) if deficiencies else "- None specified"
|
| 816 |
+
|
| 817 |
+
# Build strengths section
|
| 818 |
+
strengths_text = "\n".join(f"- {s}" for s in strengths) if strengths else "- None specified"
|
| 819 |
+
|
| 820 |
+
# Build feedback section
|
| 821 |
+
feedback_text = "\n".join(f"{i+1}. {f}" for i, f in enumerate(feedback)) if feedback else "- None specified"
|
| 822 |
+
|
| 823 |
+
# Build focus areas section
|
| 824 |
+
focus_areas_text = "\n".join(focus_areas) if focus_areas else "Address all deficiencies listed above."
|
| 825 |
+
|
| 826 |
+
# Add EV/EBITDA note for financial institutions
|
| 827 |
+
ev_note = ""
|
| 828 |
+
if is_financial:
|
| 829 |
+
ev_note = "\n**Note:** This is a financial institution - EV/EBITDA is excluded from analysis."
|
| 830 |
+
|
| 831 |
+
prompt = f"""## REVISION MODE ACTIVATED
|
| 832 |
+
|
| 833 |
+
You previously generated a SWOT analysis that did not meet quality standards. You are now in revision mode.
|
| 834 |
+
|
| 835 |
+
### YOUR TASK
|
| 836 |
+
|
| 837 |
+
1. **Review the Critic's feedback** carefully
|
| 838 |
+
2. **Address each deficiency** listed in priority order
|
| 839 |
+
3. **Preserve strengths** explicitly called out — do not regress on what worked
|
| 840 |
+
4. **Regenerate the complete SWOT** — not a partial patch
|
| 841 |
+
|
| 842 |
+
### CRITIC FEEDBACK
|
| 843 |
+
|
| 844 |
+
Status: {critique_details.get('status', 'REJECTED')}
|
| 845 |
+
Weighted Score: {critique_details.get('weighted_score', 0):.1f} / 10
|
| 846 |
+
|
| 847 |
+
**Key Deficiencies:**
|
| 848 |
+
{deficiencies_text}
|
| 849 |
+
|
| 850 |
+
**Strengths to Preserve:**
|
| 851 |
+
{strengths_text}
|
| 852 |
+
|
| 853 |
+
**Actionable Feedback:**
|
| 854 |
+
{feedback_text}
|
| 855 |
+
|
| 856 |
+
### FOCUS AREAS FOR THIS REVISION
|
| 857 |
+
|
| 858 |
+
{focus_areas_text}
|
| 859 |
+
|
| 860 |
+
### REVISION RULES
|
| 861 |
+
|
| 862 |
+
**DO:**
|
| 863 |
+
- Fix every item in "Key Deficiencies" — these are blocking issues
|
| 864 |
+
- Apply each point in "Actionable Feedback" — these are specific instructions
|
| 865 |
+
- Keep everything listed under "Strengths to Preserve" — do not modify these sections
|
| 866 |
+
- Re-verify all metric citations against the original input data
|
| 867 |
+
- Ensure temporal labels (TTM, FY, Q) are accurate for each metric
|
| 868 |
+
{ev_note}
|
| 869 |
+
|
| 870 |
+
**DO NOT:**
|
| 871 |
+
- Ignore lower-priority feedback items — address all of them
|
| 872 |
+
- Introduce new metrics not in the original input data
|
| 873 |
+
- Remove content that was working well
|
| 874 |
+
- Add defensive caveats or apologies about the revision
|
| 875 |
+
- Reference the revision process in your output — produce a clean SWOT as if first attempt
|
| 876 |
+
|
| 877 |
+
### REFERENCE DATA
|
| 878 |
+
|
| 879 |
+
{company_data}
|
| 880 |
+
|
| 881 |
+
### CURRENT DRAFT (to revise)
|
| 882 |
+
|
| 883 |
+
{current_draft}
|
| 884 |
+
|
| 885 |
+
### OUTPUT INSTRUCTIONS
|
| 886 |
+
|
| 887 |
+
Produce a complete, revised SWOT analysis following the original template structure.
|
| 888 |
+
|
| 889 |
+
Do not:
|
| 890 |
+
- Include any preamble about revisions
|
| 891 |
+
- Apologize or explain what you changed
|
| 892 |
+
- Reference the Critic's feedback in your output
|
| 893 |
+
|
| 894 |
+
Simply output the improved SWOT as a clean, final deliverable."""
|
| 895 |
+
|
| 896 |
+
return prompt
|
| 897 |
+
|
| 898 |
+
|
| 899 |
+
def _build_analyzer_prompt(company: str, ticker: str, formatted_data: str, is_financial: bool) -> str:
|
| 900 |
+
"""Build analyzer prompt with conditional EV/EBITDA handling."""
|
| 901 |
+
|
| 902 |
+
if is_financial:
|
| 903 |
+
ev_note = "\nNote: EV/EBITDA excluded - not meaningful for financial institutions."
|
| 904 |
+
else:
|
| 905 |
+
ev_note = ", ev_ebitda, ev_revenue"
|
| 906 |
+
|
| 907 |
+
system = ANALYZER_SYSTEM_PROMPT.format(ev_ebitda_note=ev_note)
|
| 908 |
+
|
| 909 |
+
return f"""{system}
|
| 910 |
+
|
| 911 |
+
=== DATA FOR {company} ({ticker}) ===
|
| 912 |
+
{formatted_data}
|
| 913 |
+
|
| 914 |
+
=== OUTPUT FORMAT ===
|
| 915 |
+
|
| 916 |
+
Produce a SWOT analysis with this exact structure:
|
| 917 |
+
|
| 918 |
+
## Strengths
|
| 919 |
+
For each (3-5 points):
|
| 920 |
+
- **Finding:** [One sentence with specific metric]
|
| 921 |
+
- **Strategic Implication:** [Why this matters]
|
| 922 |
+
- **Durability:** [High/Medium/Low]
|
| 923 |
+
|
| 924 |
+
## Weaknesses
|
| 925 |
+
For each (3-5 points):
|
| 926 |
+
- **Finding:** [One sentence with specific metric]
|
| 927 |
+
- **Severity:** [Critical/Moderate/Minor]
|
| 928 |
+
- **Trend:** [Improving/Stable/Deteriorating]
|
| 929 |
+
- **Remediation Levers:** [What could improve this]
|
| 930 |
+
|
| 931 |
+
## Opportunities
|
| 932 |
+
For each (3-5 points):
|
| 933 |
+
- **Catalyst:** [Description with supporting data]
|
| 934 |
+
- **Timing:** [Near-term/Medium-term/Long-term]
|
| 935 |
+
- **Execution Requirements:** [What must happen]
|
| 936 |
+
|
| 937 |
+
## Threats
|
| 938 |
+
For each (3-5 points):
|
| 939 |
+
- **Risk Factor:** [Description with supporting data]
|
| 940 |
+
- **Probability:** [High/Medium/Low]
|
| 941 |
+
- **Impact:** [Potential magnitude]
|
| 942 |
+
- **Mitigation Options:** [Possible responses]
|
| 943 |
+
|
| 944 |
+
## Data Quality Notes
|
| 945 |
+
- **Metrics Used:** [List key metrics analyzed]
|
| 946 |
+
- **Data Gaps:** [Any unavailable metrics]
|
| 947 |
+
- **Confidence Level:** [High/Medium/Low]
|
| 948 |
+
|
| 949 |
+
Every finding MUST cite a specific number from the data."""
|
| 950 |
+
|
| 951 |
+
|
| 952 |
@traceable(name="Analyzer")
|
| 953 |
def analyzer_node(state, workflow_id=None, progress_store=None):
|
| 954 |
# Extract workflow_id and progress_store from state (graph invokes with state only)
|
|
|
|
| 969 |
user_keys = state.get("user_api_keys", {})
|
| 970 |
llm = get_llm_client(user_keys) if user_keys else get_llm_client()
|
| 971 |
raw = state["raw_data"]
|
|
|
|
|
|
|
| 972 |
company = state["company_name"]
|
| 973 |
ticker = state.get("ticker", "")
|
| 974 |
|
| 975 |
+
# Extract company profile and detect financial institution
|
| 976 |
+
company_profile = _extract_company_profile(raw)
|
| 977 |
+
sector = company_profile.get("sector", "")
|
| 978 |
+
industry = company_profile.get("industry", "")
|
| 979 |
+
is_financial = _is_financial_institution(sector, industry, ticker)
|
| 980 |
+
|
| 981 |
+
if is_financial:
|
| 982 |
+
_add_activity_log(workflow_id, progress_store, "analyzer",
|
| 983 |
+
f"Financial institution detected - excluding EV/EBITDA")
|
| 984 |
+
|
| 985 |
# Extract and format metrics for better LLM understanding
|
| 986 |
extracted = _extract_key_metrics(raw)
|
| 987 |
+
formatted_data = _format_metrics_for_prompt(extracted, is_financial=is_financial)
|
| 988 |
|
| 989 |
# Generate detailed data report (shown before SWOT)
|
| 990 |
+
data_report = _generate_data_report(raw, is_financial=is_financial)
|
| 991 |
+
|
| 992 |
+
# Detect revision mode: if revision_count > 0 and critique_details exist
|
| 993 |
+
is_revision = state.get("revision_count", 0) > 0
|
| 994 |
+
critique_details = state.get("critique_details", {})
|
| 995 |
+
|
| 996 |
+
if is_revision and critique_details:
|
| 997 |
+
# REVISION MODE: Use enhanced revision prompt with Critic feedback
|
| 998 |
+
current_revision = state.get("revision_count", 0) + 1
|
| 999 |
+
_add_activity_log(workflow_id, progress_store, "analyzer",
|
| 1000 |
+
f"Revision #{current_revision} in progress...")
|
| 1001 |
+
|
| 1002 |
+
prompt = _build_revision_prompt(
|
| 1003 |
+
critique_details=critique_details,
|
| 1004 |
+
company_data=formatted_data,
|
| 1005 |
+
current_draft=state.get("draft_report", ""),
|
| 1006 |
+
is_financial=is_financial
|
| 1007 |
+
)
|
| 1008 |
+
|
| 1009 |
+
# Update progress with revision info
|
| 1010 |
+
if workflow_id and progress_store:
|
| 1011 |
+
progress_store[workflow_id].update({
|
| 1012 |
+
"current_step": "analyzer",
|
| 1013 |
+
"revision_count": current_revision,
|
| 1014 |
+
})
|
| 1015 |
+
else:
|
| 1016 |
+
# INITIAL MODE: Use standard analyzer prompt
|
| 1017 |
+
_add_activity_log(workflow_id, progress_store, "analyzer",
|
| 1018 |
+
f"Calling LLM to generate SWOT analysis...")
|
| 1019 |
+
prompt = _build_analyzer_prompt(company, ticker, formatted_data, is_financial)
|
| 1020 |
+
current_revision = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1021 |
|
|
|
|
| 1022 |
start_time = time.time()
|
| 1023 |
response, provider, error, providers_failed = llm.query(prompt, temperature=0)
|
| 1024 |
elapsed = time.time() - start_time
|
|
|
|
| 1045 |
llm_status[provider_name] = "completed"
|
| 1046 |
|
| 1047 |
if error:
|
| 1048 |
+
if is_revision:
|
| 1049 |
+
# REVISION MODE ERROR: Graceful degradation - keep previous draft
|
| 1050 |
+
_add_activity_log(workflow_id, progress_store, "analyzer", f"Revision failed: {error}")
|
| 1051 |
+
if current_revision == 1:
|
| 1052 |
+
_add_activity_log(workflow_id, progress_store, "analyzer",
|
| 1053 |
+
"Using initial draft (revision unavailable)")
|
| 1054 |
+
else:
|
| 1055 |
+
_add_activity_log(workflow_id, progress_store, "analyzer",
|
| 1056 |
+
f"Using revision #{current_revision - 1} draft (further revision unavailable)")
|
| 1057 |
+
# Don't set error - allow workflow to complete with previous draft
|
| 1058 |
+
state["analyzer_revision_skipped"] = True
|
| 1059 |
+
state["revision_count"] = current_revision
|
| 1060 |
+
else:
|
| 1061 |
+
# INITIAL MODE ERROR: Abort workflow
|
| 1062 |
+
state["draft_report"] = f"Error generating analysis: {error}"
|
| 1063 |
+
state["provider_used"] = None
|
| 1064 |
+
state["error"] = error # Signal workflow to abort
|
| 1065 |
+
_add_activity_log(workflow_id, progress_store, "analyzer", f"LLM error: {error}")
|
| 1066 |
+
_add_activity_log(workflow_id, progress_store, "analyzer",
|
| 1067 |
+
"Workflow aborted - all LLM providers unavailable")
|
| 1068 |
else:
|
| 1069 |
+
if is_revision:
|
| 1070 |
+
# REVISION MODE SUCCESS: Update draft with revision
|
| 1071 |
+
state["draft_report"] = response
|
| 1072 |
+
state["provider_used"] = provider
|
| 1073 |
+
state["analyzer_revision_skipped"] = False
|
| 1074 |
+
state["revision_count"] = current_revision
|
| 1075 |
+
_add_activity_log(workflow_id, progress_store, "analyzer",
|
| 1076 |
+
f"Revision #{current_revision} completed via {provider} ({elapsed:.1f}s)")
|
| 1077 |
+
else:
|
| 1078 |
+
# INITIAL MODE SUCCESS: Combine data report with SWOT analysis
|
| 1079 |
+
swot_section = f"## SWOT Analysis\n\n{response}"
|
| 1080 |
+
full_report = f"{data_report}\n{swot_section}"
|
| 1081 |
+
state["draft_report"] = full_report
|
| 1082 |
+
state["data_report"] = data_report # Store separately for frontend flexibility
|
| 1083 |
+
state["provider_used"] = provider
|
| 1084 |
+
_add_activity_log(workflow_id, progress_store, "analyzer",
|
| 1085 |
+
f"SWOT generated via {provider} ({elapsed:.1f}s)")
|
| 1086 |
+
|
| 1087 |
+
# Update progress with final revision count
|
| 1088 |
+
if workflow_id and progress_store:
|
| 1089 |
+
progress_store[workflow_id].update({
|
| 1090 |
+
"revision_count": state.get("revision_count", 0),
|
| 1091 |
+
"score": state.get("score", 0)
|
| 1092 |
+
})
|
| 1093 |
|
| 1094 |
return state
|
src/nodes/critic.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
from src.llm_client import get_llm_client
|
| 2 |
from langsmith import traceable
|
| 3 |
import json
|
| 4 |
-
import re
|
| 5 |
import time
|
| 6 |
|
| 7 |
|
|
@@ -13,251 +12,207 @@ def _add_activity_log(workflow_id, progress_store, step, message):
|
|
| 13 |
|
| 14 |
|
| 15 |
# ============================================================
|
| 16 |
-
#
|
| 17 |
# ============================================================
|
| 18 |
|
| 19 |
-
|
| 20 |
-
"""
|
| 21 |
-
Check if all 4 SWOT sections are present.
|
| 22 |
-
Returns dict with section presence and score (0-2 points).
|
| 23 |
-
"""
|
| 24 |
-
report_lower = report.lower()
|
| 25 |
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
|
| 33 |
-
|
| 34 |
-
score = 2 if present_count == 4 else (1 if present_count >= 2 else 0)
|
| 35 |
|
| 36 |
-
|
| 37 |
-
"sections": sections,
|
| 38 |
-
"present_count": present_count,
|
| 39 |
-
"score": score,
|
| 40 |
-
"max_score": 2
|
| 41 |
-
}
|
| 42 |
|
|
|
|
| 43 |
|
| 44 |
-
|
| 45 |
-
"""
|
| 46 |
-
Count specific facts/numbers cited in the report.
|
| 47 |
-
Returns dict with count and score (0-3 points).
|
| 48 |
-
"""
|
| 49 |
-
# Patterns for numeric citations
|
| 50 |
-
patterns = [
|
| 51 |
-
r'\$[\d,]+\.?\d*[BMK]?', # Dollar amounts: $3.6B, $100M
|
| 52 |
-
r'\d+\.?\d*\s*%', # Percentages: 7.26%, 42.59%
|
| 53 |
-
r'\d+\.?\d*x', # Multiples: 0.13x, 2.35x
|
| 54 |
-
r'P/E[:\s]+\d+', # P/E ratios
|
| 55 |
-
r'P/S[:\s]+\d+', # P/S ratios
|
| 56 |
-
r'P/B[:\s]+\d+', # P/B ratios
|
| 57 |
-
r'EV/EBITDA[:\s]+\d+', # EV/EBITDA
|
| 58 |
-
r'PEG[:\s]+\d+', # PEG ratio
|
| 59 |
-
r'VIX[:\s]+\d+', # VIX
|
| 60 |
-
r'Beta[:\s]+\d+', # Beta
|
| 61 |
-
r'\d+/100', # Scores: 67.38/100
|
| 62 |
-
r'CAGR[:\s]*\d+', # CAGR
|
| 63 |
-
r'\d{4}', # Years: 2024, 2025
|
| 64 |
-
]
|
| 65 |
|
| 66 |
-
|
| 67 |
-
for pattern in patterns:
|
| 68 |
-
matches = re.findall(pattern, report, re.IGNORECASE)
|
| 69 |
-
citations.extend(matches)
|
| 70 |
-
|
| 71 |
-
# Deduplicate
|
| 72 |
-
unique_citations = list(set(citations))
|
| 73 |
-
count = len(unique_citations)
|
| 74 |
-
|
| 75 |
-
# Score: 0-2 citations = 0pts, 3-5 = 1pt, 6-10 = 2pts, 10+ = 3pts
|
| 76 |
-
if count >= 10:
|
| 77 |
-
score = 3
|
| 78 |
-
elif count >= 6:
|
| 79 |
-
score = 2
|
| 80 |
-
elif count >= 3:
|
| 81 |
-
score = 1
|
| 82 |
-
else:
|
| 83 |
-
score = 0
|
| 84 |
|
| 85 |
-
|
| 86 |
-
"count": count,
|
| 87 |
-
"examples": unique_citations[:10], # Show first 10
|
| 88 |
-
"score": score,
|
| 89 |
-
"max_score": 3
|
| 90 |
-
}
|
| 91 |
|
|
|
|
| 92 |
|
| 93 |
-
|
| 94 |
-
"""
|
| 95 |
-
Check if report references data from available MCP sources.
|
| 96 |
-
Returns dict with coverage and score (0-2 points).
|
| 97 |
-
"""
|
| 98 |
-
report_lower = report.lower()
|
| 99 |
-
|
| 100 |
-
source_keywords = {
|
| 101 |
-
"fundamentals": ["revenue", "net margin", "debt", "cash flow", "eps", "earnings"],
|
| 102 |
-
"volatility": ["beta", "volatility", "vix", "price swing"],
|
| 103 |
-
"macro": ["gdp", "interest rate", "inflation", "unemployment", "fed"],
|
| 104 |
-
"valuation": ["p/e", "p/s", "p/b", "ev/ebitda", "peg", "valuation", "market cap"],
|
| 105 |
-
"news": ["news", "analyst", "article", "report"],
|
| 106 |
-
"sentiment": ["sentiment", "bullish", "bearish", "reddit", "finnhub"]
|
| 107 |
-
}
|
| 108 |
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
|
|
|
| 114 |
|
| 115 |
-
|
| 116 |
-
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
else:
|
| 124 |
-
score = 0
|
| 125 |
-
|
| 126 |
-
return {
|
| 127 |
-
"sources_referenced": sources_referenced,
|
| 128 |
-
"referenced_count": referenced_count,
|
| 129 |
-
"total_available": len(sources_available),
|
| 130 |
-
"coverage_pct": round(coverage_pct, 1),
|
| 131 |
-
"score": score,
|
| 132 |
-
"max_score": 2
|
| 133 |
-
}
|
| 134 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
|
| 175 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
"""
|
| 177 |
-
|
| 178 |
-
|
| 179 |
"""
|
| 180 |
-
|
| 181 |
-
citations_check = count_numeric_citations(report)
|
| 182 |
-
sources_check = check_data_sources(report, sources_available)
|
| 183 |
-
balance_check = check_section_balance(report)
|
| 184 |
-
|
| 185 |
-
total_score = (
|
| 186 |
-
sections_check["score"] +
|
| 187 |
-
citations_check["score"] +
|
| 188 |
-
sources_check["score"] +
|
| 189 |
-
balance_check["score"]
|
| 190 |
-
)
|
| 191 |
-
max_score = 8
|
| 192 |
-
|
| 193 |
-
# Convert to 1-10 scale (deterministic portion = 40% weight)
|
| 194 |
-
normalized_score = (total_score / max_score) * 4 # 0-4 points
|
| 195 |
-
|
| 196 |
-
return {
|
| 197 |
-
"sections": sections_check,
|
| 198 |
-
"citations": citations_check,
|
| 199 |
-
"sources": sources_check,
|
| 200 |
-
"balance": balance_check,
|
| 201 |
-
"total_score": total_score,
|
| 202 |
-
"max_score": max_score,
|
| 203 |
-
"normalized_score": round(normalized_score, 2)
|
| 204 |
-
}
|
| 205 |
|
|
|
|
|
|
|
|
|
|
| 206 |
|
| 207 |
-
#
|
| 208 |
-
|
| 209 |
-
|
|
|
|
|
|
|
| 210 |
|
| 211 |
-
|
| 212 |
-
|
|
|
|
|
|
|
| 213 |
|
| 214 |
-
|
| 215 |
-
1. Strategic Alignment (0-2 pts): Does the analysis align with the given strategic focus?
|
| 216 |
-
2. Data Grounding (0-2 pts): Does EVERY claim cite specific numbers from the source data? Penalize any invented facts not in the data.
|
| 217 |
-
3. Logical Consistency (0-2 pts): Are S/O clearly positive and W/T clearly negative? No contradictions?
|
| 218 |
|
| 219 |
-
IMPORTANT: If the analysis mentions facts/numbers NOT present in the source data, score Data Grounding as 0.
|
| 220 |
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
"strategic_alignment": <0-2>,
|
| 225 |
-
"data_grounding": <0-2>,
|
| 226 |
-
"logical_consistency": <0-2>,
|
| 227 |
-
"reasoning": "<string>"
|
| 228 |
-
}
|
| 229 |
-
"""
|
| 230 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
|
| 232 |
-
|
|
|
|
| 233 |
"""
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
SWOT Draft:
|
| 239 |
-
{report}
|
| 240 |
|
| 241 |
-
|
| 242 |
|
| 243 |
-
|
| 244 |
-
{source_data if source_data else "No source data provided"}
|
| 245 |
|
| 246 |
-
{
|
| 247 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 248 |
|
| 249 |
response, provider, error, providers_failed = llm.query(prompt, temperature=0)
|
| 250 |
|
| 251 |
if error:
|
|
|
|
| 252 |
return {
|
| 253 |
-
"
|
| 254 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 255 |
"provider": provider,
|
| 256 |
"providers_failed": providers_failed,
|
| 257 |
"error": True
|
| 258 |
}
|
| 259 |
|
| 260 |
try:
|
|
|
|
| 261 |
content = response.strip()
|
| 262 |
if "{" in content:
|
| 263 |
json_start = content.index("{")
|
|
@@ -265,40 +220,74 @@ SOURCE DATA (the analysis should be based ONLY on this):
|
|
| 265 |
content = content[json_start:json_end]
|
| 266 |
|
| 267 |
parsed = json.loads(content)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
return {
|
| 269 |
-
"
|
| 270 |
-
"
|
| 271 |
-
"
|
| 272 |
-
"
|
| 273 |
-
"
|
|
|
|
|
|
|
|
|
|
| 274 |
"provider": provider,
|
| 275 |
"providers_failed": providers_failed,
|
| 276 |
"error": False
|
| 277 |
}
|
|
|
|
| 278 |
except (json.JSONDecodeError, ValueError) as e:
|
| 279 |
return {
|
| 280 |
-
"
|
| 281 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
"provider": provider,
|
| 283 |
"providers_failed": providers_failed,
|
| 284 |
"error": True
|
| 285 |
}
|
| 286 |
|
| 287 |
|
| 288 |
-
# ============================================================
|
| 289 |
-
# HYBRID SCORING
|
| 290 |
-
# ============================================================
|
| 291 |
-
|
| 292 |
@traceable(name="Critic")
|
| 293 |
def critic_node(state, workflow_id=None, progress_store=None):
|
| 294 |
"""
|
| 295 |
-
Critic node with
|
| 296 |
-
|
| 297 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
|
| 299 |
-
|
| 300 |
"""
|
| 301 |
-
# Extract workflow_id and progress_store from state
|
| 302 |
if workflow_id is None:
|
| 303 |
workflow_id = state.get("workflow_id")
|
| 304 |
if progress_store is None:
|
|
@@ -307,7 +296,6 @@ def critic_node(state, workflow_id=None, progress_store=None):
|
|
| 307 |
# Skip evaluation if workflow has an error (abort mode)
|
| 308 |
if state.get("error"):
|
| 309 |
_add_activity_log(workflow_id, progress_store, "critic", "Skipping evaluation - workflow aborted")
|
| 310 |
-
# Simplify error message for user display
|
| 311 |
error_msg = state.get("error", "")
|
| 312 |
if "429" in error_msg or "Too Many Requests" in error_msg:
|
| 313 |
user_friendly_msg = "All AI providers are temporarily unavailable due to rate limits. Please wait a moment and try again."
|
|
@@ -320,50 +308,27 @@ def critic_node(state, workflow_id=None, progress_store=None):
|
|
| 320 |
return state
|
| 321 |
|
| 322 |
report = state.get("draft_report", "")
|
| 323 |
-
strategy_focus = state.get("strategy_focus", "Cost Leadership")
|
| 324 |
revision_count = state.get("revision_count", 0)
|
|
|
|
| 325 |
|
| 326 |
# Log evaluation start
|
| 327 |
-
_add_activity_log(workflow_id, progress_store, "critic", f"Evaluating SWOT quality (
|
| 328 |
|
| 329 |
-
#
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
except:
|
| 335 |
-
sources_available = ["fundamentals", "volatility", "macro", "valuation", "news", "sentiment"]
|
| 336 |
-
|
| 337 |
-
# Run deterministic checks
|
| 338 |
-
print("Running deterministic checks...")
|
| 339 |
-
det_results = run_deterministic_checks(report, sources_available)
|
| 340 |
-
det_score = det_results["normalized_score"] # 0-4
|
| 341 |
-
|
| 342 |
-
print(f" Sections: {det_results['sections']['present_count']}/4 ({det_results['sections']['score']}/{det_results['sections']['max_score']} pts)")
|
| 343 |
-
print(f" Citations: {det_results['citations']['count']} found ({det_results['citations']['score']}/{det_results['citations']['max_score']} pts)")
|
| 344 |
-
print(f" Source Coverage: {det_results['sources']['coverage_pct']}% ({det_results['sources']['score']}/{det_results['sources']['max_score']} pts)")
|
| 345 |
-
print(f" Balance: {'Yes' if det_results['balance']['balanced'] else 'No'} ({det_results['balance']['score']}/{det_results['balance']['max_score']} pts)")
|
| 346 |
-
print(f" Deterministic Score: {det_score:.1f}/4")
|
| 347 |
-
|
| 348 |
-
# Run LLM evaluation with source data for grounding check
|
| 349 |
-
print("Running LLM evaluation...")
|
| 350 |
llm = get_llm_client()
|
| 351 |
-
_add_activity_log(workflow_id, progress_store, "critic",
|
| 352 |
start_time = time.time()
|
| 353 |
|
| 354 |
-
|
| 355 |
-
source_data = state.get("raw_data", "")
|
| 356 |
-
# Truncate if too long to avoid token limits
|
| 357 |
-
if len(source_data) > 4000:
|
| 358 |
-
source_data = source_data[:4000] + "\n... [truncated]"
|
| 359 |
-
|
| 360 |
-
llm_results = run_llm_evaluation(report, strategy_focus, llm, source_data)
|
| 361 |
-
llm_score = llm_results["score"] # 1-6
|
| 362 |
elapsed = time.time() - start_time
|
| 363 |
-
provider =
|
| 364 |
|
| 365 |
# Log failed providers
|
| 366 |
-
providers_failed =
|
| 367 |
for pf in providers_failed:
|
| 368 |
_add_activity_log(workflow_id, progress_store, "critic", f"LLM {pf['name']} failed: {pf['error']}")
|
| 369 |
|
|
@@ -372,49 +337,98 @@ def critic_node(state, workflow_id=None, progress_store=None):
|
|
| 372 |
state["llm_providers_failed"] = []
|
| 373 |
state["llm_providers_failed"].extend([pf["name"] for pf in providers_failed])
|
| 374 |
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 388 |
else:
|
| 389 |
-
score_msg
|
| 390 |
_add_activity_log(workflow_id, progress_store, "critic", score_msg)
|
| 391 |
|
| 392 |
-
# Build
|
| 393 |
-
|
| 394 |
-
f"
|
| 395 |
-
f"
|
| 396 |
-
f" - Numeric Citations: {det_results['citations']['count']} found",
|
| 397 |
-
f" - Data Source Coverage: {det_results['sources']['coverage_pct']}%",
|
| 398 |
-
f" - Section Balance: {'Balanced' if det_results['balance']['balanced'] else 'Unbalanced'}",
|
| 399 |
"",
|
| 400 |
-
|
| 401 |
-
f" {llm_results['reasoning']}"
|
| 402 |
]
|
| 403 |
|
| 404 |
-
|
| 405 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 406 |
state["critique_details"] = {
|
| 407 |
-
"
|
| 408 |
-
"
|
| 409 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 410 |
}
|
| 411 |
|
| 412 |
# Update progress
|
| 413 |
if workflow_id and progress_store:
|
| 414 |
progress_store[workflow_id].update({
|
| 415 |
"current_step": "critic",
|
| 416 |
-
"revision_count":
|
| 417 |
-
"score":
|
| 418 |
})
|
| 419 |
|
| 420 |
return state
|
|
|
|
| 1 |
from src.llm_client import get_llm_client
|
| 2 |
from langsmith import traceable
|
| 3 |
import json
|
|
|
|
| 4 |
import time
|
| 5 |
|
| 6 |
|
|
|
|
| 12 |
|
| 13 |
|
| 14 |
# ============================================================
|
| 15 |
+
# LLM-ONLY WEIGHTED RUBRIC EVALUATION
|
| 16 |
# ============================================================
|
| 17 |
|
| 18 |
+
CRITIC_SYSTEM_PROMPT = """You are a SWOT Output Critic and Quality Gatekeeper.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
+
## ROLE
|
| 21 |
+
Act as an independent, impartial evaluator that reviews SWOT analyses. Your function is to:
|
| 22 |
+
1. Verify factual accuracy against provided input data
|
| 23 |
+
2. Assess quality against a weighted rubric
|
| 24 |
+
3. Decide whether the output PASSES or FAILS
|
| 25 |
+
4. Provide actionable feedback if rejected
|
| 26 |
|
| 27 |
+
You are a quality gate, not a collaborator. Be strict.
|
|
|
|
| 28 |
|
| 29 |
+
## VALID METRICS SCHEMA
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
+
**Fundamentals:** revenue, net_income, net_margin_pct, total_assets, total_liabilities, stockholders_equity, operating_margin_pct, total_debt, operating_cash_flow, free_cash_flow
|
| 32 |
|
| 33 |
+
**Valuation:** current_price, market_cap, enterprise_value, trailing_pe, forward_pe, ps_ratio, pb_ratio, trailing_peg, forward_peg, earnings_growth, revenue_growth
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
+
**Volatility:** vix, vxn, beta, historical_volatility, implied_volatility
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
+
**Macro:** gdp_growth, interest_rate, cpi_inflation, unemployment
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
+
**Qualitative:** News (title, date, source, url), Sentiment (title, date, source, url)
|
| 40 |
|
| 41 |
+
## EVALUATION RUBRIC (Weighted)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
+
### 1. Evidence Grounding (25%) — HARD FLOOR: >=7
|
| 44 |
+
- All claims cite specific metrics from input data
|
| 45 |
+
- No fabricated metrics (hallucination check)
|
| 46 |
+
- Field names match schema
|
| 47 |
+
- 9-10: Every claim traceable; 7-8: Nearly all grounded; 5-6: Most grounded, 2-3 unverifiable; 3-4: Multiple unsupported; 1-2: Clear hallucinations
|
| 48 |
+
- **If ANY fabricated metric detected, cap at 4**
|
| 49 |
|
| 50 |
+
### 2. Constraint Compliance (20%) — HARD FLOOR: >=6
|
| 51 |
+
- No buy/sell/hold recommendations
|
| 52 |
+
- Temporal labels accurate (TTM, FY, forward)
|
| 53 |
+
- "DATA NOT PROVIDED" used for missing metrics
|
| 54 |
+
- 9-10: All constraints respected; 7-8: Minor issues; 5-6: One moderate violation; 3-4: Multiple violations; 1-2: Systematic violations
|
| 55 |
|
| 56 |
+
### 3. Specificity & Actionability (20%)
|
| 57 |
+
- Company-specific, not generic templates
|
| 58 |
+
- Quantified findings (not "strong margins" but "31% operating margin")
|
| 59 |
+
- Avoids business cliches
|
| 60 |
+
- 9-10: Every point specific and quantified; 7-8: Mostly specific; 5-6: Mix of specific/generic; 3-4: Mostly generic; 1-2: Template-like
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
+
### 4. Strategic Insight (15%)
|
| 63 |
+
- Synthesis across multiple data sources
|
| 64 |
+
- Prioritization by materiality
|
| 65 |
+
- Goes beyond restating metrics to interpreting implications
|
| 66 |
+
- 9-10: Identifies causal relationships; 7-8: Good synthesis; 5-6: Surface-level; 3-4: Restates metrics; 1-2: No value-add
|
| 67 |
|
| 68 |
+
### 5. Completeness & Balance (10%)
|
| 69 |
+
Required sections:
|
| 70 |
+
- Strengths (Finding, Strategic Implication, Durability)
|
| 71 |
+
- Weaknesses (Finding, Severity, Trend, Remediation Levers)
|
| 72 |
+
- Opportunities (Catalyst, Timing, Execution Requirements)
|
| 73 |
+
- Threats (Risk Factor, Probability, Impact, Mitigation Options)
|
| 74 |
+
- Data Quality Notes
|
| 75 |
+
- 9-10: All present and substantive; 7-8: All present, minor gaps; 5-6: Missing 1 section; 3-4: Multiple missing; 1-2: Major gaps
|
| 76 |
+
|
| 77 |
+
### 6. Clarity & Structure (10%)
|
| 78 |
+
- Clean formatting, logical grouping
|
| 79 |
+
- Easy to scan (not walls of text)
|
| 80 |
+
- No contradictions
|
| 81 |
+
- 9-10: Impeccable; 7-8: Well-structured; 5-6: Readable but dense; 3-4: Hard to follow; 1-2: Poorly organized
|
| 82 |
+
|
| 83 |
+
## PASS CONDITIONS (ALL must be met)
|
| 84 |
+
1. Weighted average >= 7.0
|
| 85 |
+
2. Evidence Grounding >= 7
|
| 86 |
+
3. Constraint Compliance >= 6
|
| 87 |
+
4. No individual criterion below 5
|
| 88 |
+
|
| 89 |
+
## OUTPUT FORMAT (JSON only, no other text)
|
| 90 |
+
|
| 91 |
+
{
|
| 92 |
+
"status": "APPROVED" or "REJECTED",
|
| 93 |
+
"weighted_score": <float>,
|
| 94 |
+
"scores": {
|
| 95 |
+
"evidence_grounding": <1-10>,
|
| 96 |
+
"constraint_compliance": <1-10>,
|
| 97 |
+
"specificity_actionability": <1-10>,
|
| 98 |
+
"strategic_insight": <1-10>,
|
| 99 |
+
"completeness_balance": <1-10>,
|
| 100 |
+
"clarity_structure": <1-10>
|
| 101 |
+
},
|
| 102 |
+
"hard_floor_violations": ["list of violated floors or empty array"],
|
| 103 |
+
"hallucinations_detected": ["list of fabricated metrics or empty array"],
|
| 104 |
+
"key_deficiencies": ["prioritized list, max 5"],
|
| 105 |
+
"strengths_to_preserve": ["elements done well"],
|
| 106 |
+
"actionable_feedback": ["specific rewrite instructions, max 5"]
|
| 107 |
+
}
|
| 108 |
+
"""
|
| 109 |
|
| 110 |
+
# Weights for each criterion
|
| 111 |
+
CRITERION_WEIGHTS = {
|
| 112 |
+
"evidence_grounding": 0.25,
|
| 113 |
+
"constraint_compliance": 0.20,
|
| 114 |
+
"specificity_actionability": 0.20,
|
| 115 |
+
"strategic_insight": 0.15,
|
| 116 |
+
"completeness_balance": 0.10,
|
| 117 |
+
"clarity_structure": 0.10,
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
# Hard floor requirements
|
| 121 |
+
HARD_FLOORS = {
|
| 122 |
+
"evidence_grounding": 7,
|
| 123 |
+
"constraint_compliance": 6,
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
# Minimum score for any criterion
|
| 127 |
+
MIN_INDIVIDUAL_SCORE = 5
|
| 128 |
|
| 129 |
+
|
| 130 |
+
def calculate_weighted_score(scores: dict) -> float:
|
| 131 |
+
"""Calculate weighted average from individual criterion scores."""
|
| 132 |
+
total = 0.0
|
| 133 |
+
for criterion, weight in CRITERION_WEIGHTS.items():
|
| 134 |
+
score = scores.get(criterion, 5) # Default to 5 if missing
|
| 135 |
+
total += score * weight
|
| 136 |
+
return round(total, 2)
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
def check_pass_conditions(scores: dict, weighted_score: float) -> tuple:
|
| 140 |
"""
|
| 141 |
+
Check if all pass conditions are met.
|
| 142 |
+
Returns (passed: bool, violations: list)
|
| 143 |
"""
|
| 144 |
+
violations = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
|
| 146 |
+
# Check weighted average threshold
|
| 147 |
+
if weighted_score < 7.0:
|
| 148 |
+
violations.append(f"Weighted score {weighted_score:.1f} < 7.0 threshold")
|
| 149 |
|
| 150 |
+
# Check hard floors
|
| 151 |
+
for criterion, floor in HARD_FLOORS.items():
|
| 152 |
+
score = scores.get(criterion, 0)
|
| 153 |
+
if score < floor:
|
| 154 |
+
violations.append(f"{criterion}: {score} < {floor} (hard floor)")
|
| 155 |
|
| 156 |
+
# Check minimum individual scores
|
| 157 |
+
for criterion, score in scores.items():
|
| 158 |
+
if score < MIN_INDIVIDUAL_SCORE:
|
| 159 |
+
violations.append(f"{criterion}: {score} < {MIN_INDIVIDUAL_SCORE} (minimum)")
|
| 160 |
|
| 161 |
+
return (len(violations) == 0, violations)
|
|
|
|
|
|
|
|
|
|
| 162 |
|
|
|
|
| 163 |
|
| 164 |
+
def run_llm_evaluation(report: str, source_data: str, iteration: int, llm) -> dict:
|
| 165 |
+
"""
|
| 166 |
+
Run LLM-based evaluation with weighted rubric.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
|
| 168 |
+
Args:
|
| 169 |
+
report: The SWOT output to evaluate
|
| 170 |
+
source_data: The source data the SWOT should be based on
|
| 171 |
+
iteration: Current revision number (1, 2, or 3)
|
| 172 |
+
llm: LLM client instance
|
| 173 |
|
| 174 |
+
Returns:
|
| 175 |
+
Evaluation result dict with scores, status, and feedback
|
| 176 |
"""
|
| 177 |
+
# Truncate source data if too long
|
| 178 |
+
max_source_len = 8000
|
| 179 |
+
if len(source_data) > max_source_len:
|
| 180 |
+
source_data = source_data[:max_source_len] + "\n... [truncated]"
|
|
|
|
|
|
|
| 181 |
|
| 182 |
+
prompt = f"""{CRITIC_SYSTEM_PROMPT}
|
| 183 |
|
| 184 |
+
## INPUTS
|
|
|
|
| 185 |
|
| 186 |
+
**Iteration:** {iteration} of 3
|
| 187 |
+
|
| 188 |
+
**Source Data (the SWOT should be based ONLY on this):**
|
| 189 |
+
{source_data}
|
| 190 |
+
|
| 191 |
+
**SWOT Output to Evaluate:**
|
| 192 |
+
{report}
|
| 193 |
+
|
| 194 |
+
Evaluate strictly and respond with JSON only."""
|
| 195 |
|
| 196 |
response, provider, error, providers_failed = llm.query(prompt, temperature=0)
|
| 197 |
|
| 198 |
if error:
|
| 199 |
+
# Return default middle scores on error
|
| 200 |
return {
|
| 201 |
+
"status": "REJECTED",
|
| 202 |
+
"weighted_score": 5.0,
|
| 203 |
+
"scores": {k: 5 for k in CRITERION_WEIGHTS.keys()},
|
| 204 |
+
"hard_floor_violations": [],
|
| 205 |
+
"hallucinations_detected": [],
|
| 206 |
+
"key_deficiencies": [f"LLM evaluation failed: {error}"],
|
| 207 |
+
"strengths_to_preserve": [],
|
| 208 |
+
"actionable_feedback": ["Unable to evaluate - please retry"],
|
| 209 |
"provider": provider,
|
| 210 |
"providers_failed": providers_failed,
|
| 211 |
"error": True
|
| 212 |
}
|
| 213 |
|
| 214 |
try:
|
| 215 |
+
# Parse JSON from response
|
| 216 |
content = response.strip()
|
| 217 |
if "{" in content:
|
| 218 |
json_start = content.index("{")
|
|
|
|
| 220 |
content = content[json_start:json_end]
|
| 221 |
|
| 222 |
parsed = json.loads(content)
|
| 223 |
+
|
| 224 |
+
# Extract and validate scores
|
| 225 |
+
scores = parsed.get("scores", {})
|
| 226 |
+
for criterion in CRITERION_WEIGHTS.keys():
|
| 227 |
+
if criterion not in scores:
|
| 228 |
+
scores[criterion] = 5 # Default
|
| 229 |
+
else:
|
| 230 |
+
scores[criterion] = min(max(int(scores[criterion]), 1), 10) # Clamp 1-10
|
| 231 |
+
|
| 232 |
+
# Calculate weighted score
|
| 233 |
+
weighted_score = calculate_weighted_score(scores)
|
| 234 |
+
|
| 235 |
+
# Check pass conditions
|
| 236 |
+
passed, violations = check_pass_conditions(scores, weighted_score)
|
| 237 |
+
|
| 238 |
+
# Determine status
|
| 239 |
+
status = "APPROVED" if passed else "REJECTED"
|
| 240 |
+
|
| 241 |
+
# Override status if LLM said APPROVED but conditions not met
|
| 242 |
+
if parsed.get("status") == "APPROVED" and not passed:
|
| 243 |
+
status = "REJECTED"
|
| 244 |
+
|
| 245 |
return {
|
| 246 |
+
"status": status,
|
| 247 |
+
"weighted_score": weighted_score,
|
| 248 |
+
"scores": scores,
|
| 249 |
+
"hard_floor_violations": parsed.get("hard_floor_violations", violations),
|
| 250 |
+
"hallucinations_detected": parsed.get("hallucinations_detected", []),
|
| 251 |
+
"key_deficiencies": parsed.get("key_deficiencies", [])[:5],
|
| 252 |
+
"strengths_to_preserve": parsed.get("strengths_to_preserve", []),
|
| 253 |
+
"actionable_feedback": parsed.get("actionable_feedback", [])[:5],
|
| 254 |
"provider": provider,
|
| 255 |
"providers_failed": providers_failed,
|
| 256 |
"error": False
|
| 257 |
}
|
| 258 |
+
|
| 259 |
except (json.JSONDecodeError, ValueError) as e:
|
| 260 |
return {
|
| 261 |
+
"status": "REJECTED",
|
| 262 |
+
"weighted_score": 5.0,
|
| 263 |
+
"scores": {k: 5 for k in CRITERION_WEIGHTS.keys()},
|
| 264 |
+
"hard_floor_violations": [],
|
| 265 |
+
"hallucinations_detected": [],
|
| 266 |
+
"key_deficiencies": [f"JSON parsing failed: {str(e)[:100]}"],
|
| 267 |
+
"strengths_to_preserve": [],
|
| 268 |
+
"actionable_feedback": ["Evaluation response was malformed - please retry"],
|
| 269 |
"provider": provider,
|
| 270 |
"providers_failed": providers_failed,
|
| 271 |
"error": True
|
| 272 |
}
|
| 273 |
|
| 274 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
@traceable(name="Critic")
|
| 276 |
def critic_node(state, workflow_id=None, progress_store=None):
|
| 277 |
"""
|
| 278 |
+
Critic node with LLM-only weighted rubric evaluation.
|
| 279 |
+
|
| 280 |
+
Evaluates SWOT output on 6 criteria with weighted scoring:
|
| 281 |
+
- Evidence Grounding (25%) - hard floor >= 7
|
| 282 |
+
- Constraint Compliance (20%) - hard floor >= 6
|
| 283 |
+
- Specificity & Actionability (20%)
|
| 284 |
+
- Strategic Insight (15%)
|
| 285 |
+
- Completeness & Balance (10%)
|
| 286 |
+
- Clarity & Structure (10%)
|
| 287 |
|
| 288 |
+
Pass requires: weighted avg >= 7.0, hard floors met, no score < 5
|
| 289 |
"""
|
| 290 |
+
# Extract workflow_id and progress_store from state
|
| 291 |
if workflow_id is None:
|
| 292 |
workflow_id = state.get("workflow_id")
|
| 293 |
if progress_store is None:
|
|
|
|
| 296 |
# Skip evaluation if workflow has an error (abort mode)
|
| 297 |
if state.get("error"):
|
| 298 |
_add_activity_log(workflow_id, progress_store, "critic", "Skipping evaluation - workflow aborted")
|
|
|
|
| 299 |
error_msg = state.get("error", "")
|
| 300 |
if "429" in error_msg or "Too Many Requests" in error_msg:
|
| 301 |
user_friendly_msg = "All AI providers are temporarily unavailable due to rate limits. Please wait a moment and try again."
|
|
|
|
| 308 |
return state
|
| 309 |
|
| 310 |
report = state.get("draft_report", "")
|
|
|
|
| 311 |
revision_count = state.get("revision_count", 0)
|
| 312 |
+
iteration = revision_count + 1 # 1-indexed for display
|
| 313 |
|
| 314 |
# Log evaluation start
|
| 315 |
+
_add_activity_log(workflow_id, progress_store, "critic", f"Evaluating SWOT quality (iteration {iteration}/3)...")
|
| 316 |
|
| 317 |
+
# Get source data for grounding verification
|
| 318 |
+
source_data = state.get("raw_data", "")
|
| 319 |
+
|
| 320 |
+
# Run LLM evaluation
|
| 321 |
+
print(f"Running LLM evaluation (iteration {iteration})...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 322 |
llm = get_llm_client()
|
| 323 |
+
_add_activity_log(workflow_id, progress_store, "critic", "Calling LLM for quality evaluation...")
|
| 324 |
start_time = time.time()
|
| 325 |
|
| 326 |
+
result = run_llm_evaluation(report, source_data, iteration, llm)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 327 |
elapsed = time.time() - start_time
|
| 328 |
+
provider = result.get('provider', 'unknown')
|
| 329 |
|
| 330 |
# Log failed providers
|
| 331 |
+
providers_failed = result.get('providers_failed', [])
|
| 332 |
for pf in providers_failed:
|
| 333 |
_add_activity_log(workflow_id, progress_store, "critic", f"LLM {pf['name']} failed: {pf['error']}")
|
| 334 |
|
|
|
|
| 337 |
state["llm_providers_failed"] = []
|
| 338 |
state["llm_providers_failed"].extend([pf["name"] for pf in providers_failed])
|
| 339 |
|
| 340 |
+
# Extract results
|
| 341 |
+
status = result["status"]
|
| 342 |
+
weighted_score = result["weighted_score"]
|
| 343 |
+
scores = result["scores"]
|
| 344 |
+
|
| 345 |
+
# Handle ESCALATE if max iterations reached
|
| 346 |
+
if iteration > 3 and status == "REJECTED":
|
| 347 |
+
status = "ESCALATE"
|
| 348 |
+
_add_activity_log(workflow_id, progress_store, "critic", "Max iterations reached - escalating for human review")
|
| 349 |
+
|
| 350 |
+
# Log scores
|
| 351 |
+
print(f" Status: {status}")
|
| 352 |
+
print(f" Weighted Score: {weighted_score:.1f}/10")
|
| 353 |
+
for criterion, score in scores.items():
|
| 354 |
+
floor = HARD_FLOORS.get(criterion, "-")
|
| 355 |
+
print(f" {criterion}: {score}/10 (floor: {floor})")
|
| 356 |
+
|
| 357 |
+
_add_activity_log(workflow_id, progress_store, "critic", f"Evaluation via {provider} ({elapsed:.1f}s)")
|
| 358 |
+
|
| 359 |
+
# Log status and score
|
| 360 |
+
if status == "APPROVED":
|
| 361 |
+
score_msg = f"APPROVED - Score: {weighted_score:.1f}/10"
|
| 362 |
+
elif status == "ESCALATE":
|
| 363 |
+
score_msg = f"ESCALATE - Score: {weighted_score:.1f}/10 (max iterations)"
|
| 364 |
else:
|
| 365 |
+
score_msg = f"REJECTED - Score: {weighted_score:.1f}/10 - needs revision"
|
| 366 |
_add_activity_log(workflow_id, progress_store, "critic", score_msg)
|
| 367 |
|
| 368 |
+
# Build critique message
|
| 369 |
+
critique_lines = [
|
| 370 |
+
f"Status: {status}",
|
| 371 |
+
f"Weighted Score: {weighted_score:.1f}/10",
|
|
|
|
|
|
|
|
|
|
| 372 |
"",
|
| 373 |
+
"Criterion Scores:",
|
|
|
|
| 374 |
]
|
| 375 |
|
| 376 |
+
for criterion, score in scores.items():
|
| 377 |
+
weight = int(CRITERION_WEIGHTS[criterion] * 100)
|
| 378 |
+
floor = HARD_FLOORS.get(criterion)
|
| 379 |
+
floor_str = f" (floor: {floor})" if floor else ""
|
| 380 |
+
passed = "PASS" if score >= (floor or MIN_INDIVIDUAL_SCORE) else "FAIL"
|
| 381 |
+
critique_lines.append(f" {criterion}: {score}/10 [{weight}%] {floor_str} - {passed}")
|
| 382 |
+
|
| 383 |
+
if result.get("hard_floor_violations"):
|
| 384 |
+
critique_lines.append("")
|
| 385 |
+
critique_lines.append("Hard Floor Violations:")
|
| 386 |
+
for v in result["hard_floor_violations"]:
|
| 387 |
+
critique_lines.append(f" - {v}")
|
| 388 |
+
|
| 389 |
+
if result.get("hallucinations_detected"):
|
| 390 |
+
critique_lines.append("")
|
| 391 |
+
critique_lines.append("Hallucinations Detected:")
|
| 392 |
+
for h in result["hallucinations_detected"]:
|
| 393 |
+
critique_lines.append(f" - {h}")
|
| 394 |
+
|
| 395 |
+
if result.get("key_deficiencies"):
|
| 396 |
+
critique_lines.append("")
|
| 397 |
+
critique_lines.append("Key Deficiencies:")
|
| 398 |
+
for i, d in enumerate(result["key_deficiencies"], 1):
|
| 399 |
+
critique_lines.append(f" {i}. {d}")
|
| 400 |
+
|
| 401 |
+
if result.get("actionable_feedback"):
|
| 402 |
+
critique_lines.append("")
|
| 403 |
+
critique_lines.append("Actionable Feedback:")
|
| 404 |
+
for i, f in enumerate(result["actionable_feedback"], 1):
|
| 405 |
+
critique_lines.append(f" {i}. {f}")
|
| 406 |
+
|
| 407 |
+
if result.get("strengths_to_preserve"):
|
| 408 |
+
critique_lines.append("")
|
| 409 |
+
critique_lines.append("Strengths to Preserve:")
|
| 410 |
+
for s in result["strengths_to_preserve"]:
|
| 411 |
+
critique_lines.append(f" - {s}")
|
| 412 |
+
|
| 413 |
+
state["critique"] = "\n".join(critique_lines)
|
| 414 |
+
state["score"] = weighted_score
|
| 415 |
state["critique_details"] = {
|
| 416 |
+
"status": status,
|
| 417 |
+
"weighted_score": weighted_score,
|
| 418 |
+
"scores": scores,
|
| 419 |
+
"hard_floor_violations": result.get("hard_floor_violations", []),
|
| 420 |
+
"hallucinations_detected": result.get("hallucinations_detected", []),
|
| 421 |
+
"key_deficiencies": result.get("key_deficiencies", []),
|
| 422 |
+
"strengths_to_preserve": result.get("strengths_to_preserve", []),
|
| 423 |
+
"actionable_feedback": result.get("actionable_feedback", []),
|
| 424 |
}
|
| 425 |
|
| 426 |
# Update progress
|
| 427 |
if workflow_id and progress_store:
|
| 428 |
progress_store[workflow_id].update({
|
| 429 |
"current_step": "critic",
|
| 430 |
+
"revision_count": revision_count,
|
| 431 |
+
"score": weighted_score
|
| 432 |
})
|
| 433 |
|
| 434 |
return state
|
src/nodes/editor.py
DELETED
|
@@ -1,138 +0,0 @@
|
|
| 1 |
-
from src.llm_client import get_llm_client
|
| 2 |
-
from langsmith import traceable
|
| 3 |
-
import time
|
| 4 |
-
import json
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
def _add_activity_log(workflow_id, progress_store, step, message):
|
| 8 |
-
"""Helper to add activity log entry."""
|
| 9 |
-
if workflow_id and progress_store:
|
| 10 |
-
from src.services.workflow_store import add_activity_log
|
| 11 |
-
add_activity_log(workflow_id, step, message)
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
@traceable(name="Editor")
|
| 15 |
-
def editor_node(state, workflow_id=None, progress_store=None):
|
| 16 |
-
"""
|
| 17 |
-
Editor node that revises the SWOT draft based on critique feedback.
|
| 18 |
-
Increments the revision count and returns the improved draft.
|
| 19 |
-
"""
|
| 20 |
-
# Extract workflow_id and progress_store from state (graph invokes with state only)
|
| 21 |
-
if workflow_id is None:
|
| 22 |
-
workflow_id = state.get("workflow_id")
|
| 23 |
-
if progress_store is None:
|
| 24 |
-
progress_store = state.get("progress_store")
|
| 25 |
-
|
| 26 |
-
current_revision = state.get("revision_count", 0) + 1
|
| 27 |
-
|
| 28 |
-
# Update progress if tracking is enabled
|
| 29 |
-
if workflow_id and progress_store:
|
| 30 |
-
progress_store[workflow_id].update({
|
| 31 |
-
"current_step": "editor",
|
| 32 |
-
"revision_count": state.get("revision_count", 0),
|
| 33 |
-
"score": state.get("score", 0)
|
| 34 |
-
})
|
| 35 |
-
|
| 36 |
-
# Skip if workflow already has an error (abort mode)
|
| 37 |
-
if state.get("error"):
|
| 38 |
-
_add_activity_log(workflow_id, progress_store, "editor", f"Skipping revision - workflow aborted")
|
| 39 |
-
state["revision_count"] = current_revision
|
| 40 |
-
return state
|
| 41 |
-
|
| 42 |
-
# Log revision start
|
| 43 |
-
_add_activity_log(workflow_id, progress_store, "editor", f"Revision #{current_revision} in progress...")
|
| 44 |
-
|
| 45 |
-
# Use user-provided API keys if available
|
| 46 |
-
user_keys = state.get("user_api_keys", {})
|
| 47 |
-
llm = get_llm_client(user_keys) if user_keys else get_llm_client()
|
| 48 |
-
strategy_name = state.get("strategy_focus", "Cost Leadership")
|
| 49 |
-
|
| 50 |
-
# Get source data for grounding - editor must use ONLY this data
|
| 51 |
-
source_data = state.get("raw_data", "")
|
| 52 |
-
# Truncate if too long to avoid token limits
|
| 53 |
-
if len(source_data) > 4000:
|
| 54 |
-
source_data = source_data[:4000] + "\n... [truncated]"
|
| 55 |
-
|
| 56 |
-
# Prepare the revision prompt with source data for grounding
|
| 57 |
-
prompt = f"""
|
| 58 |
-
You are revising a SWOT analysis based on critique feedback. Keep it CONCISE.
|
| 59 |
-
|
| 60 |
-
CRITICAL GROUNDING RULES:
|
| 61 |
-
1. You may ONLY use facts and numbers from the SOURCE DATA provided below.
|
| 62 |
-
2. DO NOT invent, assume, or fabricate any information not in the source data.
|
| 63 |
-
3. Every claim must cite specific numbers from the source data.
|
| 64 |
-
4. If the critique asks for information not in the source data, state "Data not available".
|
| 65 |
-
|
| 66 |
-
SOURCE DATA (use ONLY this for facts and numbers):
|
| 67 |
-
{source_data}
|
| 68 |
-
|
| 69 |
-
CURRENT DRAFT:
|
| 70 |
-
{state['draft_report']}
|
| 71 |
-
|
| 72 |
-
CRITIQUE:
|
| 73 |
-
{state['critique']}
|
| 74 |
-
|
| 75 |
-
Strategic Focus: {strategy_name}
|
| 76 |
-
|
| 77 |
-
REVISION INSTRUCTIONS:
|
| 78 |
-
1. Address the critique points using ONLY data from SOURCE DATA above
|
| 79 |
-
2. Ensure all 4 SWOT sections are present and complete
|
| 80 |
-
3. Every bullet point must cite specific metrics from the source data
|
| 81 |
-
4. Make sure strengths/opportunities are positive, weaknesses/threats are negative
|
| 82 |
-
5. Align analysis with {strategy_name} strategic focus
|
| 83 |
-
6. If data is missing for a point, remove that point rather than inventing data
|
| 84 |
-
7. Keep each bullet point under 25 words - single sentence only
|
| 85 |
-
8. Maximum 5 bullet points per category
|
| 86 |
-
9. Remove any verbose explanations or context paragraphs
|
| 87 |
-
|
| 88 |
-
Return only the improved SWOT analysis. Keep it brief and impactful.
|
| 89 |
-
"""
|
| 90 |
-
|
| 91 |
-
# Get the revised draft from LLM
|
| 92 |
-
start_time = time.time()
|
| 93 |
-
response, provider, error, providers_failed = llm.query(prompt, temperature=0)
|
| 94 |
-
elapsed = time.time() - start_time
|
| 95 |
-
|
| 96 |
-
# Log failed providers
|
| 97 |
-
for pf in providers_failed:
|
| 98 |
-
_add_activity_log(workflow_id, progress_store, "editor", f"LLM {pf['name']} failed: {pf['error']}")
|
| 99 |
-
|
| 100 |
-
# Track failed providers in state for frontend
|
| 101 |
-
if "llm_providers_failed" not in state:
|
| 102 |
-
state["llm_providers_failed"] = []
|
| 103 |
-
state["llm_providers_failed"].extend([pf["name"] for pf in providers_failed])
|
| 104 |
-
|
| 105 |
-
if error:
|
| 106 |
-
print(f"Editor LLM error: {error}")
|
| 107 |
-
_add_activity_log(workflow_id, progress_store, "editor", f"Revision failed: {error}")
|
| 108 |
-
|
| 109 |
-
# Graceful degradation based on revision count
|
| 110 |
-
if current_revision == 1:
|
| 111 |
-
# First revision failed - use Analyzer's original draft
|
| 112 |
-
_add_activity_log(workflow_id, progress_store, "editor", "Using initial draft from Analyzer (revision unavailable)")
|
| 113 |
-
# Don't set error - allow workflow to continue with original draft
|
| 114 |
-
# draft_report already contains Analyzer's output
|
| 115 |
-
state["editor_skipped"] = True
|
| 116 |
-
else:
|
| 117 |
-
# Revision > 1 failed - use the last successful revision
|
| 118 |
-
_add_activity_log(workflow_id, progress_store, "editor", f"Using revision #{current_revision - 1} draft (further revision unavailable)")
|
| 119 |
-
# Don't set error - allow workflow to complete with previous draft
|
| 120 |
-
state["editor_skipped"] = True
|
| 121 |
-
else:
|
| 122 |
-
state["draft_report"] = response
|
| 123 |
-
state["provider_used"] = provider
|
| 124 |
-
state["editor_skipped"] = False
|
| 125 |
-
_add_activity_log(workflow_id, progress_store, "editor", f"Revision #{current_revision} completed via {provider} ({elapsed:.1f}s)")
|
| 126 |
-
|
| 127 |
-
# Increment revision count
|
| 128 |
-
state["revision_count"] = current_revision
|
| 129 |
-
|
| 130 |
-
# Update progress with new revision count
|
| 131 |
-
if workflow_id and progress_store:
|
| 132 |
-
progress_store[workflow_id].update({
|
| 133 |
-
"current_step": "editor",
|
| 134 |
-
"revision_count": state["revision_count"],
|
| 135 |
-
"score": state.get("score", 0)
|
| 136 |
-
})
|
| 137 |
-
|
| 138 |
-
return state
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/utils/analysis_cache.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
"""
|
| 2 |
Analysis Cache - Supabase PostgreSQL caching for final SWOT analysis results.
|
| 3 |
|
| 4 |
-
Caches
|
| 5 |
Uses schema: asa.analysis_cache
|
| 6 |
"""
|
| 7 |
|
|
|
|
| 1 |
"""
|
| 2 |
Analysis Cache - Supabase PostgreSQL caching for final SWOT analysis results.
|
| 3 |
|
| 4 |
+
Caches final SWOT analysis output with 24h TTL to avoid re-running the full pipeline.
|
| 5 |
Uses schema: asa.analysis_cache
|
| 6 |
"""
|
| 7 |
|
src/utils/conditions.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
from typing import Literal
|
| 2 |
|
|
|
|
| 3 |
def should_continue(state) -> Literal["exit", "retry"]:
|
| 4 |
"""
|
| 5 |
Conditional routing function that determines whether to continue
|
|
@@ -7,19 +8,19 @@ def should_continue(state) -> Literal["exit", "retry"]:
|
|
| 7 |
|
| 8 |
Exit conditions:
|
| 9 |
- Error set (LLM providers failed - abort immediately)
|
| 10 |
-
-
|
| 11 |
- Score >= 7 (good quality)
|
| 12 |
- Revision count > 3 (max attempts reached)
|
| 13 |
|
| 14 |
Continue conditions:
|
| 15 |
-
- No error AND No
|
| 16 |
"""
|
| 17 |
# Abort immediately if error is set (critical failure)
|
| 18 |
if state.get("error"):
|
| 19 |
return "exit"
|
| 20 |
|
| 21 |
-
# Exit gracefully if
|
| 22 |
-
if state.get("
|
| 23 |
return "exit"
|
| 24 |
|
| 25 |
current_score = state.get("score", 0)
|
|
|
|
| 1 |
from typing import Literal
|
| 2 |
|
| 3 |
+
|
| 4 |
def should_continue(state) -> Literal["exit", "retry"]:
|
| 5 |
"""
|
| 6 |
Conditional routing function that determines whether to continue
|
|
|
|
| 8 |
|
| 9 |
Exit conditions:
|
| 10 |
- Error set (LLM providers failed - abort immediately)
|
| 11 |
+
- Analyzer revision skipped (LLM failed but using fallback draft - exit gracefully)
|
| 12 |
- Score >= 7 (good quality)
|
| 13 |
- Revision count > 3 (max attempts reached)
|
| 14 |
|
| 15 |
Continue conditions:
|
| 16 |
+
- No error AND No revision skip AND Score < 7 AND Revisions <= 3
|
| 17 |
"""
|
| 18 |
# Abort immediately if error is set (critical failure)
|
| 19 |
if state.get("error"):
|
| 20 |
return "exit"
|
| 21 |
|
| 22 |
+
# Exit gracefully if analyzer revision was skipped (using fallback draft)
|
| 23 |
+
if state.get("analyzer_revision_skipped"):
|
| 24 |
return "exit"
|
| 25 |
|
| 26 |
current_score = state.get("score", 0)
|
src/workflow/graph.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
"""
|
| 2 |
LangGraph workflow definition for self-correcting SWOT analysis.
|
| 3 |
-
Defines the cyclic workflow: Researcher -> Analyzer -> Critic ->
|
| 4 |
"""
|
| 5 |
|
| 6 |
from langgraph.graph import StateGraph
|
|
@@ -10,17 +10,15 @@ from src.state import AgentState
|
|
| 10 |
from src.nodes.researcher import researcher_node
|
| 11 |
from src.nodes.analyzer import analyzer_node
|
| 12 |
from src.nodes.critic import critic_node
|
| 13 |
-
from src.nodes.editor import editor_node
|
| 14 |
from src.utils.conditions import should_continue
|
| 15 |
|
| 16 |
# Create the cyclic workflow
|
| 17 |
workflow = StateGraph(AgentState)
|
| 18 |
|
| 19 |
-
# Add
|
| 20 |
workflow.add_node("Researcher", RunnableLambda(researcher_node))
|
| 21 |
workflow.add_node("Analyzer", RunnableLambda(analyzer_node))
|
| 22 |
workflow.add_node("Critic", RunnableLambda(critic_node))
|
| 23 |
-
workflow.add_node("Editor", RunnableLambda(editor_node))
|
| 24 |
|
| 25 |
# Define the workflow edges
|
| 26 |
workflow.set_entry_point("Researcher")
|
|
@@ -28,18 +26,16 @@ workflow.add_edge("Researcher", "Analyzer")
|
|
| 28 |
workflow.add_edge("Analyzer", "Critic")
|
| 29 |
|
| 30 |
# Add conditional edges for the self-correcting loop
|
|
|
|
| 31 |
workflow.add_conditional_edges(
|
| 32 |
"Critic",
|
| 33 |
should_continue,
|
| 34 |
{
|
| 35 |
"exit": "__end__",
|
| 36 |
-
"retry": "
|
| 37 |
}
|
| 38 |
)
|
| 39 |
|
| 40 |
-
# Complete the loop: Editor -> Critic
|
| 41 |
-
workflow.add_edge("Editor", "Critic")
|
| 42 |
-
|
| 43 |
# Set the finish point
|
| 44 |
workflow.set_finish_point("Critic")
|
| 45 |
|
|
@@ -48,9 +44,9 @@ workflow.config = {
|
|
| 48 |
"project_name": "AI-strategy-agent-cyclic",
|
| 49 |
"tags": ["self-correcting", "quality-loop", "swot-analysis"],
|
| 50 |
"metadata": {
|
| 51 |
-
"version": "
|
| 52 |
"environment": "development",
|
| 53 |
-
"workflow_type": "researcher-analyzer-critic
|
| 54 |
}
|
| 55 |
}
|
| 56 |
|
|
|
|
| 1 |
"""
|
| 2 |
LangGraph workflow definition for self-correcting SWOT analysis.
|
| 3 |
+
Defines the cyclic workflow: Researcher -> Analyzer -> Critic -> Analyzer (revision loop)
|
| 4 |
"""
|
| 5 |
|
| 6 |
from langgraph.graph import StateGraph
|
|
|
|
| 10 |
from src.nodes.researcher import researcher_node
|
| 11 |
from src.nodes.analyzer import analyzer_node
|
| 12 |
from src.nodes.critic import critic_node
|
|
|
|
| 13 |
from src.utils.conditions import should_continue
|
| 14 |
|
| 15 |
# Create the cyclic workflow
|
| 16 |
workflow = StateGraph(AgentState)
|
| 17 |
|
| 18 |
+
# Add nodes to the workflow (Analyzer handles both initial generation and revisions)
|
| 19 |
workflow.add_node("Researcher", RunnableLambda(researcher_node))
|
| 20 |
workflow.add_node("Analyzer", RunnableLambda(analyzer_node))
|
| 21 |
workflow.add_node("Critic", RunnableLambda(critic_node))
|
|
|
|
| 22 |
|
| 23 |
# Define the workflow edges
|
| 24 |
workflow.set_entry_point("Researcher")
|
|
|
|
| 26 |
workflow.add_edge("Analyzer", "Critic")
|
| 27 |
|
| 28 |
# Add conditional edges for the self-correcting loop
|
| 29 |
+
# Analyzer now handles revisions directly (no separate Editor node)
|
| 30 |
workflow.add_conditional_edges(
|
| 31 |
"Critic",
|
| 32 |
should_continue,
|
| 33 |
{
|
| 34 |
"exit": "__end__",
|
| 35 |
+
"retry": "Analyzer" # Route back to Analyzer for revisions
|
| 36 |
}
|
| 37 |
)
|
| 38 |
|
|
|
|
|
|
|
|
|
|
| 39 |
# Set the finish point
|
| 40 |
workflow.set_finish_point("Critic")
|
| 41 |
|
|
|
|
| 44 |
"project_name": "AI-strategy-agent-cyclic",
|
| 45 |
"tags": ["self-correcting", "quality-loop", "swot-analysis"],
|
| 46 |
"metadata": {
|
| 47 |
+
"version": "2.0",
|
| 48 |
"environment": "development",
|
| 49 |
+
"workflow_type": "researcher-analyzer-critic"
|
| 50 |
}
|
| 51 |
}
|
| 52 |
|
tests/test_self_correcting_loop.py
CHANGED
|
@@ -16,7 +16,7 @@ def test_analyzer_failure():
|
|
| 16 |
|
| 17 |
# Monkey patch the analyzer node to force poor quality
|
| 18 |
def force_poor_analyzer(state):
|
| 19 |
-
"""Force a poor quality draft to trigger
|
| 20 |
state["draft_report"] = "Bad analysis. No details. Incomplete."
|
| 21 |
print("⚠️ FORCED POOR QUALITY: Overriding with very weak content")
|
| 22 |
return state
|
|
@@ -40,10 +40,10 @@ def test_critic_failure():
|
|
| 40 |
|
| 41 |
# Monkey patch the critic to force a low score
|
| 42 |
def force_low_score_critic(state):
|
| 43 |
-
"""Force a low score to trigger
|
| 44 |
state["score"] = 3 # Low score to force revision
|
| 45 |
state["critique"] = "Forced low score for testing self-correction loop"
|
| 46 |
-
print("⚠️ FORCED LOW SCORE: 3/10 to trigger
|
| 47 |
return state
|
| 48 |
|
| 49 |
# Temporarily replace critic in the workflow
|
|
|
|
| 16 |
|
| 17 |
# Monkey patch the analyzer node to force poor quality
|
| 18 |
def force_poor_analyzer(state):
|
| 19 |
+
"""Force a poor quality draft to trigger revision loop"""
|
| 20 |
state["draft_report"] = "Bad analysis. No details. Incomplete."
|
| 21 |
print("⚠️ FORCED POOR QUALITY: Overriding with very weak content")
|
| 22 |
return state
|
|
|
|
| 40 |
|
| 41 |
# Monkey patch the critic to force a low score
|
| 42 |
def force_low_score_critic(state):
|
| 43 |
+
"""Force a low score to trigger revision loop"""
|
| 44 |
state["score"] = 3 # Low score to force revision
|
| 45 |
state["critique"] = "Forced low score for testing self-correction loop"
|
| 46 |
+
print("⚠️ FORCED LOW SCORE: 3/10 to trigger revision loop")
|
| 47 |
return state
|
| 48 |
|
| 49 |
# Temporarily replace critic in the workflow
|