vn6295337 Claude Opus 4.5 commited on
Commit
53fe655
·
1 Parent(s): e6152f5

Refactor: Analyzer handles revisions directly, remove Editor node

Browse files

Architecture change: Researcher → Analyzer → Critic → Analyzer (revision loop)

- Add conditional focus area blocks to Analyzer based on failed rubric criteria
- Add _build_revision_prompt() for structured Critic feedback integration
- Analyzer detects revision mode via revision_count > 0 and critique_details
- Route "retry" from Critic back to Analyzer instead of Editor
- Rename editor_skipped → analyzer_revision_skipped in conditions.py
- Delete redundant src/nodes/editor.py
- Update Critic to LLM-only weighted rubric evaluation
- Update workflow version to 2.0

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

src/api/schemas.py CHANGED
@@ -30,7 +30,7 @@ class WorkflowStartResponse(BaseModel):
30
  class WorkflowStatus(BaseModel):
31
  """Workflow status model."""
32
  status: str # 'running', 'completed', 'error'
33
- current_step: str # 'starting', 'Researcher', 'Analyzer', 'Critic', 'Editor'
34
  revision_count: int
35
  score: int
36
 
 
30
  class WorkflowStatus(BaseModel):
31
  """Workflow status model."""
32
  status: str # 'running', 'completed', 'error'
33
+ current_step: str # 'starting', 'Researcher', 'Analyzer', 'Critic'
34
  revision_count: int
35
  score: int
36
 
src/graph_cyclic.py CHANGED
@@ -4,18 +4,16 @@ from src.state import AgentState
4
  from src.nodes.researcher import researcher_node
5
  from src.nodes.analyzer import analyzer_node
6
  from src.nodes.critic import critic_node
7
- from src.nodes.editor import editor_node
8
  from src.utils.conditions import should_continue
9
  from langsmith import traceable
10
 
11
  # Create the cyclic workflow
12
  workflow = StateGraph(AgentState)
13
 
14
- # Add all nodes to the workflow
15
  workflow.add_node("Researcher", RunnableLambda(researcher_node))
16
  workflow.add_node("Analyzer", RunnableLambda(analyzer_node))
17
  workflow.add_node("Critic", RunnableLambda(critic_node))
18
- workflow.add_node("Editor", RunnableLambda(editor_node))
19
 
20
  # Define the workflow edges
21
  workflow.set_entry_point("Researcher")
@@ -23,18 +21,16 @@ workflow.add_edge("Researcher", "Analyzer")
23
  workflow.add_edge("Analyzer", "Critic")
24
 
25
  # Add conditional edges for the self-correcting loop
 
26
  workflow.add_conditional_edges(
27
- "Critic",
28
- should_continue,
29
  {
30
  "exit": "__end__",
31
- "retry": "Editor"
32
  }
33
  )
34
 
35
- # Complete the loop: Editor → Critic
36
- workflow.add_edge("Editor", "Critic")
37
-
38
  # Set the finish point
39
  workflow.set_finish_point("Critic")
40
 
@@ -43,9 +39,9 @@ workflow.config = {
43
  "project_name": "AI-strategy-agent-cyclic",
44
  "tags": ["self-correcting", "quality-loop", "swot-analysis"],
45
  "metadata": {
46
- "version": "1.0",
47
  "environment": "development",
48
- "workflow_type": "researcher-analyzer-critic-editor"
49
  }
50
  }
51
 
@@ -91,7 +87,7 @@ if __name__ == "__main__":
91
  target_company = "Tesla"
92
 
93
  print(f"🔍 Running Self-Correcting SWOT Analysis for {target_company}...")
94
- print("📝 This workflow includes: Researcher → Analyzer → Critic → Editor (loop)")
95
  print("🎯 Loop continues until score ≥ 7 or 3 revisions attempted\n")
96
 
97
  # Execute the workflow
@@ -115,7 +111,7 @@ if __name__ == "__main__":
115
  print(f" - Initial Quality: Improved from unknown to {final_score}/10")
116
  print(f" - Revisions Made: {final_revision_count}")
117
  print(f" - Final Report Length: {len(result['draft_report'])} characters")
118
- print(f" - Workflow: Researcher → Analyzer → Critic → Editor (loop)")
119
  print(f" - Tracing: Enhanced LangSmith traces available")
120
 
121
  # Quality assessment
 
4
  from src.nodes.researcher import researcher_node
5
  from src.nodes.analyzer import analyzer_node
6
  from src.nodes.critic import critic_node
 
7
  from src.utils.conditions import should_continue
8
  from langsmith import traceable
9
 
10
  # Create the cyclic workflow
11
  workflow = StateGraph(AgentState)
12
 
13
+ # Add nodes to the workflow (Analyzer handles both initial generation and revisions)
14
  workflow.add_node("Researcher", RunnableLambda(researcher_node))
15
  workflow.add_node("Analyzer", RunnableLambda(analyzer_node))
16
  workflow.add_node("Critic", RunnableLambda(critic_node))
 
17
 
18
  # Define the workflow edges
19
  workflow.set_entry_point("Researcher")
 
21
  workflow.add_edge("Analyzer", "Critic")
22
 
23
  # Add conditional edges for the self-correcting loop
24
+ # Analyzer now handles revisions directly (no separate Editor node)
25
  workflow.add_conditional_edges(
26
+ "Critic",
27
+ should_continue,
28
  {
29
  "exit": "__end__",
30
+ "retry": "Analyzer" # Route back to Analyzer for revisions
31
  }
32
  )
33
 
 
 
 
34
  # Set the finish point
35
  workflow.set_finish_point("Critic")
36
 
 
39
  "project_name": "AI-strategy-agent-cyclic",
40
  "tags": ["self-correcting", "quality-loop", "swot-analysis"],
41
  "metadata": {
42
+ "version": "2.0",
43
  "environment": "development",
44
+ "workflow_type": "researcher-analyzer-critic"
45
  }
46
  }
47
 
 
87
  target_company = "Tesla"
88
 
89
  print(f"🔍 Running Self-Correcting SWOT Analysis for {target_company}...")
90
+ print("📝 This workflow includes: Researcher → Analyzer → Critic → Analyzer (revision loop)")
91
  print("🎯 Loop continues until score ≥ 7 or 3 revisions attempted\n")
92
 
93
  # Execute the workflow
 
111
  print(f" - Initial Quality: Improved from unknown to {final_score}/10")
112
  print(f" - Revisions Made: {final_revision_count}")
113
  print(f" - Final Report Length: {len(result['draft_report'])} characters")
114
+ print(f" - Workflow: Researcher → Analyzer → Critic → Analyzer (revision loop)")
115
  print(f" - Tracing: Enhanced LangSmith traces available")
116
 
117
  # Quality assessment
src/nodes/analyzer.py CHANGED
@@ -1,10 +1,123 @@
1
- from src.tools import get_strategy_context
2
  from src.llm_client import get_llm_client
3
  from langsmith import traceable
4
  import time
5
  import json
6
 
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  def _add_activity_log(workflow_id, progress_store, step, message):
9
  """Helper to add activity log entry."""
10
  if workflow_id and progress_store:
@@ -118,8 +231,13 @@ def _get_value(metric_data) -> any:
118
  return metric_data
119
 
120
 
121
- def _generate_data_report(raw_data: str) -> str:
122
- """Generate complete multi-source data report with simple tables."""
 
 
 
 
 
123
  try:
124
  data = json.loads(raw_data)
125
  except json.JSONDecodeError:
@@ -195,14 +313,17 @@ def _generate_data_report(raw_data: str) -> str:
195
  ("P/E Forward", "forward_pe", lambda v: _format_number(v, "x")),
196
  ("P/B Ratio", "pb_ratio", lambda v: _format_number(v, "x")),
197
  ("P/S Ratio", "ps_ratio", lambda v: _format_number(v, "x")),
198
- ("EV/EBITDA", "ev_ebitda", lambda v: _format_number(v, "x")),
199
- ("EV/Revenue", "ev_revenue", lambda v: _format_number(v, "x")),
200
  ("PEG Ratio", "trailing_peg", lambda v: _format_number(v, "x")),
201
  ("Price/FCF", "price_to_fcf", lambda v: _format_number(v, "x")),
202
  ("Revenue Growth", "revenue_growth", lambda v: _format_number(v * 100 if v and abs(v) < 10 else v, "%") if v else "N/A"),
203
  ("Earnings Growth", "earnings_growth", lambda v: _format_number(v * 100 if v and abs(v) < 10 else v, "%") if v else "N/A"),
204
  ]
205
 
 
 
 
 
 
206
  for name, key, fmt in val_metrics:
207
  y = yf_val.get(key)
208
  a = av_val.get(key)
@@ -470,8 +591,13 @@ def _extract_key_metrics(raw_data: str) -> dict:
470
  return extracted
471
 
472
 
473
- def _format_metrics_for_prompt(extracted: dict) -> str:
474
- """Format extracted metrics into a clear text for the LLM."""
 
 
 
 
 
475
  lines = []
476
  lines.append(f"Company: {extracted['company']} ({extracted['ticker']})")
477
  lines.append("")
@@ -542,7 +668,7 @@ def _format_metrics_for_prompt(extracted: dict) -> str:
542
  lines.append(f"- P/B Ratio: {val['pb_ratio']:.2f}")
543
  if val.get("ps_ratio"):
544
  lines.append(f"- P/S Ratio: {val['ps_ratio']:.2f}")
545
- if val.get("ev_ebitda"):
546
  lines.append(f"- EV/EBITDA: {val['ev_ebitda']:.1f}")
547
  if val.get("valuation_signal"):
548
  lines.append(f"- Overall Signal: {val['valuation_signal']}")
@@ -608,6 +734,221 @@ def _format_metrics_for_prompt(extracted: dict) -> str:
608
  return "\n".join(lines)
609
 
610
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
611
  @traceable(name="Analyzer")
612
  def analyzer_node(state, workflow_id=None, progress_store=None):
613
  # Extract workflow_id and progress_store from state (graph invokes with state only)
@@ -628,56 +969,56 @@ def analyzer_node(state, workflow_id=None, progress_store=None):
628
  user_keys = state.get("user_api_keys", {})
629
  llm = get_llm_client(user_keys) if user_keys else get_llm_client()
630
  raw = state["raw_data"]
631
- strategy_name = state.get("strategy_focus", "Cost Leadership")
632
- strategy_context = get_strategy_context(strategy_name)
633
  company = state["company_name"]
634
  ticker = state.get("ticker", "")
635
 
 
 
 
 
 
 
 
 
 
 
636
  # Extract and format metrics for better LLM understanding
637
  extracted = _extract_key_metrics(raw)
638
- formatted_data = _format_metrics_for_prompt(extracted)
639
 
640
  # Generate detailed data report (shown before SWOT)
641
- data_report = _generate_data_report(raw)
642
-
643
- # Log LLM call start
644
- _add_activity_log(workflow_id, progress_store, "analyzer", f"Calling LLM to generate SWOT analysis...")
645
-
646
- prompt = f"""You are a financial analyst creating a CONCISE SWOT analysis for {company} ({ticker}).
647
-
648
- CRITICAL INSTRUCTIONS:
649
- 1. ONLY use the data provided below. DO NOT invent or assume any information.
650
- 2. Every point MUST cite specific numbers from the data (e.g., "P/E of 21.3", "Beta of 0.88").
651
- 3. If data is missing for a category, say "Insufficient data" - do NOT make up information.
652
- 4. Focus on what the numbers actually mean for this specific company.
653
-
654
- FORMAT REQUIREMENTS - BE CONCISE:
655
- - Each bullet point: 1 sentence MAX (under 25 words)
656
- - 3-5 bullet points per SWOT category
657
- - Focus on the most impactful insights only
658
- - NO lengthy explanations or context paragraphs
659
-
660
- Strategic Focus: {strategy_name}
661
- Context: {strategy_context}
662
-
663
- === ACTUAL DATA FROM FINANCIAL SOURCES ===
664
- {formatted_data}
665
-
666
- Based ONLY on the data above, provide a SWOT analysis in this format:
667
-
668
- Strengths:
669
- - [Single sentence with metric, under 25 words]
670
-
671
- Weaknesses:
672
- - [Single sentence with metric, under 25 words]
673
-
674
- Opportunities:
675
- - [Single sentence citing macro/market data, under 25 words]
676
-
677
- Threats:
678
- - [Single sentence citing risks, under 25 words]
679
 
680
- Remember: Every bullet must cite actual data. Keep each point brief and impactful."""
681
  start_time = time.time()
682
  response, provider, error, providers_failed = llm.query(prompt, temperature=0)
683
  elapsed = time.time() - start_time
@@ -704,18 +1045,50 @@ Remember: Every bullet must cite actual data. Keep each point brief and impactfu
704
  llm_status[provider_name] = "completed"
705
 
706
  if error:
707
- state["draft_report"] = f"Error generating analysis: {error}"
708
- state["provider_used"] = None
709
- state["error"] = error # Signal workflow to abort
710
- _add_activity_log(workflow_id, progress_store, "analyzer", f"LLM error: {error}")
711
- _add_activity_log(workflow_id, progress_store, "analyzer", "Workflow aborted - all LLM providers unavailable")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
712
  else:
713
- # Combine data report (Part 1) with SWOT analysis (Part 2)
714
- swot_section = f"## SWOT Analysis\n\n{response}"
715
- full_report = f"{data_report}\n{swot_section}"
716
- state["draft_report"] = full_report
717
- state["data_report"] = data_report # Store separately for frontend flexibility
718
- state["provider_used"] = provider
719
- _add_activity_log(workflow_id, progress_store, "analyzer", f"SWOT generated via {provider} ({elapsed:.1f}s)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
720
 
721
  return state
 
 
1
  from src.llm_client import get_llm_client
2
  from langsmith import traceable
3
  import time
4
  import json
5
 
6
 
7
+ # Financial institution detection for EV/EBITDA exclusion
8
+ FINANCIAL_SECTORS = {
9
+ "financial services", "financial", "banking", "banks",
10
+ "insurance", "real estate investment trust", "reit",
11
+ "investment management", "capital markets", "diversified financial services",
12
+ "consumer finance", "asset management", "mortgage finance",
13
+ }
14
+
15
+ FINANCIAL_INDUSTRIES = {
16
+ "banks", "regional banks", "diversified banks", "money center banks",
17
+ "insurance", "life insurance", "property insurance", "reinsurance",
18
+ "real estate", "reit", "mortgage reits", "equity reits",
19
+ "asset management", "investment banking", "capital markets",
20
+ "consumer finance", "specialty finance",
21
+ }
22
+
23
+ # Fallback: known financial tickers when sector data unavailable
24
+ FINANCIAL_TICKERS = {
25
+ "JPM", "BAC", "WFC", "GS", "MS", "C", "USB", "PNC", "TFC", "COF",
26
+ "AXP", "BLK", "SCHW", "CME", "ICE", "SPGI", "MCO",
27
+ "BRK.A", "BRK.B", "MET", "PRU", "AIG", "ALL", "TRV", "PGR", "CB",
28
+ "AMT", "PLD", "CCI", "EQIX", "PSA", "O", "WELL", "AVB", "EQR",
29
+ }
30
+
31
+ # =============================================================================
32
+ # REVISION MODE: Conditional Focus Area Blocks
33
+ # These are included in revision prompts based on which rubric criteria failed
34
+ # =============================================================================
35
+
36
+ EVIDENCE_GROUNDING_BLOCK = """
37
+ **EVIDENCE GROUNDING (Critical)**
38
+ - Every claim must cite a specific metric from the input data
39
+ - Use exact field names: `revenue`, `net_margin_pct`, `trailing_pe`, etc.
40
+ - Format citations as: "[Metric]: [Value] ([Source], [Period])"
41
+ - If a metric was flagged as fabricated, remove it entirely or replace with actual data
42
+ """
43
+
44
+ CONSTRAINT_COMPLIANCE_BLOCK = """
45
+ **CONSTRAINT COMPLIANCE (Critical)**
46
+ - Remove any language that sounds like investment advice
47
+ - Check all temporal labels — TTM vs FY vs Q must match the source
48
+ - Add confidence levels to key conclusions: (High/Medium/Low)
49
+ - Do not use EV/EBITDA for financial institutions
50
+ - For missing data, state "DATA NOT PROVIDED" — do not estimate
51
+ """
52
+
53
+ SPECIFICITY_BLOCK = """
54
+ **SPECIFICITY & ACTIONABILITY**
55
+ - Replace generic statements with company-specific observations
56
+ - Quantify every claim possible: not "strong margins" but "31.0% operating margin"
57
+ - Remove business clichés: "leveraging," "best-in-class," "synergies"
58
+ """
59
+
60
+ INSIGHT_BLOCK = """
61
+ **STRATEGIC INSIGHT**
62
+ - Connect observations across data baskets (e.g., link margin trends to macro rates)
63
+ - Go beyond restating metrics — explain WHY they matter
64
+ - Identify non-obvious relationships in the data
65
+ """
66
+
67
+ COMPLETENESS_BLOCK = """
68
+ **COMPLETENESS & BALANCE**
69
+ - Ensure ALL required sections are present (Strengths, Weaknesses, Opportunities, Threats, Data Quality Notes)
70
+ - Balance quadrants — no section should be filler or disproportionately thin
71
+ """
72
+
73
+ CLARITY_BLOCK = """
74
+ **CLARITY & STRUCTURE**
75
+ - Use consistent formatting throughout
76
+ - Ensure no contradictions across sections
77
+ - Make output scannable — executives should grasp key points in 30 seconds
78
+ """
79
+
80
+
81
+ def _is_financial_institution(sector: str, industry: str, ticker: str) -> bool:
82
+ """Detect if company is a financial institution (EV/EBITDA not meaningful)."""
83
+ sector_lower = (sector or "").lower().strip()
84
+ industry_lower = (industry or "").lower().strip()
85
+
86
+ if any(fs in sector_lower for fs in FINANCIAL_SECTORS):
87
+ return True
88
+ if any(fi in industry_lower for fi in FINANCIAL_INDUSTRIES):
89
+ return True
90
+ if ticker and ticker.upper() in FINANCIAL_TICKERS:
91
+ return True
92
+ return False
93
+
94
+
95
+ def _extract_company_profile(raw_data: str) -> dict:
96
+ """Extract sector/industry from Yahoo Finance data if available."""
97
+ try:
98
+ data = json.loads(raw_data)
99
+ except json.JSONDecodeError:
100
+ return {}
101
+
102
+ multi_source = data.get("multi_source", {})
103
+
104
+ # Try valuation Yahoo Finance data first
105
+ yf_val = multi_source.get("valuation_all", {}).get("yahoo_finance", {}).get("data", {})
106
+ profile = yf_val.get("profile", {})
107
+
108
+ if profile.get("sector"):
109
+ return {"sector": profile.get("sector"), "industry": profile.get("industry")}
110
+
111
+ # Fallback to fundamentals Yahoo Finance
112
+ yf_fund = multi_source.get("fundamentals_all", {}).get("yahoo_finance", {}).get("data", {})
113
+ fund_profile = yf_fund.get("profile", {})
114
+
115
+ return {
116
+ "sector": fund_profile.get("sector", ""),
117
+ "industry": fund_profile.get("industry", "")
118
+ }
119
+
120
+
121
  def _add_activity_log(workflow_id, progress_store, step, message):
122
  """Helper to add activity log entry."""
123
  if workflow_id and progress_store:
 
231
  return metric_data
232
 
233
 
234
+ def _generate_data_report(raw_data: str, is_financial: bool = False) -> str:
235
+ """Generate complete multi-source data report with simple tables.
236
+
237
+ Args:
238
+ raw_data: JSON string of research data
239
+ is_financial: If True, exclude EV/EBITDA for financial institutions
240
+ """
241
  try:
242
  data = json.loads(raw_data)
243
  except json.JSONDecodeError:
 
313
  ("P/E Forward", "forward_pe", lambda v: _format_number(v, "x")),
314
  ("P/B Ratio", "pb_ratio", lambda v: _format_number(v, "x")),
315
  ("P/S Ratio", "ps_ratio", lambda v: _format_number(v, "x")),
 
 
316
  ("PEG Ratio", "trailing_peg", lambda v: _format_number(v, "x")),
317
  ("Price/FCF", "price_to_fcf", lambda v: _format_number(v, "x")),
318
  ("Revenue Growth", "revenue_growth", lambda v: _format_number(v * 100 if v and abs(v) < 10 else v, "%") if v else "N/A"),
319
  ("Earnings Growth", "earnings_growth", lambda v: _format_number(v * 100 if v and abs(v) < 10 else v, "%") if v else "N/A"),
320
  ]
321
 
322
+ # Only include EV/EBITDA for non-financial companies
323
+ if not is_financial:
324
+ val_metrics.insert(6, ("EV/EBITDA", "ev_ebitda", lambda v: _format_number(v, "x")))
325
+ val_metrics.insert(7, ("EV/Revenue", "ev_revenue", lambda v: _format_number(v, "x")))
326
+
327
  for name, key, fmt in val_metrics:
328
  y = yf_val.get(key)
329
  a = av_val.get(key)
 
591
  return extracted
592
 
593
 
594
+ def _format_metrics_for_prompt(extracted: dict, is_financial: bool = False) -> str:
595
+ """Format extracted metrics into a clear text for the LLM.
596
+
597
+ Args:
598
+ extracted: Extracted metrics dictionary
599
+ is_financial: If True, exclude EV/EBITDA from valuation metrics
600
+ """
601
  lines = []
602
  lines.append(f"Company: {extracted['company']} ({extracted['ticker']})")
603
  lines.append("")
 
668
  lines.append(f"- P/B Ratio: {val['pb_ratio']:.2f}")
669
  if val.get("ps_ratio"):
670
  lines.append(f"- P/S Ratio: {val['ps_ratio']:.2f}")
671
+ if val.get("ev_ebitda") and not is_financial:
672
  lines.append(f"- EV/EBITDA: {val['ev_ebitda']:.1f}")
673
  if val.get("valuation_signal"):
674
  lines.append(f"- Overall Signal: {val['valuation_signal']}")
 
734
  return "\n".join(lines)
735
 
736
 
737
+ # New institutional-grade prompt
738
+ ANALYZER_SYSTEM_PROMPT = """You are a senior financial analyst producing institutional-grade SWOT analyses.
739
+
740
+ ## DATA GROUNDING RULES (CRITICAL)
741
+ 1. USE ONLY the provided data. Never invent or assume metrics not given.
742
+ 2. CITE specific numbers for every finding (e.g., "Net margin: 24.3%", "P/E: 21.3x").
743
+ 3. If data is missing, state "Insufficient data" - do NOT fabricate.
744
+ 4. Distinguish trailing (historical) vs forward (projected) metrics.
745
+
746
+ ## AVAILABLE DATA BASKETS
747
+
748
+ ### Fundamentals (SEC EDGAR + Yahoo Finance)
749
+ revenue, net_income, net_margin_pct, gross_margin_pct, operating_margin_pct,
750
+ total_assets, total_liabilities, stockholders_equity, free_cash_flow,
751
+ operating_cash_flow, long_term_debt, debt_to_equity, eps
752
+
753
+ ### Valuation (Yahoo Finance)
754
+ market_cap, enterprise_value, trailing_pe, forward_pe, pb_ratio, ps_ratio,
755
+ trailing_peg, price_to_fcf, revenue_growth, earnings_growth
756
+ {ev_ebitda_note}
757
+
758
+ ### Volatility (FRED + Yahoo)
759
+ vix, vxn, beta, historical_volatility, implied_volatility
760
+
761
+ ### Macro (BEA/BLS/FRED)
762
+ gdp_growth, interest_rate, cpi_inflation, unemployment
763
+
764
+ ### News & Sentiment
765
+ News articles with title, source, url
766
+ Sentiment scores from Finnhub and Reddit
767
+
768
+ ## WHAT YOU DO NOT DO
769
+ - Provide buy/sell/hold recommendations
770
+ - Compare to sector/peer benchmarks (data not provided)
771
+ - Speculate beyond provided data
772
+ - Use vague hedge words without quantification"""
773
+
774
+
775
+ def _build_revision_prompt(
776
+ critique_details: dict,
777
+ company_data: str,
778
+ current_draft: str,
779
+ is_financial: bool
780
+ ) -> str:
781
+ """Build revision prompt with conditional focus areas based on failed criteria.
782
+
783
+ Args:
784
+ critique_details: Structured dict from Critic with scores and feedback
785
+ company_data: Formatted metrics string for reference
786
+ current_draft: The current SWOT draft to be revised
787
+ is_financial: Whether the company is a financial institution
788
+
789
+ Returns:
790
+ Complete revision prompt string
791
+ """
792
+ scores = critique_details.get("scores", {})
793
+
794
+ # Determine which focus areas to include based on failed criteria
795
+ focus_areas = []
796
+ if scores.get("evidence_grounding", 10) < 7:
797
+ focus_areas.append(EVIDENCE_GROUNDING_BLOCK)
798
+ if scores.get("constraint_compliance", 10) < 6:
799
+ focus_areas.append(CONSTRAINT_COMPLIANCE_BLOCK)
800
+ if scores.get("specificity_actionability", 10) < 7:
801
+ focus_areas.append(SPECIFICITY_BLOCK)
802
+ if scores.get("strategic_insight", 10) < 7:
803
+ focus_areas.append(INSIGHT_BLOCK)
804
+ if scores.get("completeness_balance", 10) < 7:
805
+ focus_areas.append(COMPLETENESS_BLOCK)
806
+ if scores.get("clarity_structure", 10) < 7:
807
+ focus_areas.append(CLARITY_BLOCK)
808
+
809
+ # Format critic feedback components
810
+ deficiencies = critique_details.get("key_deficiencies", [])
811
+ strengths = critique_details.get("strengths_to_preserve", [])
812
+ feedback = critique_details.get("actionable_feedback", [])
813
+
814
+ # Build deficiencies section
815
+ deficiencies_text = "\n".join(f"- {d}" for d in deficiencies) if deficiencies else "- None specified"
816
+
817
+ # Build strengths section
818
+ strengths_text = "\n".join(f"- {s}" for s in strengths) if strengths else "- None specified"
819
+
820
+ # Build feedback section
821
+ feedback_text = "\n".join(f"{i+1}. {f}" for i, f in enumerate(feedback)) if feedback else "- None specified"
822
+
823
+ # Build focus areas section
824
+ focus_areas_text = "\n".join(focus_areas) if focus_areas else "Address all deficiencies listed above."
825
+
826
+ # Add EV/EBITDA note for financial institutions
827
+ ev_note = ""
828
+ if is_financial:
829
+ ev_note = "\n**Note:** This is a financial institution - EV/EBITDA is excluded from analysis."
830
+
831
+ prompt = f"""## REVISION MODE ACTIVATED
832
+
833
+ You previously generated a SWOT analysis that did not meet quality standards. You are now in revision mode.
834
+
835
+ ### YOUR TASK
836
+
837
+ 1. **Review the Critic's feedback** carefully
838
+ 2. **Address each deficiency** listed in priority order
839
+ 3. **Preserve strengths** explicitly called out — do not regress on what worked
840
+ 4. **Regenerate the complete SWOT** — not a partial patch
841
+
842
+ ### CRITIC FEEDBACK
843
+
844
+ Status: {critique_details.get('status', 'REJECTED')}
845
+ Weighted Score: {critique_details.get('weighted_score', 0):.1f} / 10
846
+
847
+ **Key Deficiencies:**
848
+ {deficiencies_text}
849
+
850
+ **Strengths to Preserve:**
851
+ {strengths_text}
852
+
853
+ **Actionable Feedback:**
854
+ {feedback_text}
855
+
856
+ ### FOCUS AREAS FOR THIS REVISION
857
+
858
+ {focus_areas_text}
859
+
860
+ ### REVISION RULES
861
+
862
+ **DO:**
863
+ - Fix every item in "Key Deficiencies" — these are blocking issues
864
+ - Apply each point in "Actionable Feedback" — these are specific instructions
865
+ - Keep everything listed under "Strengths to Preserve" — do not modify these sections
866
+ - Re-verify all metric citations against the original input data
867
+ - Ensure temporal labels (TTM, FY, Q) are accurate for each metric
868
+ {ev_note}
869
+
870
+ **DO NOT:**
871
+ - Ignore lower-priority feedback items — address all of them
872
+ - Introduce new metrics not in the original input data
873
+ - Remove content that was working well
874
+ - Add defensive caveats or apologies about the revision
875
+ - Reference the revision process in your output — produce a clean SWOT as if first attempt
876
+
877
+ ### REFERENCE DATA
878
+
879
+ {company_data}
880
+
881
+ ### CURRENT DRAFT (to revise)
882
+
883
+ {current_draft}
884
+
885
+ ### OUTPUT INSTRUCTIONS
886
+
887
+ Produce a complete, revised SWOT analysis following the original template structure.
888
+
889
+ Do not:
890
+ - Include any preamble about revisions
891
+ - Apologize or explain what you changed
892
+ - Reference the Critic's feedback in your output
893
+
894
+ Simply output the improved SWOT as a clean, final deliverable."""
895
+
896
+ return prompt
897
+
898
+
899
+ def _build_analyzer_prompt(company: str, ticker: str, formatted_data: str, is_financial: bool) -> str:
900
+ """Build analyzer prompt with conditional EV/EBITDA handling."""
901
+
902
+ if is_financial:
903
+ ev_note = "\nNote: EV/EBITDA excluded - not meaningful for financial institutions."
904
+ else:
905
+ ev_note = ", ev_ebitda, ev_revenue"
906
+
907
+ system = ANALYZER_SYSTEM_PROMPT.format(ev_ebitda_note=ev_note)
908
+
909
+ return f"""{system}
910
+
911
+ === DATA FOR {company} ({ticker}) ===
912
+ {formatted_data}
913
+
914
+ === OUTPUT FORMAT ===
915
+
916
+ Produce a SWOT analysis with this exact structure:
917
+
918
+ ## Strengths
919
+ For each (3-5 points):
920
+ - **Finding:** [One sentence with specific metric]
921
+ - **Strategic Implication:** [Why this matters]
922
+ - **Durability:** [High/Medium/Low]
923
+
924
+ ## Weaknesses
925
+ For each (3-5 points):
926
+ - **Finding:** [One sentence with specific metric]
927
+ - **Severity:** [Critical/Moderate/Minor]
928
+ - **Trend:** [Improving/Stable/Deteriorating]
929
+ - **Remediation Levers:** [What could improve this]
930
+
931
+ ## Opportunities
932
+ For each (3-5 points):
933
+ - **Catalyst:** [Description with supporting data]
934
+ - **Timing:** [Near-term/Medium-term/Long-term]
935
+ - **Execution Requirements:** [What must happen]
936
+
937
+ ## Threats
938
+ For each (3-5 points):
939
+ - **Risk Factor:** [Description with supporting data]
940
+ - **Probability:** [High/Medium/Low]
941
+ - **Impact:** [Potential magnitude]
942
+ - **Mitigation Options:** [Possible responses]
943
+
944
+ ## Data Quality Notes
945
+ - **Metrics Used:** [List key metrics analyzed]
946
+ - **Data Gaps:** [Any unavailable metrics]
947
+ - **Confidence Level:** [High/Medium/Low]
948
+
949
+ Every finding MUST cite a specific number from the data."""
950
+
951
+
952
  @traceable(name="Analyzer")
953
  def analyzer_node(state, workflow_id=None, progress_store=None):
954
  # Extract workflow_id and progress_store from state (graph invokes with state only)
 
969
  user_keys = state.get("user_api_keys", {})
970
  llm = get_llm_client(user_keys) if user_keys else get_llm_client()
971
  raw = state["raw_data"]
 
 
972
  company = state["company_name"]
973
  ticker = state.get("ticker", "")
974
 
975
+ # Extract company profile and detect financial institution
976
+ company_profile = _extract_company_profile(raw)
977
+ sector = company_profile.get("sector", "")
978
+ industry = company_profile.get("industry", "")
979
+ is_financial = _is_financial_institution(sector, industry, ticker)
980
+
981
+ if is_financial:
982
+ _add_activity_log(workflow_id, progress_store, "analyzer",
983
+ f"Financial institution detected - excluding EV/EBITDA")
984
+
985
  # Extract and format metrics for better LLM understanding
986
  extracted = _extract_key_metrics(raw)
987
+ formatted_data = _format_metrics_for_prompt(extracted, is_financial=is_financial)
988
 
989
  # Generate detailed data report (shown before SWOT)
990
+ data_report = _generate_data_report(raw, is_financial=is_financial)
991
+
992
+ # Detect revision mode: if revision_count > 0 and critique_details exist
993
+ is_revision = state.get("revision_count", 0) > 0
994
+ critique_details = state.get("critique_details", {})
995
+
996
+ if is_revision and critique_details:
997
+ # REVISION MODE: Use enhanced revision prompt with Critic feedback
998
+ current_revision = state.get("revision_count", 0) + 1
999
+ _add_activity_log(workflow_id, progress_store, "analyzer",
1000
+ f"Revision #{current_revision} in progress...")
1001
+
1002
+ prompt = _build_revision_prompt(
1003
+ critique_details=critique_details,
1004
+ company_data=formatted_data,
1005
+ current_draft=state.get("draft_report", ""),
1006
+ is_financial=is_financial
1007
+ )
1008
+
1009
+ # Update progress with revision info
1010
+ if workflow_id and progress_store:
1011
+ progress_store[workflow_id].update({
1012
+ "current_step": "analyzer",
1013
+ "revision_count": current_revision,
1014
+ })
1015
+ else:
1016
+ # INITIAL MODE: Use standard analyzer prompt
1017
+ _add_activity_log(workflow_id, progress_store, "analyzer",
1018
+ f"Calling LLM to generate SWOT analysis...")
1019
+ prompt = _build_analyzer_prompt(company, ticker, formatted_data, is_financial)
1020
+ current_revision = 0
 
 
 
 
 
 
 
1021
 
 
1022
  start_time = time.time()
1023
  response, provider, error, providers_failed = llm.query(prompt, temperature=0)
1024
  elapsed = time.time() - start_time
 
1045
  llm_status[provider_name] = "completed"
1046
 
1047
  if error:
1048
+ if is_revision:
1049
+ # REVISION MODE ERROR: Graceful degradation - keep previous draft
1050
+ _add_activity_log(workflow_id, progress_store, "analyzer", f"Revision failed: {error}")
1051
+ if current_revision == 1:
1052
+ _add_activity_log(workflow_id, progress_store, "analyzer",
1053
+ "Using initial draft (revision unavailable)")
1054
+ else:
1055
+ _add_activity_log(workflow_id, progress_store, "analyzer",
1056
+ f"Using revision #{current_revision - 1} draft (further revision unavailable)")
1057
+ # Don't set error - allow workflow to complete with previous draft
1058
+ state["analyzer_revision_skipped"] = True
1059
+ state["revision_count"] = current_revision
1060
+ else:
1061
+ # INITIAL MODE ERROR: Abort workflow
1062
+ state["draft_report"] = f"Error generating analysis: {error}"
1063
+ state["provider_used"] = None
1064
+ state["error"] = error # Signal workflow to abort
1065
+ _add_activity_log(workflow_id, progress_store, "analyzer", f"LLM error: {error}")
1066
+ _add_activity_log(workflow_id, progress_store, "analyzer",
1067
+ "Workflow aborted - all LLM providers unavailable")
1068
  else:
1069
+ if is_revision:
1070
+ # REVISION MODE SUCCESS: Update draft with revision
1071
+ state["draft_report"] = response
1072
+ state["provider_used"] = provider
1073
+ state["analyzer_revision_skipped"] = False
1074
+ state["revision_count"] = current_revision
1075
+ _add_activity_log(workflow_id, progress_store, "analyzer",
1076
+ f"Revision #{current_revision} completed via {provider} ({elapsed:.1f}s)")
1077
+ else:
1078
+ # INITIAL MODE SUCCESS: Combine data report with SWOT analysis
1079
+ swot_section = f"## SWOT Analysis\n\n{response}"
1080
+ full_report = f"{data_report}\n{swot_section}"
1081
+ state["draft_report"] = full_report
1082
+ state["data_report"] = data_report # Store separately for frontend flexibility
1083
+ state["provider_used"] = provider
1084
+ _add_activity_log(workflow_id, progress_store, "analyzer",
1085
+ f"SWOT generated via {provider} ({elapsed:.1f}s)")
1086
+
1087
+ # Update progress with final revision count
1088
+ if workflow_id and progress_store:
1089
+ progress_store[workflow_id].update({
1090
+ "revision_count": state.get("revision_count", 0),
1091
+ "score": state.get("score", 0)
1092
+ })
1093
 
1094
  return state
src/nodes/critic.py CHANGED
@@ -1,7 +1,6 @@
1
  from src.llm_client import get_llm_client
2
  from langsmith import traceable
3
  import json
4
- import re
5
  import time
6
 
7
 
@@ -13,251 +12,207 @@ def _add_activity_log(workflow_id, progress_store, step, message):
13
 
14
 
15
  # ============================================================
16
- # DETERMINISTIC SCORING FUNCTIONS
17
  # ============================================================
18
 
19
- def check_swot_sections(report: str) -> dict:
20
- """
21
- Check if all 4 SWOT sections are present.
22
- Returns dict with section presence and score (0-2 points).
23
- """
24
- report_lower = report.lower()
25
 
26
- sections = {
27
- "strengths": bool(re.search(r'\bstrengths?\b', report_lower)),
28
- "weaknesses": bool(re.search(r'\bweaknesses?\b', report_lower)),
29
- "opportunities": bool(re.search(r'\bopportunit(y|ies)\b', report_lower)),
30
- "threats": bool(re.search(r'\bthreats?\b', report_lower))
31
- }
32
 
33
- present_count = sum(sections.values())
34
- score = 2 if present_count == 4 else (1 if present_count >= 2 else 0)
35
 
36
- return {
37
- "sections": sections,
38
- "present_count": present_count,
39
- "score": score,
40
- "max_score": 2
41
- }
42
 
 
43
 
44
- def count_numeric_citations(report: str) -> dict:
45
- """
46
- Count specific facts/numbers cited in the report.
47
- Returns dict with count and score (0-3 points).
48
- """
49
- # Patterns for numeric citations
50
- patterns = [
51
- r'\$[\d,]+\.?\d*[BMK]?', # Dollar amounts: $3.6B, $100M
52
- r'\d+\.?\d*\s*%', # Percentages: 7.26%, 42.59%
53
- r'\d+\.?\d*x', # Multiples: 0.13x, 2.35x
54
- r'P/E[:\s]+\d+', # P/E ratios
55
- r'P/S[:\s]+\d+', # P/S ratios
56
- r'P/B[:\s]+\d+', # P/B ratios
57
- r'EV/EBITDA[:\s]+\d+', # EV/EBITDA
58
- r'PEG[:\s]+\d+', # PEG ratio
59
- r'VIX[:\s]+\d+', # VIX
60
- r'Beta[:\s]+\d+', # Beta
61
- r'\d+/100', # Scores: 67.38/100
62
- r'CAGR[:\s]*\d+', # CAGR
63
- r'\d{4}', # Years: 2024, 2025
64
- ]
65
 
66
- citations = []
67
- for pattern in patterns:
68
- matches = re.findall(pattern, report, re.IGNORECASE)
69
- citations.extend(matches)
70
-
71
- # Deduplicate
72
- unique_citations = list(set(citations))
73
- count = len(unique_citations)
74
-
75
- # Score: 0-2 citations = 0pts, 3-5 = 1pt, 6-10 = 2pts, 10+ = 3pts
76
- if count >= 10:
77
- score = 3
78
- elif count >= 6:
79
- score = 2
80
- elif count >= 3:
81
- score = 1
82
- else:
83
- score = 0
84
 
85
- return {
86
- "count": count,
87
- "examples": unique_citations[:10], # Show first 10
88
- "score": score,
89
- "max_score": 3
90
- }
91
 
 
92
 
93
- def check_data_sources(report: str, sources_available: list) -> dict:
94
- """
95
- Check if report references data from available MCP sources.
96
- Returns dict with coverage and score (0-2 points).
97
- """
98
- report_lower = report.lower()
99
-
100
- source_keywords = {
101
- "fundamentals": ["revenue", "net margin", "debt", "cash flow", "eps", "earnings"],
102
- "volatility": ["beta", "volatility", "vix", "price swing"],
103
- "macro": ["gdp", "interest rate", "inflation", "unemployment", "fed"],
104
- "valuation": ["p/e", "p/s", "p/b", "ev/ebitda", "peg", "valuation", "market cap"],
105
- "news": ["news", "analyst", "article", "report"],
106
- "sentiment": ["sentiment", "bullish", "bearish", "reddit", "finnhub"]
107
- }
108
 
109
- sources_referenced = {}
110
- for source in sources_available:
111
- keywords = source_keywords.get(source, [])
112
- found = any(kw in report_lower for kw in keywords)
113
- sources_referenced[source] = found
 
114
 
115
- referenced_count = sum(sources_referenced.values())
116
- coverage_pct = (referenced_count / len(sources_available) * 100) if sources_available else 0
 
 
 
117
 
118
- # Score: <50% = 0pts, 50-75% = 1pt, >75% = 2pts
119
- if coverage_pct >= 75:
120
- score = 2
121
- elif coverage_pct >= 50:
122
- score = 1
123
- else:
124
- score = 0
125
-
126
- return {
127
- "sources_referenced": sources_referenced,
128
- "referenced_count": referenced_count,
129
- "total_available": len(sources_available),
130
- "coverage_pct": round(coverage_pct, 1),
131
- "score": score,
132
- "max_score": 2
133
- }
134
 
 
 
 
 
 
135
 
136
- def check_section_balance(report: str) -> dict:
137
- """
138
- Check if SWOT sections are reasonably balanced (not all items in one section).
139
- Returns dict with balance info and score (0-1 point).
140
- """
141
- # Count bullet points or list items per section
142
- sections = ["strength", "weakness", "opportunit", "threat"]
143
-
144
- # Split report by sections and count items
145
- report_lower = report.lower()
146
- item_counts = {}
147
-
148
- for section in sections:
149
- # Find section and count bullet points after it
150
- pattern = rf'{section}.*?(?=(?:weakness|opportunit|threat|$))'
151
- match = re.search(pattern, report_lower, re.DOTALL)
152
- if match:
153
- section_text = match.group()
154
- # Count bullet points (-, *, •) or numbered items
155
- items = len(re.findall(r'[\-\*\•]\s+\w|^\d+\.\s+\w', section_text, re.MULTILINE))
156
- item_counts[section] = max(items, 1) # At least 1 if section exists
157
-
158
- if not item_counts:
159
- return {"balanced": False, "score": 0, "max_score": 1}
160
-
161
- counts = list(item_counts.values())
162
- avg = sum(counts) / len(counts)
163
-
164
- # Check if any section has less than 25% of average (unbalanced)
165
- balanced = all(c >= avg * 0.25 for c in counts) if avg > 0 else False
166
-
167
- return {
168
- "item_counts": item_counts,
169
- "balanced": balanced,
170
- "score": 1 if balanced else 0,
171
- "max_score": 1
172
- }
 
 
 
 
173
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
- def run_deterministic_checks(report: str, sources_available: list) -> dict:
 
 
 
 
 
 
 
 
 
 
176
  """
177
- Run all deterministic checks and return combined results.
178
- Total possible: 8 points
179
  """
180
- sections_check = check_swot_sections(report)
181
- citations_check = count_numeric_citations(report)
182
- sources_check = check_data_sources(report, sources_available)
183
- balance_check = check_section_balance(report)
184
-
185
- total_score = (
186
- sections_check["score"] +
187
- citations_check["score"] +
188
- sources_check["score"] +
189
- balance_check["score"]
190
- )
191
- max_score = 8
192
-
193
- # Convert to 1-10 scale (deterministic portion = 40% weight)
194
- normalized_score = (total_score / max_score) * 4 # 0-4 points
195
-
196
- return {
197
- "sections": sections_check,
198
- "citations": citations_check,
199
- "sources": sources_check,
200
- "balance": balance_check,
201
- "total_score": total_score,
202
- "max_score": max_score,
203
- "normalized_score": round(normalized_score, 2)
204
- }
205
 
 
 
 
206
 
207
- # ============================================================
208
- # LLM SCORING
209
- # ============================================================
 
 
210
 
211
- LLM_RUBRIC = """
212
- You are a strategy evaluator. Given a SWOT analysis and the SOURCE DATA it should be based on, score it on a scale of 1 to 6.
 
 
213
 
214
- Scoring Criteria:
215
- 1. Strategic Alignment (0-2 pts): Does the analysis align with the given strategic focus?
216
- 2. Data Grounding (0-2 pts): Does EVERY claim cite specific numbers from the source data? Penalize any invented facts not in the data.
217
- 3. Logical Consistency (0-2 pts): Are S/O clearly positive and W/T clearly negative? No contradictions?
218
 
219
- IMPORTANT: If the analysis mentions facts/numbers NOT present in the source data, score Data Grounding as 0.
220
 
221
- Respond in this JSON format only, no other text:
222
- {
223
- "score": <int 1-6>,
224
- "strategic_alignment": <0-2>,
225
- "data_grounding": <0-2>,
226
- "logical_consistency": <0-2>,
227
- "reasoning": "<string>"
228
- }
229
- """
230
 
 
 
 
 
 
231
 
232
- def run_llm_evaluation(report: str, strategy_focus: str, llm, source_data: str = "") -> dict:
 
233
  """
234
- Run LLM-based qualitative evaluation.
235
- Returns score (1-6) and reasoning.
236
- """
237
- prompt = f"""
238
- SWOT Draft:
239
- {report}
240
 
241
- Strategic Focus: {strategy_focus}
242
 
243
- SOURCE DATA (the analysis should be based ONLY on this):
244
- {source_data if source_data else "No source data provided"}
245
 
246
- {LLM_RUBRIC}
247
- """
 
 
 
 
 
 
 
248
 
249
  response, provider, error, providers_failed = llm.query(prompt, temperature=0)
250
 
251
  if error:
 
252
  return {
253
- "score": 3, # Default middle score
254
- "reasoning": f"LLM evaluation failed: {error}",
 
 
 
 
 
 
255
  "provider": provider,
256
  "providers_failed": providers_failed,
257
  "error": True
258
  }
259
 
260
  try:
 
261
  content = response.strip()
262
  if "{" in content:
263
  json_start = content.index("{")
@@ -265,40 +220,74 @@ SOURCE DATA (the analysis should be based ONLY on this):
265
  content = content[json_start:json_end]
266
 
267
  parsed = json.loads(content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
  return {
269
- "score": min(max(parsed.get("score", 3), 1), 6), # Clamp 1-6
270
- "strategic_alignment": parsed.get("strategic_alignment", 0),
271
- "data_grounding": parsed.get("data_grounding", 0),
272
- "logical_consistency": parsed.get("logical_consistency", 0),
273
- "reasoning": parsed.get("reasoning", "No reasoning provided"),
 
 
 
274
  "provider": provider,
275
  "providers_failed": providers_failed,
276
  "error": False
277
  }
 
278
  except (json.JSONDecodeError, ValueError) as e:
279
  return {
280
- "score": 3,
281
- "reasoning": f"JSON parsing failed: {str(e)[:100]}",
 
 
 
 
 
 
282
  "provider": provider,
283
  "providers_failed": providers_failed,
284
  "error": True
285
  }
286
 
287
 
288
- # ============================================================
289
- # HYBRID SCORING
290
- # ============================================================
291
-
292
  @traceable(name="Critic")
293
  def critic_node(state, workflow_id=None, progress_store=None):
294
  """
295
- Critic node with hybrid scoring:
296
- - Deterministic checks (40%): sections, citations, source coverage, balance
297
- - LLM evaluation (60%): strategic alignment, insight quality, consistency
 
 
 
 
 
 
298
 
299
- Final score = deterministic (0-4) + LLM (0-6) = 1-10 scale
300
  """
301
- # Extract workflow_id and progress_store from state (graph invokes with state only)
302
  if workflow_id is None:
303
  workflow_id = state.get("workflow_id")
304
  if progress_store is None:
@@ -307,7 +296,6 @@ def critic_node(state, workflow_id=None, progress_store=None):
307
  # Skip evaluation if workflow has an error (abort mode)
308
  if state.get("error"):
309
  _add_activity_log(workflow_id, progress_store, "critic", "Skipping evaluation - workflow aborted")
310
- # Simplify error message for user display
311
  error_msg = state.get("error", "")
312
  if "429" in error_msg or "Too Many Requests" in error_msg:
313
  user_friendly_msg = "All AI providers are temporarily unavailable due to rate limits. Please wait a moment and try again."
@@ -320,50 +308,27 @@ def critic_node(state, workflow_id=None, progress_store=None):
320
  return state
321
 
322
  report = state.get("draft_report", "")
323
- strategy_focus = state.get("strategy_focus", "Cost Leadership")
324
  revision_count = state.get("revision_count", 0)
 
325
 
326
  # Log evaluation start
327
- _add_activity_log(workflow_id, progress_store, "critic", f"Evaluating SWOT quality (revision #{revision_count})...")
328
 
329
- # Parse sources_available from raw_data
330
- sources_available = []
331
- try:
332
- raw_data = json.loads(state.get("raw_data", "{}"))
333
- sources_available = raw_data.get("sources_available", [])
334
- except:
335
- sources_available = ["fundamentals", "volatility", "macro", "valuation", "news", "sentiment"]
336
-
337
- # Run deterministic checks
338
- print("Running deterministic checks...")
339
- det_results = run_deterministic_checks(report, sources_available)
340
- det_score = det_results["normalized_score"] # 0-4
341
-
342
- print(f" Sections: {det_results['sections']['present_count']}/4 ({det_results['sections']['score']}/{det_results['sections']['max_score']} pts)")
343
- print(f" Citations: {det_results['citations']['count']} found ({det_results['citations']['score']}/{det_results['citations']['max_score']} pts)")
344
- print(f" Source Coverage: {det_results['sources']['coverage_pct']}% ({det_results['sources']['score']}/{det_results['sources']['max_score']} pts)")
345
- print(f" Balance: {'Yes' if det_results['balance']['balanced'] else 'No'} ({det_results['balance']['score']}/{det_results['balance']['max_score']} pts)")
346
- print(f" Deterministic Score: {det_score:.1f}/4")
347
-
348
- # Run LLM evaluation with source data for grounding check
349
- print("Running LLM evaluation...")
350
  llm = get_llm_client()
351
- _add_activity_log(workflow_id, progress_store, "critic", f"Calling LLM for quality evaluation...")
352
  start_time = time.time()
353
 
354
- # Get formatted source data for grounding verification
355
- source_data = state.get("raw_data", "")
356
- # Truncate if too long to avoid token limits
357
- if len(source_data) > 4000:
358
- source_data = source_data[:4000] + "\n... [truncated]"
359
-
360
- llm_results = run_llm_evaluation(report, strategy_focus, llm, source_data)
361
- llm_score = llm_results["score"] # 1-6
362
  elapsed = time.time() - start_time
363
- provider = llm_results.get('provider', 'unknown')
364
 
365
  # Log failed providers
366
- providers_failed = llm_results.get('providers_failed', [])
367
  for pf in providers_failed:
368
  _add_activity_log(workflow_id, progress_store, "critic", f"LLM {pf['name']} failed: {pf['error']}")
369
 
@@ -372,49 +337,98 @@ def critic_node(state, workflow_id=None, progress_store=None):
372
  state["llm_providers_failed"] = []
373
  state["llm_providers_failed"].extend([pf["name"] for pf in providers_failed])
374
 
375
- print(f" LLM Score: {llm_score}/6 ({provider})")
376
- _add_activity_log(workflow_id, progress_store, "critic", f"LLM evaluation via {provider} ({elapsed:.1f}s)")
377
-
378
- # Combine scores: deterministic (0-4) + LLM (1-6) = 1-10
379
- final_score = det_score + llm_score
380
- final_score = min(max(final_score, 1), 10) # Clamp 1-10
381
-
382
- print(f"Critic scored: {final_score:.1f}/10 (det:{det_score:.1f} + llm:{llm_score})")
383
-
384
- # Log score result with revision decision hint
385
- score_msg = f"Score: {final_score:.0f}/10"
386
- if final_score < 7:
387
- score_msg += " - needs revision"
 
 
 
 
 
 
 
 
 
 
 
388
  else:
389
- score_msg += " - quality passed"
390
  _add_activity_log(workflow_id, progress_store, "critic", score_msg)
391
 
392
- # Build detailed critique
393
- critique_parts = [
394
- f"Deterministic Analysis ({det_results['total_score']}/{det_results['max_score']} pts):",
395
- f" - SWOT Sections: {det_results['sections']['present_count']}/4 present",
396
- f" - Numeric Citations: {det_results['citations']['count']} found",
397
- f" - Data Source Coverage: {det_results['sources']['coverage_pct']}%",
398
- f" - Section Balance: {'Balanced' if det_results['balance']['balanced'] else 'Unbalanced'}",
399
  "",
400
- f"LLM Evaluation ({llm_score}/6 pts):",
401
- f" {llm_results['reasoning']}"
402
  ]
403
 
404
- state["critique"] = "\n".join(critique_parts)
405
- state["score"] = final_score
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
406
  state["critique_details"] = {
407
- "deterministic": det_results,
408
- "llm": llm_results,
409
- "final_score": final_score
 
 
 
 
 
410
  }
411
 
412
  # Update progress
413
  if workflow_id and progress_store:
414
  progress_store[workflow_id].update({
415
  "current_step": "critic",
416
- "revision_count": state.get("revision_count", 0),
417
- "score": final_score
418
  })
419
 
420
  return state
 
1
  from src.llm_client import get_llm_client
2
  from langsmith import traceable
3
  import json
 
4
  import time
5
 
6
 
 
12
 
13
 
14
  # ============================================================
15
+ # LLM-ONLY WEIGHTED RUBRIC EVALUATION
16
  # ============================================================
17
 
18
+ CRITIC_SYSTEM_PROMPT = """You are a SWOT Output Critic and Quality Gatekeeper.
 
 
 
 
 
19
 
20
+ ## ROLE
21
+ Act as an independent, impartial evaluator that reviews SWOT analyses. Your function is to:
22
+ 1. Verify factual accuracy against provided input data
23
+ 2. Assess quality against a weighted rubric
24
+ 3. Decide whether the output PASSES or FAILS
25
+ 4. Provide actionable feedback if rejected
26
 
27
+ You are a quality gate, not a collaborator. Be strict.
 
28
 
29
+ ## VALID METRICS SCHEMA
 
 
 
 
 
30
 
31
+ **Fundamentals:** revenue, net_income, net_margin_pct, total_assets, total_liabilities, stockholders_equity, operating_margin_pct, total_debt, operating_cash_flow, free_cash_flow
32
 
33
+ **Valuation:** current_price, market_cap, enterprise_value, trailing_pe, forward_pe, ps_ratio, pb_ratio, trailing_peg, forward_peg, earnings_growth, revenue_growth
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
+ **Volatility:** vix, vxn, beta, historical_volatility, implied_volatility
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
+ **Macro:** gdp_growth, interest_rate, cpi_inflation, unemployment
 
 
 
 
 
38
 
39
+ **Qualitative:** News (title, date, source, url), Sentiment (title, date, source, url)
40
 
41
+ ## EVALUATION RUBRIC (Weighted)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
+ ### 1. Evidence Grounding (25%) — HARD FLOOR: >=7
44
+ - All claims cite specific metrics from input data
45
+ - No fabricated metrics (hallucination check)
46
+ - Field names match schema
47
+ - 9-10: Every claim traceable; 7-8: Nearly all grounded; 5-6: Most grounded, 2-3 unverifiable; 3-4: Multiple unsupported; 1-2: Clear hallucinations
48
+ - **If ANY fabricated metric detected, cap at 4**
49
 
50
+ ### 2. Constraint Compliance (20%) — HARD FLOOR: >=6
51
+ - No buy/sell/hold recommendations
52
+ - Temporal labels accurate (TTM, FY, forward)
53
+ - "DATA NOT PROVIDED" used for missing metrics
54
+ - 9-10: All constraints respected; 7-8: Minor issues; 5-6: One moderate violation; 3-4: Multiple violations; 1-2: Systematic violations
55
 
56
+ ### 3. Specificity & Actionability (20%)
57
+ - Company-specific, not generic templates
58
+ - Quantified findings (not "strong margins" but "31% operating margin")
59
+ - Avoids business cliches
60
+ - 9-10: Every point specific and quantified; 7-8: Mostly specific; 5-6: Mix of specific/generic; 3-4: Mostly generic; 1-2: Template-like
 
 
 
 
 
 
 
 
 
 
 
61
 
62
+ ### 4. Strategic Insight (15%)
63
+ - Synthesis across multiple data sources
64
+ - Prioritization by materiality
65
+ - Goes beyond restating metrics to interpreting implications
66
+ - 9-10: Identifies causal relationships; 7-8: Good synthesis; 5-6: Surface-level; 3-4: Restates metrics; 1-2: No value-add
67
 
68
+ ### 5. Completeness & Balance (10%)
69
+ Required sections:
70
+ - Strengths (Finding, Strategic Implication, Durability)
71
+ - Weaknesses (Finding, Severity, Trend, Remediation Levers)
72
+ - Opportunities (Catalyst, Timing, Execution Requirements)
73
+ - Threats (Risk Factor, Probability, Impact, Mitigation Options)
74
+ - Data Quality Notes
75
+ - 9-10: All present and substantive; 7-8: All present, minor gaps; 5-6: Missing 1 section; 3-4: Multiple missing; 1-2: Major gaps
76
+
77
+ ### 6. Clarity & Structure (10%)
78
+ - Clean formatting, logical grouping
79
+ - Easy to scan (not walls of text)
80
+ - No contradictions
81
+ - 9-10: Impeccable; 7-8: Well-structured; 5-6: Readable but dense; 3-4: Hard to follow; 1-2: Poorly organized
82
+
83
+ ## PASS CONDITIONS (ALL must be met)
84
+ 1. Weighted average >= 7.0
85
+ 2. Evidence Grounding >= 7
86
+ 3. Constraint Compliance >= 6
87
+ 4. No individual criterion below 5
88
+
89
+ ## OUTPUT FORMAT (JSON only, no other text)
90
+
91
+ {
92
+ "status": "APPROVED" or "REJECTED",
93
+ "weighted_score": <float>,
94
+ "scores": {
95
+ "evidence_grounding": <1-10>,
96
+ "constraint_compliance": <1-10>,
97
+ "specificity_actionability": <1-10>,
98
+ "strategic_insight": <1-10>,
99
+ "completeness_balance": <1-10>,
100
+ "clarity_structure": <1-10>
101
+ },
102
+ "hard_floor_violations": ["list of violated floors or empty array"],
103
+ "hallucinations_detected": ["list of fabricated metrics or empty array"],
104
+ "key_deficiencies": ["prioritized list, max 5"],
105
+ "strengths_to_preserve": ["elements done well"],
106
+ "actionable_feedback": ["specific rewrite instructions, max 5"]
107
+ }
108
+ """
109
 
110
+ # Weights for each criterion
111
+ CRITERION_WEIGHTS = {
112
+ "evidence_grounding": 0.25,
113
+ "constraint_compliance": 0.20,
114
+ "specificity_actionability": 0.20,
115
+ "strategic_insight": 0.15,
116
+ "completeness_balance": 0.10,
117
+ "clarity_structure": 0.10,
118
+ }
119
+
120
+ # Hard floor requirements
121
+ HARD_FLOORS = {
122
+ "evidence_grounding": 7,
123
+ "constraint_compliance": 6,
124
+ }
125
+
126
+ # Minimum score for any criterion
127
+ MIN_INDIVIDUAL_SCORE = 5
128
 
129
+
130
+ def calculate_weighted_score(scores: dict) -> float:
131
+ """Calculate weighted average from individual criterion scores."""
132
+ total = 0.0
133
+ for criterion, weight in CRITERION_WEIGHTS.items():
134
+ score = scores.get(criterion, 5) # Default to 5 if missing
135
+ total += score * weight
136
+ return round(total, 2)
137
+
138
+
139
+ def check_pass_conditions(scores: dict, weighted_score: float) -> tuple:
140
  """
141
+ Check if all pass conditions are met.
142
+ Returns (passed: bool, violations: list)
143
  """
144
+ violations = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
+ # Check weighted average threshold
147
+ if weighted_score < 7.0:
148
+ violations.append(f"Weighted score {weighted_score:.1f} < 7.0 threshold")
149
 
150
+ # Check hard floors
151
+ for criterion, floor in HARD_FLOORS.items():
152
+ score = scores.get(criterion, 0)
153
+ if score < floor:
154
+ violations.append(f"{criterion}: {score} < {floor} (hard floor)")
155
 
156
+ # Check minimum individual scores
157
+ for criterion, score in scores.items():
158
+ if score < MIN_INDIVIDUAL_SCORE:
159
+ violations.append(f"{criterion}: {score} < {MIN_INDIVIDUAL_SCORE} (minimum)")
160
 
161
+ return (len(violations) == 0, violations)
 
 
 
162
 
 
163
 
164
+ def run_llm_evaluation(report: str, source_data: str, iteration: int, llm) -> dict:
165
+ """
166
+ Run LLM-based evaluation with weighted rubric.
 
 
 
 
 
 
167
 
168
+ Args:
169
+ report: The SWOT output to evaluate
170
+ source_data: The source data the SWOT should be based on
171
+ iteration: Current revision number (1, 2, or 3)
172
+ llm: LLM client instance
173
 
174
+ Returns:
175
+ Evaluation result dict with scores, status, and feedback
176
  """
177
+ # Truncate source data if too long
178
+ max_source_len = 8000
179
+ if len(source_data) > max_source_len:
180
+ source_data = source_data[:max_source_len] + "\n... [truncated]"
 
 
181
 
182
+ prompt = f"""{CRITIC_SYSTEM_PROMPT}
183
 
184
+ ## INPUTS
 
185
 
186
+ **Iteration:** {iteration} of 3
187
+
188
+ **Source Data (the SWOT should be based ONLY on this):**
189
+ {source_data}
190
+
191
+ **SWOT Output to Evaluate:**
192
+ {report}
193
+
194
+ Evaluate strictly and respond with JSON only."""
195
 
196
  response, provider, error, providers_failed = llm.query(prompt, temperature=0)
197
 
198
  if error:
199
+ # Return default middle scores on error
200
  return {
201
+ "status": "REJECTED",
202
+ "weighted_score": 5.0,
203
+ "scores": {k: 5 for k in CRITERION_WEIGHTS.keys()},
204
+ "hard_floor_violations": [],
205
+ "hallucinations_detected": [],
206
+ "key_deficiencies": [f"LLM evaluation failed: {error}"],
207
+ "strengths_to_preserve": [],
208
+ "actionable_feedback": ["Unable to evaluate - please retry"],
209
  "provider": provider,
210
  "providers_failed": providers_failed,
211
  "error": True
212
  }
213
 
214
  try:
215
+ # Parse JSON from response
216
  content = response.strip()
217
  if "{" in content:
218
  json_start = content.index("{")
 
220
  content = content[json_start:json_end]
221
 
222
  parsed = json.loads(content)
223
+
224
+ # Extract and validate scores
225
+ scores = parsed.get("scores", {})
226
+ for criterion in CRITERION_WEIGHTS.keys():
227
+ if criterion not in scores:
228
+ scores[criterion] = 5 # Default
229
+ else:
230
+ scores[criterion] = min(max(int(scores[criterion]), 1), 10) # Clamp 1-10
231
+
232
+ # Calculate weighted score
233
+ weighted_score = calculate_weighted_score(scores)
234
+
235
+ # Check pass conditions
236
+ passed, violations = check_pass_conditions(scores, weighted_score)
237
+
238
+ # Determine status
239
+ status = "APPROVED" if passed else "REJECTED"
240
+
241
+ # Override status if LLM said APPROVED but conditions not met
242
+ if parsed.get("status") == "APPROVED" and not passed:
243
+ status = "REJECTED"
244
+
245
  return {
246
+ "status": status,
247
+ "weighted_score": weighted_score,
248
+ "scores": scores,
249
+ "hard_floor_violations": parsed.get("hard_floor_violations", violations),
250
+ "hallucinations_detected": parsed.get("hallucinations_detected", []),
251
+ "key_deficiencies": parsed.get("key_deficiencies", [])[:5],
252
+ "strengths_to_preserve": parsed.get("strengths_to_preserve", []),
253
+ "actionable_feedback": parsed.get("actionable_feedback", [])[:5],
254
  "provider": provider,
255
  "providers_failed": providers_failed,
256
  "error": False
257
  }
258
+
259
  except (json.JSONDecodeError, ValueError) as e:
260
  return {
261
+ "status": "REJECTED",
262
+ "weighted_score": 5.0,
263
+ "scores": {k: 5 for k in CRITERION_WEIGHTS.keys()},
264
+ "hard_floor_violations": [],
265
+ "hallucinations_detected": [],
266
+ "key_deficiencies": [f"JSON parsing failed: {str(e)[:100]}"],
267
+ "strengths_to_preserve": [],
268
+ "actionable_feedback": ["Evaluation response was malformed - please retry"],
269
  "provider": provider,
270
  "providers_failed": providers_failed,
271
  "error": True
272
  }
273
 
274
 
 
 
 
 
275
  @traceable(name="Critic")
276
  def critic_node(state, workflow_id=None, progress_store=None):
277
  """
278
+ Critic node with LLM-only weighted rubric evaluation.
279
+
280
+ Evaluates SWOT output on 6 criteria with weighted scoring:
281
+ - Evidence Grounding (25%) - hard floor >= 7
282
+ - Constraint Compliance (20%) - hard floor >= 6
283
+ - Specificity & Actionability (20%)
284
+ - Strategic Insight (15%)
285
+ - Completeness & Balance (10%)
286
+ - Clarity & Structure (10%)
287
 
288
+ Pass requires: weighted avg >= 7.0, hard floors met, no score < 5
289
  """
290
+ # Extract workflow_id and progress_store from state
291
  if workflow_id is None:
292
  workflow_id = state.get("workflow_id")
293
  if progress_store is None:
 
296
  # Skip evaluation if workflow has an error (abort mode)
297
  if state.get("error"):
298
  _add_activity_log(workflow_id, progress_store, "critic", "Skipping evaluation - workflow aborted")
 
299
  error_msg = state.get("error", "")
300
  if "429" in error_msg or "Too Many Requests" in error_msg:
301
  user_friendly_msg = "All AI providers are temporarily unavailable due to rate limits. Please wait a moment and try again."
 
308
  return state
309
 
310
  report = state.get("draft_report", "")
 
311
  revision_count = state.get("revision_count", 0)
312
+ iteration = revision_count + 1 # 1-indexed for display
313
 
314
  # Log evaluation start
315
+ _add_activity_log(workflow_id, progress_store, "critic", f"Evaluating SWOT quality (iteration {iteration}/3)...")
316
 
317
+ # Get source data for grounding verification
318
+ source_data = state.get("raw_data", "")
319
+
320
+ # Run LLM evaluation
321
+ print(f"Running LLM evaluation (iteration {iteration})...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
322
  llm = get_llm_client()
323
+ _add_activity_log(workflow_id, progress_store, "critic", "Calling LLM for quality evaluation...")
324
  start_time = time.time()
325
 
326
+ result = run_llm_evaluation(report, source_data, iteration, llm)
 
 
 
 
 
 
 
327
  elapsed = time.time() - start_time
328
+ provider = result.get('provider', 'unknown')
329
 
330
  # Log failed providers
331
+ providers_failed = result.get('providers_failed', [])
332
  for pf in providers_failed:
333
  _add_activity_log(workflow_id, progress_store, "critic", f"LLM {pf['name']} failed: {pf['error']}")
334
 
 
337
  state["llm_providers_failed"] = []
338
  state["llm_providers_failed"].extend([pf["name"] for pf in providers_failed])
339
 
340
+ # Extract results
341
+ status = result["status"]
342
+ weighted_score = result["weighted_score"]
343
+ scores = result["scores"]
344
+
345
+ # Handle ESCALATE if max iterations reached
346
+ if iteration > 3 and status == "REJECTED":
347
+ status = "ESCALATE"
348
+ _add_activity_log(workflow_id, progress_store, "critic", "Max iterations reached - escalating for human review")
349
+
350
+ # Log scores
351
+ print(f" Status: {status}")
352
+ print(f" Weighted Score: {weighted_score:.1f}/10")
353
+ for criterion, score in scores.items():
354
+ floor = HARD_FLOORS.get(criterion, "-")
355
+ print(f" {criterion}: {score}/10 (floor: {floor})")
356
+
357
+ _add_activity_log(workflow_id, progress_store, "critic", f"Evaluation via {provider} ({elapsed:.1f}s)")
358
+
359
+ # Log status and score
360
+ if status == "APPROVED":
361
+ score_msg = f"APPROVED - Score: {weighted_score:.1f}/10"
362
+ elif status == "ESCALATE":
363
+ score_msg = f"ESCALATE - Score: {weighted_score:.1f}/10 (max iterations)"
364
  else:
365
+ score_msg = f"REJECTED - Score: {weighted_score:.1f}/10 - needs revision"
366
  _add_activity_log(workflow_id, progress_store, "critic", score_msg)
367
 
368
+ # Build critique message
369
+ critique_lines = [
370
+ f"Status: {status}",
371
+ f"Weighted Score: {weighted_score:.1f}/10",
 
 
 
372
  "",
373
+ "Criterion Scores:",
 
374
  ]
375
 
376
+ for criterion, score in scores.items():
377
+ weight = int(CRITERION_WEIGHTS[criterion] * 100)
378
+ floor = HARD_FLOORS.get(criterion)
379
+ floor_str = f" (floor: {floor})" if floor else ""
380
+ passed = "PASS" if score >= (floor or MIN_INDIVIDUAL_SCORE) else "FAIL"
381
+ critique_lines.append(f" {criterion}: {score}/10 [{weight}%] {floor_str} - {passed}")
382
+
383
+ if result.get("hard_floor_violations"):
384
+ critique_lines.append("")
385
+ critique_lines.append("Hard Floor Violations:")
386
+ for v in result["hard_floor_violations"]:
387
+ critique_lines.append(f" - {v}")
388
+
389
+ if result.get("hallucinations_detected"):
390
+ critique_lines.append("")
391
+ critique_lines.append("Hallucinations Detected:")
392
+ for h in result["hallucinations_detected"]:
393
+ critique_lines.append(f" - {h}")
394
+
395
+ if result.get("key_deficiencies"):
396
+ critique_lines.append("")
397
+ critique_lines.append("Key Deficiencies:")
398
+ for i, d in enumerate(result["key_deficiencies"], 1):
399
+ critique_lines.append(f" {i}. {d}")
400
+
401
+ if result.get("actionable_feedback"):
402
+ critique_lines.append("")
403
+ critique_lines.append("Actionable Feedback:")
404
+ for i, f in enumerate(result["actionable_feedback"], 1):
405
+ critique_lines.append(f" {i}. {f}")
406
+
407
+ if result.get("strengths_to_preserve"):
408
+ critique_lines.append("")
409
+ critique_lines.append("Strengths to Preserve:")
410
+ for s in result["strengths_to_preserve"]:
411
+ critique_lines.append(f" - {s}")
412
+
413
+ state["critique"] = "\n".join(critique_lines)
414
+ state["score"] = weighted_score
415
  state["critique_details"] = {
416
+ "status": status,
417
+ "weighted_score": weighted_score,
418
+ "scores": scores,
419
+ "hard_floor_violations": result.get("hard_floor_violations", []),
420
+ "hallucinations_detected": result.get("hallucinations_detected", []),
421
+ "key_deficiencies": result.get("key_deficiencies", []),
422
+ "strengths_to_preserve": result.get("strengths_to_preserve", []),
423
+ "actionable_feedback": result.get("actionable_feedback", []),
424
  }
425
 
426
  # Update progress
427
  if workflow_id and progress_store:
428
  progress_store[workflow_id].update({
429
  "current_step": "critic",
430
+ "revision_count": revision_count,
431
+ "score": weighted_score
432
  })
433
 
434
  return state
src/nodes/editor.py DELETED
@@ -1,138 +0,0 @@
1
- from src.llm_client import get_llm_client
2
- from langsmith import traceable
3
- import time
4
- import json
5
-
6
-
7
- def _add_activity_log(workflow_id, progress_store, step, message):
8
- """Helper to add activity log entry."""
9
- if workflow_id and progress_store:
10
- from src.services.workflow_store import add_activity_log
11
- add_activity_log(workflow_id, step, message)
12
-
13
-
14
- @traceable(name="Editor")
15
- def editor_node(state, workflow_id=None, progress_store=None):
16
- """
17
- Editor node that revises the SWOT draft based on critique feedback.
18
- Increments the revision count and returns the improved draft.
19
- """
20
- # Extract workflow_id and progress_store from state (graph invokes with state only)
21
- if workflow_id is None:
22
- workflow_id = state.get("workflow_id")
23
- if progress_store is None:
24
- progress_store = state.get("progress_store")
25
-
26
- current_revision = state.get("revision_count", 0) + 1
27
-
28
- # Update progress if tracking is enabled
29
- if workflow_id and progress_store:
30
- progress_store[workflow_id].update({
31
- "current_step": "editor",
32
- "revision_count": state.get("revision_count", 0),
33
- "score": state.get("score", 0)
34
- })
35
-
36
- # Skip if workflow already has an error (abort mode)
37
- if state.get("error"):
38
- _add_activity_log(workflow_id, progress_store, "editor", f"Skipping revision - workflow aborted")
39
- state["revision_count"] = current_revision
40
- return state
41
-
42
- # Log revision start
43
- _add_activity_log(workflow_id, progress_store, "editor", f"Revision #{current_revision} in progress...")
44
-
45
- # Use user-provided API keys if available
46
- user_keys = state.get("user_api_keys", {})
47
- llm = get_llm_client(user_keys) if user_keys else get_llm_client()
48
- strategy_name = state.get("strategy_focus", "Cost Leadership")
49
-
50
- # Get source data for grounding - editor must use ONLY this data
51
- source_data = state.get("raw_data", "")
52
- # Truncate if too long to avoid token limits
53
- if len(source_data) > 4000:
54
- source_data = source_data[:4000] + "\n... [truncated]"
55
-
56
- # Prepare the revision prompt with source data for grounding
57
- prompt = f"""
58
- You are revising a SWOT analysis based on critique feedback. Keep it CONCISE.
59
-
60
- CRITICAL GROUNDING RULES:
61
- 1. You may ONLY use facts and numbers from the SOURCE DATA provided below.
62
- 2. DO NOT invent, assume, or fabricate any information not in the source data.
63
- 3. Every claim must cite specific numbers from the source data.
64
- 4. If the critique asks for information not in the source data, state "Data not available".
65
-
66
- SOURCE DATA (use ONLY this for facts and numbers):
67
- {source_data}
68
-
69
- CURRENT DRAFT:
70
- {state['draft_report']}
71
-
72
- CRITIQUE:
73
- {state['critique']}
74
-
75
- Strategic Focus: {strategy_name}
76
-
77
- REVISION INSTRUCTIONS:
78
- 1. Address the critique points using ONLY data from SOURCE DATA above
79
- 2. Ensure all 4 SWOT sections are present and complete
80
- 3. Every bullet point must cite specific metrics from the source data
81
- 4. Make sure strengths/opportunities are positive, weaknesses/threats are negative
82
- 5. Align analysis with {strategy_name} strategic focus
83
- 6. If data is missing for a point, remove that point rather than inventing data
84
- 7. Keep each bullet point under 25 words - single sentence only
85
- 8. Maximum 5 bullet points per category
86
- 9. Remove any verbose explanations or context paragraphs
87
-
88
- Return only the improved SWOT analysis. Keep it brief and impactful.
89
- """
90
-
91
- # Get the revised draft from LLM
92
- start_time = time.time()
93
- response, provider, error, providers_failed = llm.query(prompt, temperature=0)
94
- elapsed = time.time() - start_time
95
-
96
- # Log failed providers
97
- for pf in providers_failed:
98
- _add_activity_log(workflow_id, progress_store, "editor", f"LLM {pf['name']} failed: {pf['error']}")
99
-
100
- # Track failed providers in state for frontend
101
- if "llm_providers_failed" not in state:
102
- state["llm_providers_failed"] = []
103
- state["llm_providers_failed"].extend([pf["name"] for pf in providers_failed])
104
-
105
- if error:
106
- print(f"Editor LLM error: {error}")
107
- _add_activity_log(workflow_id, progress_store, "editor", f"Revision failed: {error}")
108
-
109
- # Graceful degradation based on revision count
110
- if current_revision == 1:
111
- # First revision failed - use Analyzer's original draft
112
- _add_activity_log(workflow_id, progress_store, "editor", "Using initial draft from Analyzer (revision unavailable)")
113
- # Don't set error - allow workflow to continue with original draft
114
- # draft_report already contains Analyzer's output
115
- state["editor_skipped"] = True
116
- else:
117
- # Revision > 1 failed - use the last successful revision
118
- _add_activity_log(workflow_id, progress_store, "editor", f"Using revision #{current_revision - 1} draft (further revision unavailable)")
119
- # Don't set error - allow workflow to complete with previous draft
120
- state["editor_skipped"] = True
121
- else:
122
- state["draft_report"] = response
123
- state["provider_used"] = provider
124
- state["editor_skipped"] = False
125
- _add_activity_log(workflow_id, progress_store, "editor", f"Revision #{current_revision} completed via {provider} ({elapsed:.1f}s)")
126
-
127
- # Increment revision count
128
- state["revision_count"] = current_revision
129
-
130
- # Update progress with new revision count
131
- if workflow_id and progress_store:
132
- progress_store[workflow_id].update({
133
- "current_step": "editor",
134
- "revision_count": state["revision_count"],
135
- "score": state.get("score", 0)
136
- })
137
-
138
- return state
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/utils/analysis_cache.py CHANGED
@@ -1,7 +1,7 @@
1
  """
2
  Analysis Cache - Supabase PostgreSQL caching for final SWOT analysis results.
3
 
4
- Caches Editor agent output with 24h TTL to avoid re-running the full pipeline.
5
  Uses schema: asa.analysis_cache
6
  """
7
 
 
1
  """
2
  Analysis Cache - Supabase PostgreSQL caching for final SWOT analysis results.
3
 
4
+ Caches final SWOT analysis output with 24h TTL to avoid re-running the full pipeline.
5
  Uses schema: asa.analysis_cache
6
  """
7
 
src/utils/conditions.py CHANGED
@@ -1,5 +1,6 @@
1
  from typing import Literal
2
 
 
3
  def should_continue(state) -> Literal["exit", "retry"]:
4
  """
5
  Conditional routing function that determines whether to continue
@@ -7,19 +8,19 @@ def should_continue(state) -> Literal["exit", "retry"]:
7
 
8
  Exit conditions:
9
  - Error set (LLM providers failed - abort immediately)
10
- - Editor skipped (LLM failed but using fallback draft - exit gracefully)
11
  - Score >= 7 (good quality)
12
  - Revision count > 3 (max attempts reached)
13
 
14
  Continue conditions:
15
- - No error AND No editor skip AND Score < 7 AND Revisions <= 3
16
  """
17
  # Abort immediately if error is set (critical failure)
18
  if state.get("error"):
19
  return "exit"
20
 
21
- # Exit gracefully if editor was skipped (using fallback draft)
22
- if state.get("editor_skipped"):
23
  return "exit"
24
 
25
  current_score = state.get("score", 0)
 
1
  from typing import Literal
2
 
3
+
4
  def should_continue(state) -> Literal["exit", "retry"]:
5
  """
6
  Conditional routing function that determines whether to continue
 
8
 
9
  Exit conditions:
10
  - Error set (LLM providers failed - abort immediately)
11
+ - Analyzer revision skipped (LLM failed but using fallback draft - exit gracefully)
12
  - Score >= 7 (good quality)
13
  - Revision count > 3 (max attempts reached)
14
 
15
  Continue conditions:
16
+ - No error AND No revision skip AND Score < 7 AND Revisions <= 3
17
  """
18
  # Abort immediately if error is set (critical failure)
19
  if state.get("error"):
20
  return "exit"
21
 
22
+ # Exit gracefully if analyzer revision was skipped (using fallback draft)
23
+ if state.get("analyzer_revision_skipped"):
24
  return "exit"
25
 
26
  current_score = state.get("score", 0)
src/workflow/graph.py CHANGED
@@ -1,6 +1,6 @@
1
  """
2
  LangGraph workflow definition for self-correcting SWOT analysis.
3
- Defines the cyclic workflow: Researcher -> Analyzer -> Critic -> Editor (loop)
4
  """
5
 
6
  from langgraph.graph import StateGraph
@@ -10,17 +10,15 @@ from src.state import AgentState
10
  from src.nodes.researcher import researcher_node
11
  from src.nodes.analyzer import analyzer_node
12
  from src.nodes.critic import critic_node
13
- from src.nodes.editor import editor_node
14
  from src.utils.conditions import should_continue
15
 
16
  # Create the cyclic workflow
17
  workflow = StateGraph(AgentState)
18
 
19
- # Add all nodes to the workflow
20
  workflow.add_node("Researcher", RunnableLambda(researcher_node))
21
  workflow.add_node("Analyzer", RunnableLambda(analyzer_node))
22
  workflow.add_node("Critic", RunnableLambda(critic_node))
23
- workflow.add_node("Editor", RunnableLambda(editor_node))
24
 
25
  # Define the workflow edges
26
  workflow.set_entry_point("Researcher")
@@ -28,18 +26,16 @@ workflow.add_edge("Researcher", "Analyzer")
28
  workflow.add_edge("Analyzer", "Critic")
29
 
30
  # Add conditional edges for the self-correcting loop
 
31
  workflow.add_conditional_edges(
32
  "Critic",
33
  should_continue,
34
  {
35
  "exit": "__end__",
36
- "retry": "Editor"
37
  }
38
  )
39
 
40
- # Complete the loop: Editor -> Critic
41
- workflow.add_edge("Editor", "Critic")
42
-
43
  # Set the finish point
44
  workflow.set_finish_point("Critic")
45
 
@@ -48,9 +44,9 @@ workflow.config = {
48
  "project_name": "AI-strategy-agent-cyclic",
49
  "tags": ["self-correcting", "quality-loop", "swot-analysis"],
50
  "metadata": {
51
- "version": "1.0",
52
  "environment": "development",
53
- "workflow_type": "researcher-analyzer-critic-editor"
54
  }
55
  }
56
 
 
1
  """
2
  LangGraph workflow definition for self-correcting SWOT analysis.
3
+ Defines the cyclic workflow: Researcher -> Analyzer -> Critic -> Analyzer (revision loop)
4
  """
5
 
6
  from langgraph.graph import StateGraph
 
10
  from src.nodes.researcher import researcher_node
11
  from src.nodes.analyzer import analyzer_node
12
  from src.nodes.critic import critic_node
 
13
  from src.utils.conditions import should_continue
14
 
15
  # Create the cyclic workflow
16
  workflow = StateGraph(AgentState)
17
 
18
+ # Add nodes to the workflow (Analyzer handles both initial generation and revisions)
19
  workflow.add_node("Researcher", RunnableLambda(researcher_node))
20
  workflow.add_node("Analyzer", RunnableLambda(analyzer_node))
21
  workflow.add_node("Critic", RunnableLambda(critic_node))
 
22
 
23
  # Define the workflow edges
24
  workflow.set_entry_point("Researcher")
 
26
  workflow.add_edge("Analyzer", "Critic")
27
 
28
  # Add conditional edges for the self-correcting loop
29
+ # Analyzer now handles revisions directly (no separate Editor node)
30
  workflow.add_conditional_edges(
31
  "Critic",
32
  should_continue,
33
  {
34
  "exit": "__end__",
35
+ "retry": "Analyzer" # Route back to Analyzer for revisions
36
  }
37
  )
38
 
 
 
 
39
  # Set the finish point
40
  workflow.set_finish_point("Critic")
41
 
 
44
  "project_name": "AI-strategy-agent-cyclic",
45
  "tags": ["self-correcting", "quality-loop", "swot-analysis"],
46
  "metadata": {
47
+ "version": "2.0",
48
  "environment": "development",
49
+ "workflow_type": "researcher-analyzer-critic"
50
  }
51
  }
52
 
tests/test_self_correcting_loop.py CHANGED
@@ -16,7 +16,7 @@ def test_analyzer_failure():
16
 
17
  # Monkey patch the analyzer node to force poor quality
18
  def force_poor_analyzer(state):
19
- """Force a poor quality draft to trigger Editor"""
20
  state["draft_report"] = "Bad analysis. No details. Incomplete."
21
  print("⚠️ FORCED POOR QUALITY: Overriding with very weak content")
22
  return state
@@ -40,10 +40,10 @@ def test_critic_failure():
40
 
41
  # Monkey patch the critic to force a low score
42
  def force_low_score_critic(state):
43
- """Force a low score to trigger Editor"""
44
  state["score"] = 3 # Low score to force revision
45
  state["critique"] = "Forced low score for testing self-correction loop"
46
- print("⚠️ FORCED LOW SCORE: 3/10 to trigger Editor")
47
  return state
48
 
49
  # Temporarily replace critic in the workflow
 
16
 
17
  # Monkey patch the analyzer node to force poor quality
18
  def force_poor_analyzer(state):
19
+ """Force a poor quality draft to trigger revision loop"""
20
  state["draft_report"] = "Bad analysis. No details. Incomplete."
21
  print("⚠️ FORCED POOR QUALITY: Overriding with very weak content")
22
  return state
 
40
 
41
  # Monkey patch the critic to force a low score
42
  def force_low_score_critic(state):
43
+ """Force a low score to trigger revision loop"""
44
  state["score"] = 3 # Low score to force revision
45
  state["critique"] = "Forced low score for testing self-correction loop"
46
+ print("⚠️ FORCED LOW SCORE: 3/10 to trigger revision loop")
47
  return state
48
 
49
  # Temporarily replace critic in the workflow