Spaces:

vn6295337
/

Instant-SWOT-Agent

Sleeping

vn6295337 Claude Opus 4.5 commited on Jan 12

Commit

53fe655

1 Parent(s): e6152f5

Refactor: Analyzer handles revisions directly, remove Editor node

Architecture change: Researcher → Analyzer → Critic → Analyzer (revision loop)

- Add conditional focus area blocks to Analyzer based on failed rubric criteria
- Add _build_revision_prompt() for structured Critic feedback integration
- Analyzer detects revision mode via revision_count > 0 and critique_details
- Route "retry" from Critic back to Analyzer instead of Editor
- Rename editor_skipped → analyzer_revision_skipped in conditions.py
- Delete redundant src/nodes/editor.py
- Update Critic to LLM-only weighted rubric evaluation
- Update workflow version to 2.0

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

Files changed (9) hide show

src/api/schemas.py +1 -1
src/graph_cyclic.py +9 -13
src/nodes/analyzer.py +435 -62
src/nodes/critic.py +298 -284
src/nodes/editor.py +0 -138
src/utils/analysis_cache.py +1 -1
src/utils/conditions.py +5 -4
src/workflow/graph.py +6 -10
tests/test_self_correcting_loop.py +3 -3

src/api/schemas.py CHANGED Viewed

@@ -30,7 +30,7 @@ class WorkflowStartResponse(BaseModel):
 class WorkflowStatus(BaseModel):
     """Workflow status model."""
     status: str  # 'running', 'completed', 'error'
-    current_step: str  # 'starting', 'Researcher', 'Analyzer', 'Critic', 'Editor'
     revision_count: int
     score: int

 class WorkflowStatus(BaseModel):
     """Workflow status model."""
     status: str  # 'running', 'completed', 'error'
+    current_step: str  # 'starting', 'Researcher', 'Analyzer', 'Critic'
     revision_count: int
     score: int

src/graph_cyclic.py CHANGED Viewed

@@ -4,18 +4,16 @@ from src.state import AgentState
 from src.nodes.researcher import researcher_node
 from src.nodes.analyzer import analyzer_node
 from src.nodes.critic import critic_node
-from src.nodes.editor import editor_node
 from src.utils.conditions import should_continue
 from langsmith import traceable
 # Create the cyclic workflow
 workflow = StateGraph(AgentState)
-# Add all nodes to the workflow
 workflow.add_node("Researcher", RunnableLambda(researcher_node))
 workflow.add_node("Analyzer", RunnableLambda(analyzer_node))
 workflow.add_node("Critic", RunnableLambda(critic_node))
-workflow.add_node("Editor", RunnableLambda(editor_node))
 # Define the workflow edges
 workflow.set_entry_point("Researcher")
@@ -23,18 +21,16 @@ workflow.add_edge("Researcher", "Analyzer")
 workflow.add_edge("Analyzer", "Critic")
 # Add conditional edges for the self-correcting loop
 workflow.add_conditional_edges(
-    "Critic",
-    should_continue,
     {
         "exit": "__end__",
-        "retry": "Editor"
     }
 )
-# Complete the loop: Editor → Critic
-workflow.add_edge("Editor", "Critic")
 # Set the finish point
 workflow.set_finish_point("Critic")
@@ -43,9 +39,9 @@ workflow.config = {
     "project_name": "AI-strategy-agent-cyclic",
     "tags": ["self-correcting", "quality-loop", "swot-analysis"],
     "metadata": {
-        "version": "1.0",
         "environment": "development",
-        "workflow_type": "researcher-analyzer-critic-editor"
     }
 }
@@ -91,7 +87,7 @@ if __name__ == "__main__":
     target_company = "Tesla"
     print(f"🔍 Running Self-Correcting SWOT Analysis for {target_company}...")
-    print("📝 This workflow includes: Researcher → Analyzer → Critic → Editor (loop)")
     print("🎯 Loop continues until score ≥ 7 or 3 revisions attempted\n")
     # Execute the workflow
@@ -115,7 +111,7 @@ if __name__ == "__main__":
     print(f"   - Initial Quality: Improved from unknown to {final_score}/10")
     print(f"   - Revisions Made: {final_revision_count}")
     print(f"   - Final Report Length: {len(result['draft_report'])} characters")
-    print(f"   - Workflow: Researcher → Analyzer → Critic → Editor (loop)")
     print(f"   - Tracing: Enhanced LangSmith traces available")
     # Quality assessment

 from src.nodes.researcher import researcher_node
 from src.nodes.analyzer import analyzer_node
 from src.nodes.critic import critic_node
 from src.utils.conditions import should_continue
 from langsmith import traceable
 # Create the cyclic workflow
 workflow = StateGraph(AgentState)
+# Add nodes to the workflow (Analyzer handles both initial generation and revisions)
 workflow.add_node("Researcher", RunnableLambda(researcher_node))
 workflow.add_node("Analyzer", RunnableLambda(analyzer_node))
 workflow.add_node("Critic", RunnableLambda(critic_node))
 # Define the workflow edges
 workflow.set_entry_point("Researcher")
 workflow.add_edge("Analyzer", "Critic")
 # Add conditional edges for the self-correcting loop
+# Analyzer now handles revisions directly (no separate Editor node)
 workflow.add_conditional_edges(
+    "Critic",
+    should_continue,
     {
         "exit": "__end__",
+        "retry": "Analyzer"  # Route back to Analyzer for revisions
     }
 )
 # Set the finish point
 workflow.set_finish_point("Critic")
     "project_name": "AI-strategy-agent-cyclic",
     "tags": ["self-correcting", "quality-loop", "swot-analysis"],
     "metadata": {
+        "version": "2.0",
         "environment": "development",
+        "workflow_type": "researcher-analyzer-critic"
     }
 }
     target_company = "Tesla"
     print(f"🔍 Running Self-Correcting SWOT Analysis for {target_company}...")
+    print("📝 This workflow includes: Researcher → Analyzer → Critic → Analyzer (revision loop)")
     print("🎯 Loop continues until score ≥ 7 or 3 revisions attempted\n")
     # Execute the workflow
     print(f"   - Initial Quality: Improved from unknown to {final_score}/10")
     print(f"   - Revisions Made: {final_revision_count}")
     print(f"   - Final Report Length: {len(result['draft_report'])} characters")
+    print(f"   - Workflow: Researcher → Analyzer → Critic → Analyzer (revision loop)")
     print(f"   - Tracing: Enhanced LangSmith traces available")
     # Quality assessment

src/nodes/analyzer.py CHANGED Viewed

@@ -1,10 +1,123 @@
-from src.tools import get_strategy_context
 from src.llm_client import get_llm_client
 from langsmith import traceable
 import time
 import json
 def _add_activity_log(workflow_id, progress_store, step, message):
     """Helper to add activity log entry."""
     if workflow_id and progress_store:
@@ -118,8 +231,13 @@ def _get_value(metric_data) -> any:
     return metric_data
-def _generate_data_report(raw_data: str) -> str:
-    """Generate complete multi-source data report with simple tables."""
     try:
         data = json.loads(raw_data)
     except json.JSONDecodeError:
@@ -195,14 +313,17 @@ def _generate_data_report(raw_data: str) -> str:
             ("P/E Forward", "forward_pe", lambda v: _format_number(v, "x")),
             ("P/B Ratio", "pb_ratio", lambda v: _format_number(v, "x")),
             ("P/S Ratio", "ps_ratio", lambda v: _format_number(v, "x")),
-            ("EV/EBITDA", "ev_ebitda", lambda v: _format_number(v, "x")),
-            ("EV/Revenue", "ev_revenue", lambda v: _format_number(v, "x")),
             ("PEG Ratio", "trailing_peg", lambda v: _format_number(v, "x")),
             ("Price/FCF", "price_to_fcf", lambda v: _format_number(v, "x")),
             ("Revenue Growth", "revenue_growth", lambda v: _format_number(v * 100 if v and abs(v) < 10 else v, "%") if v else "N/A"),
             ("Earnings Growth", "earnings_growth", lambda v: _format_number(v * 100 if v and abs(v) < 10 else v, "%") if v else "N/A"),
         ]
         for name, key, fmt in val_metrics:
             y = yf_val.get(key)
             a = av_val.get(key)
@@ -470,8 +591,13 @@ def _extract_key_metrics(raw_data: str) -> dict:
     return extracted
-def _format_metrics_for_prompt(extracted: dict) -> str:
-    """Format extracted metrics into a clear text for the LLM."""
     lines = []
     lines.append(f"Company: {extracted['company']} ({extracted['ticker']})")
     lines.append("")
@@ -542,7 +668,7 @@ def _format_metrics_for_prompt(extracted: dict) -> str:
             lines.append(f"- P/B Ratio: {val['pb_ratio']:.2f}")
         if val.get("ps_ratio"):
             lines.append(f"- P/S Ratio: {val['ps_ratio']:.2f}")
-        if val.get("ev_ebitda"):
             lines.append(f"- EV/EBITDA: {val['ev_ebitda']:.1f}")
         if val.get("valuation_signal"):
             lines.append(f"- Overall Signal: {val['valuation_signal']}")
@@ -608,6 +734,221 @@ def _format_metrics_for_prompt(extracted: dict) -> str:
     return "\n".join(lines)
 @traceable(name="Analyzer")
 def analyzer_node(state, workflow_id=None, progress_store=None):
     # Extract workflow_id and progress_store from state (graph invokes with state only)
@@ -628,56 +969,56 @@ def analyzer_node(state, workflow_id=None, progress_store=None):
     user_keys = state.get("user_api_keys", {})
     llm = get_llm_client(user_keys) if user_keys else get_llm_client()
     raw = state["raw_data"]
-    strategy_name = state.get("strategy_focus", "Cost Leadership")
-    strategy_context = get_strategy_context(strategy_name)
     company = state["company_name"]
     ticker = state.get("ticker", "")
     # Extract and format metrics for better LLM understanding
     extracted = _extract_key_metrics(raw)
-    formatted_data = _format_metrics_for_prompt(extracted)
     # Generate detailed data report (shown before SWOT)
-    data_report = _generate_data_report(raw)
-    # Log LLM call start
-    _add_activity_log(workflow_id, progress_store, "analyzer", f"Calling LLM to generate SWOT analysis...")
-    prompt = f"""You are a financial analyst creating a CONCISE SWOT analysis for {company} ({ticker}).
-CRITICAL INSTRUCTIONS:
-1. ONLY use the data provided below. DO NOT invent or assume any information.
-2. Every point MUST cite specific numbers from the data (e.g., "P/E of 21.3", "Beta of 0.88").
-3. If data is missing for a category, say "Insufficient data" - do NOT make up information.
-4. Focus on what the numbers actually mean for this specific company.
-FORMAT REQUIREMENTS - BE CONCISE:
-- Each bullet point: 1 sentence MAX (under 25 words)
-- 3-5 bullet points per SWOT category
-- Focus on the most impactful insights only
-- NO lengthy explanations or context paragraphs
-Strategic Focus: {strategy_name}
-Context: {strategy_context}
-=== ACTUAL DATA FROM FINANCIAL SOURCES ===
-{formatted_data}
-Based ONLY on the data above, provide a SWOT analysis in this format:
-Strengths:
-- [Single sentence with metric, under 25 words]
-Weaknesses:
-- [Single sentence with metric, under 25 words]
-Opportunities:
-- [Single sentence citing macro/market data, under 25 words]
-Threats:
-- [Single sentence citing risks, under 25 words]
-Remember: Every bullet must cite actual data. Keep each point brief and impactful."""
     start_time = time.time()
     response, provider, error, providers_failed = llm.query(prompt, temperature=0)
     elapsed = time.time() - start_time
@@ -704,18 +1045,50 @@ Remember: Every bullet must cite actual data. Keep each point brief and impactfu
             llm_status[provider_name] = "completed"
     if error:
-        state["draft_report"] = f"Error generating analysis: {error}"
-        state["provider_used"] = None
-        state["error"] = error  # Signal workflow to abort
-        _add_activity_log(workflow_id, progress_store, "analyzer", f"LLM error: {error}")
-        _add_activity_log(workflow_id, progress_store, "analyzer", "Workflow aborted - all LLM providers unavailable")
     else:
-        # Combine data report (Part 1) with SWOT analysis (Part 2)
-        swot_section = f"## SWOT Analysis\n\n{response}"
-        full_report = f"{data_report}\n{swot_section}"
-        state["draft_report"] = full_report
-        state["data_report"] = data_report  # Store separately for frontend flexibility
-        state["provider_used"] = provider
-        _add_activity_log(workflow_id, progress_store, "analyzer", f"SWOT generated via {provider} ({elapsed:.1f}s)")
     return state

 from src.llm_client import get_llm_client
 from langsmith import traceable
 import time
 import json
+# Financial institution detection for EV/EBITDA exclusion
+FINANCIAL_SECTORS = {
+    "financial services", "financial", "banking", "banks",
+    "insurance", "real estate investment trust", "reit",
+    "investment management", "capital markets", "diversified financial services",
+    "consumer finance", "asset management", "mortgage finance",
+}
+FINANCIAL_INDUSTRIES = {
+    "banks", "regional banks", "diversified banks", "money center banks",
+    "insurance", "life insurance", "property insurance", "reinsurance",
+    "real estate", "reit", "mortgage reits", "equity reits",
+    "asset management", "investment banking", "capital markets",
+    "consumer finance", "specialty finance",
+}
+# Fallback: known financial tickers when sector data unavailable
+FINANCIAL_TICKERS = {
+    "JPM", "BAC", "WFC", "GS", "MS", "C", "USB", "PNC", "TFC", "COF",
+    "AXP", "BLK", "SCHW", "CME", "ICE", "SPGI", "MCO",
+    "BRK.A", "BRK.B", "MET", "PRU", "AIG", "ALL", "TRV", "PGR", "CB",
+    "AMT", "PLD", "CCI", "EQIX", "PSA", "O", "WELL", "AVB", "EQR",
+}
+# =============================================================================
+# REVISION MODE: Conditional Focus Area Blocks
+# These are included in revision prompts based on which rubric criteria failed
+# =============================================================================
+EVIDENCE_GROUNDING_BLOCK = """
+**EVIDENCE GROUNDING (Critical)**
+- Every claim must cite a specific metric from the input data
+- Use exact field names: `revenue`, `net_margin_pct`, `trailing_pe`, etc.
+- Format citations as: "[Metric]: [Value] ([Source], [Period])"
+- If a metric was flagged as fabricated, remove it entirely or replace with actual data
+"""
+CONSTRAINT_COMPLIANCE_BLOCK = """
+**CONSTRAINT COMPLIANCE (Critical)**
+- Remove any language that sounds like investment advice
+- Check all temporal labels — TTM vs FY vs Q must match the source
+- Add confidence levels to key conclusions: (High/Medium/Low)
+- Do not use EV/EBITDA for financial institutions
+- For missing data, state "DATA NOT PROVIDED" — do not estimate
+"""
+SPECIFICITY_BLOCK = """
+**SPECIFICITY & ACTIONABILITY**
+- Replace generic statements with company-specific observations
+- Quantify every claim possible: not "strong margins" but "31.0% operating margin"
+- Remove business clichés: "leveraging," "best-in-class," "synergies"
+"""
+INSIGHT_BLOCK = """
+**STRATEGIC INSIGHT**
+- Connect observations across data baskets (e.g., link margin trends to macro rates)
+- Go beyond restating metrics — explain WHY they matter
+- Identify non-obvious relationships in the data
+"""
+COMPLETENESS_BLOCK = """
+**COMPLETENESS & BALANCE**
+- Ensure ALL required sections are present (Strengths, Weaknesses, Opportunities, Threats, Data Quality Notes)
+- Balance quadrants — no section should be filler or disproportionately thin
+"""
+CLARITY_BLOCK = """
+**CLARITY & STRUCTURE**
+- Use consistent formatting throughout
+- Ensure no contradictions across sections
+- Make output scannable — executives should grasp key points in 30 seconds
+"""
+def _is_financial_institution(sector: str, industry: str, ticker: str) -> bool:
+    """Detect if company is a financial institution (EV/EBITDA not meaningful)."""
+    sector_lower = (sector or "").lower().strip()
+    industry_lower = (industry or "").lower().strip()
+    if any(fs in sector_lower for fs in FINANCIAL_SECTORS):
+        return True
+    if any(fi in industry_lower for fi in FINANCIAL_INDUSTRIES):
+        return True
+    if ticker and ticker.upper() in FINANCIAL_TICKERS:
+        return True
+    return False
+def _extract_company_profile(raw_data: str) -> dict:
+    """Extract sector/industry from Yahoo Finance data if available."""
+    try:
+        data = json.loads(raw_data)
+    except json.JSONDecodeError:
+        return {}
+    multi_source = data.get("multi_source", {})
+    # Try valuation Yahoo Finance data first
+    yf_val = multi_source.get("valuation_all", {}).get("yahoo_finance", {}).get("data", {})
+    profile = yf_val.get("profile", {})
+    if profile.get("sector"):
+        return {"sector": profile.get("sector"), "industry": profile.get("industry")}
+    # Fallback to fundamentals Yahoo Finance
+    yf_fund = multi_source.get("fundamentals_all", {}).get("yahoo_finance", {}).get("data", {})
+    fund_profile = yf_fund.get("profile", {})
+    return {
+        "sector": fund_profile.get("sector", ""),
+        "industry": fund_profile.get("industry", "")
+    }
 def _add_activity_log(workflow_id, progress_store, step, message):
     """Helper to add activity log entry."""
     if workflow_id and progress_store:
     return metric_data
+def _generate_data_report(raw_data: str, is_financial: bool = False) -> str:
+    """Generate complete multi-source data report with simple tables.
+    Args:
+        raw_data: JSON string of research data
+        is_financial: If True, exclude EV/EBITDA for financial institutions
+    """
     try:
         data = json.loads(raw_data)
     except json.JSONDecodeError:
             ("P/E Forward", "forward_pe", lambda v: _format_number(v, "x")),
             ("P/B Ratio", "pb_ratio", lambda v: _format_number(v, "x")),
             ("P/S Ratio", "ps_ratio", lambda v: _format_number(v, "x")),
             ("PEG Ratio", "trailing_peg", lambda v: _format_number(v, "x")),
             ("Price/FCF", "price_to_fcf", lambda v: _format_number(v, "x")),
             ("Revenue Growth", "revenue_growth", lambda v: _format_number(v * 100 if v and abs(v) < 10 else v, "%") if v else "N/A"),
             ("Earnings Growth", "earnings_growth", lambda v: _format_number(v * 100 if v and abs(v) < 10 else v, "%") if v else "N/A"),
         ]
+        # Only include EV/EBITDA for non-financial companies
+        if not is_financial:
+            val_metrics.insert(6, ("EV/EBITDA", "ev_ebitda", lambda v: _format_number(v, "x")))
+            val_metrics.insert(7, ("EV/Revenue", "ev_revenue", lambda v: _format_number(v, "x")))
         for name, key, fmt in val_metrics:
             y = yf_val.get(key)
             a = av_val.get(key)
     return extracted
+def _format_metrics_for_prompt(extracted: dict, is_financial: bool = False) -> str:
+    """Format extracted metrics into a clear text for the LLM.
+    Args:
+        extracted: Extracted metrics dictionary
+        is_financial: If True, exclude EV/EBITDA from valuation metrics
+    """
     lines = []
     lines.append(f"Company: {extracted['company']} ({extracted['ticker']})")
     lines.append("")
             lines.append(f"- P/B Ratio: {val['pb_ratio']:.2f}")
         if val.get("ps_ratio"):
             lines.append(f"- P/S Ratio: {val['ps_ratio']:.2f}")
+        if val.get("ev_ebitda") and not is_financial:
             lines.append(f"- EV/EBITDA: {val['ev_ebitda']:.1f}")
         if val.get("valuation_signal"):
             lines.append(f"- Overall Signal: {val['valuation_signal']}")
     return "\n".join(lines)
+# New institutional-grade prompt
+ANALYZER_SYSTEM_PROMPT = """You are a senior financial analyst producing institutional-grade SWOT analyses.
+## DATA GROUNDING RULES (CRITICAL)
+1. USE ONLY the provided data. Never invent or assume metrics not given.
+2. CITE specific numbers for every finding (e.g., "Net margin: 24.3%", "P/E: 21.3x").
+3. If data is missing, state "Insufficient data" - do NOT fabricate.
+4. Distinguish trailing (historical) vs forward (projected) metrics.
+## AVAILABLE DATA BASKETS
+### Fundamentals (SEC EDGAR + Yahoo Finance)
+revenue, net_income, net_margin_pct, gross_margin_pct, operating_margin_pct,
+total_assets, total_liabilities, stockholders_equity, free_cash_flow,
+operating_cash_flow, long_term_debt, debt_to_equity, eps
+### Valuation (Yahoo Finance)
+market_cap, enterprise_value, trailing_pe, forward_pe, pb_ratio, ps_ratio,
+trailing_peg, price_to_fcf, revenue_growth, earnings_growth
+{ev_ebitda_note}
+### Volatility (FRED + Yahoo)
+vix, vxn, beta, historical_volatility, implied_volatility
+### Macro (BEA/BLS/FRED)
+gdp_growth, interest_rate, cpi_inflation, unemployment
+### News & Sentiment
+News articles with title, source, url
+Sentiment scores from Finnhub and Reddit
+## WHAT YOU DO NOT DO
+- Provide buy/sell/hold recommendations
+- Compare to sector/peer benchmarks (data not provided)
+- Speculate beyond provided data
+- Use vague hedge words without quantification"""
+def _build_revision_prompt(
+    critique_details: dict,
+    company_data: str,
+    current_draft: str,
+    is_financial: bool
+) -> str:
+    """Build revision prompt with conditional focus areas based on failed criteria.
+    Args:
+        critique_details: Structured dict from Critic with scores and feedback
+        company_data: Formatted metrics string for reference
+        current_draft: The current SWOT draft to be revised
+        is_financial: Whether the company is a financial institution
+    Returns:
+        Complete revision prompt string
+    """
+    scores = critique_details.get("scores", {})
+    # Determine which focus areas to include based on failed criteria
+    focus_areas = []
+    if scores.get("evidence_grounding", 10) < 7:
+        focus_areas.append(EVIDENCE_GROUNDING_BLOCK)
+    if scores.get("constraint_compliance", 10) < 6:
+        focus_areas.append(CONSTRAINT_COMPLIANCE_BLOCK)
+    if scores.get("specificity_actionability", 10) < 7:
+        focus_areas.append(SPECIFICITY_BLOCK)
+    if scores.get("strategic_insight", 10) < 7:
+        focus_areas.append(INSIGHT_BLOCK)
+    if scores.get("completeness_balance", 10) < 7:
+        focus_areas.append(COMPLETENESS_BLOCK)
+    if scores.get("clarity_structure", 10) < 7:
+        focus_areas.append(CLARITY_BLOCK)
+    # Format critic feedback components
+    deficiencies = critique_details.get("key_deficiencies", [])
+    strengths = critique_details.get("strengths_to_preserve", [])
+    feedback = critique_details.get("actionable_feedback", [])
+    # Build deficiencies section
+    deficiencies_text = "\n".join(f"- {d}" for d in deficiencies) if deficiencies else "- None specified"
+    # Build strengths section
+    strengths_text = "\n".join(f"- {s}" for s in strengths) if strengths else "- None specified"
+    # Build feedback section
+    feedback_text = "\n".join(f"{i+1}. {f}" for i, f in enumerate(feedback)) if feedback else "- None specified"
+    # Build focus areas section
+    focus_areas_text = "\n".join(focus_areas) if focus_areas else "Address all deficiencies listed above."
+    # Add EV/EBITDA note for financial institutions
+    ev_note = ""
+    if is_financial:
+        ev_note = "\n**Note:** This is a financial institution - EV/EBITDA is excluded from analysis."
+    prompt = f"""## REVISION MODE ACTIVATED
+You previously generated a SWOT analysis that did not meet quality standards. You are now in revision mode.
+### YOUR TASK
+1. **Review the Critic's feedback** carefully
+2. **Address each deficiency** listed in priority order
+3. **Preserve strengths** explicitly called out — do not regress on what worked
+4. **Regenerate the complete SWOT** — not a partial patch
+### CRITIC FEEDBACK
+Status: {critique_details.get('status', 'REJECTED')}
+Weighted Score: {critique_details.get('weighted_score', 0):.1f} / 10
+**Key Deficiencies:**
+{deficiencies_text}
+**Strengths to Preserve:**
+{strengths_text}
+**Actionable Feedback:**
+{feedback_text}
+### FOCUS AREAS FOR THIS REVISION
+{focus_areas_text}
+### REVISION RULES
+**DO:**
+- Fix every item in "Key Deficiencies" — these are blocking issues
+- Apply each point in "Actionable Feedback" — these are specific instructions
+- Keep everything listed under "Strengths to Preserve" — do not modify these sections
+- Re-verify all metric citations against the original input data
+- Ensure temporal labels (TTM, FY, Q) are accurate for each metric
+{ev_note}
+**DO NOT:**
+- Ignore lower-priority feedback items — address all of them
+- Introduce new metrics not in the original input data
+- Remove content that was working well
+- Add defensive caveats or apologies about the revision
+- Reference the revision process in your output — produce a clean SWOT as if first attempt
+### REFERENCE DATA
+{company_data}
+### CURRENT DRAFT (to revise)
+{current_draft}
+### OUTPUT INSTRUCTIONS
+Produce a complete, revised SWOT analysis following the original template structure.
+Do not:
+- Include any preamble about revisions
+- Apologize or explain what you changed
+- Reference the Critic's feedback in your output
+Simply output the improved SWOT as a clean, final deliverable."""
+    return prompt
+def _build_analyzer_prompt(company: str, ticker: str, formatted_data: str, is_financial: bool) -> str:
+    """Build analyzer prompt with conditional EV/EBITDA handling."""
+    if is_financial:
+        ev_note = "\nNote: EV/EBITDA excluded - not meaningful for financial institutions."
+    else:
+        ev_note = ", ev_ebitda, ev_revenue"
+    system = ANALYZER_SYSTEM_PROMPT.format(ev_ebitda_note=ev_note)
+    return f"""{system}
+=== DATA FOR {company} ({ticker}) ===
+{formatted_data}
+=== OUTPUT FORMAT ===
+Produce a SWOT analysis with this exact structure:
+## Strengths
+For each (3-5 points):
+- **Finding:** [One sentence with specific metric]
+- **Strategic Implication:** [Why this matters]
+- **Durability:** [High/Medium/Low]
+## Weaknesses
+For each (3-5 points):
+- **Finding:** [One sentence with specific metric]
+- **Severity:** [Critical/Moderate/Minor]
+- **Trend:** [Improving/Stable/Deteriorating]
+- **Remediation Levers:** [What could improve this]
+## Opportunities
+For each (3-5 points):
+- **Catalyst:** [Description with supporting data]
+- **Timing:** [Near-term/Medium-term/Long-term]
+- **Execution Requirements:** [What must happen]
+## Threats
+For each (3-5 points):
+- **Risk Factor:** [Description with supporting data]
+- **Probability:** [High/Medium/Low]
+- **Impact:** [Potential magnitude]
+- **Mitigation Options:** [Possible responses]
+## Data Quality Notes
+- **Metrics Used:** [List key metrics analyzed]
+- **Data Gaps:** [Any unavailable metrics]
+- **Confidence Level:** [High/Medium/Low]
+Every finding MUST cite a specific number from the data."""
 @traceable(name="Analyzer")
 def analyzer_node(state, workflow_id=None, progress_store=None):
     # Extract workflow_id and progress_store from state (graph invokes with state only)
     user_keys = state.get("user_api_keys", {})
     llm = get_llm_client(user_keys) if user_keys else get_llm_client()
     raw = state["raw_data"]
     company = state["company_name"]
     ticker = state.get("ticker", "")
+    # Extract company profile and detect financial institution
+    company_profile = _extract_company_profile(raw)
+    sector = company_profile.get("sector", "")
+    industry = company_profile.get("industry", "")
+    is_financial = _is_financial_institution(sector, industry, ticker)
+    if is_financial:
+        _add_activity_log(workflow_id, progress_store, "analyzer",
+                          f"Financial institution detected - excluding EV/EBITDA")
     # Extract and format metrics for better LLM understanding
     extracted = _extract_key_metrics(raw)
+    formatted_data = _format_metrics_for_prompt(extracted, is_financial=is_financial)
     # Generate detailed data report (shown before SWOT)
+    data_report = _generate_data_report(raw, is_financial=is_financial)
+    # Detect revision mode: if revision_count > 0 and critique_details exist
+    is_revision = state.get("revision_count", 0) > 0
+    critique_details = state.get("critique_details", {})
+    if is_revision and critique_details:
+        # REVISION MODE: Use enhanced revision prompt with Critic feedback
+        current_revision = state.get("revision_count", 0) + 1
+        _add_activity_log(workflow_id, progress_store, "analyzer",
+                          f"Revision #{current_revision} in progress...")
+        prompt = _build_revision_prompt(
+            critique_details=critique_details,
+            company_data=formatted_data,
+            current_draft=state.get("draft_report", ""),
+            is_financial=is_financial
+        )
+        # Update progress with revision info
+        if workflow_id and progress_store:
+            progress_store[workflow_id].update({
+                "current_step": "analyzer",
+                "revision_count": current_revision,
+            })
+    else:
+        # INITIAL MODE: Use standard analyzer prompt
+        _add_activity_log(workflow_id, progress_store, "analyzer",
+                          f"Calling LLM to generate SWOT analysis...")
+        prompt = _build_analyzer_prompt(company, ticker, formatted_data, is_financial)
+        current_revision = 0
     start_time = time.time()
     response, provider, error, providers_failed = llm.query(prompt, temperature=0)
     elapsed = time.time() - start_time
             llm_status[provider_name] = "completed"
     if error:
+        if is_revision:
+            # REVISION MODE ERROR: Graceful degradation - keep previous draft
+            _add_activity_log(workflow_id, progress_store, "analyzer", f"Revision failed: {error}")
+            if current_revision == 1:
+                _add_activity_log(workflow_id, progress_store, "analyzer",
+                                  "Using initial draft (revision unavailable)")
+            else:
+                _add_activity_log(workflow_id, progress_store, "analyzer",
+                                  f"Using revision #{current_revision - 1} draft (further revision unavailable)")
+            # Don't set error - allow workflow to complete with previous draft
+            state["analyzer_revision_skipped"] = True
+            state["revision_count"] = current_revision
+        else:
+            # INITIAL MODE ERROR: Abort workflow
+            state["draft_report"] = f"Error generating analysis: {error}"
+            state["provider_used"] = None
+            state["error"] = error  # Signal workflow to abort
+            _add_activity_log(workflow_id, progress_store, "analyzer", f"LLM error: {error}")
+            _add_activity_log(workflow_id, progress_store, "analyzer",
+                              "Workflow aborted - all LLM providers unavailable")
     else:
+        if is_revision:
+            # REVISION MODE SUCCESS: Update draft with revision
+            state["draft_report"] = response
+            state["provider_used"] = provider
+            state["analyzer_revision_skipped"] = False
+            state["revision_count"] = current_revision
+            _add_activity_log(workflow_id, progress_store, "analyzer",
+                              f"Revision #{current_revision} completed via {provider} ({elapsed:.1f}s)")
+        else:
+            # INITIAL MODE SUCCESS: Combine data report with SWOT analysis
+            swot_section = f"## SWOT Analysis\n\n{response}"
+            full_report = f"{data_report}\n{swot_section}"
+            state["draft_report"] = full_report
+            state["data_report"] = data_report  # Store separately for frontend flexibility
+            state["provider_used"] = provider
+            _add_activity_log(workflow_id, progress_store, "analyzer",
+                              f"SWOT generated via {provider} ({elapsed:.1f}s)")
+    # Update progress with final revision count
+    if workflow_id and progress_store:
+        progress_store[workflow_id].update({
+            "revision_count": state.get("revision_count", 0),
+            "score": state.get("score", 0)
+        })
     return state

src/nodes/critic.py CHANGED Viewed

@@ -1,7 +1,6 @@
 from src.llm_client import get_llm_client
 from langsmith import traceable
 import json
-import re
 import time
@@ -13,251 +12,207 @@ def _add_activity_log(workflow_id, progress_store, step, message):
 # ============================================================
-# DETERMINISTIC SCORING FUNCTIONS
 # ============================================================
-def check_swot_sections(report: str) -> dict:
-    """
-    Check if all 4 SWOT sections are present.
-    Returns dict with section presence and score (0-2 points).
-    """
-    report_lower = report.lower()
-    sections = {
-        "strengths": bool(re.search(r'\bstrengths?\b', report_lower)),
-        "weaknesses": bool(re.search(r'\bweaknesses?\b', report_lower)),
-        "opportunities": bool(re.search(r'\bopportunit(y|ies)\b', report_lower)),
-        "threats": bool(re.search(r'\bthreats?\b', report_lower))
-    }
-    present_count = sum(sections.values())
-    score = 2 if present_count == 4 else (1 if present_count >= 2 else 0)
-    return {
-        "sections": sections,
-        "present_count": present_count,
-        "score": score,
-        "max_score": 2
-    }
-def count_numeric_citations(report: str) -> dict:
-    """
-    Count specific facts/numbers cited in the report.
-    Returns dict with count and score (0-3 points).
-    """
-    # Patterns for numeric citations
-    patterns = [
-        r'\$[\d,]+\.?\d*[BMK]?',           # Dollar amounts: $3.6B, $100M
-        r'\d+\.?\d*\s*%',                   # Percentages: 7.26%, 42.59%
-        r'\d+\.?\d*x',                      # Multiples: 0.13x, 2.35x
-        r'P/E[:\s]+\d+',                    # P/E ratios
-        r'P/S[:\s]+\d+',                    # P/S ratios
-        r'P/B[:\s]+\d+',                    # P/B ratios
-        r'EV/EBITDA[:\s]+\d+',              # EV/EBITDA
-        r'PEG[:\s]+\d+',                    # PEG ratio
-        r'VIX[:\s]+\d+',                    # VIX
-        r'Beta[:\s]+\d+',                   # Beta
-        r'\d+/100',                         # Scores: 67.38/100
-        r'CAGR[:\s]*\d+',                   # CAGR
-        r'\d{4}',                           # Years: 2024, 2025
-    ]
-    citations = []
-    for pattern in patterns:
-        matches = re.findall(pattern, report, re.IGNORECASE)
-        citations.extend(matches)
-    # Deduplicate
-    unique_citations = list(set(citations))
-    count = len(unique_citations)
-    # Score: 0-2 citations = 0pts, 3-5 = 1pt, 6-10 = 2pts, 10+ = 3pts
-    if count >= 10:
-        score = 3
-    elif count >= 6:
-        score = 2
-    elif count >= 3:
-        score = 1
-    else:
-        score = 0
-    return {
-        "count": count,
-        "examples": unique_citations[:10],  # Show first 10
-        "score": score,
-        "max_score": 3
-    }
-def check_data_sources(report: str, sources_available: list) -> dict:
-    """
-    Check if report references data from available MCP sources.
-    Returns dict with coverage and score (0-2 points).
-    """
-    report_lower = report.lower()
-    source_keywords = {
-        "fundamentals": ["revenue", "net margin", "debt", "cash flow", "eps", "earnings"],
-        "volatility": ["beta", "volatility", "vix", "price swing"],
-        "macro": ["gdp", "interest rate", "inflation", "unemployment", "fed"],
-        "valuation": ["p/e", "p/s", "p/b", "ev/ebitda", "peg", "valuation", "market cap"],
-        "news": ["news", "analyst", "article", "report"],
-        "sentiment": ["sentiment", "bullish", "bearish", "reddit", "finnhub"]
-    }
-    sources_referenced = {}
-    for source in sources_available:
-        keywords = source_keywords.get(source, [])
-        found = any(kw in report_lower for kw in keywords)
-        sources_referenced[source] = found
-    referenced_count = sum(sources_referenced.values())
-    coverage_pct = (referenced_count / len(sources_available) * 100) if sources_available else 0
-    # Score: <50% = 0pts, 50-75% = 1pt, >75% = 2pts
-    if coverage_pct >= 75:
-        score = 2
-    elif coverage_pct >= 50:
-        score = 1
-    else:
-        score = 0
-    return {
-        "sources_referenced": sources_referenced,
-        "referenced_count": referenced_count,
-        "total_available": len(sources_available),
-        "coverage_pct": round(coverage_pct, 1),
-        "score": score,
-        "max_score": 2
-    }
-def check_section_balance(report: str) -> dict:
-    """
-    Check if SWOT sections are reasonably balanced (not all items in one section).
-    Returns dict with balance info and score (0-1 point).
-    """
-    # Count bullet points or list items per section
-    sections = ["strength", "weakness", "opportunit", "threat"]
-    # Split report by sections and count items
-    report_lower = report.lower()
-    item_counts = {}
-    for section in sections:
-        # Find section and count bullet points after it
-        pattern = rf'{section}.*?(?=(?:weakness|opportunit|threat|$))'
-        match = re.search(pattern, report_lower, re.DOTALL)
-        if match:
-            section_text = match.group()
-            # Count bullet points (-, *, •) or numbered items
-            items = len(re.findall(r'[\-\*\•]\s+\w|^\d+\.\s+\w', section_text, re.MULTILINE))
-            item_counts[section] = max(items, 1)  # At least 1 if section exists
-    if not item_counts:
-        return {"balanced": False, "score": 0, "max_score": 1}
-    counts = list(item_counts.values())
-    avg = sum(counts) / len(counts)
-    # Check if any section has less than 25% of average (unbalanced)
-    balanced = all(c >= avg * 0.25 for c in counts) if avg > 0 else False
-    return {
-        "item_counts": item_counts,
-        "balanced": balanced,
-        "score": 1 if balanced else 0,
-        "max_score": 1
-    }
-def run_deterministic_checks(report: str, sources_available: list) -> dict:
     """
-    Run all deterministic checks and return combined results.
-    Total possible: 8 points
     """
-    sections_check = check_swot_sections(report)
-    citations_check = count_numeric_citations(report)
-    sources_check = check_data_sources(report, sources_available)
-    balance_check = check_section_balance(report)
-    total_score = (
-        sections_check["score"] +
-        citations_check["score"] +
-        sources_check["score"] +
-        balance_check["score"]
-    )
-    max_score = 8
-    # Convert to 1-10 scale (deterministic portion = 40% weight)
-    normalized_score = (total_score / max_score) * 4  # 0-4 points
-    return {
-        "sections": sections_check,
-        "citations": citations_check,
-        "sources": sources_check,
-        "balance": balance_check,
-        "total_score": total_score,
-        "max_score": max_score,
-        "normalized_score": round(normalized_score, 2)
-    }
-# ============================================================
-# LLM SCORING
-# ============================================================
-LLM_RUBRIC = """
-You are a strategy evaluator. Given a SWOT analysis and the SOURCE DATA it should be based on, score it on a scale of 1 to 6.
-Scoring Criteria:
-1. Strategic Alignment (0-2 pts): Does the analysis align with the given strategic focus?
-2. Data Grounding (0-2 pts): Does EVERY claim cite specific numbers from the source data? Penalize any invented facts not in the data.
-3. Logical Consistency (0-2 pts): Are S/O clearly positive and W/T clearly negative? No contradictions?
-IMPORTANT: If the analysis mentions facts/numbers NOT present in the source data, score Data Grounding as 0.
-Respond in this JSON format only, no other text:
-{
-  "score": <int 1-6>,
-  "strategic_alignment": <0-2>,
-  "data_grounding": <0-2>,
-  "logical_consistency": <0-2>,
-  "reasoning": "<string>"
-}
-"""
-def run_llm_evaluation(report: str, strategy_focus: str, llm, source_data: str = "") -> dict:
     """
-    Run LLM-based qualitative evaluation.
-    Returns score (1-6) and reasoning.
-    """
-    prompt = f"""
-SWOT Draft:
-{report}
-Strategic Focus: {strategy_focus}
-SOURCE DATA (the analysis should be based ONLY on this):
-{source_data if source_data else "No source data provided"}
-{LLM_RUBRIC}
-"""
     response, provider, error, providers_failed = llm.query(prompt, temperature=0)
     if error:
         return {
-            "score": 3,  # Default middle score
-            "reasoning": f"LLM evaluation failed: {error}",
             "provider": provider,
             "providers_failed": providers_failed,
             "error": True
         }
     try:
         content = response.strip()
         if "{" in content:
             json_start = content.index("{")
@@ -265,40 +220,74 @@ SOURCE DATA (the analysis should be based ONLY on this):
             content = content[json_start:json_end]
         parsed = json.loads(content)
         return {
-            "score": min(max(parsed.get("score", 3), 1), 6),  # Clamp 1-6
-            "strategic_alignment": parsed.get("strategic_alignment", 0),
-            "data_grounding": parsed.get("data_grounding", 0),
-            "logical_consistency": parsed.get("logical_consistency", 0),
-            "reasoning": parsed.get("reasoning", "No reasoning provided"),
             "provider": provider,
             "providers_failed": providers_failed,
             "error": False
         }
     except (json.JSONDecodeError, ValueError) as e:
         return {
-            "score": 3,
-            "reasoning": f"JSON parsing failed: {str(e)[:100]}",
             "provider": provider,
             "providers_failed": providers_failed,
             "error": True
         }
-# ============================================================
-# HYBRID SCORING
-# ============================================================
 @traceable(name="Critic")
 def critic_node(state, workflow_id=None, progress_store=None):
     """
-    Critic node with hybrid scoring:
-    - Deterministic checks (40%): sections, citations, source coverage, balance
-    - LLM evaluation (60%): strategic alignment, insight quality, consistency
-    Final score = deterministic (0-4) + LLM (0-6) = 1-10 scale
     """
-    # Extract workflow_id and progress_store from state (graph invokes with state only)
     if workflow_id is None:
         workflow_id = state.get("workflow_id")
     if progress_store is None:
@@ -307,7 +296,6 @@ def critic_node(state, workflow_id=None, progress_store=None):
     # Skip evaluation if workflow has an error (abort mode)
     if state.get("error"):
         _add_activity_log(workflow_id, progress_store, "critic", "Skipping evaluation - workflow aborted")
-        # Simplify error message for user display
         error_msg = state.get("error", "")
         if "429" in error_msg or "Too Many Requests" in error_msg:
             user_friendly_msg = "All AI providers are temporarily unavailable due to rate limits. Please wait a moment and try again."
@@ -320,50 +308,27 @@ def critic_node(state, workflow_id=None, progress_store=None):
         return state
     report = state.get("draft_report", "")
-    strategy_focus = state.get("strategy_focus", "Cost Leadership")
     revision_count = state.get("revision_count", 0)
     # Log evaluation start
-    _add_activity_log(workflow_id, progress_store, "critic", f"Evaluating SWOT quality (revision #{revision_count})...")
-    # Parse sources_available from raw_data
-    sources_available = []
-    try:
-        raw_data = json.loads(state.get("raw_data", "{}"))
-        sources_available = raw_data.get("sources_available", [])
-    except:
-        sources_available = ["fundamentals", "volatility", "macro", "valuation", "news", "sentiment"]
-    # Run deterministic checks
-    print("Running deterministic checks...")
-    det_results = run_deterministic_checks(report, sources_available)
-    det_score = det_results["normalized_score"]  # 0-4
-    print(f"  Sections: {det_results['sections']['present_count']}/4 ({det_results['sections']['score']}/{det_results['sections']['max_score']} pts)")
-    print(f"  Citations: {det_results['citations']['count']} found ({det_results['citations']['score']}/{det_results['citations']['max_score']} pts)")
-    print(f"  Source Coverage: {det_results['sources']['coverage_pct']}% ({det_results['sources']['score']}/{det_results['sources']['max_score']} pts)")
-    print(f"  Balance: {'Yes' if det_results['balance']['balanced'] else 'No'} ({det_results['balance']['score']}/{det_results['balance']['max_score']} pts)")
-    print(f"  Deterministic Score: {det_score:.1f}/4")
-    # Run LLM evaluation with source data for grounding check
-    print("Running LLM evaluation...")
     llm = get_llm_client()
-    _add_activity_log(workflow_id, progress_store, "critic", f"Calling LLM for quality evaluation...")
     start_time = time.time()
-    # Get formatted source data for grounding verification
-    source_data = state.get("raw_data", "")
-    # Truncate if too long to avoid token limits
-    if len(source_data) > 4000:
-        source_data = source_data[:4000] + "\n... [truncated]"
-    llm_results = run_llm_evaluation(report, strategy_focus, llm, source_data)
-    llm_score = llm_results["score"]  # 1-6
     elapsed = time.time() - start_time
-    provider = llm_results.get('provider', 'unknown')
     # Log failed providers
-    providers_failed = llm_results.get('providers_failed', [])
     for pf in providers_failed:
         _add_activity_log(workflow_id, progress_store, "critic", f"LLM {pf['name']} failed: {pf['error']}")
@@ -372,49 +337,98 @@ def critic_node(state, workflow_id=None, progress_store=None):
         state["llm_providers_failed"] = []
     state["llm_providers_failed"].extend([pf["name"] for pf in providers_failed])
-    print(f"  LLM Score: {llm_score}/6 ({provider})")
-    _add_activity_log(workflow_id, progress_store, "critic", f"LLM evaluation via {provider} ({elapsed:.1f}s)")
-    # Combine scores: deterministic (0-4) + LLM (1-6) = 1-10
-    final_score = det_score + llm_score
-    final_score = min(max(final_score, 1), 10)  # Clamp 1-10
-    print(f"Critic scored: {final_score:.1f}/10 (det:{det_score:.1f} + llm:{llm_score})")
-    # Log score result with revision decision hint
-    score_msg = f"Score: {final_score:.0f}/10"
-    if final_score < 7:
-        score_msg += " - needs revision"
     else:
-        score_msg += " - quality passed"
     _add_activity_log(workflow_id, progress_store, "critic", score_msg)
-    # Build detailed critique
-    critique_parts = [
-        f"Deterministic Analysis ({det_results['total_score']}/{det_results['max_score']} pts):",
-        f"  - SWOT Sections: {det_results['sections']['present_count']}/4 present",
-        f"  - Numeric Citations: {det_results['citations']['count']} found",
-        f"  - Data Source Coverage: {det_results['sources']['coverage_pct']}%",
-        f"  - Section Balance: {'Balanced' if det_results['balance']['balanced'] else 'Unbalanced'}",
         "",
-        f"LLM Evaluation ({llm_score}/6 pts):",
-        f"  {llm_results['reasoning']}"
     ]
-    state["critique"] = "\n".join(critique_parts)
-    state["score"] = final_score
     state["critique_details"] = {
-        "deterministic": det_results,
-        "llm": llm_results,
-        "final_score": final_score
     }
     # Update progress
     if workflow_id and progress_store:
         progress_store[workflow_id].update({
             "current_step": "critic",
-            "revision_count": state.get("revision_count", 0),
-            "score": final_score
         })
     return state

 from src.llm_client import get_llm_client
 from langsmith import traceable
 import json
 import time
 # ============================================================
+# LLM-ONLY WEIGHTED RUBRIC EVALUATION
 # ============================================================
+CRITIC_SYSTEM_PROMPT = """You are a SWOT Output Critic and Quality Gatekeeper.
+## ROLE
+Act as an independent, impartial evaluator that reviews SWOT analyses. Your function is to:
+1. Verify factual accuracy against provided input data
+2. Assess quality against a weighted rubric
+3. Decide whether the output PASSES or FAILS
+4. Provide actionable feedback if rejected
+You are a quality gate, not a collaborator. Be strict.
+## VALID METRICS SCHEMA
+**Fundamentals:** revenue, net_income, net_margin_pct, total_assets, total_liabilities, stockholders_equity, operating_margin_pct, total_debt, operating_cash_flow, free_cash_flow
+**Valuation:** current_price, market_cap, enterprise_value, trailing_pe, forward_pe, ps_ratio, pb_ratio, trailing_peg, forward_peg, earnings_growth, revenue_growth
+**Volatility:** vix, vxn, beta, historical_volatility, implied_volatility
+**Macro:** gdp_growth, interest_rate, cpi_inflation, unemployment
+**Qualitative:** News (title, date, source, url), Sentiment (title, date, source, url)
+## EVALUATION RUBRIC (Weighted)
+### 1. Evidence Grounding (25%) — HARD FLOOR: >=7
+- All claims cite specific metrics from input data
+- No fabricated metrics (hallucination check)
+- Field names match schema
+- 9-10: Every claim traceable; 7-8: Nearly all grounded; 5-6: Most grounded, 2-3 unverifiable; 3-4: Multiple unsupported; 1-2: Clear hallucinations
+- **If ANY fabricated metric detected, cap at 4**
+### 2. Constraint Compliance (20%) — HARD FLOOR: >=6
+- No buy/sell/hold recommendations
+- Temporal labels accurate (TTM, FY, forward)
+- "DATA NOT PROVIDED" used for missing metrics
+- 9-10: All constraints respected; 7-8: Minor issues; 5-6: One moderate violation; 3-4: Multiple violations; 1-2: Systematic violations
+### 3. Specificity & Actionability (20%)
+- Company-specific, not generic templates
+- Quantified findings (not "strong margins" but "31% operating margin")
+- Avoids business cliches
+- 9-10: Every point specific and quantified; 7-8: Mostly specific; 5-6: Mix of specific/generic; 3-4: Mostly generic; 1-2: Template-like
+### 4. Strategic Insight (15%)
+- Synthesis across multiple data sources
+- Prioritization by materiality
+- Goes beyond restating metrics to interpreting implications
+- 9-10: Identifies causal relationships; 7-8: Good synthesis; 5-6: Surface-level; 3-4: Restates metrics; 1-2: No value-add
+### 5. Completeness & Balance (10%)
+Required sections:
+- Strengths (Finding, Strategic Implication, Durability)
+- Weaknesses (Finding, Severity, Trend, Remediation Levers)
+- Opportunities (Catalyst, Timing, Execution Requirements)
+- Threats (Risk Factor, Probability, Impact, Mitigation Options)
+- Data Quality Notes
+- 9-10: All present and substantive; 7-8: All present, minor gaps; 5-6: Missing 1 section; 3-4: Multiple missing; 1-2: Major gaps
+### 6. Clarity & Structure (10%)
+- Clean formatting, logical grouping
+- Easy to scan (not walls of text)
+- No contradictions
+- 9-10: Impeccable; 7-8: Well-structured; 5-6: Readable but dense; 3-4: Hard to follow; 1-2: Poorly organized
+## PASS CONDITIONS (ALL must be met)
+1. Weighted average >= 7.0
+2. Evidence Grounding >= 7
+3. Constraint Compliance >= 6
+4. No individual criterion below 5
+## OUTPUT FORMAT (JSON only, no other text)
+{
+  "status": "APPROVED" or "REJECTED",
+  "weighted_score": <float>,
+  "scores": {
+    "evidence_grounding": <1-10>,
+    "constraint_compliance": <1-10>,
+    "specificity_actionability": <1-10>,
+    "strategic_insight": <1-10>,
+    "completeness_balance": <1-10>,
+    "clarity_structure": <1-10>
+  },
+  "hard_floor_violations": ["list of violated floors or empty array"],
+  "hallucinations_detected": ["list of fabricated metrics or empty array"],
+  "key_deficiencies": ["prioritized list, max 5"],
+  "strengths_to_preserve": ["elements done well"],
+  "actionable_feedback": ["specific rewrite instructions, max 5"]
+}
+"""
+# Weights for each criterion
+CRITERION_WEIGHTS = {
+    "evidence_grounding": 0.25,
+    "constraint_compliance": 0.20,
+    "specificity_actionability": 0.20,
+    "strategic_insight": 0.15,
+    "completeness_balance": 0.10,
+    "clarity_structure": 0.10,
+}
+# Hard floor requirements
+HARD_FLOORS = {
+    "evidence_grounding": 7,
+    "constraint_compliance": 6,
+}
+# Minimum score for any criterion
+MIN_INDIVIDUAL_SCORE = 5
+def calculate_weighted_score(scores: dict) -> float:
+    """Calculate weighted average from individual criterion scores."""
+    total = 0.0
+    for criterion, weight in CRITERION_WEIGHTS.items():
+        score = scores.get(criterion, 5)  # Default to 5 if missing
+        total += score * weight
+    return round(total, 2)
+def check_pass_conditions(scores: dict, weighted_score: float) -> tuple:
     """
+    Check if all pass conditions are met.
+    Returns (passed: bool, violations: list)
     """
+    violations = []
+    # Check weighted average threshold
+    if weighted_score < 7.0:
+        violations.append(f"Weighted score {weighted_score:.1f} < 7.0 threshold")
+    # Check hard floors
+    for criterion, floor in HARD_FLOORS.items():
+        score = scores.get(criterion, 0)
+        if score < floor:
+            violations.append(f"{criterion}: {score} < {floor} (hard floor)")
+    # Check minimum individual scores
+    for criterion, score in scores.items():
+        if score < MIN_INDIVIDUAL_SCORE:
+            violations.append(f"{criterion}: {score} < {MIN_INDIVIDUAL_SCORE} (minimum)")
+    return (len(violations) == 0, violations)
+def run_llm_evaluation(report: str, source_data: str, iteration: int, llm) -> dict:
+    """
+    Run LLM-based evaluation with weighted rubric.
+    Args:
+        report: The SWOT output to evaluate
+        source_data: The source data the SWOT should be based on
+        iteration: Current revision number (1, 2, or 3)
+        llm: LLM client instance
+    Returns:
+        Evaluation result dict with scores, status, and feedback
     """
+    # Truncate source data if too long
+    max_source_len = 8000
+    if len(source_data) > max_source_len:
+        source_data = source_data[:max_source_len] + "\n... [truncated]"
+    prompt = f"""{CRITIC_SYSTEM_PROMPT}
+## INPUTS
+**Iteration:** {iteration} of 3
+**Source Data (the SWOT should be based ONLY on this):**
+{source_data}
+**SWOT Output to Evaluate:**
+{report}
+Evaluate strictly and respond with JSON only."""
     response, provider, error, providers_failed = llm.query(prompt, temperature=0)
     if error:
+        # Return default middle scores on error
         return {
+            "status": "REJECTED",
+            "weighted_score": 5.0,
+            "scores": {k: 5 for k in CRITERION_WEIGHTS.keys()},
+            "hard_floor_violations": [],
+            "hallucinations_detected": [],
+            "key_deficiencies": [f"LLM evaluation failed: {error}"],
+            "strengths_to_preserve": [],
+            "actionable_feedback": ["Unable to evaluate - please retry"],
             "provider": provider,
             "providers_failed": providers_failed,
             "error": True
         }
     try:
+        # Parse JSON from response
         content = response.strip()
         if "{" in content:
             json_start = content.index("{")
             content = content[json_start:json_end]
         parsed = json.loads(content)
+        # Extract and validate scores
+        scores = parsed.get("scores", {})
+        for criterion in CRITERION_WEIGHTS.keys():
+            if criterion not in scores:
+                scores[criterion] = 5  # Default
+            else:
+                scores[criterion] = min(max(int(scores[criterion]), 1), 10)  # Clamp 1-10
+        # Calculate weighted score
+        weighted_score = calculate_weighted_score(scores)
+        # Check pass conditions
+        passed, violations = check_pass_conditions(scores, weighted_score)
+        # Determine status
+        status = "APPROVED" if passed else "REJECTED"
+        # Override status if LLM said APPROVED but conditions not met
+        if parsed.get("status") == "APPROVED" and not passed:
+            status = "REJECTED"
         return {
+            "status": status,
+            "weighted_score": weighted_score,
+            "scores": scores,
+            "hard_floor_violations": parsed.get("hard_floor_violations", violations),
+            "hallucinations_detected": parsed.get("hallucinations_detected", []),
+            "key_deficiencies": parsed.get("key_deficiencies", [])[:5],
+            "strengths_to_preserve": parsed.get("strengths_to_preserve", []),
+            "actionable_feedback": parsed.get("actionable_feedback", [])[:5],
             "provider": provider,
             "providers_failed": providers_failed,
             "error": False
         }
     except (json.JSONDecodeError, ValueError) as e:
         return {
+            "status": "REJECTED",
+            "weighted_score": 5.0,
+            "scores": {k: 5 for k in CRITERION_WEIGHTS.keys()},
+            "hard_floor_violations": [],
+            "hallucinations_detected": [],
+            "key_deficiencies": [f"JSON parsing failed: {str(e)[:100]}"],
+            "strengths_to_preserve": [],
+            "actionable_feedback": ["Evaluation response was malformed - please retry"],
             "provider": provider,
             "providers_failed": providers_failed,
             "error": True
         }
 @traceable(name="Critic")
 def critic_node(state, workflow_id=None, progress_store=None):
     """
+    Critic node with LLM-only weighted rubric evaluation.
+    Evaluates SWOT output on 6 criteria with weighted scoring:
+    - Evidence Grounding (25%) - hard floor >= 7
+    - Constraint Compliance (20%) - hard floor >= 6
+    - Specificity & Actionability (20%)
+    - Strategic Insight (15%)
+    - Completeness & Balance (10%)
+    - Clarity & Structure (10%)
+    Pass requires: weighted avg >= 7.0, hard floors met, no score < 5
     """
+    # Extract workflow_id and progress_store from state
     if workflow_id is None:
         workflow_id = state.get("workflow_id")
     if progress_store is None:
     # Skip evaluation if workflow has an error (abort mode)
     if state.get("error"):
         _add_activity_log(workflow_id, progress_store, "critic", "Skipping evaluation - workflow aborted")
         error_msg = state.get("error", "")
         if "429" in error_msg or "Too Many Requests" in error_msg:
             user_friendly_msg = "All AI providers are temporarily unavailable due to rate limits. Please wait a moment and try again."
         return state
     report = state.get("draft_report", "")
     revision_count = state.get("revision_count", 0)
+    iteration = revision_count + 1  # 1-indexed for display
     # Log evaluation start
+    _add_activity_log(workflow_id, progress_store, "critic", f"Evaluating SWOT quality (iteration {iteration}/3)...")
+    # Get source data for grounding verification
+    source_data = state.get("raw_data", "")
+    # Run LLM evaluation
+    print(f"Running LLM evaluation (iteration {iteration})...")
     llm = get_llm_client()
+    _add_activity_log(workflow_id, progress_store, "critic", "Calling LLM for quality evaluation...")
     start_time = time.time()
+    result = run_llm_evaluation(report, source_data, iteration, llm)
     elapsed = time.time() - start_time
+    provider = result.get('provider', 'unknown')
     # Log failed providers
+    providers_failed = result.get('providers_failed', [])
     for pf in providers_failed:
         _add_activity_log(workflow_id, progress_store, "critic", f"LLM {pf['name']} failed: {pf['error']}")
         state["llm_providers_failed"] = []
     state["llm_providers_failed"].extend([pf["name"] for pf in providers_failed])
+    # Extract results
+    status = result["status"]
+    weighted_score = result["weighted_score"]
+    scores = result["scores"]
+    # Handle ESCALATE if max iterations reached
+    if iteration > 3 and status == "REJECTED":
+        status = "ESCALATE"
+        _add_activity_log(workflow_id, progress_store, "critic", "Max iterations reached - escalating for human review")
+    # Log scores
+    print(f"  Status: {status}")
+    print(f"  Weighted Score: {weighted_score:.1f}/10")
+    for criterion, score in scores.items():
+        floor = HARD_FLOORS.get(criterion, "-")
+        print(f"    {criterion}: {score}/10 (floor: {floor})")
+    _add_activity_log(workflow_id, progress_store, "critic", f"Evaluation via {provider} ({elapsed:.1f}s)")
+    # Log status and score
+    if status == "APPROVED":
+        score_msg = f"APPROVED - Score: {weighted_score:.1f}/10"
+    elif status == "ESCALATE":
+        score_msg = f"ESCALATE - Score: {weighted_score:.1f}/10 (max iterations)"
     else:
+        score_msg = f"REJECTED - Score: {weighted_score:.1f}/10 - needs revision"
     _add_activity_log(workflow_id, progress_store, "critic", score_msg)
+    # Build critique message
+    critique_lines = [
+        f"Status: {status}",
+        f"Weighted Score: {weighted_score:.1f}/10",
         "",
+        "Criterion Scores:",
     ]
+    for criterion, score in scores.items():
+        weight = int(CRITERION_WEIGHTS[criterion] * 100)
+        floor = HARD_FLOORS.get(criterion)
+        floor_str = f" (floor: {floor})" if floor else ""
+        passed = "PASS" if score >= (floor or MIN_INDIVIDUAL_SCORE) else "FAIL"
+        critique_lines.append(f"  {criterion}: {score}/10 [{weight}%] {floor_str} - {passed}")
+    if result.get("hard_floor_violations"):
+        critique_lines.append("")
+        critique_lines.append("Hard Floor Violations:")
+        for v in result["hard_floor_violations"]:
+            critique_lines.append(f"  - {v}")
+    if result.get("hallucinations_detected"):
+        critique_lines.append("")
+        critique_lines.append("Hallucinations Detected:")
+        for h in result["hallucinations_detected"]:
+            critique_lines.append(f"  - {h}")
+    if result.get("key_deficiencies"):
+        critique_lines.append("")
+        critique_lines.append("Key Deficiencies:")
+        for i, d in enumerate(result["key_deficiencies"], 1):
+            critique_lines.append(f"  {i}. {d}")
+    if result.get("actionable_feedback"):
+        critique_lines.append("")
+        critique_lines.append("Actionable Feedback:")
+        for i, f in enumerate(result["actionable_feedback"], 1):
+            critique_lines.append(f"  {i}. {f}")
+    if result.get("strengths_to_preserve"):
+        critique_lines.append("")
+        critique_lines.append("Strengths to Preserve:")
+        for s in result["strengths_to_preserve"]:
+            critique_lines.append(f"  - {s}")
+    state["critique"] = "\n".join(critique_lines)
+    state["score"] = weighted_score
     state["critique_details"] = {
+        "status": status,
+        "weighted_score": weighted_score,
+        "scores": scores,
+        "hard_floor_violations": result.get("hard_floor_violations", []),
+        "hallucinations_detected": result.get("hallucinations_detected", []),
+        "key_deficiencies": result.get("key_deficiencies", []),
+        "strengths_to_preserve": result.get("strengths_to_preserve", []),
+        "actionable_feedback": result.get("actionable_feedback", []),
     }
     # Update progress
     if workflow_id and progress_store:
         progress_store[workflow_id].update({
             "current_step": "critic",
+            "revision_count": revision_count,
+            "score": weighted_score
         })
     return state

src/nodes/editor.py DELETED Viewed

@@ -1,138 +0,0 @@
-from src.llm_client import get_llm_client
-from langsmith import traceable
-import time
-import json
-def _add_activity_log(workflow_id, progress_store, step, message):
-    """Helper to add activity log entry."""
-    if workflow_id and progress_store:
-        from src.services.workflow_store import add_activity_log
-        add_activity_log(workflow_id, step, message)
-@traceable(name="Editor")
-def editor_node(state, workflow_id=None, progress_store=None):
-    """
-    Editor node that revises the SWOT draft based on critique feedback.
-    Increments the revision count and returns the improved draft.
-    """
-    # Extract workflow_id and progress_store from state (graph invokes with state only)
-    if workflow_id is None:
-        workflow_id = state.get("workflow_id")
-    if progress_store is None:
-        progress_store = state.get("progress_store")
-    current_revision = state.get("revision_count", 0) + 1
-    # Update progress if tracking is enabled
-    if workflow_id and progress_store:
-        progress_store[workflow_id].update({
-            "current_step": "editor",
-            "revision_count": state.get("revision_count", 0),
-            "score": state.get("score", 0)
-        })
-    # Skip if workflow already has an error (abort mode)
-    if state.get("error"):
-        _add_activity_log(workflow_id, progress_store, "editor", f"Skipping revision - workflow aborted")
-        state["revision_count"] = current_revision
-        return state
-    # Log revision start
-    _add_activity_log(workflow_id, progress_store, "editor", f"Revision #{current_revision} in progress...")
-    # Use user-provided API keys if available
-    user_keys = state.get("user_api_keys", {})
-    llm = get_llm_client(user_keys) if user_keys else get_llm_client()
-    strategy_name = state.get("strategy_focus", "Cost Leadership")
-    # Get source data for grounding - editor must use ONLY this data
-    source_data = state.get("raw_data", "")
-    # Truncate if too long to avoid token limits
-    if len(source_data) > 4000:
-        source_data = source_data[:4000] + "\n... [truncated]"
-    # Prepare the revision prompt with source data for grounding
-    prompt = f"""
-You are revising a SWOT analysis based on critique feedback. Keep it CONCISE.
-CRITICAL GROUNDING RULES:
-1. You may ONLY use facts and numbers from the SOURCE DATA provided below.
-2. DO NOT invent, assume, or fabricate any information not in the source data.
-3. Every claim must cite specific numbers from the source data.
-4. If the critique asks for information not in the source data, state "Data not available".
-SOURCE DATA (use ONLY this for facts and numbers):
-{source_data}
-CURRENT DRAFT:
-{state['draft_report']}
-CRITIQUE:
-{state['critique']}
-Strategic Focus: {strategy_name}
-REVISION INSTRUCTIONS:
-1. Address the critique points using ONLY data from SOURCE DATA above
-2. Ensure all 4 SWOT sections are present and complete
-3. Every bullet point must cite specific metrics from the source data
-4. Make sure strengths/opportunities are positive, weaknesses/threats are negative
-5. Align analysis with {strategy_name} strategic focus
-6. If data is missing for a point, remove that point rather than inventing data
-7. Keep each bullet point under 25 words - single sentence only
-8. Maximum 5 bullet points per category
-9. Remove any verbose explanations or context paragraphs
-Return only the improved SWOT analysis. Keep it brief and impactful.
-"""
-    # Get the revised draft from LLM
-    start_time = time.time()
-    response, provider, error, providers_failed = llm.query(prompt, temperature=0)
-    elapsed = time.time() - start_time
-    # Log failed providers
-    for pf in providers_failed:
-        _add_activity_log(workflow_id, progress_store, "editor", f"LLM {pf['name']} failed: {pf['error']}")
-    # Track failed providers in state for frontend
-    if "llm_providers_failed" not in state:
-        state["llm_providers_failed"] = []
-    state["llm_providers_failed"].extend([pf["name"] for pf in providers_failed])
-    if error:
-        print(f"Editor LLM error: {error}")
-        _add_activity_log(workflow_id, progress_store, "editor", f"Revision failed: {error}")
-        # Graceful degradation based on revision count
-        if current_revision == 1:
-            # First revision failed - use Analyzer's original draft
-            _add_activity_log(workflow_id, progress_store, "editor", "Using initial draft from Analyzer (revision unavailable)")
-            # Don't set error - allow workflow to continue with original draft
-            # draft_report already contains Analyzer's output
-            state["editor_skipped"] = True
-        else:
-            # Revision > 1 failed - use the last successful revision
-            _add_activity_log(workflow_id, progress_store, "editor", f"Using revision #{current_revision - 1} draft (further revision unavailable)")
-            # Don't set error - allow workflow to complete with previous draft
-            state["editor_skipped"] = True
-    else:
-        state["draft_report"] = response
-        state["provider_used"] = provider
-        state["editor_skipped"] = False
-        _add_activity_log(workflow_id, progress_store, "editor", f"Revision #{current_revision} completed via {provider} ({elapsed:.1f}s)")
-    # Increment revision count
-    state["revision_count"] = current_revision
-    # Update progress with new revision count
-    if workflow_id and progress_store:
-        progress_store[workflow_id].update({
-            "current_step": "editor",
-            "revision_count": state["revision_count"],
-            "score": state.get("score", 0)
-        })
-    return state

src/utils/analysis_cache.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """
 Analysis Cache - Supabase PostgreSQL caching for final SWOT analysis results.
-Caches Editor agent output with 24h TTL to avoid re-running the full pipeline.
 Uses schema: asa.analysis_cache
 """

 """
 Analysis Cache - Supabase PostgreSQL caching for final SWOT analysis results.
+Caches final SWOT analysis output with 24h TTL to avoid re-running the full pipeline.
 Uses schema: asa.analysis_cache
 """

src/utils/conditions.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from typing import Literal
 def should_continue(state) -> Literal["exit", "retry"]:
     """
     Conditional routing function that determines whether to continue
@@ -7,19 +8,19 @@ def should_continue(state) -> Literal["exit", "retry"]:
     Exit conditions:
     - Error set (LLM providers failed - abort immediately)
-    - Editor skipped (LLM failed but using fallback draft - exit gracefully)
     - Score >= 7 (good quality)
     - Revision count > 3 (max attempts reached)
     Continue conditions:
-    - No error AND No editor skip AND Score < 7 AND Revisions <= 3
     """
     # Abort immediately if error is set (critical failure)
     if state.get("error"):
         return "exit"
-    # Exit gracefully if editor was skipped (using fallback draft)
-    if state.get("editor_skipped"):
         return "exit"
     current_score = state.get("score", 0)

 from typing import Literal
 def should_continue(state) -> Literal["exit", "retry"]:
     """
     Conditional routing function that determines whether to continue
     Exit conditions:
     - Error set (LLM providers failed - abort immediately)
+    - Analyzer revision skipped (LLM failed but using fallback draft - exit gracefully)
     - Score >= 7 (good quality)
     - Revision count > 3 (max attempts reached)
     Continue conditions:
+    - No error AND No revision skip AND Score < 7 AND Revisions <= 3
     """
     # Abort immediately if error is set (critical failure)
     if state.get("error"):
         return "exit"
+    # Exit gracefully if analyzer revision was skipped (using fallback draft)
+    if state.get("analyzer_revision_skipped"):
         return "exit"
     current_score = state.get("score", 0)

src/workflow/graph.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """
 LangGraph workflow definition for self-correcting SWOT analysis.
-Defines the cyclic workflow: Researcher -> Analyzer -> Critic -> Editor (loop)
 """
 from langgraph.graph import StateGraph
@@ -10,17 +10,15 @@ from src.state import AgentState
 from src.nodes.researcher import researcher_node
 from src.nodes.analyzer import analyzer_node
 from src.nodes.critic import critic_node
-from src.nodes.editor import editor_node
 from src.utils.conditions import should_continue
 # Create the cyclic workflow
 workflow = StateGraph(AgentState)
-# Add all nodes to the workflow
 workflow.add_node("Researcher", RunnableLambda(researcher_node))
 workflow.add_node("Analyzer", RunnableLambda(analyzer_node))
 workflow.add_node("Critic", RunnableLambda(critic_node))
-workflow.add_node("Editor", RunnableLambda(editor_node))
 # Define the workflow edges
 workflow.set_entry_point("Researcher")
@@ -28,18 +26,16 @@ workflow.add_edge("Researcher", "Analyzer")
 workflow.add_edge("Analyzer", "Critic")
 # Add conditional edges for the self-correcting loop
 workflow.add_conditional_edges(
     "Critic",
     should_continue,
     {
         "exit": "__end__",
-        "retry": "Editor"
     }
 )
-# Complete the loop: Editor -> Critic
-workflow.add_edge("Editor", "Critic")
 # Set the finish point
 workflow.set_finish_point("Critic")
@@ -48,9 +44,9 @@ workflow.config = {
     "project_name": "AI-strategy-agent-cyclic",
     "tags": ["self-correcting", "quality-loop", "swot-analysis"],
     "metadata": {
-        "version": "1.0",
         "environment": "development",
-        "workflow_type": "researcher-analyzer-critic-editor"
     }
 }

 """
 LangGraph workflow definition for self-correcting SWOT analysis.
+Defines the cyclic workflow: Researcher -> Analyzer -> Critic -> Analyzer (revision loop)
 """
 from langgraph.graph import StateGraph
 from src.nodes.researcher import researcher_node
 from src.nodes.analyzer import analyzer_node
 from src.nodes.critic import critic_node
 from src.utils.conditions import should_continue
 # Create the cyclic workflow
 workflow = StateGraph(AgentState)
+# Add nodes to the workflow (Analyzer handles both initial generation and revisions)
 workflow.add_node("Researcher", RunnableLambda(researcher_node))
 workflow.add_node("Analyzer", RunnableLambda(analyzer_node))
 workflow.add_node("Critic", RunnableLambda(critic_node))
 # Define the workflow edges
 workflow.set_entry_point("Researcher")
 workflow.add_edge("Analyzer", "Critic")
 # Add conditional edges for the self-correcting loop
+# Analyzer now handles revisions directly (no separate Editor node)
 workflow.add_conditional_edges(
     "Critic",
     should_continue,
     {
         "exit": "__end__",
+        "retry": "Analyzer"  # Route back to Analyzer for revisions
     }
 )
 # Set the finish point
 workflow.set_finish_point("Critic")
     "project_name": "AI-strategy-agent-cyclic",
     "tags": ["self-correcting", "quality-loop", "swot-analysis"],
     "metadata": {
+        "version": "2.0",
         "environment": "development",
+        "workflow_type": "researcher-analyzer-critic"
     }
 }

tests/test_self_correcting_loop.py CHANGED Viewed

@@ -16,7 +16,7 @@ def test_analyzer_failure():
     # Monkey patch the analyzer node to force poor quality
     def force_poor_analyzer(state):
-        """Force a poor quality draft to trigger Editor"""
         state["draft_report"] = "Bad analysis. No details. Incomplete."
         print("⚠️  FORCED POOR QUALITY: Overriding with very weak content")
         return state
@@ -40,10 +40,10 @@ def test_critic_failure():
     # Monkey patch the critic to force a low score
     def force_low_score_critic(state):
-        """Force a low score to trigger Editor"""
         state["score"] = 3  # Low score to force revision
         state["critique"] = "Forced low score for testing self-correction loop"
-        print("⚠️  FORCED LOW SCORE: 3/10 to trigger Editor")
         return state
     # Temporarily replace critic in the workflow

     # Monkey patch the analyzer node to force poor quality
     def force_poor_analyzer(state):
+        """Force a poor quality draft to trigger revision loop"""
         state["draft_report"] = "Bad analysis. No details. Incomplete."
         print("⚠️  FORCED POOR QUALITY: Overriding with very weak content")
         return state
     # Monkey patch the critic to force a low score
     def force_low_score_critic(state):
+        """Force a low score to trigger revision loop"""
         state["score"] = 3  # Low score to force revision
         state["critique"] = "Forced low score for testing self-correction loop"
+        print("⚠️  FORCED LOW SCORE: 3/10 to trigger revision loop")
         return state
     # Temporarily replace critic in the workflow