AlgoSensei / agent /nodes /analyze_node.py
uncertainrods's picture
init_code
e266561
"""
analyze_node.py β€” Evaluates the user's reasoning and identifies the primary gap.
Improvements over v1:
- Uses llm.with_structured_output() for guaranteed schema compliance
- Clamps gap_magnitude to [0, 10] as a safety guard
- Runs sandboxed code evaluation and blends result into hybrid gap score
- Loads and updates UserProfile in SQLite for persistent memory
- Populates explain-why-wrong fields (mistake, why_wrong, correct_thinking)
"""
from agent.models import AgentState, EvaluationOutput
from agent.llm_factory import get_llm
from agent.prompts import ANALYZE_PROMPT
from agent.knowledge import get_misconceptions
from agent.memory import load_profile, update_profile, persist_profile
from agent.sandbox import run_code_safely, get_test_cases_for_topic
_llm = get_llm()
_structured_llm = _llm.with_structured_output(EvaluationOutput, method="function_calling")
def evaluate_reasoning(state: AgentState) -> dict:
"""
Analyzes user's thought process and code.
Updates the UserProfile in the DB with the latest gap scores.
Returns identified_gap, gap_magnitude, explain-why-wrong fields, and test_pass_rate.
"""
topic = state.get("problem_topic", "Unknown")
code = state.get("code", "") or ""
session_id = state.get("session_id", "anonymous")
# ── 1. Run sandbox evaluation if code is provided ───────────────────────
test_results_summary = "No code submitted."
test_pass_rate = None
if code.strip():
test_cases = get_test_cases_for_topic(topic)
if test_cases:
run_result = run_code_safely(code, test_cases)
test_pass_rate = run_result["pass_rate"]
test_results_summary = (
f"Passed {run_result['passed']}/{run_result['total']} test cases. "
f"Errors: {run_result['errors'][:2]}"
)
else:
test_results_summary = "No built-in test cases for this topic β€” using LLM evaluation only."
# ── 2. Fetch misconceptions for topic context ────────────────────────────
misconceptions = "; ".join(get_misconceptions(topic))
# ── 3. LLM evaluation with structured output ────────────────────────────
try:
result: EvaluationOutput = _structured_llm.invoke(
ANALYZE_PROMPT.format_messages(
topic=topic,
problem=state["problem"],
thought=state["user_thought"],
code=code or "No code provided",
misconceptions=misconceptions,
test_results=test_results_summary,
)
)
gap = max(0, min(10, result.gap_magnitude)) # Clamp to [0, 10]
# ── 4. Hybrid scoring: blend LLM gap with code test pass rate ───────
if test_pass_rate is not None:
gap = int(round(0.6 * gap + 0.4 * (10 - test_pass_rate * 10)))
gap = max(0, min(10, gap))
except Exception as e:
print(f"[analyze_node] Structured output error: {e}")
gap = 5
result = EvaluationOutput(
problem_topic=topic,
identified_gap="Could not parse analysis",
gap_magnitude=5,
reasoning="Parse error fallback",
)
# ── 5. Update persistent UserProfile ────────────────────────────────────
try:
profile = load_profile(session_id)
profile = update_profile(profile, topic, gap, solved=(gap == 0))
persist_profile(profile)
except Exception as e:
print(f"[analyze_node] Memory update error: {e}")
return {
"identified_gap": result.identified_gap,
"gap_magnitude": gap,
"mistake": result.mistake,
"why_wrong": result.why_wrong,
"correct_thinking": result.correct_thinking,
"test_pass_rate": test_pass_rate,
}