Spaces:

uncertainrods
/

AlgoSensei

Sleeping

App Files Files Community

AlgoSensei / agent /nodes /analyze_node.py

uncertainrods

init_code

e266561 10 days ago

raw

history blame contribute delete

4.09 kB

	"""
	analyze_node.py — Evaluates the user's reasoning and identifies the primary gap.

	Improvements over v1:
	- Uses llm.with_structured_output() for guaranteed schema compliance
	- Clamps gap_magnitude to [0, 10] as a safety guard
	- Runs sandboxed code evaluation and blends result into hybrid gap score
	- Loads and updates UserProfile in SQLite for persistent memory
	- Populates explain-why-wrong fields (mistake, why_wrong, correct_thinking)
	"""

	from agent.models import AgentState, EvaluationOutput
	from agent.llm_factory import get_llm
	from agent.prompts import ANALYZE_PROMPT
	from agent.knowledge import get_misconceptions
	from agent.memory import load_profile, update_profile, persist_profile
	from agent.sandbox import run_code_safely, get_test_cases_for_topic

	_llm = get_llm()
	_structured_llm = _llm.with_structured_output(EvaluationOutput, method="function_calling")


	def evaluate_reasoning(state: AgentState) -> dict:
	"""
	Analyzes user's thought process and code.
	Updates the UserProfile in the DB with the latest gap scores.
	Returns identified_gap, gap_magnitude, explain-why-wrong fields, and test_pass_rate.
	"""
	topic = state.get("problem_topic", "Unknown")
	code = state.get("code", "") or ""
	session_id = state.get("session_id", "anonymous")

	# ── 1. Run sandbox evaluation if code is provided ───────────────────────
	test_results_summary = "No code submitted."
	test_pass_rate = None
	if code.strip():
	test_cases = get_test_cases_for_topic(topic)
	if test_cases:
	run_result = run_code_safely(code, test_cases)
	test_pass_rate = run_result["pass_rate"]
	test_results_summary = (
	f"Passed {run_result['passed']}/{run_result['total']} test cases. "
	f"Errors: {run_result['errors'][:2]}"
	)
	else:
	test_results_summary = "No built-in test cases for this topic — using LLM evaluation only."

	# ── 2. Fetch misconceptions for topic context ────────────────────────────
	misconceptions = "; ".join(get_misconceptions(topic))

	# ── 3. LLM evaluation with structured output ────────────────────────────
	try:
	result: EvaluationOutput = _structured_llm.invoke(
	ANALYZE_PROMPT.format_messages(
	topic=topic,
	problem=state["problem"],
	thought=state["user_thought"],
	code=code or "No code provided",
	misconceptions=misconceptions,
	test_results=test_results_summary,
	)
	)
	gap = max(0, min(10, result.gap_magnitude)) # Clamp to [0, 10]

	# ── 4. Hybrid scoring: blend LLM gap with code test pass rate ───────
	if test_pass_rate is not None:
	gap = int(round(0.6 * gap + 0.4 * (10 - test_pass_rate * 10)))
	gap = max(0, min(10, gap))

	except Exception as e:
	print(f"[analyze_node] Structured output error: {e}")
	gap = 5
	result = EvaluationOutput(
	problem_topic=topic,
	identified_gap="Could not parse analysis",
	gap_magnitude=5,
	reasoning="Parse error fallback",
	)

	# ── 5. Update persistent UserProfile ────────────────────────────────────
	try:
	profile = load_profile(session_id)
	profile = update_profile(profile, topic, gap, solved=(gap == 0))
	persist_profile(profile)
	except Exception as e:
	print(f"[analyze_node] Memory update error: {e}")

	return {
	"identified_gap": result.identified_gap,
	"gap_magnitude": gap,
	"mistake": result.mistake,
	"why_wrong": result.why_wrong,
	"correct_thinking": result.correct_thinking,
	"test_pass_rate": test_pass_rate,
	}