from __future__ import annotations import json from datetime import datetime from agents.state import MathMentorState from llm.client import get_llm PARSER_PROMPT = """\ You are a math problem parser. Clean and structure the following text into a well-defined math problem. The text may come from OCR or speech-to-text and may contain: - Typos or misrecognized characters - Math symbols written as words - Ambiguous notation Your job: 1. Clean the text into proper mathematical notation 2. Identify the topic and key variables 3. ONLY flag needs_clarification if the problem is truly impossible to understand or critically incomplete IMPORTANT: Set needs_clarification to false for normal math problems. Most problems do NOT need clarification. Only set true if the problem is genuinely unreadable or missing critical information that makes it unsolvable. Respond with JSON only (no markdown code fences): {{ "problem_text": "cleaned, clear problem statement", "topic": "algebra|probability|calculus|linear_algebra", "variables": ["list", "of", "variables"], "constraints": ["list of constraints if any"], "needs_clarification": false, "clarification_reason": "" }} Raw input: {raw_text} """ def _strip_json_fences(text: str) -> str: """Strip markdown code fences from LLM JSON responses.""" text = text.strip() if text.startswith("```"): text = text.split("\n", 1)[1] if "\n" in text else text[3:] if text.endswith("```"): text = text[:-3] return text.strip() def parser_node(state: MathMentorState) -> dict: text = state.get("human_edited_text") or state.get("extracted_text", "") llm = get_llm() response = llm.invoke(PARSER_PROMPT.format(raw_text=text)) try: content = _strip_json_fences(response.content) parsed = json.loads(content) except (json.JSONDecodeError, AttributeError): parsed = { "problem_text": text, "topic": "unknown", "variables": [], "constraints": [], "needs_clarification": False, "clarification_reason": "", } needs_review = parsed.get("needs_clarification", False) return { "parsed_problem": parsed, "needs_human_review": needs_review, "agent_trace": state.get("agent_trace", []) + [ { "agent": "parser", "action": "parsed", "summary": f"Topic: {parsed.get('topic', 'unknown')}, needs_clarification: {needs_review}", "timestamp": datetime.now().isoformat(), } ], }