File size: 2,619 Bytes
3c25c17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
from __future__ import annotations

import json
from datetime import datetime

from agents.state import MathMentorState
from llm.client import get_llm

PARSER_PROMPT = """\
You are a math problem parser. Clean and structure the following text into a well-defined math problem.

The text may come from OCR or speech-to-text and may contain:
- Typos or misrecognized characters
- Math symbols written as words
- Ambiguous notation

Your job:
1. Clean the text into proper mathematical notation
2. Identify the topic and key variables
3. ONLY flag needs_clarification if the problem is truly impossible to understand or critically incomplete

IMPORTANT: Set needs_clarification to false for normal math problems. Most problems do NOT need clarification. Only set true if the problem is genuinely unreadable or missing critical information that makes it unsolvable.

Respond with JSON only (no markdown code fences):
{{
  "problem_text": "cleaned, clear problem statement",
  "topic": "algebra|probability|calculus|linear_algebra",
  "variables": ["list", "of", "variables"],
  "constraints": ["list of constraints if any"],
  "needs_clarification": false,
  "clarification_reason": ""
}}

Raw input: {raw_text}
"""


def _strip_json_fences(text: str) -> str:
    """Strip markdown code fences from LLM JSON responses."""
    text = text.strip()
    if text.startswith("```"):
        text = text.split("\n", 1)[1] if "\n" in text else text[3:]
    if text.endswith("```"):
        text = text[:-3]
    return text.strip()


def parser_node(state: MathMentorState) -> dict:
    text = state.get("human_edited_text") or state.get("extracted_text", "")

    llm = get_llm()
    response = llm.invoke(PARSER_PROMPT.format(raw_text=text))

    try:
        content = _strip_json_fences(response.content)
        parsed = json.loads(content)
    except (json.JSONDecodeError, AttributeError):
        parsed = {
            "problem_text": text,
            "topic": "unknown",
            "variables": [],
            "constraints": [],
            "needs_clarification": False,
            "clarification_reason": "",
        }

    needs_review = parsed.get("needs_clarification", False)

    return {
        "parsed_problem": parsed,
        "needs_human_review": needs_review,
        "agent_trace": state.get("agent_trace", [])
        + [
            {
                "agent": "parser",
                "action": "parsed",
                "summary": f"Topic: {parsed.get('topic', 'unknown')}, needs_clarification: {needs_review}",
                "timestamp": datetime.now().isoformat(),
            }
        ],
    }