Spaces:
Running
Running
| import re | |
| from typing import Optional, Dict, Any | |
| from dataclasses import dataclass | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| class MathIntent: | |
| intent: str | |
| expression: str | |
| variable: Optional[str] = 'x' | |
| original_query: str = "" | |
| class MathQueryNormalizer: | |
| """ | |
| Normalizes natural language math queries into structured intents | |
| resolvable by symbolic solvers. | |
| """ | |
| def __init__(self): | |
| self.intent_patterns = { | |
| "derivative": [ | |
| r"derivative of\s+(.+)", | |
| r"derive\s+(.+)", | |
| r"differentiate\s+(.+)", | |
| r"d/dx\s*\[?(.+)\]?" | |
| ], | |
| "integral": [ | |
| r"integral of\s+(.+)", | |
| r"integrate\s+(.+)", | |
| r"antiderivative of\s+(.+)" | |
| ], | |
| "limit": [ | |
| r"limit of\s+(.+)\s+as\s+(\w+)\s+approaches\s+(.+)", | |
| r"lim\s+(.+)" | |
| ], | |
| "equation": [ | |
| r"solve\s+(.+)", | |
| r"find x for\s+(.+)", # basic | |
| r"(.+)=(.+)" # implicit equation if contains = | |
| ], | |
| "arithmetic": [ | |
| r"calculate\s+(.+)", | |
| r"what is\s+(.+)", | |
| r"evaluate\s+(.+)" | |
| ] | |
| } | |
| # Stop words to clean from expression | |
| self.stop_words = ["what is", "calculate", "the", "please", "solve", "evaluate"] | |
| def normalize(self, text: str) -> Optional[MathIntent]: | |
| """ | |
| Parses text to identify math intent and extract the core expression. | |
| Returns None if no clear math intent is found. | |
| """ | |
| if not text: | |
| return None | |
| clean_text = text.lower().strip().rstrip("?") | |
| # 1. Check specific intents | |
| # Derivative | |
| for pattern in self.intent_patterns["derivative"]: | |
| match = re.search(pattern, clean_text) | |
| if match: | |
| # Group 1 is usually the expression | |
| raw_expr = match.group(1) | |
| expr = self._clean_expression(raw_expr) | |
| return MathIntent( | |
| intent="derivative", | |
| expression=expr, | |
| variable='x', | |
| original_query=text | |
| ) | |
| # Integral | |
| for pattern in self.intent_patterns["integral"]: | |
| match = re.search(pattern, clean_text) | |
| if match: | |
| raw_expr = match.group(1) | |
| expr = self._clean_expression(raw_expr) | |
| return MathIntent( | |
| intent="integral", | |
| expression=expr, | |
| variable='x', | |
| original_query=text | |
| ) | |
| # Equation Solving | |
| if "=" in clean_text: | |
| expr = self._clean_expression(clean_text) | |
| return MathIntent( | |
| intent="equation", | |
| expression=expr.strip(), | |
| variable='x', | |
| original_query=text | |
| ) | |
| # Arithmetic / Simplification | |
| # If it contains numbers and operators, or starts with "calculate", "what is" | |
| if self._is_arithmetic(clean_text) or any(clean_text.startswith(sw) for sw in ["calculate", "what is", "evaluate"]): | |
| expr = self._clean_expression(clean_text) | |
| return MathIntent( | |
| intent="arithmetic", | |
| expression=expr, | |
| original_query=text | |
| ) | |
| return None | |
| def _clean_expression(self, text: str) -> str: | |
| """ | |
| Removes natural language words from an expression, leaving only | |
| the mathematical notation SymPy can safely parse. | |
| ROOT CAUSE FIX: the previous version only stripped stop words from | |
| the START of the string. So "what is the value of 5*9" became | |
| "the value of 5*9" — SymPy then treated t, h, e, v, a, l, u, e, o, f | |
| as separate symbols and multiplied them: 45·a·e²·f·h·l·o·t·u·v. | |
| That's the "45aeflouv" garble seen on the UI. | |
| Fix: strip ALL known English prose words, not just from the start. | |
| """ | |
| import re | |
| text = text.strip() | |
| # Full list of prose words to remove wherever they appear | |
| prose_words = [ | |
| "what is", "what are", "the value of", "the result of", | |
| "please", "calculate", "compute", "evaluate", "find", | |
| "solve", "simplify", "determine", "the", "of", "for", | |
| "result", "value", "answer", "how do i", "how to", "i", "can you", | |
| "find x", "how do we", "we", "do", "how", | |
| ] | |
| for phrase in sorted(prose_words, key=len, reverse=True): # longest first | |
| text = re.sub(rf'\b{re.escape(phrase)}\b', ' ', text, flags=re.IGNORECASE) | |
| # Collapse multiple spaces | |
| text = re.sub(r' +', ' ', text).strip() | |
| return text | |
| def _is_arithmetic(self, text: str) -> bool: | |
| """ | |
| Checks if text is primarily arithmetic (numbers, operators). | |
| """ | |
| # Allow basic math chars | |
| allowed_chars = set("0123456789+-*/^().= \t") | |
| # 1. Must contain at least one digit | |
| if not any(c.isdigit() for c in text): | |
| return False | |
| # 2. Must only contain allowed chars [and maybe 'x', 'y' for algebra?] | |
| # Let's be strict for "arithmetic" intent, looser for "algebra" if we had it. | |
| # But user query "2x + 5" is algebra/simplification. | |
| # Let's allow algebraic vars for simplification | |
| allowed_chars.update(set("xyzabc")) | |
| # Check if characters are valid | |
| for char in text: | |
| if char not in allowed_chars: | |
| return False | |
| return True |