""" Advanced Reasoning Engine — Chain-of-Thought (CoT) reasoning layer. Sits between the intent router and the LLM / specialist engines. Provides structured, multi-step reasoning for every response: Stage 1 — Problem Analysis · Identify problem type, domain, and sub-questions · Detect ambiguity or missing information · Choose the optimal resolution strategy Stage 2 — Chain-of-Thought Planning · Decompose complex problems into ordered sub-steps · Identify dependencies between sub-steps · Estimate difficulty and required knowledge Stage 3 — Verification & Confidence · Cross-check reasoning against known constraints · Assign confidence level (HIGH / MEDIUM / LOW) · Flag any assumptions made Stage 4 — Response Synthesis · Compose final LLM prompt that enforces the reasoning trace · Force the LLM to follow the plan and not deviate For advanced math the engine adds a symbolic pre-analysis step: · Identify operation type and variable structure · Reason about the expected form of the answer · Verify the SymPy result makes mathematical sense Usage: from reasoning_engine import ReasoningEngine re = ReasoningEngine() plan = re.analyze("integrate x^2 sin(x)", intent="advanced_math") prompt = re.build_math_prompt(user_input, sympy_result, plan) prompt_gen = re.build_general_prompt(user_input, intent, context, plan) """ from __future__ import annotations import re from dataclasses import dataclass, field from typing import Optional # ───────────────────────────────────────────────────────────────────────────── # Data structures # ───────────────────────────────────────────────────────────────────────────── @dataclass class ReasoningPlan: """Structured reasoning plan produced by the engine.""" problem_type: str = "unknown" domain: str = "general" sub_problems: list[str] = field(default_factory=list) strategy: str = "" expected_form: str = "" assumptions: list[str] = field(default_factory=list) confidence: str = "MEDIUM" # HIGH / MEDIUM / LOW reasoning_steps: list[str] = field(default_factory=list) warnings: list[str] = field(default_factory=list) def summary(self) -> str: """One-line summary for console display.""" return ( f"[Reasoning] domain={self.domain} | strategy={self.strategy[:60]} " f"| confidence={self.confidence}" ) def full_trace(self) -> str: """Full reasoning trace as a numbered list.""" lines = [f"Problem type: {self.problem_type}", f"Domain: {self.domain}"] if self.sub_problems: lines.append("Sub-problems:") for i, sp in enumerate(self.sub_problems, 1): lines.append(f" {i}. {sp}") lines.append(f"Strategy: {self.strategy}") if self.expected_form: lines.append(f"Expected answer form: {self.expected_form}") if self.assumptions: lines.append("Assumptions: " + "; ".join(self.assumptions)) if self.warnings: lines.append("Warnings: " + "; ".join(self.warnings)) lines.append(f"Confidence: {self.confidence}") return "\n".join(lines) # ───────────────────────────────────────────────────────────────────────────── # Domain / problem-type classifiers # ───────────────────────────────────────────────────────────────────────────── _DOMAIN_HINTS: dict[str, list[str]] = { "calculus": [ "integrate", "integral", "derivative", "differentiate", "d/dx", "limit", "lim", "antiderivative", ], "algebra": [ "solve", "factor", "expand", "simplify", "roots", "zeros", "equation", "polynomial", "quadratic", ], "linear_algebra": [ "matrix", "eigenvalue", "eigenvector", "determinant", "inverse", "rank", "trace", "vector", "dot product", "cross product", ], "differential_equations": [ "differential equation", "ode", "dy/dx", "y''", "y'", "dsolve", ], "transforms": [ "laplace", "fourier", "z-transform", "inverse laplace", ], "series": [ "taylor", "maclaurin", "power series", "series expansion", ], "number_theory": [ "prime", "gcd", "lcm", "modular", "mod ", "divisible", "factorization", "congruence", ], "statistics": [ "mean", "median", "mode", "variance", "standard deviation", "average", "probability", "distribution", ], "combinatorics": [ "factorial", "binomial", "permutation", "combination", "choose", "nCr", "nPr", ], "complex_analysis": [ "complex", "imaginary", "real part", "modulus", "argument", "conjugate", "polar form", ], "physics": [ "velocity", "acceleration", "force", "energy", "momentum", "electric", "magnetic", "quantum", "wave", "frequency", ], "computer_science": [ "algorithm", "complexity", "big o", "sorting", "graph", "recursion", "dynamic programming", ], } _PROBLEM_TYPE_PATTERNS: list[tuple[str, str]] = [ # Highest-priority: detect before generic "solve" or "equation" (r'\bdifferential\s+equation\b|\bode\b|\bdsolve\b', "ode_solving"), (r'\bintegrate\b|\bintegral\b|\bantiderivative\b', "integration"), (r'\bderivative\b|\bdifferentiate\b|\bd/d[a-z]\b', "differentiation"), (r'\blimit\b|\blim\s', "limit_evaluation"), (r'\beigenvalue\b|\beigenvector\b', "matrix_operation"), (r'\bdeterminant\b|\bdet\b|\binverse\s+matrix\b|\bmatrix\s+rank\b|\bmatrix\s+trace\b', "matrix_operation"), (r'\btaylor\b|\bmaclaurin\b|\bseries\s+expansion\b|\bpower\s+series\b', "series_expansion"), (r'\blaplace\s+transform\b|\blaplace\s+of\b', "laplace_transform"), (r'\bfourier\s+transform\b|\bfourier\s+of\b', "fourier_transform"), (r'\bprime\s+factor|\bgcd\b|\blcm\b|\bmod\b|\bmodular\b', "number_theory"), (r'\bfactorial\b|\bbinomial\s+coeff|\bpermutation\b|\bnCr\b|\bnPr\b', "combinatorics"), (r'\bsum\s+of\b|\bsummation\b|\bsum\b.*\bfrom\b', "summation"), (r'\bmean\b|\bmedian\b|\bvariance\b|\bstandard\s+deviation\b|\baverage\b', "statistical_analysis"), (r'\bcomplex\s+number\b|\bimaginary\s+part\b|\breal\s+part\b|\bmodulus\s+of\b|\bargument\s+of\b', "complex_number"), (r'\bfactor\b|\bfactorise\b|\bfactorize\b', "factorization"), (r'\bexpand\b', "algebraic_expansion"), (r'\bsimplify\b', "simplification"), (r'\bsolve\b|\broots?\s+of\b|\bzeros?\s+of\b', "equation_solving"), (r'\bwhat\s+is\b|\bwho\s+is\b|\bexplain\b|\bdefine\b', "knowledge_retrieval"), (r'\bhow\s+to\b|\bhow\s+does\b', "procedural_knowledge"), (r'\bwhy\b', "explanatory_reasoning"), (r'\bcompare\b|\bdifference\s+between\b', "comparative_analysis"), ] _STRATEGIES: dict[str, str] = { "integration": "Apply integration rules (u-sub, IBP, trig-sub, or direct antiderivative)", "differentiation": "Apply chain rule, product rule, quotient rule, or basic derivative rules", "limit_evaluation": "Apply L'Hôpital's rule, algebraic manipulation, or squeeze theorem", "equation_solving": "Factor, use quadratic formula, or numerical methods as needed", "factorization": "Factor out GCF, then use special patterns (difference of squares, sum/difference of cubes)", "algebraic_expansion":"Apply binomial theorem or FOIL method for products", "simplification": "Cancel common factors, apply trig/logarithm identities, or algebraic reduction", "matrix_operation": "Apply matrix algorithms: cofactor expansion (det), row reduction (rank/inverse), characteristic polynomial (eigenvalues)", "ode_solving": "Classify ODE (linear/separable/exact/homogeneous) and apply corresponding solution method", "series_expansion": "Compute Taylor/Maclaurin coefficients using repeated differentiation", "laplace_transform": "Apply Laplace transform table entries and linearity", "fourier_transform": "Apply Fourier transform definition and standard pairs", "number_theory": "Apply Euclidean algorithm (GCD/LCM) or prime factorization via trial division", "statistical_analysis": "Compute descriptive statistics: mean, variance (E[(X-μ)²]), std deviation", "combinatorics": "Apply counting principles: factorial, binomial theorem, or permutation formulas", "complex_number": "Use Cartesian/polar form of complex numbers and their properties", "knowledge_retrieval": "Retrieve and synthesize relevant factual information", "procedural_knowledge": "Provide a clear step-by-step explanation of the procedure", "explanatory_reasoning": "Reason about causes, mechanisms, or justifications", "comparative_analysis": "Identify key dimensions of comparison and contrast each one", "unknown": "Analyze the problem and apply the most relevant reasoning approach", } _EXPECTED_FORMS: dict[str, str] = { "integration": "A symbolic function + constant of integration C (indefinite) or a real number (definite)", "differentiation": "A symbolic expression of the same or lower degree", "limit_evaluation": "A real number, ∞, -∞, or 'does not exist'", "equation_solving": "One or more numerical or symbolic values for the unknown", "factorization": "A product of irreducible factors", "series_expansion": "A polynomial in x up to the given order, plus O(x^n) remainder", "matrix_operation": "A scalar (det/rank/trace) or matrix (inverse/eigenvectors)", "ode_solving": "A function y(x) with integration constants C1, C2, …", "laplace_transform": "A rational or transcendental function of s", "fourier_transform": "A function of the frequency variable k", "statistical_analysis": "Real-valued descriptive statistics (mean, variance, std)", "combinatorics": "A non-negative integer", } # ───────────────────────────────────────────────────────────────────────────── # Multi-hop decomposer # ───────────────────────────────────────────────────────────────────────────── def _decompose(user_input: str, problem_type: str, intent: str) -> list[str]: """Break the problem into an ordered list of sub-problems / reasoning steps.""" lowered = user_input.lower() # Math decompositions if problem_type == "integration": steps = ["Identify the integrand and the variable of integration"] if "from" in lowered and "to" in lowered: steps.append("Confirm the integration limits (lower and upper bounds)") steps += [ "Check whether u-substitution, integration by parts, or a standard form applies", "Compute the antiderivative step by step", "Apply the Fundamental Theorem of Calculus if limits are given", "Simplify the result and add C for indefinite integrals", ] return steps if problem_type == "differentiation": steps = ["Identify the function and the differentiation variable"] if "second" in lowered or "third" in lowered or "2nd" in lowered or "3rd" in lowered: steps.append("Determine the required order of differentiation") steps += [ "Identify which rules apply (chain rule, product rule, quotient rule)", "Differentiate term by term", "Simplify the derivative expression", ] return steps if problem_type == "limit_evaluation": return [ "Identify the function and the point of approach", "Check for direct substitution; if it gives 0/0 or ∞/∞, apply L'Hôpital's rule", "Alternatively, try algebraic simplification or known limit results", "Evaluate the limit or determine if it diverges", ] if problem_type == "equation_solving": steps = ["Identify the type of equation (linear, quadratic, polynomial, transcendental)"] if "=" in user_input: steps.append("Rearrange so one side is 0 (or use direct substitution for LHS=RHS)") steps += [ "Choose the solution method: factoring, quadratic formula, or numeric methods", "Find all solutions in the required domain", "Verify each solution satisfies the original equation", ] return steps if problem_type == "ode_solving": return [ "Classify the ODE: order, linearity, and special form", "For 1st order: check separable, linear (integrating factor), exact", "For 2nd order: find the characteristic equation and its roots", "Write the general solution with arbitrary constants C1, C2, …", "Apply initial conditions if provided to find particular solution", ] if problem_type == "matrix_operation": steps = ["Identify the matrix dimensions and operation required"] if "eigenvalue" in lowered: steps += [ "Form the characteristic polynomial det(A - λI) = 0", "Solve for eigenvalues λ", "For each eigenvalue, solve (A - λI)v = 0 for eigenvectors", ] elif "determinant" in lowered or "det" in lowered: steps += [ "Choose expansion method: cofactor expansion or row reduction", "Compute the determinant", ] elif "inverse" in lowered: steps += [ "Augment the matrix with the identity: [A | I]", "Row-reduce to obtain [I | A⁻¹]", ] return steps if problem_type == "series_expansion": return [ "Identify the function and expansion point", "Compute successive derivatives at the expansion point", "Form the Taylor/Maclaurin series coefficients: f^(n)(a) / n!", "Write the series up to the required order", "Identify the pattern or general term if possible", ] if problem_type == "laplace_transform": return [ "Express the function in terms of known Laplace pairs", "Apply linearity of the Laplace transform", "Use standard table entries or direct computation", "Simplify the result in the s-domain", ] # Knowledge / reasoning decompositions if problem_type == "knowledge_retrieval": return [ "Identify the core concept being asked about", "Recall the definition and key properties", "Provide relevant examples or applications", "Connect to related concepts if helpful", ] if problem_type == "explanatory_reasoning": return [ "Identify the phenomenon / concept requiring explanation", "Determine the causal chain or mechanism", "State any assumptions or simplifications", "Synthesise a clear, evidence-based explanation", ] if problem_type == "comparative_analysis": return [ "Identify what is being compared", "List key dimensions of comparison", "Analyse each dimension for both subjects", "Summarise similarities and differences", ] # Default: general reasoning return [ "Understand the question and identify the core task", "Break the problem into smaller sub-problems", "Address each sub-problem in order", "Synthesise a complete and accurate answer", ] # ───────────────────────────────────────────────────────────────────────────── # Confidence estimator # ───────────────────────────────────────────────────────────────────────────── def _estimate_confidence( user_input: str, problem_type: str, intent: str, has_symbolic_result: bool = False, ) -> str: """Heuristically estimate confidence in the system's ability to answer.""" if has_symbolic_result: return "HIGH" # SymPy computed it — we're certain lowered = user_input.lower() # High confidence for well-defined math types high_confidence_types = { "integration", "differentiation", "limit_evaluation", "equation_solving", "factorization", "algebraic_expansion", "simplification", "series_expansion", "number_theory", "statistical_analysis", "combinatorics", } if problem_type in high_confidence_types: return "HIGH" # Medium confidence for conceptual / knowledge if intent in ("knowledge", "conversation"): # Simple factual questions tend to be higher confidence if any(kw in lowered for kw in ["what is", "define", "who is"]): return "MEDIUM" # Open-ended or opinion questions are lower if any(kw in lowered for kw in ["why", "opinion", "best", "should i"]): return "LOW" return "MEDIUM" return "MEDIUM" # ───────────────────────────────────────────────────────────────────────────── # Warnings detector # ───────────────────────────────────────────────────────────────────────────── def _detect_warnings(user_input: str, problem_type: str) -> list[str]: """Flag potential issues in the problem statement.""" warnings = [] lowered = user_input.lower() if len(user_input.strip()) < 5: warnings.append("Input is very short — may be ambiguous") if problem_type == "equation_solving" and "=" not in user_input and "solve" in lowered: warnings.append("No '=' found — treating expression as equal to 0") if problem_type == "integration" and "from" in lowered and "to" not in lowered: warnings.append("'from' found but 'to' is missing — treating as indefinite integral") if problem_type in ("differentiation", "integration") and not any( c in user_input for c in list("xyztnkabcmnpqrs") ): warnings.append("No variable detected — defaulting to x") if "undefined" in lowered or "infinity" in lowered: warnings.append("Expression may involve singularities or unbounded behaviour") return warnings # ───────────────────────────────────────────────────────────────────────────── # Public interface # ───────────────────────────────────────────────────────────────────────────── class ReasoningEngine: """ Chain-of-Thought reasoning engine. Usage: re = ReasoningEngine() plan = re.analyze(user_input, intent) prompt = re.build_math_prompt(user_input, sympy_result, plan) prompt = re.build_general_prompt(user_input, intent, context, plan) """ def analyze( self, user_input: str, intent: str, has_symbolic_result: bool = False, ) -> ReasoningPlan: """ Produce a structured ReasoningPlan for the given input. Args: user_input: Raw user query. intent: Classified intent (advanced_math / math / knowledge / conversation). has_symbolic_result: True when SymPy has already computed the answer. Returns: ReasoningPlan with problem type, strategy, sub-problems, confidence. """ lowered = user_input.lower() # 1. Detect domain domain = "general" for d, keywords in _DOMAIN_HINTS.items(): for kw in keywords: if kw in lowered: domain = d break if domain != "general": break # 2. Classify problem type (list preserves priority order) problem_type = "unknown" for pattern, ptype in _PROBLEM_TYPE_PATTERNS: if re.search(pattern, lowered): problem_type = ptype break # 3. Strategy & expected answer form strategy = _STRATEGIES.get(problem_type, _STRATEGIES["unknown"]) expected_form = _EXPECTED_FORMS.get(problem_type, "") # 4. Decompose into sub-problems sub_problems = _decompose(user_input, problem_type, intent) # 5. Detect assumptions assumptions = [] if domain in ("calculus", "algebra", "differential_equations"): if not any(kw in lowered for kw in ["complex", "imaginary", "i^2"]): assumptions.append("Working over the real numbers unless otherwise stated") if problem_type in ("integration", "differentiation"): assumptions.append("Function is sufficiently smooth (differentiable/integrable)") # 6. Warnings warnings = _detect_warnings(user_input, problem_type) # 7. Confidence confidence = _estimate_confidence( user_input, problem_type, intent, has_symbolic_result ) return ReasoningPlan( problem_type=problem_type, domain=domain, sub_problems=sub_problems, strategy=strategy, expected_form=expected_form, assumptions=assumptions, confidence=confidence, reasoning_steps=sub_problems, warnings=warnings, ) def build_math_prompt( self, user_input: str, sympy_result: str, plan: ReasoningPlan, ) -> str: """ Build an LLM prompt for advanced math where SymPy has the correct answer. The prompt embeds the full reasoning plan so the LLM follows the exact strategy. """ steps_str = "\n".join( f" Step {i}: {s}" for i, s in enumerate(plan.sub_problems, 1) ) assumptions_str = ( ("\nAssumptions: " + "; ".join(plan.assumptions)) if plan.assumptions else "" ) expected_str = ( f"\nExpected answer form: {plan.expected_form}" if plan.expected_form else "" ) return ( f"PROBLEM: {user_input}\n\n" f"VERIFIED ANSWER (computed by SymPy — 100% correct): {sympy_result}\n\n" "=== YOUR TASK ===\n" f"Explain, step by step, HOW a student arrives at: {sympy_result}\n" "Do NOT recompute the answer. Do NOT give a different answer. " "Your explanation must lead to exactly the verified answer above.\n\n" f"TECHNIQUE: {plan.strategy}\n" f"DOMAIN: {plan.domain}" f"{expected_str}" f"{assumptions_str}\n\n" f"STEPS TO WALK THROUGH:\n{steps_str}\n\n" "Write a numbered explanation. For each step:\n" " - State what you are doing and why.\n" " - Show the algebra or calculation clearly.\n" " - Connect it to the next step.\n\n" f"End with: 'Final answer: {sympy_result}' — use this exact wording." ) def build_general_prompt( self, user_input: str, intent: str, context: str, plan: ReasoningPlan, ) -> str: """ Build an LLM prompt for knowledge/conversation using chain-of-thought. The plan is embedded so the LLM follows the structured reasoning trace. """ steps_str = "\n".join( f" {i}. {s}" for i, s in enumerate(plan.sub_problems, 1) ) context_block = ( f"\nRELEVANT CONTEXT:\n{context}\n" if context else "" ) confidence_note = ( "\nNote: confidence in this answer is LOW — state clearly if uncertain." if plan.confidence == "LOW" else "" ) warnings_block = "" if plan.warnings: warnings_block = "\nFLAGS: " + "; ".join(plan.warnings) + "\n" return ( "You are AnveshAI, a helpful and honest AI assistant.\n" f"QUESTION: {user_input}\n" f"{context_block}" f"{warnings_block}\n" f"REASONING PLAN (follow this structure):\n{steps_str}\n\n" f"ANSWER STRATEGY: {plan.strategy}\n" f"{confidence_note}\n\n" "Write a clear, thorough, well-structured response that follows " "the reasoning plan above step by step. " "Be concise but complete. State any assumptions or caveats explicitly." ) def build_math_fallback_prompt( self, user_input: str, plan: ReasoningPlan, error_context: str = "", ) -> str: """ Prompt for when SymPy fails and the LLM must solve from scratch. Uses chain-of-thought to maximise correctness. """ steps_str = "\n".join( f" Step {i}: {s}" for i, s in enumerate(plan.sub_problems, 1) ) error_note = ( f"\nNote: automated computation failed ({error_context}). " "Solve manually with extra care.\n" if error_context else "" ) return ( "You are a mathematics expert.\n" f"PROBLEM: {user_input}\n" f"{error_note}\n" f"PROBLEM TYPE: {plan.problem_type}\n" f"STRATEGY: {plan.strategy}\n" f"{f'EXPECTED ANSWER FORM: {plan.expected_form}' if plan.expected_form else ''}\n\n" f"REASONING STEPS TO FOLLOW:\n{steps_str}\n\n" "Solve the problem completely by following each step above. " "Show all working in full — do not skip steps. " "State the final answer clearly at the end." )