import sympy as sp import random from typing import Dict, Any, Tuple, List, Optional class TaskGenerationEngine: """ Symbolic calculus task generator with scaffold hints and technique metadata. Improvements over v1: 1. Stores which integration technique is needed (u-sub, by-parts, etc.) 2. Generates scaffold hints (first step of solution) for Scaf-GRPO 3. Better prompt formatting using LaTeX-style notation 4. More diverse function compositions 5. Technique-aware variant generation """ def __init__(self): self.x = sp.Symbol('x') # Components for generating random functions F(x) self.basic_functions = [ lambda x, c: x**c, lambda x, c: sp.sin(c*x), lambda x, c: sp.cos(c*x), lambda x, c: sp.exp(c*x), lambda x, c: sp.ln(sp.Abs(c*x + 1)), # +1 avoids log(0) ] # Additional functions for higher difficulty self.advanced_functions = [ lambda x, c: sp.tan(c*x), lambda x, c: sp.atan(c*x), lambda x, c: sp.sinh(c*x), lambda x, c: sp.cosh(c*x), lambda x, c: x**c * sp.exp(x), # Requires integration by parts lambda x, c: sp.sin(x) * sp.cos(c*x), # Product of trig ] # Technique detection patterns self._technique_detectors = { 'power_rule': self._is_power_rule, 'u_substitution': self._is_u_substitution, 'by_parts': self._is_by_parts, 'trigonometric': self._is_trig_integral, 'exponential': self._is_exponential, 'logarithmic': self._is_logarithmic, } def _score_difficulty(self, components: int, nesting: int) -> float: """D = num_components + degree_of_nesting * 2""" return float(components + nesting * 2.0) def _detect_technique(self, f_expr) -> str: """Detect which integration technique is most appropriate for f(x).""" for technique, detector in self._technique_detectors.items(): if detector(f_expr): return technique return 'power_rule' # Default fallback def _is_power_rule(self, expr) -> bool: """Check if expression is a simple polynomial.""" return expr.is_polynomial(self.x) def _is_u_substitution(self, expr) -> bool: """Check if expression likely needs u-substitution.""" # Composition of functions suggests u-sub if isinstance(expr, sp.Mul): args = expr.args # Look for f(g(x)) * g'(x) pattern for arg in args: if arg.has(sp.sin, sp.cos, sp.exp, sp.log) and not arg.is_polynomial(self.x): return True return False def _is_by_parts(self, expr) -> bool: """Check if expression likely needs integration by parts.""" if isinstance(expr, sp.Mul): has_poly = any(a.is_polynomial(self.x) for a in expr.args) has_transcendental = any(a.has(sp.sin, sp.cos, sp.exp, sp.log) for a in expr.args) return has_poly and has_transcendental return False def _is_trig_integral(self, expr) -> bool: """Check if expression is primarily trigonometric.""" return expr.has(sp.sin, sp.cos, sp.tan) and not expr.has(sp.exp, sp.log) def _is_exponential(self, expr) -> bool: """Check if expression is primarily exponential.""" return expr.has(sp.exp) and not expr.has(sp.sin, sp.cos) def _is_logarithmic(self, expr) -> bool: """Check if expression involves logarithms.""" return expr.has(sp.log, sp.ln) def _generate_scaffold_hint(self, f_expr, F_expr, technique: str) -> Dict[str, str]: """ Generate a scaffold hint for the problem. Returns a dict with: - 'technique': which technique to use - 'hint_level_1': gentle nudge (technique name) - 'hint_level_2': first step of solution - 'hint_level_3': most of the solution """ hints = { 'technique': technique, 'hint_level_1': '', 'hint_level_2': '', 'hint_level_3': '', } technique_descriptions = { 'power_rule': "Try applying the power rule: ∫x^n dx = x^(n+1)/(n+1) + C", 'u_substitution': "Try u-substitution. Look for a composite function and its derivative.", 'by_parts': "Try integration by parts: ∫u dv = uv - ∫v du", 'trigonometric': "Try using trigonometric identities to simplify first.", 'exponential': "Remember that ∫e^(ax) dx = (1/a)e^(ax) + C", 'logarithmic': "Remember that ∫(1/x) dx = ln|x| + C", } hints['hint_level_1'] = technique_descriptions.get( technique, "Try identifying the integration technique needed." ) # Level 2: Show the substitution or setup try: if technique == 'u_substitution': # Try to identify the inner function for u-sub hint hints['hint_level_2'] = f"Hint: Try {hints['hint_level_1']}. The integrand has a composite structure." elif technique == 'by_parts': hints['hint_level_2'] = f"Hint: {hints['hint_level_1']}. Identify which part to differentiate (u) and which to integrate (dv)." else: hints['hint_level_2'] = f"Hint: {hints['hint_level_1']}" except Exception: hints['hint_level_2'] = hints['hint_level_1'] # Level 3: Show the first term of the answer try: simplified = sp.simplify(F_expr) if isinstance(simplified, sp.Add): first_term = simplified.args[0] hints['hint_level_3'] = f"The answer starts with: {sp.pretty(first_term)} + ..." else: hints['hint_level_3'] = f"The answer has the form: {type(simplified).__name__} expression" except Exception: hints['hint_level_3'] = hints['hint_level_2'] return hints def generate_random_function(self, complexity: int) -> Tuple[Any, float]: """Generates a random F(x) with appropriate complexity.""" num_components = max(1, int(complexity / 2)) nesting = max(0, int(complexity / 4)) # Use advanced functions at higher complexity available_funcs = list(self.basic_functions) if complexity >= 4: available_funcs.extend(self.advanced_functions[:3]) if complexity >= 6: available_funcs.extend(self.advanced_functions[3:]) f_expr = 0 for _ in range(num_components): comp_func = random.choice(available_funcs) coeff = random.randint(1, 5) try: term = comp_func(self.x, coeff) except Exception: # Fallback to simple polynomial term = self.x ** coeff # Apply nesting for _ in range(nesting): outer = random.choice(self.basic_functions) try: term = outer(term, 1) except Exception: break f_expr += random.randint(1, 10) * term return f_expr, self._score_difficulty(num_components, nesting) def generate_task(self, target_difficulty_band: float) -> Dict[str, Any]: """ Provides an indefinite integral task with technique hints and scaffold support. Returns dict with: - problem: formatted problem text - solution: ground truth solution string - difficulty: computed difficulty score - type: 'integration' - sympy_F: SymPy expression for F(x) (antiderivative) - sympy_f: SymPy expression for f(x) (integrand) - technique: detected integration technique - scaffold_hints: dict of progressive hints """ complexity = max(1, int(target_difficulty_band)) # 1. Generate F(x) F_expr, diff = self.generate_random_function(complexity) # 2. Differentiate to get the problem f(x) f_expr = sp.diff(F_expr, self.x) # 3. Detect technique and generate hints technique = self._detect_technique(f_expr) scaffold_hints = self._generate_scaffold_hint(f_expr, F_expr, technique) # 4. Format strings — use cleaner formatting for LLM consumption try: pretty_f = sp.pretty(f_expr, use_unicode=True) except Exception: pretty_f = str(f_expr) problem_text = f"Find the indefinite integral: ∫ ({pretty_f}) dx" solution_text = f"{sp.simplify(F_expr)} + C" return { "problem": problem_text, "difficulty": diff, "solution": solution_text, "type": "integration", "sympy_F": F_expr, "sympy_f": f_expr, "technique": technique, "scaffold_hints": scaffold_hints, } def generate_variants(self, task: Dict[str, Any], count: int = 2) -> List[Dict[str, Any]]: """ LADDER Component: Recursive Decomposition for Integration. Breaks down sums or simplifies coefficients. Improved: preserves technique hints and scaffold data through decomposition. """ variants = [] F_expr = task.get("sympy_F") if F_expr is None: # Fallback if task was not generated by us return [self.generate_task(max(1, task.get("difficulty", 2) - 2))] # Recursive Rule 1: Linearity (split sums) if isinstance(F_expr, sp.Add): args = F_expr.args for arg in args[:count]: sub_F = arg sub_f = sp.diff(sub_F, self.x) technique = self._detect_technique(sub_f) scaffold = self._generate_scaffold_hint(sub_f, sub_F, technique) try: pretty_sub_f = sp.pretty(sub_f, use_unicode=True) except Exception: pretty_sub_f = str(sub_f) variants.append({ "problem": f"Integrate step-variant: ∫ ({pretty_sub_f}) dx", "solution": f"{sub_F} + C", "difficulty": max(0.5, task["difficulty"] - 1.0), "type": "integration", "sympy_F": sub_F, "sympy_f": sub_f, "technique": technique, "scaffold_hints": scaffold, }) # Recursive Rule 2: Constant simplification if not variants: # Just return a simpler integral by reducing difficulty variants.append(self.generate_task(max(1.0, task["difficulty"] - 2.0))) return variants[:count] def generate_technique_focused_task(self, technique: str, difficulty: float = 2.0) -> Dict[str, Any]: """ Generate a task that specifically targets a given integration technique. Useful for curriculum learning when the model struggles with a technique. """ x = self.x technique_generators = { 'power_rule': lambda: random.randint(1, 5) * x**random.randint(1, 6), 'u_substitution': lambda: sp.sin(random.randint(1, 3) * x**2) * x, 'by_parts': lambda: x * sp.exp(random.randint(1, 3) * x), 'trigonometric': lambda: sp.sin(x)**random.randint(1, 3) * sp.cos(x), 'exponential': lambda: random.randint(1, 5) * sp.exp(random.randint(1, 4) * x), 'logarithmic': lambda: sp.ln(sp.Abs(x + 1)), } generator = technique_generators.get(technique) if generator is None: return self.generate_task(difficulty) try: F_expr = generator() f_expr = sp.diff(F_expr, x) scaffold = self._generate_scaffold_hint(f_expr, F_expr, technique) try: pretty_f = sp.pretty(f_expr, use_unicode=True) except Exception: pretty_f = str(f_expr) return { "problem": f"Find the indefinite integral: ∫ ({pretty_f}) dx", "solution": f"{sp.simplify(F_expr)} + C", "difficulty": difficulty, "type": "integration", "sympy_F": F_expr, "sympy_f": f_expr, "technique": technique, "scaffold_hints": scaffold, } except Exception: return self.generate_task(difficulty)