AutoMathReasoner / env /generator.py
HarshitShri026's picture
push
973cd6f
import sympy as sp
import random
from typing import Dict, Any, Tuple, List, Optional
class TaskGenerationEngine:
"""
Symbolic calculus task generator with scaffold hints and technique metadata.
Improvements over v1:
1. Stores which integration technique is needed (u-sub, by-parts, etc.)
2. Generates scaffold hints (first step of solution) for Scaf-GRPO
3. Better prompt formatting using LaTeX-style notation
4. More diverse function compositions
5. Technique-aware variant generation
"""
def __init__(self):
self.x = sp.Symbol('x')
# Components for generating random functions F(x)
self.basic_functions = [
lambda x, c: x**c,
lambda x, c: sp.sin(c*x),
lambda x, c: sp.cos(c*x),
lambda x, c: sp.exp(c*x),
lambda x, c: sp.ln(sp.Abs(c*x + 1)), # +1 avoids log(0)
]
# Additional functions for higher difficulty
self.advanced_functions = [
lambda x, c: sp.tan(c*x),
lambda x, c: sp.atan(c*x),
lambda x, c: sp.sinh(c*x),
lambda x, c: sp.cosh(c*x),
lambda x, c: x**c * sp.exp(x), # Requires integration by parts
lambda x, c: sp.sin(x) * sp.cos(c*x), # Product of trig
]
# Technique detection patterns
self._technique_detectors = {
'power_rule': self._is_power_rule,
'u_substitution': self._is_u_substitution,
'by_parts': self._is_by_parts,
'trigonometric': self._is_trig_integral,
'exponential': self._is_exponential,
'logarithmic': self._is_logarithmic,
}
def _score_difficulty(self, components: int, nesting: int) -> float:
"""D = num_components + degree_of_nesting * 2"""
return float(components + nesting * 2.0)
def _detect_technique(self, f_expr) -> str:
"""Detect which integration technique is most appropriate for f(x)."""
for technique, detector in self._technique_detectors.items():
if detector(f_expr):
return technique
return 'power_rule' # Default fallback
def _is_power_rule(self, expr) -> bool:
"""Check if expression is a simple polynomial."""
return expr.is_polynomial(self.x)
def _is_u_substitution(self, expr) -> bool:
"""Check if expression likely needs u-substitution."""
# Composition of functions suggests u-sub
if isinstance(expr, sp.Mul):
args = expr.args
# Look for f(g(x)) * g'(x) pattern
for arg in args:
if arg.has(sp.sin, sp.cos, sp.exp, sp.log) and not arg.is_polynomial(self.x):
return True
return False
def _is_by_parts(self, expr) -> bool:
"""Check if expression likely needs integration by parts."""
if isinstance(expr, sp.Mul):
has_poly = any(a.is_polynomial(self.x) for a in expr.args)
has_transcendental = any(a.has(sp.sin, sp.cos, sp.exp, sp.log) for a in expr.args)
return has_poly and has_transcendental
return False
def _is_trig_integral(self, expr) -> bool:
"""Check if expression is primarily trigonometric."""
return expr.has(sp.sin, sp.cos, sp.tan) and not expr.has(sp.exp, sp.log)
def _is_exponential(self, expr) -> bool:
"""Check if expression is primarily exponential."""
return expr.has(sp.exp) and not expr.has(sp.sin, sp.cos)
def _is_logarithmic(self, expr) -> bool:
"""Check if expression involves logarithms."""
return expr.has(sp.log, sp.ln)
def _generate_scaffold_hint(self, f_expr, F_expr, technique: str) -> Dict[str, str]:
"""
Generate a scaffold hint for the problem.
Returns a dict with:
- 'technique': which technique to use
- 'hint_level_1': gentle nudge (technique name)
- 'hint_level_2': first step of solution
- 'hint_level_3': most of the solution
"""
hints = {
'technique': technique,
'hint_level_1': '',
'hint_level_2': '',
'hint_level_3': '',
}
technique_descriptions = {
'power_rule': "Try applying the power rule: ∫x^n dx = x^(n+1)/(n+1) + C",
'u_substitution': "Try u-substitution. Look for a composite function and its derivative.",
'by_parts': "Try integration by parts: ∫u dv = uv - ∫v du",
'trigonometric': "Try using trigonometric identities to simplify first.",
'exponential': "Remember that ∫e^(ax) dx = (1/a)e^(ax) + C",
'logarithmic': "Remember that ∫(1/x) dx = ln|x| + C",
}
hints['hint_level_1'] = technique_descriptions.get(
technique, "Try identifying the integration technique needed."
)
# Level 2: Show the substitution or setup
try:
if technique == 'u_substitution':
# Try to identify the inner function for u-sub hint
hints['hint_level_2'] = f"Hint: Try {hints['hint_level_1']}. The integrand has a composite structure."
elif technique == 'by_parts':
hints['hint_level_2'] = f"Hint: {hints['hint_level_1']}. Identify which part to differentiate (u) and which to integrate (dv)."
else:
hints['hint_level_2'] = f"Hint: {hints['hint_level_1']}"
except Exception:
hints['hint_level_2'] = hints['hint_level_1']
# Level 3: Show the first term of the answer
try:
simplified = sp.simplify(F_expr)
if isinstance(simplified, sp.Add):
first_term = simplified.args[0]
hints['hint_level_3'] = f"The answer starts with: {sp.pretty(first_term)} + ..."
else:
hints['hint_level_3'] = f"The answer has the form: {type(simplified).__name__} expression"
except Exception:
hints['hint_level_3'] = hints['hint_level_2']
return hints
def generate_random_function(self, complexity: int) -> Tuple[Any, float]:
"""Generates a random F(x) with appropriate complexity."""
num_components = max(1, int(complexity / 2))
nesting = max(0, int(complexity / 4))
# Use advanced functions at higher complexity
available_funcs = list(self.basic_functions)
if complexity >= 4:
available_funcs.extend(self.advanced_functions[:3])
if complexity >= 6:
available_funcs.extend(self.advanced_functions[3:])
f_expr = 0
for _ in range(num_components):
comp_func = random.choice(available_funcs)
coeff = random.randint(1, 5)
try:
term = comp_func(self.x, coeff)
except Exception:
# Fallback to simple polynomial
term = self.x ** coeff
# Apply nesting
for _ in range(nesting):
outer = random.choice(self.basic_functions)
try:
term = outer(term, 1)
except Exception:
break
f_expr += random.randint(1, 10) * term
return f_expr, self._score_difficulty(num_components, nesting)
def generate_task(self, target_difficulty_band: float) -> Dict[str, Any]:
"""
Provides an indefinite integral task with technique hints and scaffold support.
Returns dict with:
- problem: formatted problem text
- solution: ground truth solution string
- difficulty: computed difficulty score
- type: 'integration'
- sympy_F: SymPy expression for F(x) (antiderivative)
- sympy_f: SymPy expression for f(x) (integrand)
- technique: detected integration technique
- scaffold_hints: dict of progressive hints
"""
complexity = max(1, int(target_difficulty_band))
# 1. Generate F(x)
F_expr, diff = self.generate_random_function(complexity)
# 2. Differentiate to get the problem f(x)
f_expr = sp.diff(F_expr, self.x)
# 3. Detect technique and generate hints
technique = self._detect_technique(f_expr)
scaffold_hints = self._generate_scaffold_hint(f_expr, F_expr, technique)
# 4. Format strings — use cleaner formatting for LLM consumption
try:
pretty_f = sp.pretty(f_expr, use_unicode=True)
except Exception:
pretty_f = str(f_expr)
problem_text = f"Find the indefinite integral: ∫ ({pretty_f}) dx"
solution_text = f"{sp.simplify(F_expr)} + C"
return {
"problem": problem_text,
"difficulty": diff,
"solution": solution_text,
"type": "integration",
"sympy_F": F_expr,
"sympy_f": f_expr,
"technique": technique,
"scaffold_hints": scaffold_hints,
}
def generate_variants(self, task: Dict[str, Any], count: int = 2) -> List[Dict[str, Any]]:
"""
LADDER Component: Recursive Decomposition for Integration.
Breaks down sums or simplifies coefficients.
Improved: preserves technique hints and scaffold data through decomposition.
"""
variants = []
F_expr = task.get("sympy_F")
if F_expr is None:
# Fallback if task was not generated by us
return [self.generate_task(max(1, task.get("difficulty", 2) - 2))]
# Recursive Rule 1: Linearity (split sums)
if isinstance(F_expr, sp.Add):
args = F_expr.args
for arg in args[:count]:
sub_F = arg
sub_f = sp.diff(sub_F, self.x)
technique = self._detect_technique(sub_f)
scaffold = self._generate_scaffold_hint(sub_f, sub_F, technique)
try:
pretty_sub_f = sp.pretty(sub_f, use_unicode=True)
except Exception:
pretty_sub_f = str(sub_f)
variants.append({
"problem": f"Integrate step-variant: ∫ ({pretty_sub_f}) dx",
"solution": f"{sub_F} + C",
"difficulty": max(0.5, task["difficulty"] - 1.0),
"type": "integration",
"sympy_F": sub_F,
"sympy_f": sub_f,
"technique": technique,
"scaffold_hints": scaffold,
})
# Recursive Rule 2: Constant simplification
if not variants:
# Just return a simpler integral by reducing difficulty
variants.append(self.generate_task(max(1.0, task["difficulty"] - 2.0)))
return variants[:count]
def generate_technique_focused_task(self, technique: str, difficulty: float = 2.0) -> Dict[str, Any]:
"""
Generate a task that specifically targets a given integration technique.
Useful for curriculum learning when the model struggles with a technique.
"""
x = self.x
technique_generators = {
'power_rule': lambda: random.randint(1, 5) * x**random.randint(1, 6),
'u_substitution': lambda: sp.sin(random.randint(1, 3) * x**2) * x,
'by_parts': lambda: x * sp.exp(random.randint(1, 3) * x),
'trigonometric': lambda: sp.sin(x)**random.randint(1, 3) * sp.cos(x),
'exponential': lambda: random.randint(1, 5) * sp.exp(random.randint(1, 4) * x),
'logarithmic': lambda: sp.ln(sp.Abs(x + 1)),
}
generator = technique_generators.get(technique)
if generator is None:
return self.generate_task(difficulty)
try:
F_expr = generator()
f_expr = sp.diff(F_expr, x)
scaffold = self._generate_scaffold_hint(f_expr, F_expr, technique)
try:
pretty_f = sp.pretty(f_expr, use_unicode=True)
except Exception:
pretty_f = str(f_expr)
return {
"problem": f"Find the indefinite integral: ∫ ({pretty_f}) dx",
"solution": f"{sp.simplify(F_expr)} + C",
"difficulty": difficulty,
"type": "integration",
"sympy_F": F_expr,
"sympy_f": f_expr,
"technique": technique,
"scaffold_hints": scaffold,
}
except Exception:
return self.generate_task(difficulty)