simpleLLM / math_expert /config.py
hollywoodfrancis's picture
Upload 11 files
b8ab4a2 verified
# Math Expert Configuration
# 1.1. Mathematical Domains and Specializations
MATH_DOMAINS = {
"algebra": {
"level": "expert",
"topics": [
"linear algebra",
"abstract algebra",
"polynomial equations",
"matrix operations",
"group theory",
"ring theory",
"field theory",
"representation theory",
"homological algebra",
"category theory",
"universal algebra",
"non-associative algebras",
"Lie algebras",
"quantum groups",
"Hopf algebras",
"K-theory"
]
},
"calculus": {
"level": "expert",
"topics": [
"single variable calculus",
"multivariable calculus",
"differential equations",
"partial differential equations",
"vector calculus",
"complex analysis",
"functional analysis",
"measure theory",
"differential geometry",
"geometric measure theory",
"non-standard analysis",
"stochastic calculus",
"calculus of variations",
"symplectic geometry"
]
},
"proof_writing": {
"level": "expert",
"topics": [
"induction",
"contradiction",
"direct proof",
"proof by cases",
"epsilon-delta proofs",
"existence proofs",
"uniqueness proofs",
"category theory proofs",
"homotopy type theory",
"model theory",
"proof theory",
"set theory",
"constructive mathematics",
"proof complexity"
]
},
"probability": {
"level": "expert",
"topics": [
"probability theory",
"random variables",
"distributions",
"stochastic processes",
"Bayesian inference",
"Markov chains",
"measure-theoretic probability",
"stochastic calculus",
"martingales",
"large deviations",
"ergodic theory",
"random matrix theory",
"stochastic PDEs"
]
},
"statistics": {
"level": "expert",
"topics": [
"descriptive statistics",
"inferential statistics",
"hypothesis testing",
"regression analysis",
"time series analysis",
"bayesian statistics",
"non-parametric methods",
"statistical learning theory",
"high-dimensional statistics",
"causal inference",
"spatial statistics",
"robust statistics",
"computational statistics"
]
},
"number_theory": {
"level": "expert",
"topics": [
"prime numbers",
"modular arithmetic",
"diophantine equations",
"cryptography",
"analytic number theory",
"algebraic number theory",
"elliptic curves",
"automorphic forms",
"arithmetic geometry",
"p-adic analysis",
"analytic continuation",
"modular forms",
"zeta functions"
]
},
"geometry": {
"level": "expert",
"topics": [
"euclidean geometry",
"non-euclidean geometry",
"differential geometry",
"topology",
"algebraic geometry",
"projective geometry",
"symplectic geometry",
"algebraic topology",
"geometric analysis",
"geometric group theory",
"Riemannian geometry",
"Kähler geometry",
"hyperbolic geometry"
]
},
"combinatorics": {
"level": "expert",
"topics": [
"graph theory",
"enumerative combinatorics",
"combinatorial optimization",
"matroid theory",
"combinatorial designs",
"extremal combinatorics",
"probabilistic combinatorics",
"algebraic combinatorics",
"topological combinatorics",
"combinatorial geometry",
"Ramsey theory"
]
},
"logic": {
"level": "expert",
"topics": [
"first-order logic",
"model theory",
"proof theory",
"set theory",
"computability theory",
"type theory",
"category theory",
"modal logic",
"temporal logic",
"constructive logic",
"intuitionistic logic",
"proof complexity"
]
},
"theoretical_cs": {
"level": "expert",
"topics": [
"computational complexity",
"algorithms",
"cryptography",
"quantum computing",
"machine learning theory",
"formal verification",
"type systems",
"programming language theory",
"distributed computing",
"parallel algorithms",
"computational geometry",
"randomized algorithms"
]
},
"applied_math": {
"level": "expert",
"topics": [
"numerical analysis",
"optimization",
"control theory",
"mathematical physics",
"fluid dynamics",
"quantum mechanics",
"relativity",
"mathematical biology",
"financial mathematics",
"signal processing",
"data assimilation",
"inverse problems"
]
}
}
# 1.2. Core Tasks
CORE_TASKS = [
{
"task_type": "problem_solving",
"description": "Solve complex mathematical problems",
"example": "Prove the Riemann Hypothesis",
"difficulty_levels": ["basic", "intermediate", "advanced", "research_level", "open_problem"]
},
{
"task_type": "proof_writing",
"description": "Prove mathematical statements with advanced techniques",
"example": "Prove Fermat's Last Theorem using elliptic curves",
"proof_types": ["induction", "contradiction", "direct", "cases", "category_theory", "homotopy_type", "model_theory", "proof_complexity", "constructive"]
},
{
"task_type": "calculus_computation",
"description": "Perform advanced calculus operations",
"example": "Solve Navier-Stokes equations for turbulence",
"operation_types": ["differentiation", "integration", "limits", "functional_analysis", "measure_theory", "stochastic_calculus", "geometric_measure_theory"]
},
{
"task_type": "symbolic_computation",
"description": "Manipulate complex mathematical expressions",
"example": "Simplify tensor equations in general relativity",
"expression_types": ["polynomial", "rational", "trigonometric", "exponential", "tensor", "operator", "Lie_algebra", "Hopf_algebra"]
},
{
"task_type": "concept_explanation",
"description": "Explain advanced mathematical concepts",
"example": "Explain the Langlands program",
"explanation_types": ["definition", "intuition", "application", "example", "formal", "geometric", "historical", "pedagogical"]
},
{
"task_type": "statistical_analysis",
"description": "Perform advanced statistical analysis",
"example": "Analyze high-dimensional genomic data",
"statistical_methods": ["regression", "hypothesis_testing", "confidence_intervals", "bayesian_methods", "non_parametric", "causal_inference", "computational_methods"]
},
{
"task_type": "probability_calculation",
"description": "Calculate complex probabilities",
"example": "Calculate phase transitions in random matrix theory",
"distributions": ["binomial", "normal", "poisson", "exponential", "multivariate", "stochastic_processes", "random_matrix", "levy_processes"]
},
{
"task_type": "number_theory_problem",
"description": "Solve advanced number theory problems",
"example": "Prove the Birch and Swinnerton-Dyer conjecture",
"problem_types": ["prime", "modular", "diophantine", "analytic", "algebraic", "elliptic_curve", "modular_form"]
},
{
"task_type": "geometric_construction",
"description": "Construct and analyze complex geometric objects",
"example": "Construct a Calabi-Yau manifold",
"construction_types": ["euclidean", "non_euclidean", "projective", "differential", "algebraic", "symplectic", "topological"]
},
{
"task_type": "mathematical_modeling",
"description": "Create advanced mathematical models",
"example": "Model quantum field theory",
"model_types": ["continuous", "discrete", "stochastic", "partial_differential", "non_linear", "quantum", "statistical"]
},
{
"task_type": "proof_verification",
"description": "Verify complex mathematical proofs",
"example": "Verify the proof of the Four Color Theorem",
"verification_methods": ["formal_verification", "model_checking", "proof_assistant", "automated_reasoning", "interactive_theorem_proving"]
},
{
"task_type": "algorithm_design",
"description": "Design and analyze mathematical algorithms",
"example": "Design a quantum algorithm for factorization",
"algorithm_types": ["numerical", "combinatorial", "geometric", "algebraic", "probabilistic", "quantum", "parallel"]
},
{
"task_type": "research_paper_analysis",
"description": "Analyze and explain mathematical research papers",
"example": "Explain Wiles' proof of Fermat's Last Theorem",
"analysis_types": ["technical", "historical", "pedagogical", "critical", "extensional"]
},
{
"task_type": "open_problem_analysis",
"description": "Analyze and make progress on open mathematical problems",
"example": "Analyze the Collatz conjecture",
"problem_classes": ["number_theory", "combinatorics", "analysis", "algebra", "geometry", "probability"]
},
{
"task_type": "mathematical_philosophy",
"description": "Analyze philosophical aspects of mathematics",
"example": "Explain the foundations of mathematics",
"philosophical_topics": ["foundations", "philosophy_of_math", "logic", "set_theory", "constructivism", "intuitionism"]
},
{
"task_type": "mathematical_software_development",
"description": "Develop mathematical software and algorithms",
"example": "Implement a new numerical method",
"software_types": ["numerical", "symbolic", "proof_assistant", "visualization", "simulation", "optimization"]
}
]
# Dataset Configuration
DATASETS = {
"proofnet": {
"source": "huggingface",
"dataset_name": "proofnet",
"split": "train",
"use_fields": ["problem", "solution", "proof_steps"]
},
"math_dataset": {
"source": "huggingface",
"dataset_name": "deepmind/mathematics_dataset",
"split": "train-hard",
"use_fields": ["question", "answer", "steps"]
},
"gsm8k": {
"source": "huggingface",
"dataset_name": "gsm8k",
"split": "train",
"use_fields": ["question", "answer"]
},
"mathlib": {
"source": "huggingface",
"dataset_name": "mathlib",
"split": "train",
"use_fields": ["theorem", "proof", "dependencies"]
},
"arxiv_math": {
"source": "huggingface",
"dataset_name": "arxiv_math",
"split": "train",
"use_fields": ["paper", "equations", "proofs"]
},
"clay_institute": {
"source": "huggingface",
"dataset_name": "clay_institute_problems",
"split": "train",
"use_fields": ["problem", "background", "current_status", "approaches"]
},
"open_problems": {
"source": "huggingface",
"dataset_name": "open_math_problems",
"split": "train",
"use_fields": ["problem", "category", "history", "attempts"]
},
"research_papers": {
"source": "huggingface",
"dataset_name": "math_research_papers",
"split": "train",
"use_fields": ["title", "abstract", "content", "proofs", "theorems"]
}
}
# Data Processing Configuration
DATA_PROCESSING = {
"format": "jsonl",
"normalization": {
"equations": "sympy",
"latex": "plaintext",
"proof_steps": "yaml",
"tensor_operations": "torch",
"quantum_operations": "qiskit",
"geometric_objects": "geometric_algebra",
"category_theory": "category_theory"
},
"validation": {
"min_steps": 2,
"max_steps": 200,
"min_length": 10,
"max_length": 100000
}
}
if __name__ == "__main__":
print("Math Expert Configuration Loaded")
print(f"Number of domains: {len(MATH_DOMAINS)}")
print(f"Number of tasks: {len(CORE_TASKS)}")
print(f"Number of datasets: {len(DATASETS)}")