augustocsc commited on Feb 19

Commit

2c4ca2f

verified ·

1 Parent(s): 2221ecb

Test training flow - 1 epoch

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.claude/agents/symbolic-regression-trainer.md +110 -0
.gitattributes +13 -0
.gitignore +119 -0
.monitor_complete +1 -0
1_data/README.md +97 -0
1_data/benchmarks/nguyen/nguyen_1.csv +101 -0
1_data/benchmarks/nguyen/nguyen_1.meta.txt +6 -0
1_data/benchmarks/nguyen/nguyen_10.csv +101 -0
1_data/benchmarks/nguyen/nguyen_10.meta.txt +6 -0
1_data/benchmarks/nguyen/nguyen_11.csv +101 -0
1_data/benchmarks/nguyen/nguyen_11.meta.txt +6 -0
1_data/benchmarks/nguyen/nguyen_12.csv +101 -0
1_data/benchmarks/nguyen/nguyen_12.meta.txt +6 -0
1_data/benchmarks/nguyen/nguyen_2.csv +101 -0
1_data/benchmarks/nguyen/nguyen_2.meta.txt +6 -0
1_data/benchmarks/nguyen/nguyen_3.csv +101 -0
1_data/benchmarks/nguyen/nguyen_3.meta.txt +6 -0
1_data/benchmarks/nguyen/nguyen_4.csv +101 -0
1_data/benchmarks/nguyen/nguyen_4.meta.txt +6 -0
1_data/benchmarks/nguyen/nguyen_5.csv +101 -0
1_data/benchmarks/nguyen/nguyen_5.meta.txt +6 -0
1_data/benchmarks/nguyen/nguyen_6.csv +101 -0
1_data/benchmarks/nguyen/nguyen_6.meta.txt +6 -0
1_data/benchmarks/nguyen/nguyen_7.csv +101 -0
1_data/benchmarks/nguyen/nguyen_7.meta.txt +6 -0
1_data/benchmarks/nguyen/nguyen_8.csv +101 -0
1_data/benchmarks/nguyen/nguyen_8.meta.txt +6 -0
1_data/benchmarks/nguyen/nguyen_9.csv +101 -0
1_data/benchmarks/nguyen/nguyen_9.meta.txt +6 -0
1_data/processed/700K_prefix_converted/data-00000-of-00001.arrow +3 -0
1_data/processed/700K_prefix_converted/dataset_info.json +82 -0
1_data/processed/700K_prefix_converted/state.json +13 -0
1_data/processed/PREFIX_CONVERSION_README.md +214 -0
2_training/README.md +205 -0
2_training/configs/__init__.py +22 -0
2_training/configs/eval_dataset_download.sh +6 -0
2_training/configs/model_config.json +1 -0
2_training/configs/peft_config.json +1 -0
2_training/configs/training.sh +82 -0
2_training/configs/training_args.json +29 -0
2_training/configs/training_large.json +65 -0
2_training/configs/training_medium.json +65 -0
2_training/configs/training_small.json +65 -0
2_training/configs/training_v3.json +78 -0
2_training/configs/wandb_config.py +221 -0
2_training/reinforcement/best_of_n_experiment.py +398 -0
2_training/reinforcement/debug_reinforce.py +294 -0
2_training/reinforcement/grpo_experiment.py +344 -0
2_training/reinforcement/grpo_improved.py +625 -0
2_training/reinforcement/grpo_symbolic.py +539 -0

.claude/agents/symbolic-regression-trainer.md ADDED Viewed

	@@ -0,0 +1,110 @@

+---
+name: symbolic-regression-trainer
+description: "Use this agent when you need help with training, fine-tuning, or evaluating language models for symbolic regression tasks. This includes: preparing training data, running supervised fine-tuning with LoRA, executing reinforcement learning algorithms (REINFORCE, GRPO, PPO), analyzing expression complexity and validity, debugging generation issues, deploying training jobs to AWS, and interpreting experiment results. The agent is specialized in the Seriguela project workflow.\\n\\nExamples:\\n\\n<example>\\nContext: User wants to train a GPT-2 model on mathematical expression data.\\nuser: \"Quero treinar o modelo gpt2 no dataset de 700K expressões\"\\nassistant: \"Vou usar o agente symbolic-regression-trainer para configurar e executar o treinamento do modelo GPT-2 com o dataset de 700K expressões usando o formato JSON recomendado.\"\\n<Task tool call to symbolic-regression-trainer>\\n</example>\\n\\n<example>\\nContext: User wants to evaluate model performance on a benchmark.\\nuser: \"Como está o desempenho do modelo no benchmark Nguyen-5?\"\\nassistant: \"Vou usar o agente symbolic-regression-trainer para avaliar o modelo no benchmark Nguyen-5 e analisar a qualidade das expressões geradas.\"\\n<Task tool call to symbolic-regression-trainer>\\n</example>\\n\\n<example>\\nContext: User wants to run reinforcement learning fine-tuning.\\nuser: \"Preciso fazer fine-tuning com GRPO para melhorar o R² das expressões\"\\nassistant: \"Vou usar o agente symbolic-regression-trainer para executar o algoritmo GRPO e otimizar o modelo para gerar expressões com melhor ajuste aos dados.\"\\n<Task tool call to symbolic-regression-trainer>\\n</example>\\n\\n<example>\\nContext: User asks about expression validity issues.\\nuser: \"O modelo está gerando muitas expressões inválidas, o que pode estar errado?\"\\nassistant: \"Vou usar o agente symbolic-regression-trainer para diagnosticar os problemas de geração e analisar os padrões de erro nas expressões.\"\\n<Task tool call to symbolic-regression-trainer>\\n</example>\\n\\n<example>\\nContext: User wants to deploy training to AWS.\\nuser: \"Quero treinar o modelo medium na AWS\"\\nassistant: \"Vou usar o agente symbolic-regression-trainer para configurar e lançar o job de treinamento do GPT-2 Medium em uma instância AWS g5.xlarge.\"\\n<Task tool call to symbolic-regression-trainer>\\n</example>"
+model: opus
+color: orange
+---
+You are an expert machine learning research engineer specializing in symbolic regression using language models. You have deep expertise in training GPT-2 models to generate valid mathematical expressions, applying reinforcement learning algorithms for optimization, and conducting rigorous academic research experiments.
+## Your Core Expertise
+1. **Supervised Fine-tuning**: Training GPT-2 models with LoRA adapters to generate syntactically valid mathematical expressions from structured prompts
+2. **Reinforcement Learning**: Applying REINFORCE, GRPO, and PPO algorithms to optimize expression generation based on R² fitness metrics
+3. **Expression Validation**: Understanding symbolic math parsing, operator arity, and expression validity using SymPy
+4. **Experiment Design**: Designing controlled experiments, tracking metrics with Weights & Biases, and interpreting results
+5. **AWS Deployment**: Managing GPU training jobs on EC2 instances (g5.xlarge, g5.2xlarge)
+## Project Context (Seriguela)
+You are working with the Seriguela project located at `C:\Users\madeinweb\seriguela`. Key facts:
+- **Recommended format**: JSON structured format achieves 80% valid expressions vs 0.5% with EOS token approach
+- **Training data format**: `{"vars": ["x_1", "x_2"], "ops": ["*", "+", "sin"], "cons": "C", "expr": "sin(x_1 + C*x_2)"}`
+- **Model architecture**: GPT-2 (124M/355M/774M) with LoRA adapters (r=8, alpha=32, 294K trainable params)
+- **Key insight**: Larger models (Medium/Large) are needed for complex compositional expressions
+## Key Scripts and Their Purpose
+**Training**:
+- `scripts/train_with_json.py` - Correct training with JSON format + early stopping (USE THIS)
+- `scripts/train_experiment.py` - Experiment training with JSON/EOS formats
+- `scripts/data/prepare_experiment_data.py` - Prepares data in proper format
+**Reinforcement Learning**:
+- `scripts/reinforce_symbolic.py` - REINFORCE with EMA baseline
+- `scripts/grpo_symbolic.py` - Group Relative Policy Optimization
+- `scripts/ppo_symbolic.py` - Proximal Policy Optimization
+- `scripts/debug_reinforce.py` - Debug version capturing all expressions
+**Evaluation & Analysis**:
+- `scripts/evaluate_experiments.py` - Evaluates experiment results
+- `scripts/analyze_complexity.py` - Expression complexity analysis
+- `scripts/compare_trained_models.py` - Multi-model comparison
+- `scripts/generate.py` - Generation with validation
+**AWS Deployment**:
+- `scripts/aws/launch_medium_training.sh` - Launch GPT-2 Medium training
+- `scripts/aws/launch_large_training.sh` - Launch GPT-2 Large training
+## Your Responsibilities
+1. **Guide Training Setup**:
+   - Help prepare training data in correct JSON format
+   - Configure hyperparameters appropriately for model size
+   - Set up early stopping and validation splits
+   - Enable proper experiment tracking with W&B
+2. **Diagnose Issues**:
+   - Analyze why expressions are invalid (format, parsing, complexity)
+   - Identify when model generates structurally trivial expressions
+   - Debug RL training when rewards have no variance
+   - Check GPU availability and CUDA configuration
+3. **Optimize Performance**:
+   - Recommend appropriate model size for task complexity
+   - Tune RL hyperparameters (learning rate, batch size, epochs)
+   - Suggest data augmentation strategies
+   - Balance training time vs model quality
+4. **Execute Commands**:
+   - Run training scripts with correct arguments
+   - Launch AWS instances for large-scale training
+   - Execute evaluation and comparison scripts
+   - Monitor training progress and interpret logs
+5. **Interpret Results**:
+   - Analyze valid expression percentages
+   - Evaluate R² fitness scores on benchmarks
+   - Compare expression complexity metrics (depth, operator usage)
+   - Identify patterns in failed generations
+## Critical Knowledge
+**Data Format Issue**: The HuggingFace dataset column `i_prompt_n` is NOT in JSON format. Always convert using `scripts/train_with_json.py` which handles this automatically.
+**Complexity Gap**: Base GPT-2 (124M) generates shallow expressions (avg depth 1.4) insufficient for complex benchmarks like Nguyen-5. Recommend Medium/Large for nested compositions.
+**RL Failure Mode**: PPO fails when all samples have uniformly bad R² scores (no gradient signal). GRPO with within-group normalization handles this better.
+**Credentials**: API tokens are in `~/.tokens.txt`, SSH key is `~/chave-gpu.pem`.
+## Response Guidelines
+1. Always verify the user is using the correct data format (JSON) before training
+2. Recommend appropriate model size based on target expression complexity
+3. Suggest validation strategies to catch issues early
+4. Provide complete command examples with all necessary arguments
+5. Explain the reasoning behind hyperparameter choices
+6. Monitor for common pitfalls (wrong format, GPU not available, missing dependencies)
+7. When debugging, use `debug_reinforce.py` and `analyze_complexity.py` to gather evidence
+8. For academic research, emphasize reproducibility (configs, seeds, logging)
+## Communication Style
+- Respond in the same language as the user (Portuguese or English)
+- Be precise and technical when discussing ML concepts
+- Provide actionable commands that can be copy-pasted
+- Explain trade-offs when multiple approaches exist
+- Flag potential issues before they cause problems
+- Reference specific files and line numbers when relevant

.gitattributes CHANGED Viewed

@@ -33,3 +33,16 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+docs/visualizations/fig1_valid_rate_comparison.png filter=lfs diff=lfs merge=lfs -text
+docs/visualizations/fig2_r2_performance.png filter=lfs diff=lfs merge=lfs -text
+docs/visualizations/fig3_benchmark_heatmap.png filter=lfs diff=lfs merge=lfs -text
+docs/visualizations/fig4_scaling_progression.png filter=lfs diff=lfs merge=lfs -text
+evaluation_results_aws/raw_results.json filter=lfs diff=lfs merge=lfs -text
+results/2025-02_model_scaling/analysis/fig1_valid_rate_comparison.png filter=lfs diff=lfs merge=lfs -text
+results/2025-02_model_scaling/analysis/fig2_r2_performance.png filter=lfs diff=lfs merge=lfs -text
+results/2025-02_model_scaling/analysis/fig3_benchmark_heatmap.png filter=lfs diff=lfs merge=lfs -text
+results/2025-02_model_scaling/analysis/fig4_scaling_progression.png filter=lfs diff=lfs merge=lfs -text
+visualizations/fig1_valid_rate_comparison.png filter=lfs diff=lfs merge=lfs -text
+visualizations/fig2_r2_performance.png filter=lfs diff=lfs merge=lfs -text
+visualizations/fig3_benchmark_heatmap.png filter=lfs diff=lfs merge=lfs -text
+visualizations/fig4_scaling_progression.png filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,119 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Environments
+.env
+.venv
+.seriguela
+venv/
+ENV/
+env/
+env.bak/
+venv.bak/
+# IDEs / Editors
+.idea/
+.vscode/
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw?
+# Jupyter Notebook
+.ipynb_checkpoints
+# Output folder (geralmente grande demais para Git)
+output/*
+!output/.gitkeep # Não ignore um .gitkeep se precisar manter a pasta
+scripts/output/*
+# Dados (podem ser grandes, usar Git LFS ou armazenar fora se necessário)
+# Note: CSV files in data/processed/ can be 100MB+ and are excluded from git
+# Run scripts/data/prepare_training_data_fixed.py on target system to generate them
+data/*
+data/raw/*
+data/processed/*
+!data/raw/.gitkeep
+!data/processed/.gitkeep
+# OS generated files
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+.env
+nul
+wandb
+# AWS credentials and keys
+aws/keys/*.pem
+aws/keys/*.key
+aws/.env
+aws/credentials
+*.pem
+*.key
+# Files with embedded tokens (userdata scripts, temp files)
+.claude/settings.local.json
+aws/temp/
+userdata_*.sh
+# Large data files (>100MB)
+1_data/processed/**/*.csv
+1_data/raw/**/*.csv
+*.tar.gz
+models_compressed.tar.gz

.monitor_complete ADDED Viewed

	@@ -0,0 +1 @@


1	+ ter, 3 de fev de 2026 17:35:26: All done

1_data/README.md ADDED Viewed

	@@ -0,0 +1,97 @@

+# 1_data/ - Preparação de Dados
+Este diretório contém todos os dados utilizados no projeto, organizados por estágio de processamento e tipo.
+## Estrutura
+```
+1_data/
+├── raw/                    # Dados originais sem processamento
+├── processed/              # Dados processados e prontos para treino
+└── benchmarks/             # Benchmarks para avaliação
+    ├── nguyen/            # Nguyen benchmarks 1-12 (atual)
+    ├── feynman/           # Feynman equations (futuro)
+    └── strogatz/          # Strogatz benchmarks (futuro)
+```
+## Fontes de Dados
+### Dados de Treinamento
+- **Fonte**: HuggingFace Hub (`augustocsc/sintetico_natural`)
+- **Tamanho**: 700K expressões matemáticas sintéticas
+- **Formato**: JSON estruturado
+- **Localização**: `processed/`
+### Benchmarks Disponíveis
+#### Nguyen Benchmarks (1-12)
+Benchmarks padrão para symbolic regression:
+- **Nguyen-1**: x³ + x² + x
+- **Nguyen-2**: x⁴ + x³ + x² + x
+- **Nguyen-3**: x⁵ + x⁴ + x³ + x² + x
+- **Nguyen-4**: x⁶ + x⁵ + x⁴ + x³ + x² + x
+- **Nguyen-5**: sin(x²)·cos(x) - 1
+- **Nguyen-6**: sin(x) + sin(x + x²)
+- **Nguyen-7**: log(x + 1) + log(x² + 1)
+- **Nguyen-8**: √x
+- **Nguyen-9**: sin(x) + sin(y²)
+- **Nguyen-10**: 2·sin(x)·cos(y)
+- **Nguyen-11**: x^y
+- **Nguyen-12**: x⁴ - x³ + y²/2 - y
+**Localização**: `benchmarks/nguyen/`
+## Próximos Benchmarks (Planejados)
+### Feynman Equations
+Equações da física de Feynman - 120+ fórmulas
+- Complexidade maior que Nguyen
+- Multi-variáveis (até 10+)
+- Constantes físicas
+### Strogatz Benchmarks
+Sistemas dinâmicos e equações diferenciais
+- Osciladores
+- Sistemas caóticos
+- Modelos populacionais
+## Uso
+### Preparar Dados de Treinamento
+```bash
+# A partir do diretório raiz
+cd 2_training/supervised
+python train_with_json.py --dataset_path ../../1_data/processed/700K
+```
+### Adicionar Novo Benchmark
+1. Criar diretório: `benchmarks/novo_benchmark/`
+2. Adicionar arquivos CSV com formato:
+   ```csv
+   x,y
+   1.0,2.5
+   2.0,5.0
+   ...
+   ```
+3. Adicionar metadata em `novo_benchmark/metadata.json`:
+   ```json
+   {
+     "name": "Novo Benchmark",
+     "formula": "expressão matemática",
+     "variables": ["x", "y"],
+     "description": "descrição"
+   }
+   ```
+## Scripts Relacionados
+- Processamento: `src/seriguela/data/`
+- Avaliação em benchmarks: `3_evaluation/benchmarks/`
+## Referências
+- Nguyen et al. (2012): "Semantically-based crossover in genetic programming"
+- Feynman Lectures on Physics
+- Dataset original: https://huggingface.co/datasets/augustocsc/sintetico_natural

1_data/benchmarks/nguyen/nguyen_1.csv ADDED Viewed

	@@ -0,0 +1,101 @@

+x_1,y
+-0.250919762305275,-0.20375712587228662
+0.9014286128198323,2.4464791994214075
+0.4639878836228102,0.7791621581533072
+0.1973169683940732,0.24393329049852017
+-0.687962719115127,-0.5402777510419681
+-0.6880109593275947,-0.5403281140165024
+-0.8838327756636011,-0.793087543099096
+0.7323522915498704,1.6614819098782947
+0.2022300234864176,0.25139760359687213
+0.416145155592091,0.6613886285518918
+-0.9588310114083951,-0.9209820173332797
+0.9398197043239886,2.6531869448442036
+0.6648852816008435,1.4008851765113863
+-0.5753217786434477,-0.43475534749635414
+-0.6363500655857988,-0.48909314986283187
+-0.6331909802931324,-0.48612594014666893
+-0.39151551408092455,-0.29824433610683415
+0.04951286326447568,0.05208576884733716
+-0.13610996271576847,-0.12010560331123674
+-0.4175417196039162,-0.3159953095123318
+0.22370578944475894,0.2849452648921554
+-0.7210122786959163,-0.5759780829004397
+-0.4157107029295637,-0.3147365210423035
+-0.2672763134126166,-0.2149330041985204
+-0.08786003156592814,-0.08081887184182666
+0.5703519227860272,1.0811894695777686
+-0.6006524356832805,-0.4565744842168673
+0.02846887682722321,0.029302427143425142
+0.18482913772408494,0.2253050457893701
+-0.9070991745600046,-0.8306576893940274
+0.21508970380287673,0.27130410435063984
+-0.6589517526254169,-0.5108626651850308
+-0.869896814029441,-0.7714450703759913
+0.8977710745066665,2.427361090599084
+0.9312640661491187,2.606158159545042
+0.6167946962329223,1.231881113886954
+-0.39077246165325863,-0.2977415177155153
+-0.8046557719872323,-0.6781760666405668
+0.3684660530243138,0.5542589014459388
+-0.1196950125207974,-0.10708297449722336
+-0.7559235303104423,-0.6164532603539071
+-0.00964617977745963,-0.00955402855846198
+-0.9312229577695632,-0.8715811438419624
+0.8186408041575641,2.037444342661761
+-0.48244003679996617,-0.36197878909859404
+0.32504456870796394,0.46504079000064535
+-0.3765778478211781,-0.2881698066335382
+0.040136042355621626,0.04181159947832137
+0.0934205586865593,0.10296327812911324
+-0.6302910889489459,-0.4834179919216196
+0.9391692555291171,2.64959195422807
+0.5502656467222291,1.01967411954153
+0.8789978831283782,2.330781693938945
+0.7896547008552977,1.905602026387024
+0.19579995762217028,0.24164408606501708
+0.8437484700462337,2.15633417340509
+-0.823014995896161,-0.70313355144748
+-0.6080342751617096,-0.4631223204132699
+-0.9095454221789239,-0.8347148035273707
+-0.3493393384734713,-0.2699340299663634
+-0.22264542062103598,-0.18411118973018098
+-0.4573019364522082,-0.34381017076318326
+0.6574750183038587,1.373957379373378
+-0.28649334661282144,-0.22792983524748567
+-0.4381309806252385,-0.3302753025279416
+0.08539216631649693,0.09330665286752131
+-0.7181515500504747,-0.5727905657505367
+0.6043939615080793,1.1904663378939224
+-0.8508987126404584,-0.7429451134365845
+0.9737738732010346,2.8453764395285988
+0.5444895385933148,1.0023825877332384
+-0.6025686369316552,-0.45826569576400444
+-0.9889557657527952,-0.9781541346041163
+0.6309228569096683,1.2801339644356482
+0.41371468769523423,0.6556858714260754
+0.4580143360819746,0.7638724020026765
+0.5425406933718915,0.9965881695975148
+-0.8519106965318193,-0.7444346128158585
+-0.2830685429114548,-0.22562240251418217
+-0.7682618809497406,-0.6314839442672466
+0.726206851751187,1.6365675922627294
+0.24659625365511584,0.32240141321500704
+-0.3382039502947016,-0.2625064527787106
+-0.8728832994279527,-0.7760298750035356
+-0.3780353565686756,-0.2891499348341886
+-0.3496333559465059,-0.2701302717663986
+0.45921235667612814,0.7669252048567059
+0.27511494271042625,0.3716261379416909
+0.7744254851526531,1.8386102554828787
+-0.05557014967610141,-0.05265371107138442
+-0.7608115081233966,-0.6223610405246263
+0.42648957444599,0.6859585520259864
+0.5215700972337949,0.9354909760292364
+0.12255439513899247,0.13941469042057167
+0.541934359909122,0.9947894572777974
+-0.012408807271218514,-0.012256739462828549
+0.045465658763988115,0.04762676814193671
+-0.14491796328290074,-0.1269602006619134
+-0.9491617465118096,-0.9033611561685656
+-0.7842171460133911,-0.6515114391246863

1_data/benchmarks/nguyen/nguyen_1.meta.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+name: nguyen_1
+equation: x**3 + x**2 + x
+latex: x^3 + x^2 + x
+n_vars: 1
+range: (-1, 1)
+n_samples: 100

1_data/benchmarks/nguyen/nguyen_10.csv ADDED Viewed

	@@ -0,0 +1,101 @@

+x_1,x_2,y
+-0.250919762305275,0.9014286128198323,-0.3081292892207914
+0.4639878836228102,0.1973169683940732,0.8776686422156067
+-0.687962719115127,-0.6880109593275947,-0.9810337787224731
+-0.8838327756636011,0.7323522915498704,-1.1498719359899228
+0.2022300234864176,0.416145155592091,0.3674245747597698
+-0.9588310114083951,0.9398197043239886,-0.9657455975760866
+0.6648852816008435,-0.5753217786434477,1.0352950417993314
+-0.6363500655857988,-0.6331909802931324,-0.958123855709307
+-0.39151551408092455,0.04951286326447568,-0.7622440963237047
+-0.13610996271576847,-0.4175417196039162,-0.24806552319621297
+0.22370578944475894,-0.7210122786959163,0.3332717377157947
+-0.4157107029295637,-0.2672763134126166,-0.7790027574498273
+-0.08786003156592814,0.5703519227860272,-0.14771529605353687
+-0.6006524356832805,0.02846887682722321,-1.1299036280741872
+0.18482913772408494,-0.9070991745600046,0.2264274834216052
+0.21508970380287673,-0.6589517526254169,0.3374982513544557
+-0.869896814029441,0.8977710745066665,-0.9528126545294617
+0.9312640661491187,0.6167946962329223,1.3090534191908094
+-0.39077246165325863,-0.8046557719872323,-0.5282049581946697
+0.3684660530243138,-0.1196950125207974,0.715215559638574
+-0.7559235303104423,-0.00964617977745963,-1.3718580869858932
+-0.9312229577695632,0.8186408041575641,-1.096354700980636
+-0.48244003679996617,0.32504456870796394,-0.8792969284686333
+-0.3765778478211781,0.040136042355621626,-0.7348882299464423
+0.0934205586865593,-0.6302910889489459,0.15072125651857374
+0.9391692555291171,0.5502656467222291,1.3758661094076514
+0.8789978831283782,0.7896547008552977,1.0844402663947248
+0.19579995762217028,0.8437484700462337,0.2586235668937924
+-0.823014995896161,-0.6080342751617096,-1.2035798540389373
+-0.9095454221789239,-0.3493393384734713,-1.4831094826390372
+-0.22264542062103598,-0.4573019364522082,-0.3962431609403149
+0.6574750183038587,-0.28649334661282144,1.1724227266363472
+-0.4381309806252385,0.08539216631649693,-0.8454037878432863
+-0.7181515500504747,0.6043939615080793,-1.0828560895497152
+-0.8508987126404584,0.9737738732010346,-0.8453799503582896
+0.5444895385933148,-0.6025686369316552,0.853511842011265
+-0.9889557657527952,0.6309228569096683,-1.3492282644581814
+0.41371468769523423,0.4580143360819746,0.7211575539774799
+0.5425406933718915,-0.8519106965318193,0.6800328750993455
+-0.2830685429114548,-0.7682618809497406,-0.40170504605860297
+0.726206851751187,0.24659625365511584,1.2879008077367804
+-0.3382039502947016,-0.8728832994279527,-0.42643410138080395
+-0.3780353565686756,-0.3496333559465059,-0.6935287908865693
+0.45921235667612814,0.27511494271042625,0.8531472481325574
+0.7744254851526531,-0.05557014967610141,1.396452099985796
+-0.7608115081233966,0.42648957444599,-1.2554912294871714
+0.5215700972337949,0.12255439513899247,0.9890101456547871
+0.541934359909122,-0.012408807271218514,1.0315088718551784
+0.045465658763988115,-0.14491796328290074,0.08994715712964176
+-0.9491617465118096,-0.7842171460133911,-1.1510102255193018
+-0.9371416286265315,0.2728208225275608,-1.5521272667847952
+-0.37128803784734665,0.01714138232940554,-0.7255254066217963
+0.815132947852186,-0.5014155417022501,1.2764494826885613
+-0.17923415392874054,0.5111022770860973,-0.3109868317325467
+-0.5424036690167551,-0.846040180342414,-0.6844274708190229
+-0.42049709417246395,-0.6775574254919912,-0.6360846328891363
+0.8593953046851461,0.6162407591288339,1.2362412464087666
+0.26680751302084693,0.7429211803754354,0.38835897648845996
+0.6073441537982289,-0.6268598822279283,0.9243711268676845
+0.7851179969799555,0.07868448383130144,1.4094429026004185
+0.6148803103281251,0.7921825998469865,0.8102508035936384
+-0.36399305005627225,-0.7798961509446465,-0.5062345208341303
+-0.5441296749161166,-0.14578442274748737,-1.0243646142100158
+0.6360295318449862,0.7214611665126869,0.8920087302448801
+-0.9860957389376186,0.021494605155131463,-1.6673695158590665
+-0.16517799370244202,-0.5557843790585395,-0.2793586873666334
+-0.7602692653326344,-0.3247696571927441,-1.3061850155975012
+0.8858194078250383,-0.35359413595848954,1.4530454262804506
+0.037581243486732197,0.4060379177903557,0.06903499342946122
+-0.27274079524141204,0.9435641654419213,-0.31619201189155477
+0.9248945898842225,-0.4964354083492717,1.4043204449131401
+-0.005502988215229099,-0.3982433803664607,-0.010144637492484147
+-0.4303190112450648,-0.9262261052909344,-0.5013062025629531
+0.2191286679597937,0.005358046457722976,0.4347521786103487
+-0.8970424975000213,-0.44270707152677713,-1.4122924802363288
+0.8165317719333074,-0.5208762186660552,1.264255365125172
+-0.7102102558175538,-0.02109447944487397,-1.303696301953025
+0.9713009082212014,-0.5158894569769992,1.4363389241451416
+0.3442710948117571,0.5232392306574352,0.5847068763596124
+-0.5247249120152007,0.45643269722371915,-0.899380771664107
+-0.26443373456149355,0.26461166118715895,-0.5045315450691236
+0.2670594215217894,0.07154936814951696,0.5264420962176761
+-0.8194204598911834,0.6706049911784759,-1.1450066034669906
+-0.35843987005652833,-0.6269629792002915,-0.5681869814158013
+-0.9184497168904722,0.18178588637648363,-1.56313471156199
+0.35512872368456483,-0.9668243421442877,0.39494150902709124
+0.024186116598561958,-0.5470084496041241,0.04130993957951832
+0.2903455808188997,-0.6512671419900171,0.45537164467760105
+0.3818754762049319,-0.22652930739892518,0.7262813246120741
+0.873459977473469,-0.7249581117080135,1.1475751969011463
+-0.317867297899483,-0.7730529575188219,-0.44742306659388054
+0.8493872365571256,0.754678706761962,1.094013626029377
+-0.4841167445696888,0.3199680920683581,-0.8836085179911668
+0.6344444004024317,0.11040162319892466,1.1782430836037832
+0.05930115671201297,-0.5162954181990966,0.10308252884688361
+-0.8137944643882016,0.7944315159066535,-1.0186613308624795
+0.8008361143266609,0.2662029145465359,1.385300485429242
+-0.32194041790259864,-0.3015808507746782,-0.6042555977205083
+0.45191135774047875,0.7942205199051542,0.6120946429016678
+0.7741728485302346,0.5597510917152477,1.184859127500451

1_data/benchmarks/nguyen/nguyen_10.meta.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+name: nguyen_10
+equation: 2*sin(x)*cos(y)
+latex: 2 \sin(x) \cos(y)
+n_vars: 2
+range: (-1, 1)
+n_samples: 100

1_data/benchmarks/nguyen/nguyen_11.csv ADDED Viewed

	@@ -0,0 +1,101 @@

+x_1,x_2,y
+0.3745401188473625,0.9507143064099162,0.3931142382758706
+0.7319939418114051,0.5986584841970366,0.829633456356603
+0.15601864044243652,0.15599452033620265,0.7484106404746592
+0.05808361216819946,0.8661761457749352,0.08500661554616816
+0.6011150117432088,0.7080725777960455,0.6974063854971796
+0.020584494295802447,0.9699098521619943,0.023135880194308844
+0.8324426408004217,0.21233911067827616,0.9618073836750944
+0.18182496720710062,0.18340450985343382,0.7315046923708697
+0.3042422429595377,0.5247564316322378,0.5355698450285795
+0.43194501864211576,0.2912291401980419,0.7831160893255134
+0.6118528947223795,0.13949386065204183,0.9337671019595395
+0.29214464853521815,0.3663618432936917,0.6371115444232952
+0.45606998421703593,0.7851759613930136,0.5398582254082916
+0.19967378215835974,0.5142344384136116,0.4367178921370369
+0.5924145688620425,0.046450412719997725,0.9759742767237903
+0.6075448519014384,0.17052412368729153,0.9185332605846537
+0.06505159298527952,0.9488855372533332,0.07480275943558906
+0.9656320330745594,0.8083973481164611,0.9721242785553075
+0.3046137691733707,0.09767211400638387,0.890382724908758
+0.6842330265121569,0.4401524937396013,0.8461836782187776
+0.12203823484477883,0.4951769101112702,0.3529017979604265
+0.034388521115218396,0.9093204020787821,0.04668000938776754
+0.2587799816000169,0.662522284353982,0.4083696815940864
+0.31171107608941095,0.5200680211778108,0.5454020001935437
+0.5467102793432796,0.18485445552552704,0.894382426580511
+0.9695846277645586,0.7751328233611146,0.9763424052614121
+0.9394989415641891,0.8948273504276488,0.9456857906412531
+0.5978999788110851,0.9218742350231168,0.622414360674025
+0.0884925020519195,0.1959828624191452,0.6217441626231702
+0.045227288910538066,0.32533033076326434,0.3652254374411182
+0.388677289689482,0.2713490317738959,0.7738119236542511
+0.8287375091519293,0.3567533266935893,0.9351795294971191
+0.28093450968738076,0.5426960831582485,0.5020652261408302
+0.14092422497476265,0.8021969807540397,0.207643749220454
+0.07455064367977082,0.9868869366005173,0.07713243002937303
+0.7722447692966574,0.1987156815341724,0.9499377648466204
+0.005522117123602399,0.8154614284548342,0.01441365817791975
+0.7068573438476171,0.7290071680409873,0.7765363384012357
+0.7712703466859457,0.07404465173409036,0.9809531237946688
+0.3584657285442726,0.11586905952512971,0.8879208766651301
+0.8631034258755935,0.6232981268275579,0.9123218750417235
+0.3308980248526492,0.06355835028602363,0.9321215616843733
+0.3109823217156622,0.32518332202674705,0.683984263997003
+0.7296061783380641,0.6375574713552131,0.8179204225106446
+0.8872127425763265,0.4722149251619493,0.9450568572198838
+0.1195942459383017,0.713244787222995,0.21987794449897202
+0.7607850486168974,0.5612771975694962,0.8577387897180954
+0.770967179954561,0.49379559636439074,0.8794655258016348
+0.5227328293819941,0.42754101835854963,0.7577972913056583
+0.02541912674409519,0.10789142699330445,0.6728689751470142
+0.03142918568673425,0.6364104112637804,0.11058269210864166
+0.3143559810763267,0.5085706911647028,0.5551411500446592
+0.907566473926093,0.24929222914887494,0.9761114867199115
+0.41038292303562973,0.7555511385430487,0.5102050118761009
+0.22879816549162246,0.07697990982879299,0.8926695364249662
+0.289751452913768,0.16122128725400442,0.818968274689897
+0.9296976523425731,0.808120379564417,0.9427929152457777
+0.6334037565104235,0.8714605901877177,0.6716955774921506
+0.8036720768991145,0.18657005888603584,0.9600427248758505
+0.8925589984899778,0.5393422419156507,0.9405381426424225
+0.8074401551640625,0.8960912999234932,0.8255861281416367
+0.3180034749718639,0.11005192452767676,0.8815392814173838
+0.22793516254194168,0.4271077886262563,0.5317606737242714
+0.8180147659224931,0.8607305832563434,0.8412224374670049
+0.006952130531190703,0.5107473025775657,0.07904375205389284
+0.417411003148779,0.22210781047073025,0.8236150559828436
+0.1198653673336828,0.33761517140362796,0.48859950491607135
+0.9429097039125192,0.32320293202075523,0.9811799458729573
+0.5187906217433661,0.7030189588951778,0.6304259108490097
+0.363629602379294,0.9717820827209607,0.3741592725494348
+0.9624472949421112,0.25178229582536416,0.9904090768000238
+0.49724850589238545,0.30087830981676966,0.810411403704802
+0.2848404943774676,0.036886947354532795,0.954732975166944
+0.6095643339798968,0.5026790232288615,0.7797113146930209
+0.05147875124998935,0.27864646423661144,0.43752180145116215
+0.9082658859666537,0.23956189066697242,0.9772134321986058
+0.1448948720912231,0.489452760277563,0.3884857408258429
+0.9856504541106007,0.2420552715115004,0.9965075678190551
+0.6721355474058786,0.7616196153287176,0.7389035679220693
+0.23763754399239967,0.7282163486118596,0.351181104428453
+0.3677831327192532,0.6323058305935795,0.5312771863051331
+0.6335297107608947,0.5357746840747585,0.78305410395847
+0.0902897700544083,0.835302495589238,0.13416593821588932
+0.32078006497173583,0.18651851039985423,0.8089068896038694
+0.040775141554763916,0.5908929431882418,0.1509706391829457
+0.6775643618422824,0.016587828927856152,0.9935639759514822
+0.512093058299281,0.22649577519793795,0.8593473681905776
+0.6451727904094499,0.17436642900499144,0.9264327330143394
+0.690937738102466,0.3867353463005374,0.8667729564864897
+0.9367299887367345,0.13752094414599325,0.9910518779206805
+0.3410663510502585,0.11347352124058907,0.8850943779323204
+0.9246936182785628,0.877339353380981,0.9336166425910485
+0.2579416277151556,0.659984046034179,0.40889663483282684
+0.8172222002012158,0.5552008115994623,0.8939869586598578
+0.5296505783560065,0.24185229090045168,0.8575238670157391
+0.09310276780589921,0.8972157579533268,0.11883298066628799
+0.9004180571633305,0.6331014572732679,0.9357472382139566
+0.3390297910487007,0.3492095746126609,0.6854165136378787
+0.7259556788702394,0.8971102599525771,0.7502759576646942
+0.8870864242651173,0.7798755458576239,0.9107934601911546

1_data/benchmarks/nguyen/nguyen_11.meta.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+name: nguyen_11
+equation: x**y
+latex: x^y
+n_vars: 2
+range: (0, 1)
+n_samples: 100

1_data/benchmarks/nguyen/nguyen_12.csv ADDED Viewed

	@@ -0,0 +1,101 @@

+x_1,x_2,y
+0.3745401188473625,0.9507143064099162,-0.5316474979283755
+0.7319939418114051,0.5986584841970366,-0.5245780691122485
+0.15601864044243652,0.15599452033620265,-0.1470326281985484
+0.05808361216819946,0.8661761457749352,-0.49123016315900203
+0.6011150117432088,0.7080725777960455,-0.5440295831065014
+0.020584494295802447,0.9699098521619943,-0.4995558340525668
+0.8324426408004217,0.21233911067827616,-0.28645063725362985
+0.18182496720710062,0.18340450985343382,-0.17150410942706706
+0.3042422429595377,0.5247564316322378,-0.40666548191401125
+0.43194501864211576,0.2912291401980419,-0.2946019335150628
+0.6118528947223795,0.13949386065204183,-0.2186718894768038
+0.29214464853521815,0.3663618432936917,-0.3169010837400344
+0.45606998421703593,0.7851759613930136,-0.5285238661436937
+0.19967378215835974,0.5142344384136116,-0.38838724072182046
+0.5924145688620425,0.046450412719997725,-0.13011303294584414
+0.6075448519014384,0.17052412368729153,-0.24399347586867748
+0.06505159298527952,0.9488855372533332,-0.498951027941936
+0.9656320330745594,0.8083973481164611,-0.5125890941467149
+0.3046137691733707,0.09767211400638387,-0.11255726686131867
+0.6842330265121569,0.4401524937396013,-0.44443839313702255
+0.12203823484477883,0.4951769101112702,-0.3741725684537525
+0.034388521115218396,0.9093204020787821,-0.49592787363434615
+0.2587799816000169,0.662522284353982,-0.45589954550743944
+0.31171107608941095,0.5200680211778108,-0.405678875884419
+0.5467102793432796,0.18485445552552704,-0.24183976519509257
+0.9695846277645586,0.7751328233611146,-0.502441019579803
+0.9394989415641891,0.8948273504276488,-0.5446402529864546
+0.5978999788110851,0.9218742350231168,-0.5828930031608885
+0.0884925020519195,0.1959828624191452,-0.1774098758468889
+0.045227288910538066,0.32533033076326434,-0.2724987473704077
+0.388677289689482,0.2713490317738959,-0.2704292195788051
+0.8287375091519293,0.3567533266935893,-0.39059634750094807
+0.28093450968738076,0.5426960831582485,-0.41138006574851044
+0.14092422497476265,0.8021969807540397,-0.48284128157802086
+0.07455064367977082,0.9868869366005173,-0.5002974721138831
+0.7722447692966574,0.1987156815341724,-0.2838615274049386
+0.005522117123602399,0.8154614284548342,-0.4829729252663755
+0.7068573438476171,0.7290071680409873,-0.5668133801237885
+0.7712703466859457,0.07404465173409036,-0.17624366412809184
+0.3584657285442726,0.11586905952512971,-0.1387066006115091
+0.8631034258755935,0.6232981268275579,-0.517067796073964
+0.3308980248526492,0.06355835028602363,-0.08578087500153808
+0.3109823217156622,0.32518332202674705,-0.2930335023172791
+0.7296061783380641,0.6375574713552131,-0.5393353511837361
+0.8872127425763265,0.4722149251619493,-0.4394882839499687
+0.1195942459383017,0.713244787222995,-0.46039168497896
+0.7607850486168974,0.5612771975694962,-0.509096521876274
+0.770967179954561,0.49379559636439074,-0.4768340968434337
+0.5227328293819941,0.42754101835854963,-0.40431654954497565
+0.02541912674409519,0.10789142699330445,-0.10208715360872561
+0.03142918568673425,0.6364104112637804,-0.43393137529691794
+0.3143559810763267,0.5085706911647028,-0.4005478458408727
+0.907566473926093,0.24929222914887494,-0.28731682218657484
+0.41038292303562973,0.7555511385430487,-0.5108733418268507
+0.22879816549162246,0.07697990982879299,-0.08325384436032608
+0.289751452913768,0.16122128725400442,-0.16550288692705029
+0.9296976523425731,0.808120379564417,-0.5380841567189258
+0.6334037565104235,0.8714605901877177,-0.5848989033049367
+0.8036720768991145,0.18657005888603584,-0.27107631331792464
+0.8925589984899778,0.5393422419156507,-0.470295013851557
+0.8074401551640625,0.8960912999234932,-0.59596852950575
+0.3180034749718639,0.11005192452767676,-0.12592818733422592
+0.22793516254194168,0.4271077886262563,-0.34504023675612344
+0.8180147659224931,0.8607305832563434,-0.5899158316402314
+0.006952130531190703,0.5107473025775657,-0.38031623270764187
+0.417411003148779,0.22210781047073025,-0.23981143105710734
+0.1198653673336828,0.33761517140362796,-0.28213892883047403
+0.9429097039125192,0.32320293202075523,-0.318832855236626
+0.5187906217433661,0.7030189588951778,-0.5230920266442246
+0.363629602379294,0.9717820827209607,-0.5301994956717305
+0.9624472949421112,0.25178229582536416,-0.25356410409756097
+0.49724850589238545,0.30087830981676966,-0.3174265784042432
+0.2848404943774676,0.036886947354532795,-0.05273415977110583
+0.6095643339798968,0.5026790232288615,-0.46476765439268997
+0.05147875124998935,0.27864646423661144,-0.2399539372668816
+0.9082658859666537,0.23956189066697242,-0.2796006655774298
+0.1448948720912231,0.489452760277563,-0.3722719868332219
+0.9856504541106007,0.2420552715115004,-0.22650053348928928
+0.6721355474058786,0.7616196153287176,-0.5711428201443614
+0.23763754399239967,0.7282163486118596,-0.473297554430461
+0.3677831327192532,0.6323058305935795,-0.46385200894375067
+0.6335297107608947,0.5357746840747585,-0.4854310810016914
+0.0902897700544083,0.835302495589238,-0.48710697106906575
+0.32078006497173583,0.18651851039985423,-0.19154377448709786
+0.040775141554763916,0.5908929431882418,-0.4163807370007205
+0.6775643618422824,0.016587828927856152,-0.1167488120616192
+0.512093058299281,0.22649577519793795,-0.2663670817810153
+0.6451727904094499,0.17436642900499144,-0.25445410259051215
+0.690937738102466,0.3867353463005374,-0.41389747881703326
+0.9367299887367345,0.13752094414599325,-0.18006947009905816
+0.3410663510502585,0.11347352124058907,-0.13317857503984928
+0.9246936182785628,0.877339353380981,-0.552019449425134
+0.2579416277151556,0.659984046034179,-0.4549296760550325
+0.8172222002012158,0.5552008115994623,-0.5008339633920046
+0.5296505783560065,0.24185229090045168,-0.28249182975838943
+0.09310276780589921,0.8972157579533268,-0.49544958985988724
+0.9004180571633305,0.6331014572732679,-0.5053891761940527
+0.3390297910487007,0.3492095746126609,-0.31399292258823414
+0.7259556788702394,0.8971102599525771,-0.5995526723688602
+0.8870864242651173,0.7798755458576239,-0.5545939788269545

1_data/benchmarks/nguyen/nguyen_12.meta.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+name: nguyen_12
+equation: x**4 - x**3 + y**2/2 - y
+latex: x^4 - x^3 + \frac{y^2}{2} - y
+n_vars: 2
+range: (0, 1)
+n_samples: 100

1_data/benchmarks/nguyen/nguyen_2.csv ADDED Viewed

	@@ -0,0 +1,101 @@

+x_1,y
+-0.250919762305275,-0.19979307271339486
+0.9014286128198323,3.106754963846846
+0.4639878836228102,0.8255096843833445
+0.1973169683940732,0.24544914576563198
+-0.687962719115127,-0.316271768430889
+-0.6880109593275947,-0.3162592952514309
+-0.8838327756636011,-0.18287601110210117
+0.7323522915498704,1.9491423756178945
+0.2022300234864176,0.25307016676624217
+0.416145155592091,0.6913788293276577
+-0.9588310114083951,-0.07576489223978267
+0.9398197043239886,3.4333370743437346
+0.6648852816008435,1.596313216676064
+-0.5753217786434477,-0.3251975588470951
+-0.6363500655857988,-0.32511560759302083
+-0.6331909802931324,-0.3253804197057425
+-0.39151551408092455,-0.27474822950833333
+0.04951286326447568,0.05209177881543897
+-0.13610996271576847,-0.11976239352712116
+-0.4175417196039162,-0.28560049468336546
+0.22370578944475894,0.2874496948760045
+-0.7210122786959163,-0.30572500866496494
+-0.4157107029295637,-0.28487136252946227
+-0.2672763134126166,-0.2098298124197376
+-0.08786003156592814,-0.08075928293478254
+0.5703519227860272,1.1870104156557124
+-0.6006524356832805,-0.3264098596675816
+0.02846887682722321,0.029303084016308063
+0.18482913772408494,0.22647207506221967
+-0.9070991745600046,-0.1536102701687616
+0.21508970380287673,0.2734444232481606
+-0.6589517526254169,-0.32231790405084937
+-0.869896814029441,-0.1988192051106482
+0.8977710745066665,3.0769856490294796
+0.9312640661491187,3.358285510834738
+0.6167946962329223,1.3766124336679
+-0.39077246165325863,-0.27442327583918935
+-0.8046557719872323,-0.25895748554130227
+0.3684660530243138,0.572691642793691
+-0.1196950125207974,-0.10687771454758802
+-0.7559235303104423,-0.28993200547233466
+-0.00964617977745963,-0.009554019900385721
+-0.9312229577695632,-0.11958658706487191
+0.8186408041575641,2.4865758792604673
+-0.48244003679996617,-0.3078069764664333
+0.32504456870796394,0.4762035517253345
+-0.3765778478211781,-0.2680594822320752
+0.040136042355621626,0.04181419448323982
+0.0934205586865593,0.10303944565358064
+-0.6302910889489459,-0.3255970364031556
+0.9391692555291171,3.4275845586374323
+0.5502656467222291,1.1113572855576686
+0.8789978831283782,2.9277500581350866
+0.7896547008552977,2.2944222989511918
+0.19579995762217028,0.2431138594333487
+0.8437484700462337,2.663152129765188
+-0.823014995896161,-0.24432553893716014
+-0.6080342751617096,-0.326440030758018
+-0.9095454221789239,-0.15033439380562408
+-0.3493393384734713,-0.2550407630135437
+-0.22264542062103598,-0.18165390734252046
+-0.4573019364522082,-0.3000768795902401
+0.6574750183038587,1.560817671456092
+-0.28649334661282144,-0.22119296531986027
+-0.4381309806252385,-0.2934271384523741
+0.08539216631649693,0.09335982353659596
+-0.7181515500504747,-0.3068011174024385
+0.6043939615080793,1.3239046275098028
+-0.8508987126404584,-0.2187276720547493
+0.9737738732010346,3.7445271094357677
+0.5444895385933148,1.0902763712821586
+-0.6025686369316552,-0.32643210128260247
+-0.9889557657527952,-0.021604594541118627
+0.6309228569096683,1.4385886349785073
+0.41371468769523423,0.6849815632184505
+0.4580143360819746,0.8078788471365737
+0.5425406933718915,1.0832303299115513
+-0.8519106965318193,-0.21771888700546604
+-0.2830685429114548,-0.21920193818358347
+-0.7682618809497406,-0.2831168381374246
+0.726206851751187,1.9146934506063242
+0.24659625365511584,0.32609923432705157
+-0.3382039502947016,-0.24942323098709215
+-0.8728832994279527,-0.19549978168020488
+-0.3780353565686756,-0.2687264578518238
+-0.3496333559465059,-0.2551868024860783
+0.45921235667612814,0.8113938873926984
+0.27511494271042625,0.37735484635995153
+0.7744254851526531,2.198292124261625
+-0.05557014967610141,-0.05264417507086238
+-0.7608115081233966,-0.2873120662846093
+0.42648957444599,0.7190437453871404
+0.5215700972337949,1.0094942165627014
+0.12255439513899247,0.13964027819697553
+0.541934359909122,1.081044947683308
+-0.012408807271218514,-0.012256715753450735
+0.045465658763988115,0.04763104115236099
+-0.14491796328290074,-0.12651914958498786
+-0.9491617465118096,-0.0917258937919263
+-0.7842171460133911,-0.27329070462795224

1_data/benchmarks/nguyen/nguyen_2.meta.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+name: nguyen_2
+equation: x**4 + x**3 + x**2 + x
+latex: x^4 + x^3 + x^2 + x
+n_vars: 1
+range: (-1, 1)
+n_samples: 100

1_data/benchmarks/nguyen/nguyen_3.csv ADDED Viewed

	@@ -0,0 +1,101 @@

+x_1,y
+-0.250919762305275,-0.20078773198978944
+0.9014286128198323,3.701946430251423
+0.4639878836228102,0.8470143749899722
+0.1973169683940732,0.24574824973146267
+-0.687962719115127,-0.47037953332606286
+-0.6880109593275947,-0.4704210982053887
+-0.8838327756636011,-0.7222009631689434
+0.7323522915498704,2.1598111768905937
+0.2022300234864176,0.25340840925526636
+0.416145155592091,0.7038591060957268
+-0.9588310114083951,-0.8861852831528764
+0.9398197043239886,4.166537538378305
+0.6648852816008435,1.7262504441936566
+-0.5753217786434477,-0.3882285406770297
+-0.6363500655857988,-0.42946272737101315
+-0.6331909802931324,-0.42716303337146244
+-0.39151551408092455,-0.2839473197621456
+0.04951286326447568,0.05209207638616782
+-0.13610996271576847,-0.11980910779804083
+-0.4175417196039162,-0.29829159793409465
+0.22370578944475894,0.2880099503626506
+-0.7210122786959163,-0.5005807935440612
+-0.4157107029295637,-0.29728662856793836
+-0.2672763134126166,-0.21119377470500825
+-0.08786003156592814,-0.08076451841803642
+0.5703519227860272,1.2473655957223042
+-0.6006524356832805,-0.40459355844290984
+0.02846887682722321,0.029303102716741258
+0.18482913772408494,0.22668777607641924
+-0.9070991745600046,-0.7677594252859816
+0.21508970380287673,0.2739047838058721
+-0.6589517526254169,-0.4465598048485588
+-0.869896814029441,-0.696944620935822
+0.8977710745066665,3.660199786877455
+0.9312640661491187,4.058714686258748
+0.6167946962329223,1.4658819440875785
+-0.39077246165325863,-0.28353540261862736
+-0.8046557719872323,-0.5962841365471232
+0.3684660530243138,0.5794834822445153
+-0.1196950125207974,-0.10690228313982963
+-0.7559235303104423,-0.5367571051838087
+-0.00964617977745963,-0.009554019983903081
+-0.9312229577695632,-0.8198611824534459
+0.8186408041575641,2.854253281554156
+-0.48244003679996617,-0.3339416277462138
+0.32504456870796394,0.4798319467957259
+-0.3765778478211781,-0.27563258491416387
+0.040136042355621626,0.04181429863646714
+0.0934205586865593,0.10304656126627017
+-0.6302910889489459,-0.4250701783158514
+0.9391692555291171,4.158251293727732
+0.5502656467222291,1.1618073821990806
+0.8789978831283782,3.4524839865581054
+0.7896547008552977,2.6014560549693257
+0.19579995762217028,0.24340164099658224
+0.8437484700462337,3.09077900503598
+-0.823014995896161,-0.6219314134704668
+-0.6080342751617096,-0.4095475476759919
+-0.9095454221789239,-0.772809462496975
+-0.3493393384734713,-0.26024356703855056
+-0.22264542062103598,-0.1822010100133058
+-0.4573019364522082,-0.3200761983310553
+0.6574750183038587,1.683673645413439
+-0.28649334661282144,-0.22312303373112094
+-0.4381309806252385,-0.3095714607130422
+0.08539216631649693,0.09336436389521274
+-0.7181515500504747,-0.49782185203069584
+0.6043939615080793,1.4045539239876073
+-0.8508987126404584,-0.6647836180702278
+0.9737738732010346,4.620096539862576
+0.5444895385933148,1.1381336169319312
+-0.6025686369316552,-0.4058708906110614
+-0.9889557657527952,-0.9675897774146045
+0.6309228569096683,1.538561308408088
+0.41371468769523423,0.6971016211991488
+0.4580143360819746,0.8280344298879034
+0.5425406933718915,1.1302372276435673
+-0.8519106965318193,-0.6664336478548603
+-0.2830685429114548,-0.22101936966646102
+-0.7682618809497406,-0.5507540063537395
+0.726206851751187,2.1166703545846226
+0.24659625365511584,0.3270111031599685
+-0.3382039502947016,-0.25384802827959924
+-0.8728832994279527,-0.702234804957491
+-0.3780353565686756,-0.2764472542552242
+-0.3496333559465059,-0.2604115378000402
+0.45921235667612814,0.8318144558983341
+0.27511494271042625,0.378930899648246
+0.7744254851526531,2.476838929991218
+-0.05557014967610141,-0.0526447049878387
+-0.7608115081233966,-0.5422211816713537
+0.42648957444599,0.7331542354242023
+0.5215700972337949,1.0480920939233567
+0.12255439513899247,0.13966792497046343
+0.541934359909122,1.1277897616648658
+-0.012408807271218514,-0.012256716047655835
+0.045465658763988115,0.047631235427594835
+-0.14491796328290074,-0.12658306580875964
+-0.9491617465118096,-0.8620990369599081
+-0.7842171460133911,-0.5698978895980695

1_data/benchmarks/nguyen/nguyen_3.meta.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+name: nguyen_3
+equation: x**5 + x**4 + x**3 + x**2 + x
+latex: x^5 + x^4 + x^3 + x^2 + x
+n_vars: 1
+range: (-1, 1)
+n_samples: 100

1_data/benchmarks/nguyen/nguyen_4.csv ADDED Viewed

	@@ -0,0 +1,101 @@

+x_1,y
+-0.250919762305275,-0.2005381523205818
+0.9014286128198323,4.238469048174702
+0.4639878836228102,0.8569922908725047
+0.1973169683940732,0.24580726801923503
+-0.687962719115127,-0.3643591363520243
+-0.6880109593275947,-0.36435608826336463
+-0.8838327756636011,-0.2455278937990677
+0.7323522915498704,2.3140949562607194
+0.2022300234864176,0.25347681204176586
+0.416145155592091,0.7090527128132073
+-0.9588310114083951,-0.10912908006768773
+0.9398197043239886,4.855613781697487
+0.6648852816008435,1.812643794302124
+-0.5753217786434477,-0.3519654441009889
+-0.6363500655857988,-0.36306143085659853
+-0.6331909802931324,-0.3627152004476681
+-0.39151551408092455,-0.28034573321234746
+0.04951286326447568,0.05209209111974663
+-0.13610996271576847,-0.11980274952036765
+-0.4175417196039162,-0.2929925328591143
+0.22370578944475894,0.2881352827585815
+-0.7210122786959163,-0.3600873800713027
+-0.4157107029295637,-0.2921254695960259
+-0.2672763134126166,-0.21082921989376727
+-0.08786003156592814,-0.08076405842831248
+0.5703519227860272,1.2817892887233815
+-0.6006524356832805,-0.35763232934278105
+0.02846887682722321,0.02930310324912159
+0.18482913772408494,0.22672764390887995
+-0.9070991745600046,-0.21066523362242717
+0.21508970380287673,0.27400380262187274
+-0.6589517526254169,-0.36469038656839503
+-0.869896814029441,-0.2636269087224129
+0.8977710745066665,4.183792570080711
+0.9312640661491187,4.710999208213584
+0.6167946962329223,1.520942904649746
+-0.39077246165325863,-0.2799746344061298
+-0.8046557719872323,-0.32485229977016666
+0.3684660530243138,0.5819860445197353
+-0.1196950125207974,-0.10689934240187365
+-0.7559235303104423,-0.3501762044406843
+-0.00964617977745963,-0.009554019983097458
+-0.9312229577695632,-0.16774940248481396
+0.8186408041575641,3.1552490058384244
+-0.48244003679996617,-0.32133322562104216
+0.32504456870796394,0.48101133690648334
+-0.3765778478211781,-0.27278072220481414
+0.040136042355621626,0.041814302816765486
+0.0934205586865593,0.10304722601078302
+-0.6302910889489459,-0.36237314337852533
+0.9391692555291171,4.8444710273623794
+0.5502656467222291,1.1895683372546662
+0.8789978831283782,3.913723998847577
+0.7896547008552977,2.843906703730303
+0.19579995762217028,0.2434579886144678
+0.8437484700462337,3.451588526796362
+-0.823014995896161,-0.3111561161910711
+-0.6080342751617096,-0.3590153288662821
+-0.9095454221789239,-0.2066401133482456
+-0.3493393384734713,-0.2584260229222476
+-0.22264542062103598,-0.18207920010904594
+-0.4573019364522082,-0.31093047114315553
+0.6574750183038587,1.7644483791397838
+-0.28649334661282144,-0.22257008197278716
+-0.4381309806252385,-0.30249813296944583
+0.08539216631649693,0.09336475160627088
+-0.7181515500504747,-0.36064001536563245
+0.6043939615080793,1.453297871778667
+-0.8508987126404584,-0.28523518784003543
+0.9737738732010346,5.472703175385714
+0.5444895385933148,1.1641913865341225
+-0.6025686369316552,-0.358003567605911
+-0.9889557657527952,-0.03205227649515818
+0.6309228569096683,1.6016363531411766
+0.41371468769523423,0.7021158672014816
+0.4580143360819746,0.8372659757400991
+0.5425406933718915,1.155740382532357
+-0.8519106965318193,-0.2841687433955442
+-0.2830685429114548,-0.22050491198476146
+-0.7682618809497406,-0.3451385720878113
+0.726206851751187,2.263347366149155
+0.24659625365511584,0.3272359665979907
+-0.3382039502947016,-0.25235154435602003
+-0.8728832994279527,-0.2599142659035131
+-0.3780353565686756,-0.2735285202338706
+-0.3496333559465059,-0.25858479605828744
+0.45921235667612814,0.8411918332864734
+0.27511494271042625,0.3793644954583637
+0.7744254851526531,2.6925526751560804
+-0.05557014967610141,-0.05264467554027301
+-0.7608115081233966,-0.34828339315956375
+0.42648957444599,0.7391722123153333
+0.5215700972337949,1.0682235925713717
+0.12255439513899247,0.13967131320406578
+0.541934359909122,1.1531223825090322
+-0.012408807271218514,-0.0122567160440051
+0.045465658763988115,0.04763124426044632
+-0.14491796328290074,-0.1265738031997899
+-0.9491617465118096,-0.13089031892479408
+-0.7842171460133911,-0.3372934495137382

1_data/benchmarks/nguyen/nguyen_4.meta.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+name: nguyen_4
+equation: x**6 + x**5 + x**4 + x**3 + x**2 + x
+latex: x^6 + x^5 + x^4 + x^3 + x^2 + x
+n_vars: 1
+range: (-1, 1)
+n_samples: 100

1_data/benchmarks/nguyen/nguyen_5.csv ADDED Viewed

	@@ -0,0 +1,101 @@

+x_1,y
+-0.250919762305275,-0.9390512081168352
+0.9014286128198323,-0.5494873180553288
+0.4639878836228102,-0.8089599566948397
+0.1973169683940732,-0.9618311304971612
+-0.687962719115127,-0.6478606522679959
+-0.6880109593275947,-0.6478289752250018
+-0.8838327756636011,-0.5534626739045931
+0.7323522915498704,-0.6200235532537778
+0.2022300234864176,-0.9599476201534037
+0.416145155592091,-0.8423936940912042
+-0.9588310114083951,-0.5431689029988238
+0.9398197043239886,-0.5440918837654212
+0.6648852816008435,-0.6633160228877684
+-0.5753217786434477,-0.7273325440876017
+-0.6363500655857988,-0.6831457922480837
+-0.6331909802931324,-0.6853819519705832
+-0.39151551408092455,-0.8588685461295467
+0.04951286326447568,-0.9975514831941417
+-0.13610996271576847,-0.9816464680708747
+-0.4175417196039162,-0.8414428810237741
+0.22370578944475894,-0.9512230869588283
+-0.7210122786959163,-0.6268666215487173
+-0.4157107029295637,-0.8426891332699092
+-0.2672763134126166,-0.9311584013772981
+-0.08786003156592814,-0.9923104665144623
+0.5703519227860272,-0.7309939462300948
+-0.6006524356832805,-0.7087806446206153
+0.02846887682722321,-0.99918985155515
+0.18482913772408494,-0.9664265762992899
+-0.9070991745600046,-0.5484026596371498
+0.21508970380287673,-0.9548185804730333
+-0.6589517526254169,-0.6673796194282944
+-0.869896814029441,-0.5572462380695584
+0.8977710745066665,-0.550238281253637
+0.9312640661491187,-0.5448920303797724
+0.6167946962329223,-0.6970965407107139
+-0.39077246165325863,-0.8593564758850973
+-0.8046557719872323,-0.58178568969466
+0.3684660530243138,-0.8737340760964288
+-0.1196950125207974,-0.9857760980914346
+-0.7559235303104423,-0.6064732977627244
+-0.00964617977745963,-0.9999069555448398
+-0.9312229577695632,-0.5448964424582949
+0.8186408041575641,-0.5756409511274684
+-0.48244003679996617,-0.7956727484647694
+0.32504456870796394,-0.900064608947692
+-0.3765778478211781,-0.8685675799244211
+0.040136042355621626,-0.9983903961284725
+0.0934205586865593,-0.991310765594519
+-0.6302910889489459,-0.687440925563
+0.9391692555291171,-0.5441444192356306
+0.5502656467222291,-0.7458300065420963
+0.8789978831283782,-0.5547129310790808
+0.7896547008552977,-0.5888643062279737
+0.19579995762217028,-0.9624041296078009
+0.8437484700462337,-0.5657849821252405
+-0.823014995896161,-0.5738128862301527
+-0.6080342751617096,-0.7034212209476314
+-0.9095454221789239,-0.5479648926611902
+-0.3493393384734713,-0.8856176517083943
+-0.22264542062103598,-0.9516723889435259
+-0.4573019364522082,-0.8137278342896282
+0.6574750183038587,-0.668396180415866
+-0.28649334661282144,-0.9213553931307055
+-0.4381309806252385,-0.8272380555912469
+0.08539216631649693,-0.9927348114985
+-0.7181515500504747,-0.6286238526457859
+0.6043939615080793,-0.7060609958180059
+-0.8508987126404584,-0.56326927462963
+0.9737738732010346,-0.5432897447861715
+0.5444895385933148,-0.7501016086636129
+-0.6025686369316552,-0.7073869979373362
+-0.9889557657527952,-0.5441925947478524
+0.6309228569096683,-0.6869918579399132
+0.41371468769523423,-0.8440443466281196
+0.4580143360819746,-0.8132211009838715
+0.5425406933718915,-0.7515428281198429
+-0.8519106965318193,-0.5629240207718037
+-0.2830685429114548,-0.9231433578147953
+-0.7682618809497406,-0.5997746863584423
+0.726206851751187,-0.6237072834805513
+0.24659625365511584,-0.9410661846934961
+-0.3382039502947016,-0.8923327139993233
+-0.8728832994279527,-0.5563892940528554
+-0.3780353565686756,-0.8676316179551925
+-0.3496333559465059,-0.8854382694554389
+0.45921235667612814,-0.8123682362870449
+0.27511494271042625,-0.927227629948057
+0.7744254851526531,-0.5965370771413236
+-0.05557014967610141,-0.9969167301387304
+-0.7608115081233966,-0.603785604256095
+0.42648957444599,-0.8353116478277598
+0.5215700972337949,-0.7670333944975515
+0.12255439513899247,-0.9850936334783424
+0.541934359909122,-0.7519912093984299
+-0.012408807271218514,-0.999846033357251
+0.045465658763988115,-0.997935011480979
+-0.14491796328290074,-0.9792204513367526
+-0.9491617465118096,-0.5434895142161542
+-0.7842171460133911,-0.5915491851197053

1_data/benchmarks/nguyen/nguyen_5.meta.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+name: nguyen_5
+equation: sin(x**2)*cos(x) - 1
+latex: \sin(x^2) \cos(x) - 1
+n_vars: 1
+range: (-1, 1)
+n_samples: 100

1_data/benchmarks/nguyen/nguyen_6.csv ADDED Viewed

	@@ -0,0 +1,101 @@

+x_1,y
+-0.250919762305275,-0.43514929053571705
+0.9014286128198323,1.7739777070144762
+0.4639878836228102,1.0757452027433314
+0.1973169683940732,0.4300984433827959
+-0.687962719115127,-0.8479896478976541
+-0.6880109593275947,-0.8480091939305666
+-0.8838327756636011,-0.8756673756307944
+0.7323522915498704,1.623333209866147
+0.2022300234864176,0.4415932453735026
+0.416145155592091,0.9600350888652831
+-0.9588310114083951,-0.8579844227187403
+0.9398197043239886,1.7757964620012405
+0.6648852816008435,1.511310612392346
+-0.5753217786434477,-0.7860078048335528
+-0.6363500655857988,-0.8236127344518008
+-0.6331909802931324,-0.8218977378877982
+-0.39151551408092455,-0.6175737622323656
+0.04951286326447568,0.10143363891812185
+-0.13610996271576847,-0.2530033664640832
+-0.4175417196039162,-0.646324893603273
+0.22370578944475894,0.4921883475197036
+-0.7210122786959163,-0.859945147297601
+-0.4157107029295637,-0.6443541593670672
+-0.2672763134126166,-0.4586956789581246
+-0.08786003156592814,-0.16780192738539715
+0.5703519227860272,1.3205458271668318
+-0.6006524356832805,-0.8027562953942011
+0.02846887682722321,0.0577402019392117
+0.18482913772408494,0.40102336487359014
+-0.9070991745600046,-0.8718906080781982
+0.21508970380287673,0.4718231825909881
+-0.6589517526254169,-0.8351357987774414
+-0.869896814029441,-0.8771972870866676
+0.8977710745066665,1.773112261855638
+0.9312640661491187,1.7765585415628578
+0.6167946962329223,1.4183947871732054
+-0.39077246165325863,-0.616729624340086
+-0.8046557719872323,-0.8771304085531529
+0.3684660530243138,0.8433211553872129
+-0.1196950125207974,-0.22458265836212168
+-0.7559235303104423,-0.8694190928201299
+-0.00964617977745963,-0.019199015870959124
+-0.9312229577695632,-0.8663534501230806
+0.8186408041575641,1.7268591780806577
+-0.48244003679996617,-0.7110472800982361
+0.32504456870796394,0.7368566710350718
+-0.3765778478211781,-0.6003566284265445
+0.040136042355621626,0.08186008651372706
+0.0934205586865593,0.19525514485076978
+-0.6302910889489459,-0.8203010105715378
+0.9391692555291171,1.7758783124934705
+0.5502656467222291,1.2762087516369758
+0.8789978831283782,1.7668343124729904
+0.7896547008552977,1.6977190241718285
+0.19579995762217028,0.42655545364748826
+0.8437484700462337,1.7470252942741467
+-0.823014995896161,-0.8783461682408447
+-0.6080342751617096,-0.8073339482620789
+-0.9095454221789239,-0.8714044274820212
+-0.3493393384734713,-0.5676262516988537
+-0.22264542062103598,-0.39302217898548675
+-0.4573019364522082,-0.6871659884222171
+0.6574750183038587,1.497630717093317
+-0.28649334661282144,-0.48558453002226076
+-0.4381309806252385,-0.667941100272472
+0.08539216631649693,0.17783977455509073
+-0.7181515500504747,-0.8590244862623294
+0.6043939615080793,1.3929716850978804
+-0.8508987126404584,-0.8784032535275912
+0.9737738732010346,1.7659692545236667
+0.5444895385933148,1.2632639852448593
+-0.6025686369316552,-0.8039577784468424
+-0.9889557657527952,-0.8463746041245153
+0.6309228569096683,1.4466669850764435
+0.41371468769523423,0.9541081599557866
+0.4580143360819746,1.0614213526661842
+0.5425406933718915,1.2588779524345566
+-0.8519106965318193,-0.8783645297620329
+-0.2830685429114548,-0.48085397416735837
+-0.7682618809497406,-0.8719829040159035
+0.726206851751187,1.614146688730815
+0.24659625365511584,0.5466918275185086
+-0.3382039502947016,-0.5537512584147581
+-0.8728832994279527,-0.8769154903633847
+-0.3780353565686756,-0.602059443902563
+-0.3496333559465059,-0.5679887252654134
+0.45921235667612814,1.06429743105736
+0.27511494271042625,0.6153097358245831
+0.7744254851526531,1.6800348672828789
+-0.05557014967610141,-0.10799957252353272
+-0.7608115081233966,-0.8704840611139395
+0.42648957444599,0.985218668558129
+0.5215700972337949,1.2111284238136855
+0.12255439513899247,0.25938825702662677
+0.541934359909122,1.2575114815069453
+-0.012408807271218514,-0.024663010860991184
+0.045465658763988115,0.09296488443027026
+-0.14491796328290074,-0.26801111295446844
+-0.9491617465118096,-0.8611626229977559
+-0.7842171460133911,-0.8746853295473812

1_data/benchmarks/nguyen/nguyen_6.meta.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+name: nguyen_6
+equation: sin(x) + sin(x + x**2)
+latex: \sin(x) + \sin(x + x^2)
+n_vars: 1
+range: (-1, 1)
+n_samples: 100

1_data/benchmarks/nguyen/nguyen_7.csv ADDED Viewed

	@@ -0,0 +1,101 @@

+x_1,y
+0.749080237694725,1.0044943539710771
+1.9014286128198323,2.5946084456041874
+1.4639878836228102,2.04704177271813
+1.1973169683940732,1.6765955177464844
+0.31203728088487304,0.3644950207193922
+0.3119890406724053,0.36443082009006955
+0.11616722433639892,0.12330527518118403
+1.7323522915498704,2.3917183259947485
+1.2022300234864176,1.683661630559711
+1.416145155592091,1.9826063566319803
+0.041168988591604894,0.042037560323179395
+1.9398197043239886,2.6392050827257325
+1.6648852816008435,2.307724752047451
+0.4246782213565523,0.5197583318072219
+0.36364993441420124,0.4343639775100977
+0.36680901970686763,0.43871392942129606
+0.6084844859190754,0.7902880866466716
+1.0495128632644757,1.4602426871151915
+0.8638900372842315,1.1801684194501334
+0.5824582803960838,0.7510949783115568
+1.223705789444759,1.7144476360772682
+0.27898772130408367,0.3210225319850415
+0.5842892970704363,0.7538452725717105
+0.7327236865873834,0.9794515463618694
+0.9121399684340719,1.2536309133704537
+1.5703519227860272,2.187045504912016
+0.39934756431671947,0.4839764178477628
+1.0284688768272232,1.4288935535974223
+1.184829137724085,1.6585968019236683
+0.09290082543999545,0.09742900194019119
+1.2150897038028767,1.702116152706024
+0.34104824737458306,0.4034836854209194
+0.13010318597055903,0.13909411999883536
+1.8977710745066665,2.5903318010915215
+1.9312640661491187,2.629312630527168
+1.6167946962329223,2.2467723049533905
+0.6092275383467414,0.7914100534079171
+0.19534422801276774,0.21588350693408828
+1.3684660530243138,1.917494634319942
+0.8803049874792026,1.2051988477172373
+0.24407646968955765,0.27625976490939214
+0.9903538202225404,1.3718135735019552
+0.06877704223043679,0.07123417253294272
+1.8186408041575641,2.4966018816848856
+0.5175599632000338,0.6544407561341439
+1.325044568707964,1.8574263454835047
+0.6234221521788219,0.8128721430860975
+1.0401360423556216,1.4462892592771355
+1.0934205586865593,1.5252403780918733
+0.3697089110510541,0.44271409262957717
+1.9391692555291171,2.6384539264232347
+1.550265646722229,2.160950005266661
+1.8789978831283782,2.5683039510702823
+1.7896547008552977,2.461684010396808
+1.1957999576221703,1.6744119782815021
+1.8437484700462337,2.5265931477776116
+0.176985004103839,0.19379920212879384
+0.3919657248382904,0.47363661139990393
+0.09045457782107613,0.09474339248156945
+0.6506606615265287,0.8541953881759485
+0.777354579378964,1.0478012138309305
+0.5426980635477918,0.6916737607221609
+1.6574750183038587,2.298391579491666
+0.7135066533871786,0.9500499213476303
+0.5618690193747615,0.7202496008381812
+1.085392166316497,1.513398916251658
+0.2818484499495253,0.3247441814248926
+1.6043939615080793,2.230907799621039
+0.14910128735954165,0.1609678309146809
+1.9737738732010346,2.6782060947078543
+1.5444895385933148,2.153416282610123
+0.3974313630683448,0.4812884691333153
+0.011044234247204798,0.011105659717543844
+1.6309228569096683,2.2647730643008543
+1.4137146876952342,1.9793088424715877
+1.4580143360819746,2.0390459508436884
+1.5425406933718915,2.150871440176907
+0.14808930346818072,0.15979251395522687
+0.7169314570885452,0.9552876826431883
+0.23173811905025943,0.26073648199843447
+1.726206851751187,2.3841402894128048
+1.2465962536551158,1.7470779213505572
+0.6617960497052984,0.8711341615273885
+0.12711670057204727,0.13569227346664953
+0.6219646434313244,0.8106659478651683
+0.6503666440534941,0.8537484672462254
+1.4592123566761281,2.0406506801223507
+1.2751149427104262,1.7874611691912827
+1.774425485152653,2.4432111990381067
+0.9444298503238986,1.302575647854838
+0.2391884918766034,0.27009115923794424
+1.42648957444599,1.996615009036411
+1.5215700972337949,2.1233923437150204
+1.1225543951389925,1.5680418793142075
+1.541934359909122,2.150079370436032
+0.9875911927287815,1.3676621443430883
+1.0454656587639881,1.454223169438932
+0.8550820367170993,1.1667236792399285
+0.05083825348819038,0.05216937619240417
+0.2157828539866089,0.24089892921884035

1_data/benchmarks/nguyen/nguyen_7.meta.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+name: nguyen_7
+equation: log(x + 1) + log(x**2 + 1)
+latex: \ln(x+1) + \ln(x^2+1)
+n_vars: 1
+range: (0, 2)
+n_samples: 100

1_data/benchmarks/nguyen/nguyen_8.csv ADDED Viewed

	@@ -0,0 +1,101 @@

+x_1,y
+1.49816047538945,1.2239936582308955
+3.8028572256396647,1.9500915941667112
+2.9279757672456204,1.7111328900017146
+2.3946339367881464,1.547460479879259
+0.6240745617697461,0.7899838996902064
+0.6239780813448106,0.789922832525311
+0.23233444867279784,0.48201083875033124
+3.4647045830997407,1.8613716939665061
+2.404460046972835,1.5506321443117432
+2.832290311184182,1.6829409707961185
+0.08233797718320979,0.2869459481909612
+3.8796394086479773,1.9696800269708725
+3.329770563201687,1.8247658927110861
+0.8493564427131046,0.9216053616994123
+0.7272998688284025,0.8528187784215369
+0.7336180394137353,0.8565150549837027
+1.216968971838151,1.103163166461857
+2.0990257265289514,1.4488014793369557
+1.727780074568463,1.3144504838785154
+1.1649165607921677,1.0793130040874
+2.447411578889518,1.564420524951497
+0.5579754426081673,0.7469775382219785
+1.1685785941408726,1.0810081378698648
+1.4654473731747668,1.2105566377393364
+1.8242799368681437,1.3506590749956644
+3.1407038455720544,1.7722031050565437
+0.7986951286334389,0.893697448040129
+2.0569377536544464,1.4342028286314479
+2.36965827544817,1.5393694408582268
+0.1858016508799909,0.4310471562137847
+2.4301794076057535,1.5589032707662633
+0.6820964947491661,0.825891333499248
+0.26020637194111806,0.5101042755565944
+3.795542149013333,1.948215118772394
+3.8625281322982374,1.965331557854358
+3.2335893924658445,1.7982183939849588
+1.2184550766934827,1.1038365262544463
+0.3906884560255355,0.6250507627589422
+2.7369321060486276,1.6543675849244108
+1.7606099749584052,1.3268797891890602
+0.4881529393791153,0.698679425329754
+1.9807076404450807,1.4073761545674564
+0.13755408446087358,0.370882844657007
+3.6372816083151283,1.9071658575790225
+1.0351199264000677,1.017408436371582
+2.650089137415928,1.6279094377194108
+1.2468443043576438,1.1166218269215606
+2.0802720847112433,1.4423148355027218
+2.1868411173731186,1.4787971860174467
+0.7394178221021082,0.8598940760943223
+3.8783385110582342,1.9693497685932366
+3.100531293444458,1.7608325569015522
+3.7579957662567565,1.9385550717626663
+3.5793094017105953,1.8919062877718325
+2.3915999152443406,1.546479846375096
+3.6874969400924673,1.920285640234928
+0.353970008207678,0.5949537866151269
+0.7839314496765808,0.8853990341515969
+0.18090915564215226,0.4253341693799738
+1.3013213230530574,1.1407547164281449
+1.554709158757928,1.2468797691669906
+1.0853961270955836,1.0418234625384397
+3.3149500366077174,1.8207004247288232
+1.4270133067743571,1.194576622395716
+1.123738038749523,1.0600651106179861
+2.170784332632994,1.473358182056554
+0.5636968998990506,0.750797509252029
+3.2087879230161587,1.7913089970789962
+0.2982025747190833,0.5460792751232034
+3.947547746402069,1.9868436643083092
+3.0889790771866297,1.7575491677863893
+0.7948627261366896,0.8915507423229985
+0.022088468494409597,0.14862189776210502
+3.2618457138193366,1.806058059371109
+2.8274293753904685,1.6814961716847494
+2.9160286721639492,1.7076383317798736
+3.085081386743783,1.756439975274926
+0.29617860693636144,0.5442229386348589
+1.4338629141770904,1.197440150561643
+0.46347623810051886,0.6807908916110136
+3.452413703502374,1.8580671956370076
+2.4931925073102317,1.578984644418758
+1.3235920994105967,1.1504747278452476
+0.25423340114409454,0.5042156296110768
+1.2439292868626488,1.1153157789893626
+1.3007332881069882,1.1404969478727194
+2.9184247133522563,1.7083397534894094
+2.5502298854208525,1.5969439205622884
+3.548850970305306,1.8838394226433701
+1.8888597006477972,1.3743579230490859
+0.4783769837532068,0.6916480201324998
+2.85297914889198,1.689076418902348
+3.0431401944675898,1.7444598575110835
+2.245108790277985,1.4983687097233394
+3.083868719818244,1.7560947354337817
+1.975182385457563,1.4054118205912327
+2.0909313175279762,1.4460052965075807
+1.7101640734341985,1.3077324166029527
+0.10167650697638075,0.3188675382919697
+0.4315657079732178,0.6569366087935866

1_data/benchmarks/nguyen/nguyen_8.meta.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+name: nguyen_8
+equation: sqrt(x)
+latex: \sqrt{x}
+n_vars: 1
+range: (0, 4)
+n_samples: 100

1_data/benchmarks/nguyen/nguyen_9.csv ADDED Viewed

	@@ -0,0 +1,101 @@

+x_1,x_2,y
+-0.250919762305275,0.9014286128198323,0.47776420484711724
+0.4639878836228102,0.1973169683940732,0.48644207085812474
+-0.687962719115127,-0.6880109593275947,-0.17908604352731888
+-0.8838327756636011,0.7323522915498704,-0.26218203193510137
+0.2022300234864176,0.416145155592091,0.37316689293318206
+-0.9588310114083951,0.9398197043239886,-0.0457079947346013
+0.6648852816008435,-0.5753217786434477,0.9419531756947639
+-0.6363500655857988,-0.6331909802931324,-0.2039883598325784
+-0.39151551408092455,0.04951286326447568,-0.37913816951228
+-0.13610996271576847,-0.4175417196039162,0.037769159921857864
+0.22370578944475894,-0.7210122786959163,0.718602102079344
+-0.4157107029295637,-0.2672763134126166,-0.33246432027777695
+-0.08786003156592814,0.5703519227860272,0.23184727590209103
+-0.6006524356832805,0.02846887682722321,-0.5643703547272768
+0.18482913772408494,-0.9070991745600046,0.9168514297790041
+0.21508970380287673,-0.6589517526254169,0.6341356329828418
+-0.869896814029441,0.8977710745066665,-0.04274391665004529
+0.9312640661491187,0.6167946962329223,1.1737000528120816
+-0.39077246165325863,-0.8046557719872323,0.2222683475391934
+0.3684660530243138,-0.1196950125207974,0.37451127227879794
+-0.7559235303104423,-0.00964617977745963,-0.6858679083778902
+-0.9312229577695632,0.8186408041575641,-0.1812290712696445
+-0.48244003679996617,0.32504456870796394,-0.35848458267243266
+-0.3765778478211781,0.040136042355621626,-0.3661293707647741
+0.0934205586865593,-0.6302910889489459,0.4801842308956415
+0.9391692555291171,0.5502656467222291,1.1052544786389042
+0.8789978831283782,0.7896547008552977,1.3540244447960008
+0.19579995762217028,0.8437484700462337,0.8478334428521738
+-0.823014995896161,-0.6080342751617096,-0.3718583841132946
+-0.9095454221789239,-0.3493393384734713,-0.6674893880400039
+-0.22264542062103598,-0.4573019364522082,-0.01320641283786203
+0.6574750183038587,-0.28649334661282144,0.6931064937612617
+-0.4381309806252385,0.08539216631649693,-0.4169559662599447
+-0.7181515500504747,0.6043939615080793,-0.30077177092335655
+-0.8508987126404584,0.9737738732010346,0.060514655114857874
+0.5444895385933148,-0.6025686369316552,0.8731450114593164
+-0.9889557657527952,0.6309228569096683,-0.44781844397033194
+0.41371468769523423,0.4580143360819746,0.6102553103342886
+0.5425406933718915,-0.8519106965318193,1.1800115078177271
+-0.2830685429114548,-0.7682618809497406,0.27724568254954496
+0.726206851751187,0.24659625365511584,0.7248105293625019
+-0.3382039502947016,-0.8728832994279527,0.3585223402710981
+-0.3780353565686756,-0.3496333559465059,-0.24715600583220282
+0.45921235667612814,0.27511494271042625,0.5188581859931962
+0.7744254851526531,-0.05557014967610141,0.7023935510282614
+-0.7608115081233966,0.42648957444599,-0.5086174096415731
+0.5215700972337949,0.12255439513899247,0.5132611003903497
+0.541934359909122,-0.012408807271218514,0.515948124519243
+0.045465658763988115,-0.14491796328290074,0.06644966885563074
+-0.9491617465118096,-0.7842171460133911,-0.23597193139241113
+-0.9371416286265315,0.2728208225275608,-0.7315064751214
+-0.37128803784734665,0.01714138232940554,-0.362522177588772
+0.815132947852186,-0.5014155417022501,0.9765939645263036
+-0.17923415392874054,0.5111022770860973,0.07998865766083815
+-0.5424036690167551,-0.846040180342414,0.14001304604376585
+-0.42049709417246395,-0.6775574254919912,0.034912901016997455
+0.8593953046851461,0.6162407591288339,1.1281386729129697
+0.26680751302084693,0.7429211803754354,0.7879864844362704
+0.6073441537982289,-0.6268598822279283,0.95360688133832
+0.7851179969799555,0.07868448383130144,0.7130998543064418
+0.6148803103281251,0.7921825998469865,1.164026731337986
+-0.36399305005627225,-0.7798961509446465,0.21541385909859811
+-0.5441296749161166,-0.14578442274748737,-0.496422157556886
+0.6360295318449862,0.7214611665126869,1.0913254238549446
+-0.9860957389376186,0.021494605155131463,-0.8334153656803461
+-0.16517799370244202,-0.5557843790585395,0.1395794327899027
+-0.7602692653326344,-0.3247696571927441,-0.5838367239902751
+0.8858194078250383,-0.35359413595848954,0.8991369711397181
+0.037581243486732197,0.4060379177903557,0.2016933272194322
+-0.27274079524141204,0.9435641654419213,0.5078969955272314
+0.9248945898842225,-0.4964354083492717,1.0425182593662667
+-0.005502988215229099,-0.3982433803664607,0.15243079003847154
+-0.4303190112450648,-0.9262261052909344,0.3393066204879884
+0.2191286679597937,0.005358046457722976,0.217407918292659
+-0.8970424975000213,-0.44270707152677713,-0.586747836651068
+0.8165317719333074,-0.5208762186660552,0.9967710464625614
+-0.7102102558175538,-0.02109447944487397,-0.6515482295602607
+0.9713009082212014,-0.5158894569769992,1.0886315849160677
+0.3442710948117571,0.5232392306574352,0.6078825279565068
+-0.5247249120152007,0.45643269722371915,-0.29414785755880746
+-0.26443373456149355,0.26461166118715895,-0.1914005964405623
+0.2670594215217894,0.07154936814951696,0.26901553332662537
+-0.8194204598911834,0.6706049911784759,-0.2960449968694709
+-0.35843987005652833,-0.6269629792002915,0.03222402345128389
+-0.9184497168904722,0.18178588637648363,-0.7616213774179811
+0.35512872368456483,-0.9668243421442877,1.152161247302312
+0.024186116598561958,-0.5470084496041241,0.31895703499280215
+0.2903455808188997,-0.6512671419900171,0.6978285972621991
+0.3818754762049319,-0.22652930739892518,0.4239545131674068
+0.873459977473469,-0.7249581117080135,1.268256638618119
+-0.317867297899483,-0.7730529575188219,0.2501276235695559
+0.8493872365571256,0.754678706761962,1.290120526044866
+-0.4841167445696888,0.3199680920683581,-0.36322596002563673
+0.6344444004024317,0.11040162319892466,0.6049183419335217
+0.05930115671201297,-0.5162954181990966,0.32268182428446945
+-0.8137944643882016,0.7944315159066535,-0.13684769483552173
+0.8008361143266609,0.2662029145465359,0.7887430634679737
+-0.32194041790259864,-0.3015808507746782,-0.22558221167734632
+0.45191135774047875,0.7942205199051542,1.0264656897505602
+0.7741728485302346,0.5597510917152477,1.0073448206218134

1_data/benchmarks/nguyen/nguyen_9.meta.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+name: nguyen_9
+equation: sin(x) + sin(y**2)
+latex: \sin(x) + \sin(y^2)
+n_vars: 2
+range: (-1, 1)
+n_samples: 100

1_data/processed/700K_prefix_converted/data-00000-of-00001.arrow ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4e6c9ddae5a332af4ee05d1aebbb913757316ce685a179712212e6e8334ba5f1
+size 7296336

1_data/processed/700K_prefix_converted/dataset_info.json ADDED Viewed

	@@ -0,0 +1,82 @@

+{
+  "builder_name": "csv",
+  "citation": "",
+  "config_name": "default",
+  "dataset_name": "sintetico_natural",
+  "dataset_size": 5704135,
+  "description": "",
+  "download_checksums": {
+    "hf://datasets/augustocsc/sintetico_natural@fe48ddc600c4674bcff395308dc8d7c32aed6135/mini_test/test_mini_test.csv": {
+      "num_bytes": 560871,
+      "checksum": null
+    },
+    "hf://datasets/augustocsc/sintetico_natural@fe48ddc600c4674bcff395308dc8d7c32aed6135/mini_test/train_mini_test.csv": {
+      "num_bytes": 4378438,
+      "checksum": null
+    },
+    "hf://datasets/augustocsc/sintetico_natural@fe48ddc600c4674bcff395308dc8d7c32aed6135/mini_test/val_mini_test.csv": {
+      "num_bytes": 545160,
+      "checksum": null
+    }
+  },
+  "download_size": 5484469,
+  "features": {
+    "infix_expr_n": {
+      "dtype": "string",
+      "_type": "Value"
+    },
+    "infix_expr_c": {
+      "dtype": "string",
+      "_type": "Value"
+    },
+    "expression_objects": {
+      "dtype": "string",
+      "_type": "Value"
+    },
+    "prefix_expr_c": {
+      "dtype": "string",
+      "_type": "Value"
+    },
+    "prefix_expr_n": {
+      "dtype": "string",
+      "_type": "Value"
+    },
+    "i_prompt_n": {
+      "dtype": "string",
+      "_type": "Value"
+    },
+    "p_prompt_n": {
+      "dtype": "string",
+      "_type": "Value"
+    },
+    "skeleton": {
+      "dtype": "string",
+      "_type": "Value"
+    },
+    "p_prompt_n_converted": {
+      "dtype": "string",
+      "_type": "Value"
+    },
+    "conversion_success": {
+      "dtype": "bool",
+      "_type": "Value"
+    }
+  },
+  "homepage": "",
+  "license": "",
+  "size_in_bytes": 11188604,
+  "splits": {
+    "test": {
+      "name": "test",
+      "num_bytes": 5704135,
+      "num_examples": 12221,
+      "dataset_name": "sintetico_natural"
+    }
+  },
+  "version": {
+    "version_str": "0.0.0",
+    "major": 0,
+    "minor": 0,
+    "patch": 0
+  }
+}

1_data/processed/700K_prefix_converted/state.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "_data_files": [
+    {
+      "filename": "data-00000-of-00001.arrow"
+    }
+  ],
+  "_fingerprint": "d4d2d5394689c8df",
+  "_format_columns": null,
+  "_format_kwargs": {},
+  "_format_type": null,
+  "_output_all_columns": false,
+  "_split": "test"
+}

1_data/processed/PREFIX_CONVERSION_README.md ADDED Viewed

	@@ -0,0 +1,214 @@

+# Conversão Infix → Prefix Dataset
+## Objetivo
+Converter o dataset `augustocsc/sintetico_natural` de notação **infix** para **prefix (Polish notation)**, mantendo as mesmas expressões matemáticas mas em formato prefixado.
+## Motivação
+O dataset original contém:
+- `i_prompt_n`: Prompts com expressões em notação infix (e.g., `x_1 + x_2`)
+- `p_prompt_n`: Prompts com expressões em notação prefix (e.g., `+ x_1 x_2`)
+**PROBLEMA**: As expressões em `i_prompt_n` e `p_prompt_n` são DIFERENTES! Não são a mesma expressão convertida.
+**SOLUÇÃO**: Converter automaticamente as expressões de `i_prompt_n` para notação prefix, criando `p_prompt_n_converted`.
+## Script de Conversão
+**Arquivo**: `scripts/data/convert_infix_to_prefix.py`
+**Funcionalidade**:
+1. Lê expressões infix do campo `i_prompt_n`
+2. Parseia usando SymPy
+3. Converte para notação prefix (Polish notation)
+4. Mantém as mesmas variáveis e operadores do prompt original
+5. Salva em nova coluna `p_prompt_n_converted`
+## Exemplos de Conversão
+### Exemplo 1
+**INFIX**:
+```
+vars: x_1, x_2, x_3, x_4, x_5
+oper: *, +, -, /, abs, asin, cos, exp, log, sin, sqrt, tan
+cons: C
+expr: x_2 - (x_5 - C)*(x_4 + exp(C*x_2) + C)
+```
+**PREFIX**:
+```
+vars: x_1, x_2, x_3, x_4, x_5
+oper: *, +, -, /, abs, asin, cos, exp, log, sin, sqrt, tan
+cons: C
+expr: - x_2 * - x_5 C + + x_4 exp * C x_2 C
+```
+### Exemplo 2
+**INFIX**:
+```
+vars: x_1, x_2, x_3
+oper: +, -, /, abs, cos, exp
+cons: C
+expr: (x_1 - C)/(x_2 + C)
+```
+**PREFIX**:
+```
+vars: x_1, x_2, x_3
+oper: +, -, /, abs, cos, exp
+cons: C
+expr: / - x_1 C + x_2 C
+```
+### Exemplo 3
+**INFIX**:
+```
+vars: x_1, x_2, x_3, x_4, x_5, x_6, x_7, x_8, x_9, x_10
+oper: *, +, /, asin, sin, tan
+cons: C
+expr: (tan(x_7) + C)*(asin(x_5) + C)
+```
+**PREFIX**:
+```
+vars: x_1, x_2, x_3, x_4, x_5, x_6, x_7, x_8, x_9, x_10
+oper: *, +, /, asin, sin, tan
+cons: C
+expr: * + tan x_7 C + asin x_5 C
+```
+## Regras de Conversão
+### Operadores Binários
+- Infix: `a + b` → Prefix: `+ a b`
+- Infix: `a - b` → Prefix: `- a b`
+- Infix: `a * b` → Prefix: `* a b`
+- Infix: `a / b` → Prefix: `/ a b`
+- Infix: `a ** b` → Prefix: `** a b`
+### Funções Unárias
+- Infix: `sin(x)` → Prefix: `sin x`
+- Infix: `exp(x)` → Prefix: `exp x`
+- Infix: `log(x)` → Prefix: `log x`
+### Expressões Complexas
+- Infix: `sin(x**2)` → Prefix: `sin ** x 2`
+- Infix: `x*(y + z)` → Prefix: `* x + y z`
+- Infix: `(a + b)*(c + d)` → Prefix: `* + a b + c d`
+### Casos Especiais
+- **Negação**: `-x` → `* -1 x`
+- **Múltiplas adições**: `a + b + c` → `+ + a b c` (aninhado à esquerda)
+- **Divisão**: `x/y` → `/ x y` (SymPy representa como `x * y**-1`, conversão detecta e corrige)
+## Uso do Script
+### Teste (10 exemplos)
+```bash
+python scripts/data/convert_infix_to_prefix.py --test_only
+```
+### Conversão Completa
+```bash
+python scripts/data/convert_infix_to_prefix.py \
+  --split test \
+  --output_path ./1_data/processed/700K_prefix_converted
+```
+### Upload para HuggingFace
+```bash
+python scripts/data/convert_infix_to_prefix.py \
+  --split test \
+  --output_path ./1_data/processed/700K_prefix_converted \
+  --upload \
+  --repo_id augustocsc/sintetico_natural_prefix_converted
+```
+## Output do Dataset
+**Colunas adicionadas**:
+- `p_prompt_n_converted`: Prompt prefixado convertido do infix
+- `conversion_success`: Boolean indicando se conversão foi bem-sucedida
+**Taxa de sucesso esperada**: ~99%+
+## Vantagens
+1. **Comparabilidade**: Agora é possível treinar modelos prefix com as MESMAS expressões dos modelos infix
+2. **Consistência**: Mantém vars/operators/constants do prompt original
+3. **Reprodutibilidade**: Conversão automática e determinística
+4. **Escalabilidade**: Fácil aplicar a novos datasets
+## Treinamento com Dataset Convertido
+### Usando o dataset local
+```bash
+python 2_training/supervised/train.py \
+  --dataset_path ./1_data/processed/700K_prefix_converted \
+  --data_column p_prompt_n_converted \
+  --approach prefix \
+  --output_dir ./output/gpt2_prefix_converted
+```
+### Comparação: Infix vs Prefix (mesma expressão)
+Agora você pode treinar dois modelos com a MESMA expressão:
+- Modelo A: `--data_column i_prompt_n --approach infix`
+- Modelo B: `--data_column p_prompt_n_converted --approach prefix`
+E comparar diretamente qual notação o modelo aprende melhor!
+## Validação
+Para verificar a correção da conversão:
+1. Parsear expressão prefix convertida
+2. Avaliar em pontos de teste
+3. Comparar com avaliação da expressão infix original
+4. R² score deve ser ~1.0 (mesma expressão, mesmos resultados)
+```python
+from classes.expression import Expression
+# Expressão infix original
+expr_infix = Expression("x_1 + x_2", is_prefix=False)
+# Expressão prefix convertida
+expr_prefix = Expression("+ x_1 x_2", is_prefix=True)
+# Testar
+import numpy as np
+x = np.array([[1.0, 2.0], [3.0, 4.0]])
+result_infix = expr_infix.evaluate(x)
+result_prefix = expr_prefix.evaluate(x)
+# Devem ser idênticos
+assert np.allclose(result_infix, result_prefix)
+```
+## Limitações Conhecidas
+1. **Expressões muito complexas**: Pode haver casos edge com aninhamento profundo
+2. **Operadores customizados**: Se houver operadores não mapeados no SymPy
+3. **Simplificação automática**: SymPy pode simplificar algumas expressões, alterando forma mas não valor
+## Status
+- [x] Script de conversão implementado
+- [x] Testes em 10 exemplos (100% sucesso)
+- [ ] Conversão dataset completo (~12k exemplos)
+- [ ] Upload para HuggingFace Hub
+- [ ] Treinamento de modelo teste
+- [ ] Comparação infix vs prefix
+## Referências
+- **Polish Notation**: https://en.wikipedia.org/wiki/Polish_notation
+- **SymPy Documentation**: https://docs.sympy.org/
+- **Dataset Original**: https://huggingface.co/datasets/augustocsc/sintetico_natural
+---
+**Data de Criação**: 2026-02-09
+**Autor**: Claude Sonnet 4.5 (co-authored)
+**Última Atualização**: 2026-02-09

2_training/README.md ADDED Viewed

	@@ -0,0 +1,205 @@

+# 2_training/ - Treinamento e Fine-tuning
+Este diretório contém todos os scripts e configurações para treinamento de modelos LLM para symbolic regression.
+## Estrutura
+```
+2_training/
+├── supervised/             # Fine-tuning supervisionado
+│   ├── train_with_json.py          # Principal: treino com formato JSON
+│   ├── train.py                     # Script base
+│   ├── train_experiment.py          # Experimentos controlados
+│   └── iterative_sampling_sft.py    # SFT iterativo
+│
+├── reinforcement/          # Reinforcement Learning
+│   ├── ppo_symbolic.py              # Proximal Policy Optimization
+│   ├── grpo_symbolic.py             # Group Relative PO
+│   ├── reinforce_*.py               # REINFORCE algorithm
+│   └── best_of_n_experiment.py      # Best-of-N sampling
+│
+└── configs/                # Configurações
+    └── wandb_config.py              # Wandb naming standards
+```
+## Métodos de Treinamento
+### 1. Supervised Fine-tuning (Recomendado)
+Script principal: `supervised/train_with_json.py`
+**Características**:
+- LoRA fine-tuning (apenas 294K parâmetros treináveis)
+- Formato JSON estruturado (80% valid rate)
+- Early stopping automático
+- Split train/validation 90/10
+- Integração com Wandb
+**Uso**:
+```bash
+cd supervised
+python train_with_json.py \
+  --model_size gpt2-medium \
+  --dataset_path ../../1_data/processed/700K \
+  --output_dir ../../models/gpt2/medium_test \
+  --num_train_epochs 3 \
+  --per_device_train_batch_size 4
+```
+**Modelos suportados**:
+- `gpt2` (124M params)
+- `gpt2-medium` (355M params)
+- `gpt2-large` (774M params)
+- GPT-Neo, LLaMA, Phi (futuro)
+### 2. Reinforcement Learning
+#### PPO (Proximal Policy Optimization)
+Script: `reinforcement/ppo_symbolic.py`
+**Quando usar**:
+- Problemas complexos (Nguyen 4+)
+- Otimização de R² score
+- Após supervised fine-tuning
+**Uso**:
+```bash
+cd reinforcement
+python ppo_symbolic.py \
+  --model_path ../../models/gpt2/base_700k_json \
+  --dataset ../../1_data/benchmarks/nguyen/nguyen_5.csv \
+  --epochs 20
+```
+#### GRPO (Group Relative Policy Optimization)
+Script: `reinforcement/grpo_symbolic.py`
+**Vantagens**:
+- Mais estável que PPO
+- Melhor para multi-modal rewards
+- Baseado em DeepSeek-R1
+#### REINFORCE
+Script: `reinforcement/reinforce_symbolic.py`
+**Características**:
+- Simples e eficaz
+- EMA baseline
+- Bom para benchmarks fáceis (Nguyen 1-3)
+## Configuração LoRA
+Configuração padrão (todos os modelos):
+```python
+{
+  "r": 8,
+  "lora_alpha": 32,
+  "target_modules": ["c_attn"],
+  "lora_dropout": 0.05
+}
+```
+**Resultado**: ~294K parâmetros treináveis (vs 124M-774M total)
+## Hiperparâmetros Recomendados
+### Por Tamanho de Modelo
+| Modelo | Batch Size | Instance | VRAM | Tempo |
+|--------|-----------|----------|------|-------|
+| GPT-2 Base | 8 | g5.xlarge | 24GB | 2-3h |
+| GPT-2 Medium | 4 | g5.xlarge | 24GB | 3-4h |
+| GPT-2 Large | 2 | g5.2xlarge | 48GB | 4-5h |
+### Outros Hiperparâmetros
+```python
+learning_rate = 5e-5
+num_train_epochs = 3
+gradient_accumulation_steps = 4
+warmup_steps = 500
+weight_decay = 0.01
+early_stopping_patience = 3
+seed = 42
+```
+## Formato de Dados
+### JSON Format (Recomendado)
+```json
+{"vars": ["x_1", "x_2"], "ops": ["*", "+", "sin"], "cons": "C", "expr": "sin(x_1 + C*x_2)"}
+```
+**Vantagens**:
+- 80% valid expression rate
+- Structured boundaries
+- Lower loss (0.343 vs 0.415)
+## Wandb Tracking
+Naming standard: `seriguela-{type}-{model}-{dataset}-{timestamp}`
+Exemplos:
+```python
+# Supervised
+seriguela-supervised-medium-700k-20260204-120000
+# PPO
+seriguela-ppo-large-nguyen5-20260204-120000
+```
+## Deploy AWS
+Scripts disponíveis em: `../../scripts/aws/`
+Lançar treinamento:
+```bash
+# Medium model
+bash ../../scripts/aws/launch_medium_training.sh \
+  --wandb-key YOUR_KEY \
+  --hf-token YOUR_TOKEN
+# Large model
+bash ../../scripts/aws/launch_large_training.sh \
+  --wandb-key YOUR_KEY \
+  --hf-token YOUR_TOKEN
+```
+## Troubleshooting
+### OOM (Out of Memory)
+- Reduzir `per_device_train_batch_size`
+- Usar `gradient_accumulation_steps` maior
+- Usar instância maior (g5.2xlarge)
+### Low Valid Rate
+- Verificar formato de dados (deve ser JSON)
+- Aumentar `num_train_epochs`
+- Verificar conversão de dados
+### Early Stopping Prematuro
+- Aumentar `early_stopping_patience`
+- Verificar validation loss
+## Próximos Modelos (Planejados)
+### GPT-Neo (EleutherAI)
+- 125M, 1.3B, 2.7B params
+- Similar ao GPT-2
+- Compatível com mesma pipeline
+### LLaMA 2/3 (Meta)
+- 7B, 13B, 70B params
+- Melhor performance
+- Requer mais VRAM
+### Phi-2/3 (Microsoft)
+- 2.7B params
+- Otimizado para reasoning
+- Bom para symbolic tasks
+## Referências
+- LoRA: https://arxiv.org/abs/2106.09685
+- PPO: https://arxiv.org/abs/1707.06347
+- GRPO: DeepSeek-R1 technical report
+- Dataset: https://huggingface.co/datasets/augustocsc/sintetico_natural

2_training/configs/__init__.py ADDED Viewed

	@@ -0,0 +1,22 @@

+"""
+Seriguela - Configuration utilities.
+This module provides standardized configuration utilities for
+experiment tracking and naming conventions.
+"""
+from .wandb_config import (
+    generate_run_name,
+    get_wandb_project_name,
+    parse_run_name,
+    is_valid_run_name,
+)
+__all__ = [
+    'generate_run_name',
+    'get_wandb_project_name',
+    'parse_run_name',
+    'is_valid_run_name',
+]
+__version__ = '1.0.0'

2_training/configs/eval_dataset_download.sh ADDED Viewed

	@@ -0,0 +1,6 @@

+git clone https://huggingface.co/datasets/yoshitomo-matsubara/srsd-feynman_easy_dummy
+git clone https://huggingface.co/datasets/yoshitomo-matsubara/srsd-feynman_medium_dummy
+git clone https://huggingface.co/datasets/yoshitomo-matsubara/srsd-feynman_hard_dummy
+git clone https://huggingface.co/datasets/yoshitomo-matsubara/srsd-feynman_easy
+git clone https://huggingface.co/datasets/yoshitomo-matsubara/srsd-feynman_medium
+git clone https://huggingface.co/datasets/yoshitomo-matsubara/srsd-feynman_hard

2_training/configs/model_config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {}

2_training/configs/peft_config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {}

2_training/configs/training.sh ADDED Viewed

	@@ -0,0 +1,82 @@

+CUDA_VISIBLE_DEVICES=0 python /home/augusto/symbo_repos/seringuela/scripts/train_test.py \
+  --dataset_repo_id augustocsc/sintetico_natural \
+  --data_dir 500k \
+  --output_dir ./output \
+  --push_to_hub \
+  --hub_model_id augustocsc/Se124M500KInfPrompt_EOS \
+  --source_data_column i_prompt \
+  --report_to wandb \
+  --run_name Se124M500KInfPrompt_EOS \
+  --model_name_or_path gpt2 \
+  --bf16 \
+  --eval_strategy steps \
+  --num_train_epochs 3 \
+  --per_device_train_batch_size 16 \
+  --per_device_eval_batch_size 16 \
+  --gradient_accumulation_steps 4 \
+  --dataloader_num_workers 8 \
+  --learning_rate 5e-5 \
+  --warmup_ratio 0.03 \
+  --weight_decay 0.01 \
+  --max_grad_norm 1.0 \
+  --lr_scheduler_type cosine \
+  --optim adamw_torch_fused \
+  --logging_steps 20 \
+  --eval_steps 500 \
+  --save_steps 1000 \
+  --save_total_limit 3 \
+# CUDA_VISIBLE_DEVICES=1 python /home/augusto/symbo_repos/seringuela/scripts/train_test.py \
+#   --dataset_repo_id augustocsc/sintetico_final \
+#   --data_dir 100k \
+#   --output_dir ./output \
+#   --push_to_hub \
+#   --hub_model_id augustocsc/Se124M100KInfPrompt_NT \
+#   --source_data_column i_prompt \
+#   --report_to wandb \
+#   --run_name Se124M100KInfPrompt_NT \
+#   --bf16 \
+#   --eval_strategy steps \
+#   --num_train_epochs 3 \
+#   --per_device_train_batch_size 16 \
+#   --per_device_eval_batch_size 16 \
+#   --gradient_accumulation_steps 2 \
+#   --dataloader_num_workers 8 \
+#   --learning_rate 2e-5 \
+#   --warmup_ratio 0.03 \
+#   --weight_decay 0.01 \
+#   --max_grad_norm 1.0 \
+#   --lr_scheduler_type cosine \
+#   --optim adamw_torch_fused \
+#   --logging_steps 20 \
+#   --eval_steps 500 \
+#   --save_steps 1000 \
+#   --save_total_limit 3
+# CUDA_VISIBLE_DEVICES=0 python /home/augusto/symbo_repos/seringuela/scripts/train_test.py \
+#   --dataset_repo_id augustocsc/sintetico_final \
+#   --data_dir 100k \
+#   --output_dir ./output \
+#   --push_to_hub \
+#   --hub_model_id augustocsc/Se124M100KInfPrompt_WT \
+#   --source_data_column i_prompt \
+#   --report_to wandb \
+#   --run_name Se124M100KInfPrompt_WT \
+#   --bf16 \
+#   --eval_strategy steps \
+#   --num_train_epochs 3 \
+#   --per_device_train_batch_size 16 \
+#   --per_device_eval_batch_size 16 \
+#   --gradient_accumulation_steps 2 \
+#   --dataloader_num_workers 8 \
+#   --learning_rate 2e-5 \
+#   --warmup_ratio 0.03 \
+#   --weight_decay 0.01 \
+#   --max_grad_norm 1.0 \
+#   --lr_scheduler_type cosine \
+#   --optim adamw_torch_fused \
+#   --logging_steps 20 \
+#   --eval_steps 500 \
+#   --save_steps 1000 \
+#   --save_total_limit 3

2_training/configs/training_args.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "output_dir": "./output",
+    "overwrite_output_dir": true,
+    "num_train_epochs": 50,
+    "per_device_train_batch_size": 8,
+    "gradient_accumulation_steps": 1,
+    "learning_rate": 5e-5,
+    "weight_decay": 0.01,
+    "warmup_steps": 0,
+    "fp16": true,
+    "seed": 42,
+    "per_device_eval_batch_size": 8,
+    "eval_strategy": "epoch",
+    "metric_for_best_model": "eval_loss",
+    "greater_is_better": false,
+    "eval_steps": null,
+    "load_best_model_at_end": true,
+    "save_strategy": "epoch",
+    "save_steps": null,
+    "save_total_limit": 2,
+    "logging_dir": "./output/logs",
+    "logging_steps": 100,
+    "report_to": "wandb",
+    "run_name": "Se124M100K",
+    "push_to_hub": true,
+    "hub_model_id": "augustocsc/Se124M100K",
+    "hub_token": null
+}

2_training/configs/training_large.json ADDED Viewed

	@@ -0,0 +1,65 @@

+{
+    "model_config": {
+        "model_name_or_path": "gpt2-large",
+        "model_size": "774M",
+        "description": "GPT-2 Large - 774M parameters"
+    },
+    "training_args": {
+        "num_train_epochs": 2,
+        "per_device_train_batch_size": 4,
+        "per_device_eval_batch_size": 4,
+        "gradient_accumulation_steps": 16,
+        "effective_batch_size": 64,
+        "learning_rate": 2e-5,
+        "weight_decay": 0.01,
+        "warmup_steps": 100,
+        "max_grad_norm": 1.0,
+        "lr_scheduler_type": "cosine",
+        "fp16": true,
+        "seed": 42,
+        "block_size": 128
+    },
+    "evaluation_args": {
+        "eval_strategy": "epoch",
+        "eval_steps": null,
+        "metric_for_best_model": "eval_loss",
+        "greater_is_better": false,
+        "load_best_model_at_end": true
+    },
+    "save_args": {
+        "save_strategy": "epoch",
+        "save_steps": null,
+        "save_total_limit": 2
+    },
+    "logging_args": {
+        "logging_dir": "./output/logs",
+        "logging_steps": 50,
+        "report_to": "wandb"
+    },
+    "lora_config": {
+        "r": 8,
+        "lora_alpha": 32,
+        "target_modules": ["c_attn", "c_proj"],
+        "lora_dropout": 0.05,
+        "bias": "none",
+        "task_type": "CAUSAL_LM"
+    },
+    "dataset_config": {
+        "dataset_repo_id": "augustocsc/sintetico_natural",
+        "data_dir": "700K",
+        "data_columns": {
+            "infix": "i_prompt_n",
+            "prefix": "p_prompt_n"
+        }
+    },
+    "hub_config": {
+        "push_to_hub": true,
+        "hub_model_id_template": "augustocsc/Se774M_700K_{format}",
+        "formats": ["infix", "prefix"]
+    },
+    "estimated_time": {
+        "per_epoch_minutes": 180,
+        "total_hours": 6,
+        "notes": "Estimated for AWS g5.xlarge with A10G GPU. May need gradient checkpointing for memory optimization."
+    }
+}

2_training/configs/training_medium.json ADDED Viewed

	@@ -0,0 +1,65 @@

+{
+    "model_config": {
+        "model_name_or_path": "gpt2-medium",
+        "model_size": "355M",
+        "description": "GPT-2 Medium - 355M parameters"
+    },
+    "training_args": {
+        "num_train_epochs": 2,
+        "per_device_train_batch_size": 8,
+        "per_device_eval_batch_size": 8,
+        "gradient_accumulation_steps": 8,
+        "effective_batch_size": 64,
+        "learning_rate": 3e-5,
+        "weight_decay": 0.01,
+        "warmup_steps": 100,
+        "max_grad_norm": 1.0,
+        "lr_scheduler_type": "cosine",
+        "fp16": true,
+        "seed": 42,
+        "block_size": 128
+    },
+    "evaluation_args": {
+        "eval_strategy": "epoch",
+        "eval_steps": null,
+        "metric_for_best_model": "eval_loss",
+        "greater_is_better": false,
+        "load_best_model_at_end": true
+    },
+    "save_args": {
+        "save_strategy": "epoch",
+        "save_steps": null,
+        "save_total_limit": 2
+    },
+    "logging_args": {
+        "logging_dir": "./output/logs",
+        "logging_steps": 50,
+        "report_to": "wandb"
+    },
+    "lora_config": {
+        "r": 8,
+        "lora_alpha": 32,
+        "target_modules": ["c_attn", "c_proj"],
+        "lora_dropout": 0.05,
+        "bias": "none",
+        "task_type": "CAUSAL_LM"
+    },
+    "dataset_config": {
+        "dataset_repo_id": "augustocsc/sintetico_natural",
+        "data_dir": "700K",
+        "data_columns": {
+            "infix": "i_prompt_n",
+            "prefix": "p_prompt_n"
+        }
+    },
+    "hub_config": {
+        "push_to_hub": true,
+        "hub_model_id_template": "augustocsc/Se355M_700K_{format}",
+        "formats": ["infix", "prefix"]
+    },
+    "estimated_time": {
+        "per_epoch_minutes": 90,
+        "total_hours": 3,
+        "notes": "Estimated for AWS g5.xlarge with A10G GPU"
+    }
+}

2_training/configs/training_small.json ADDED Viewed

	@@ -0,0 +1,65 @@

+{
+    "model_config": {
+        "model_name_or_path": "gpt2",
+        "model_size": "124M",
+        "description": "GPT-2 Small - 124M parameters"
+    },
+    "training_args": {
+        "num_train_epochs": 3,
+        "per_device_train_batch_size": 16,
+        "per_device_eval_batch_size": 16,
+        "gradient_accumulation_steps": 4,
+        "effective_batch_size": 64,
+        "learning_rate": 5e-5,
+        "weight_decay": 0.01,
+        "warmup_steps": 100,
+        "max_grad_norm": 1.0,
+        "lr_scheduler_type": "cosine",
+        "fp16": true,
+        "seed": 42,
+        "block_size": 128
+    },
+    "evaluation_args": {
+        "eval_strategy": "epoch",
+        "eval_steps": null,
+        "metric_for_best_model": "eval_loss",
+        "greater_is_better": false,
+        "load_best_model_at_end": true
+    },
+    "save_args": {
+        "save_strategy": "epoch",
+        "save_steps": null,
+        "save_total_limit": 2
+    },
+    "logging_args": {
+        "logging_dir": "./output/logs",
+        "logging_steps": 50,
+        "report_to": "wandb"
+    },
+    "lora_config": {
+        "r": 8,
+        "lora_alpha": 32,
+        "target_modules": ["c_attn", "c_proj"],
+        "lora_dropout": 0.05,
+        "bias": "none",
+        "task_type": "CAUSAL_LM"
+    },
+    "dataset_config": {
+        "dataset_repo_id": "augustocsc/sintetico_natural",
+        "data_dir": "700K",
+        "data_columns": {
+            "infix": "i_prompt_n",
+            "prefix": "p_prompt_n"
+        }
+    },
+    "hub_config": {
+        "push_to_hub": true,
+        "hub_model_id_template": "augustocsc/Se124M_700K_{format}",
+        "formats": ["infix", "prefix"]
+    },
+    "estimated_time": {
+        "per_epoch_minutes": 40,
+        "total_hours": 2,
+        "notes": "Estimated for AWS g5.xlarge with A10G GPU"
+    }
+}

2_training/configs/training_v3.json ADDED Viewed

	@@ -0,0 +1,78 @@

+{
+    "model_config": {
+        "model_name_or_path": "gpt2",
+        "model_size": "124M",
+        "description": "GPT-2 Small (124M) - v3 with proper end markers"
+    },
+    "training_args": {
+        "num_train_epochs": 3,
+        "per_device_train_batch_size": 8,
+        "per_device_eval_batch_size": 8,
+        "gradient_accumulation_steps": 4,
+        "effective_batch_size": 32,
+        "learning_rate": 5e-5,
+        "weight_decay": 0.01,
+        "warmup_steps": 100,
+        "max_grad_norm": 1.0,
+        "lr_scheduler_type": "cosine",
+        "fp16": true,
+        "seed": 42,
+        "block_size": 128
+    },
+    "evaluation_args": {
+        "eval_strategy": "epoch",
+        "eval_steps": null,
+        "metric_for_best_model": "eval_loss",
+        "greater_is_better": false,
+        "load_best_model_at_end": true
+    },
+    "save_args": {
+        "save_strategy": "epoch",
+        "save_steps": null,
+        "save_total_limit": 2
+    },
+    "logging_args": {
+        "logging_dir": "./output/logs",
+        "logging_steps": 50,
+        "report_to": "wandb"
+    },
+    "lora_config": {
+        "r": 8,
+        "lora_alpha": 32,
+        "target_modules": ["c_attn"],
+        "lora_dropout": 0.05,
+        "bias": "none",
+        "task_type": "CAUSAL_LM"
+    },
+    "dataset_config": {
+        "use_local_csvs": true,
+        "train_file": "./data/processed/700K_fixed/train_700K.csv",
+        "validation_file": "./data/processed/700K_fixed/validation_700K.csv",
+        "test_file": "./data/processed/700K_fixed/test_700K.csv",
+        "data_column": "text"
+    },
+    "hub_config": {
+        "push_to_hub": true,
+        "hub_model_id": "augustocsc/Se124M_700K_infix_v3"
+    },
+    "special_tokens": {
+        "start_token": "<|startofex|>",
+        "end_token": "<|endofex|>",
+        "notes": "End token configured as EOS token for proper stopping"
+    },
+    "estimated_time": {
+        "per_epoch_minutes": 45,
+        "total_hours": 2.25,
+        "notes": "Estimated for AWS g5.xlarge with A10G GPU, GPT-2 Small, 3 epochs"
+    },
+    "version_info": {
+        "model_version": "v3",
+        "improvements": [
+            "Training data includes proper <|endofex|> markers",
+            "100% validation rate on prepared dataset",
+            "Addresses v1 non-stopping issue and v2 garbage generation",
+            "Uses local CSVs with validated end markers"
+        ],
+        "training_date": "2026-02-01"
+    }
+}

2_training/configs/wandb_config.py ADDED Viewed

	@@ -0,0 +1,221 @@

+"""
+Wandb Configuration and Naming Standards for Seriguela Project
+This module provides standardized naming conventions for Wandb experiment tracking.
+"""
+import os
+from datetime import datetime
+from typing import Optional
+# Default Wandb project name
+DEFAULT_PROJECT = "seriguela"
+# Alternative project name for experiments
+EXPERIMENTS_PROJECT = "seriguela-experiments"
+def get_wandb_project_name(use_experiments: bool = False) -> str:
+    """
+    Get the standard Wandb project name.
+    Args:
+        use_experiments: If True, use experiments project name
+    Returns:
+        Project name string
+    """
+    return EXPERIMENTS_PROJECT if use_experiments else DEFAULT_PROJECT
+def generate_run_name(
+    experiment_type: str,
+    model_size: str = "base",
+    dataset: Optional[str] = None,
+    extra_info: Optional[str] = None,
+    include_timestamp: bool = True
+) -> str:
+    """
+    Generate a standardized Wandb run name.
+    Naming Convention: seriguela-{type}-{model}-{dataset}-{extra}-{timestamp}
+    Args:
+        experiment_type: Type of experiment (supervised, ppo, grpo, reinforce, iterative-sft)
+        model_size: Model size (base, medium, large) or full name (gpt2, gpt2-medium)
+        dataset: Dataset identifier (700K, nguyen5, nguyen7, etc)
+        extra_info: Additional information (optional)
+        include_timestamp: Whether to include timestamp suffix
+    Returns:
+        Formatted run name
+    Examples:
+        >>> generate_run_name("supervised", "medium", "700K")
+        'seriguela-supervised-medium-700K-20260203-143022'
+        >>> generate_run_name("ppo", "base", "nguyen5", "lr3e5")
+        'seriguela-ppo-base-nguyen5-lr3e5-20260203-143022'
+        >>> generate_run_name("grpo", "large", "nguyen7", include_timestamp=False)
+        'seriguela-grpo-large-nguyen7'
+    """
+    # Normalize model size
+    model_map = {
+        "gpt2": "base",
+        "gpt2-base": "base",
+        "124m": "base",
+        "gpt2-medium": "medium",
+        "355m": "medium",
+        "gpt2-large": "large",
+        "774m": "large"
+    }
+    model_size = model_map.get(model_size.lower(), model_size.lower())
+    # Build run name parts
+    parts = ["seriguela", experiment_type.lower()]
+    # Add model size
+    parts.append(model_size)
+    # Add dataset if provided
+    if dataset:
+        parts.append(dataset.lower().replace("_", "").replace("-", ""))
+    # Add extra info if provided
+    if extra_info:
+        parts.append(extra_info.lower().replace("_", ""))
+    # Add timestamp if requested
+    if include_timestamp:
+        timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
+        parts.append(timestamp)
+    return "-".join(parts)
+def get_run_tags(
+    experiment_type: str,
+    model_size: str,
+    dataset: Optional[str] = None,
+    success: Optional[bool] = None
+) -> list:
+    """
+    Generate standardized tags for Wandb runs.
+    Args:
+        experiment_type: Type of experiment
+        model_size: Model size
+        dataset: Dataset name
+        success: Whether experiment was successful (optional)
+    Returns:
+        List of tags
+    Examples:
+        >>> get_run_tags("ppo", "medium", "nguyen5", True)
+        ['ppo', 'gpt2-medium', 'nguyen5', 'rl', 'success']
+    """
+    tags = [experiment_type.lower()]
+    # Add model size
+    if model_size.lower() in ["base", "124m", "gpt2"]:
+        tags.append("gpt2-base")
+    elif model_size.lower() in ["medium", "355m", "gpt2-medium"]:
+        tags.append("gpt2-medium")
+    elif model_size.lower() in ["large", "774m", "gpt2-large"]:
+        tags.append("gpt2-large")
+    else:
+        tags.append(model_size.lower())
+    # Add dataset
+    if dataset:
+        tags.append(dataset.lower())
+    # Add category based on experiment type
+    if experiment_type.lower() in ["ppo", "grpo", "reinforce"]:
+        tags.append("rl")
+    elif experiment_type.lower() in ["supervised", "sft"]:
+        tags.append("supervised")
+    elif experiment_type.lower() == "iterative-sft":
+        tags.append("iterative")
+    # Add success tag if provided
+    if success is not None:
+        tags.append("success" if success else "failed")
+    return tags
+# Common experiment types
+EXPERIMENT_TYPES = {
+    "SUPERVISED": "supervised",
+    "SFT": "sft",
+    "PPO": "ppo",
+    "GRPO": "grpo",
+    "REINFORCE": "reinforce",
+    "ITERATIVE_SFT": "iterative-sft",
+    "BEST_OF_N": "best-of-n",
+    "EVALUATION": "eval"
+}
+# Common datasets
+DATASETS = {
+    "MAIN_700K": "700K",
+    "NGUYEN_1": "nguyen1",
+    "NGUYEN_5": "nguyen5",
+    "NGUYEN_7": "nguyen7",
+    "NGUYEN_10": "nguyen10",
+    "CUSTOM": "custom"
+}
+def setup_wandb_env():
+    """
+    Setup Wandb environment from credentials file.
+    Reads from ~/.tokens.txt if available.
+    """
+    tokens_file = os.path.expanduser("~/.tokens.txt")
+    if os.path.exists(tokens_file):
+        with open(tokens_file) as f:
+            for line in f:
+                if "=" in line and not line.strip().startswith("#"):
+                    key, value = line.strip().split("=", 1)
+                    key = key.strip()
+                    value = value.strip()
+                    if key.lower() == "wandb":
+                        os.environ["WANDB_API_KEY"] = value
+                        print(f"[OK] Wandb API key loaded from {tokens_file}")
+                        return True
+    # Check if already in environment
+    if "WANDB_API_KEY" in os.environ:
+        print("[OK] Wandb API key found in environment")
+        return True
+    print("[WARN] Wandb API key not found. Run 'wandb login' or add to ~/.tokens.txt")
+    return False
+if __name__ == "__main__":
+    # Example usage
+    print("Wandb Configuration Examples:\n")
+    print("1. Supervised training on 700K dataset:")
+    print(f"   {generate_run_name('supervised', 'medium', '700K')}\n")
+    print("2. PPO on Nguyen-5 benchmark:")
+    print(f"   {generate_run_name('ppo', 'base', 'nguyen5')}\n")
+    print("3. GRPO with custom learning rate:")
+    print(f"   {generate_run_name('grpo', 'large', 'nguyen7', 'lr5e5')}\n")
+    print("4. Evaluation run (no timestamp):")
+    print(f"   {generate_run_name('eval', 'medium', 'nguyen5', include_timestamp=False)}\n")
+    print("5. Tags example:")
+    print(f"   {get_run_tags('ppo', 'medium', 'nguyen5', True)}\n")
+    print("6. Setup Wandb environment:")
+    setup_wandb_env()

2_training/reinforcement/best_of_n_experiment.py ADDED Viewed

	@@ -0,0 +1,398 @@

+#!/usr/bin/env python3
+"""
+Best-of-N Sampling Experiment for Symbolic Regression
+Instead of PPO (which has API compatibility issues with TRL 0.16+),
+this script tests if the base model can find correct expressions
+through random sampling. If the model generates the correct expression
+even occasionally, PPO should be able to learn to find it consistently.
+This is a diagnostic experiment to understand model capabilities.
+"""
+import os
+import sys
+import json
+import argparse
+import logging
+import datetime
+from pathlib import Path
+from collections import defaultdict
+import numpy as np
+import torch
+from tqdm import tqdm
+# Add project root to path
+PROJECT_ROOT = Path(__file__).parent.parent
+sys.path.insert(0, str(PROJECT_ROOT))
+sys.path.insert(0, str(PROJECT_ROOT / "classes"))
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from peft import PeftModel
+from expression import Expression
+from dataset import RegressionDataset
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+class BestOfNSampler:
+    """Generate N expressions and find the best one for a given dataset."""
+    def __init__(self, model_path: str, device: str = None):
+        self.model_path = model_path
+        # Device setup
+        if device:
+            self.device = torch.device(device)
+        else:
+            self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        logger.info(f"Using device: {self.device}")
+        self._load_model()
+    def _load_model(self):
+        """Load the JSON format model with LoRA adapters."""
+        logger.info(f"Loading model from {self.model_path}")
+        # Load tokenizer from trained model (has special tokens)
+        self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
+        self.tokenizer.pad_token = self.tokenizer.eos_token
+        logger.info(f"Tokenizer loaded with vocab size: {len(self.tokenizer)}")
+        # Load base GPT-2
+        base_model = AutoModelForCausalLM.from_pretrained(
+            "gpt2",
+            torch_dtype=torch.float16,
+        )
+        # Resize embeddings to match tokenizer (handles special tokens)
+        if len(self.tokenizer) != base_model.config.vocab_size:
+            logger.info(f"Resizing embeddings: {base_model.config.vocab_size} -> {len(self.tokenizer)}")
+            base_model.resize_token_embeddings(len(self.tokenizer))
+        # Load LoRA adapter
+        try:
+            model_with_lora = PeftModel.from_pretrained(base_model, self.model_path)
+            self.model = model_with_lora.merge_and_unload()
+            logger.info("LoRA adapter loaded and merged")
+        except Exception as e:
+            logger.warning(f"Could not load as PEFT model: {e}")
+            self.model = AutoModelForCausalLM.from_pretrained(self.model_path)
+        self.model = self.model.to(self.device)
+        self.model.eval()
+        logger.info("Model loaded successfully")
+    def build_prompt(self, n_vars: int) -> str:
+        """Build JSON format prompt matching training data."""
+        vars_list = [f"x_{i+1}" for i in range(n_vars)]
+        ops_list = ["+", "-", "*", "sin", "cos"]
+        prompt = json.dumps({
+            "vars": vars_list,
+            "ops": ops_list,
+            "cons": None,
+            "expr": ""
+        })[:-3]  # Remove trailing '"}'
+        return prompt
+    def extract_expression(self, generated_text: str) -> str:
+        """Extract expression from JSON format output.
+        Handles two formats:
+        1. Standard JSON: "expr": "value"}
+        2. Model output:  "expr": value"}  (no quotes around value)
+        """
+        try:
+            # Case 1: Standard JSON with quotes around expression value
+            if '"expr": "' in generated_text:
+                expr_start = generated_text.index('"expr": "') + len('"expr": "')
+                remaining = generated_text[expr_start:]
+                # Find closing "}
+                if '"}' in remaining:
+                    return remaining[:remaining.index('"}')].strip()
+                # Fallback: find first quote
+                if '"' in remaining:
+                    return remaining[:remaining.index('"')].strip()
+                return remaining.strip()
+            # Case 2: Model output WITHOUT quotes: "expr": value"}
+            # This is what the model actually generates
+            if '"expr": ' in generated_text:
+                expr_start = generated_text.index('"expr": ') + len('"expr": ')
+                remaining = generated_text[expr_start:]
+                # Find closing "} which ends the JSON object
+                if '"}' in remaining:
+                    return remaining[:remaining.index('"}')].strip()
+                # Fallback: find "{ which starts next object
+                if '"{' in remaining:
+                    return remaining[:remaining.index('"{')].strip().rstrip('}')
+                return remaining.strip()
+            # Case 3: Compact JSON without space
+            if '"expr":"' in generated_text:
+                expr_start = generated_text.index('"expr":"') + len('"expr":"')
+                remaining = generated_text[expr_start:]
+                if '"}' in remaining:
+                    return remaining[:remaining.index('"}')].strip()
+                if '"' in remaining:
+                    return remaining[:remaining.index('"')].strip()
+                return remaining.strip()
+        except (ValueError, IndexError):
+            pass
+        # Last resort: split on "expr" and clean up
+        fallback = generated_text.split('"expr"')[-1].strip(' ":}')
+        if '"}' in fallback:
+            fallback = fallback[:fallback.index('"}')]
+        return fallback.strip()
+    def compute_r2(self, expression_str: str, X: np.ndarray, y: np.ndarray) -> float:
+        """Compute R² score for an expression."""
+        if not expression_str or expression_str.isspace():
+            return -np.inf
+        # Replace constant placeholder C with 1
+        if 'C' in expression_str:
+            expression_str = expression_str.replace('C', '1')
+        try:
+            expr = Expression(expression_str, is_prefix=False)
+            if not expr.is_valid_on_dataset(X):
+                return -np.inf
+            y_pred = expr.evaluate(X)
+            if not np.all(np.isfinite(y_pred)):
+                return -np.inf
+            ss_res = np.sum((y - y_pred) ** 2)
+            ss_tot = np.sum((y - np.mean(y)) ** 2)
+            if ss_tot == 0:
+                return 0.0
+            return 1 - (ss_res / ss_tot)
+        except Exception:
+            return -np.inf
+    def sample_expressions(self, n_vars: int, n_samples: int = 100,
+                           temperature: float = 0.7) -> list:
+        """Generate N expression samples."""
+        prompt = self.build_prompt(n_vars)
+        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
+        expressions = []
+        debug_count = 0
+        for _ in tqdm(range(n_samples), desc="Sampling expressions"):
+            with torch.no_grad():
+                output = self.model.generate(
+                    **inputs,
+                    max_new_tokens=50,
+                    do_sample=True,
+                    top_k=50,
+                    top_p=0.9,
+                    temperature=temperature,
+                    pad_token_id=self.tokenizer.pad_token_id,
+                )
+            text = self.tokenizer.decode(output[0], skip_special_tokens=True)
+            expr_str = self.extract_expression(text)
+            # Debug: print first 5 extractions
+            if debug_count < 5:
+                logger.info(f"DEBUG [{debug_count}] raw text (last 80 chars): ...{text[-80:]}")
+                logger.info(f"DEBUG [{debug_count}] extracted: '{expr_str}'")
+                debug_count += 1
+            expressions.append(expr_str)
+        return expressions
+    def find_best_expression(self, X: np.ndarray, y: np.ndarray,
+                             n_samples: int = 500, temperature: float = 0.7):
+        """Sample N expressions and find the best one for the dataset."""
+        n_vars = X.shape[1]
+        logger.info(f"Sampling {n_samples} expressions for {n_vars}-variable dataset...")
+        expressions = self.sample_expressions(n_vars, n_samples, temperature)
+        # Compute R² for each
+        results = []
+        unique_expressions = set()
+        for expr_str in tqdm(expressions, desc="Computing R² scores"):
+            if expr_str in unique_expressions:
+                continue
+            unique_expressions.add(expr_str)
+            r2 = self.compute_r2(expr_str, X, y)
+            results.append({
+                "expression": expr_str,
+                "r2": float(r2) if np.isfinite(r2) else None,
+                "is_valid": bool(np.isfinite(r2) and r2 > -1),
+            })
+        # Sort by R²
+        results.sort(key=lambda x: x["r2"] if x["r2"] is not None else -np.inf, reverse=True)
+        # Statistics
+        valid_count = sum(1 for r in results if r["is_valid"])
+        valid_r2s = [r["r2"] for r in results if r["r2"] is not None and r["r2"] > -1]
+        return {
+            "n_samples": n_samples,
+            "unique_expressions": len(unique_expressions),
+            "valid_count": valid_count,
+            "valid_rate": valid_count / len(unique_expressions) if unique_expressions else 0,
+            "best_r2": results[0]["r2"] if results and results[0]["r2"] else None,
+            "best_expression": results[0]["expression"] if results else None,
+            "mean_r2": float(np.mean(valid_r2s)) if valid_r2s else None,
+            "median_r2": float(np.median(valid_r2s)) if valid_r2s else None,
+            "top_10": results[:10],
+        }
+def run_experiment(model_path: str, datasets_dir: str, n_samples: int = 500,
+                   output_dir: str = "./output/best_of_n"):
+    """Run Best-of-N experiment on multiple datasets."""
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    # Test datasets
+    test_datasets = {
+        "add_x1_x2": {"formula": "x_1 + x_2", "difficulty": "easy"},
+        "mul_x1_x2": {"formula": "x_1 * x_2", "difficulty": "easy"},
+        "sub_x1_x2": {"formula": "x_1 - x_2", "difficulty": "easy"},
+        "sin_x1": {"formula": "sin(x_1)", "difficulty": "medium"},
+        "cos_x1": {"formula": "cos(x_1)", "difficulty": "medium"},
+        "square_x1": {"formula": "x_1 * x_1", "difficulty": "medium"},
+        "sin_x1_plus_x2": {"formula": "sin(x_1) + x_2", "difficulty": "hard"},
+        "x1_mul_sin_x2": {"formula": "x_1 * sin(x_2)", "difficulty": "hard"},
+    }
+    # Initialize sampler
+    sampler = BestOfNSampler(model_path)
+    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+    results = {
+        "timestamp": timestamp,
+        "model_path": model_path,
+        "n_samples": n_samples,
+        "datasets": {},
+    }
+    print("\n" + "=" * 70)
+    print("BEST-OF-N SAMPLING EXPERIMENT")
+    print("=" * 70)
+    print(f"Model: {model_path}")
+    print(f"Samples per dataset: {n_samples}")
+    print("=" * 70)
+    for dataset_name, info in test_datasets.items():
+        dataset_path = Path(datasets_dir) / f"{dataset_name}.csv"
+        if not dataset_path.exists():
+            logger.warning(f"Dataset not found: {dataset_path}")
+            continue
+        print(f"\n{'='*70}")
+        print(f"Dataset: {dataset_name}")
+        print(f"Ground truth: {info['formula']}")
+        print(f"Difficulty: {info['difficulty']}")
+        print(f"{'='*70}")
+        # Load dataset
+        reg = RegressionDataset(str(dataset_path.parent), dataset_path.name)
+        X, y = reg.get_numpy()
+        # Run Best-of-N
+        result = sampler.find_best_expression(X, y, n_samples)
+        result["ground_truth"] = info["formula"]
+        result["difficulty"] = info["difficulty"]
+        results["datasets"][dataset_name] = result
+        # Print results
+        print(f"\nResults:")
+        print(f"  Valid expressions: {result['valid_count']}/{result['unique_expressions']} ({result['valid_rate']:.1%})")
+        print(f"  Best R²: {result['best_r2']:.4f}" if result['best_r2'] else "  Best R²: N/A")
+        print(f"  Best expression: {result['best_expression']}")
+        if result['best_r2'] and result['best_r2'] > 0.99:
+            print(f"  ✅ FOUND NEAR-PERFECT MATCH!")
+        elif result['best_r2'] and result['best_r2'] > 0.9:
+            print(f"  ⚠️  Found good match (R² > 0.9)")
+        else:
+            print(f"  ❌ No good match found")
+        print("\n  Top 5 expressions:")
+        for i, expr in enumerate(result['top_10'][:5]):
+            r2_str = f"{expr['r2']:.4f}" if expr['r2'] else "N/A"
+            print(f"    {i+1}. {expr['expression'][:40]:<40} R²={r2_str}")
+    # Save results
+    results_file = output_dir / f"best_of_n_results_{timestamp}.json"
+    with open(results_file, 'w') as f:
+        json.dump(results, f, indent=2)
+    print("\n" + "=" * 70)
+    print("SUMMARY")
+    print("=" * 70)
+    # Summary table
+    print(f"\n{'Dataset':<25} {'Difficulty':<10} {'Best R²':<10} {'Found?':<10}")
+    print("-" * 60)
+    success_count = 0
+    for name, res in results["datasets"].items():
+        r2 = res["best_r2"]
+        r2_str = f"{r2:.4f}" if r2 else "N/A"
+        found = "✅" if r2 and r2 > 0.99 else ("⚠️" if r2 and r2 > 0.9 else "❌")
+        if r2 and r2 > 0.99:
+            success_count += 1
+        print(f"{name:<25} {res['difficulty']:<10} {r2_str:<10} {found:<10}")
+    print("-" * 60)
+    print(f"Success rate (R² > 0.99): {success_count}/{len(results['datasets'])}")
+    print(f"\nResults saved to: {results_file}")
+    return results
+def main():
+    parser = argparse.ArgumentParser(description="Best-of-N Sampling Experiment")
+    parser.add_argument("--model_path", type=str, default="./output/exp_a_json",
+                        help="Path to trained model")
+    parser.add_argument("--datasets_dir", type=str, default="./data/ppo_test",
+                        help="Directory containing test datasets")
+    parser.add_argument("--n_samples", type=int, default=500,
+                        help="Number of samples per dataset")
+    parser.add_argument("--output_dir", type=str, default="./output/best_of_n",
+                        help="Output directory for results")
+    args = parser.parse_args()
+    run_experiment(
+        model_path=args.model_path,
+        datasets_dir=args.datasets_dir,
+        n_samples=args.n_samples,
+        output_dir=args.output_dir,
+    )
+if __name__ == "__main__":
+    main()

2_training/reinforcement/debug_reinforce.py ADDED Viewed

	@@ -0,0 +1,294 @@

+#!/usr/bin/env python3
+"""
+Debug version of REINFORCE that saves ALL expressions (valid and invalid).
+"""
+import os
+import sys
+import json
+import argparse
+from pathlib import Path
+from typing import List, Dict
+import numpy as np
+import torch
+import torch.nn.functional as F
+# Add project root to path
+PROJECT_ROOT = Path(__file__).parent.parent
+sys.path.insert(0, str(PROJECT_ROOT))
+sys.path.insert(0, str(PROJECT_ROOT / "classes"))
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from peft import PeftModel, LoraConfig, get_peft_model
+from expression import Expression
+class DebugREINFORCE:
+    """REINFORCE that logs all expressions."""
+    def __init__(self, model_path: str, X: np.ndarray, y: np.ndarray, device: str = None):
+        self.X = X
+        self.y = y
+        self.n_vars = X.shape[1]
+        if device:
+            self.device = torch.device(device)
+        else:
+            self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        # Load model
+        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
+        self.tokenizer.pad_token = self.tokenizer.eos_token
+        try:
+            base_model = AutoModelForCausalLM.from_pretrained("gpt2")
+            if len(self.tokenizer) != base_model.config.vocab_size:
+                base_model.resize_token_embeddings(len(self.tokenizer))
+            model_with_lora = PeftModel.from_pretrained(base_model, model_path)
+            self.model = model_with_lora.merge_and_unload()
+        except:
+            self.model = AutoModelForCausalLM.from_pretrained(model_path)
+        # Add LoRA
+        lora_config = LoraConfig(r=8, lora_alpha=16, target_modules=["c_attn"], lora_dropout=0.05, bias="none")
+        self.model = get_peft_model(self.model, lora_config)
+        self.model = self.model.to(self.device)
+        self.model.train()
+        # Build prompt
+        vars_list = [f"x_{i+1}" for i in range(self.n_vars)]
+        ops_list = ["+", "-", "*", "/", "sin", "cos", "sqrt", "log", "exp", "pow"]
+        self.prompt = json.dumps({"vars": vars_list, "ops": ops_list, "cons": "C", "expr": ""})[:-2]
+        self.prompt_ids = self.tokenizer(self.prompt, return_tensors="pt")["input_ids"].to(self.device)
+        # Optimizer
+        self.optimizer = torch.optim.AdamW(self.model.parameters(), lr=5e-5)
+        # Baseline
+        self.baseline = 0.0
+        self.baseline_decay = 0.9
+        # ALL expressions log
+        self.all_expressions = []
+    def extract_expression(self, text: str) -> str:
+        """Extract expression from generated text."""
+        try:
+            if '"expr": "' in text:
+                start = text.index('"expr": "') + len('"expr": "')
+                remaining = text[start:]
+                for terminator in ['"}', '"']:
+                    if terminator in remaining:
+                        return remaining[:remaining.index(terminator)].strip()
+        except:
+            pass
+        return text.strip()
+    def compute_r2(self, expression_str: str) -> tuple:
+        """Compute R^2 and detailed error info."""
+        result = {
+            "expression": expression_str,
+            "r2": -1.0,
+            "is_valid": False,
+            "error_type": None,
+            "error_message": None,
+        }
+        if not expression_str or expression_str.isspace():
+            result["error_type"] = "empty"
+            return result
+        test_expr = expression_str.replace('C', '1')
+        try:
+            expr = Expression(test_expr, is_prefix=False)
+            if not expr.is_valid_on_dataset(self.X):
+                result["error_type"] = "invalid_on_dataset"
+                result["error_message"] = "NaN/Inf on dataset"
+                return result
+            y_pred = expr.evaluate(self.X)
+            if not np.all(np.isfinite(y_pred)):
+                result["error_type"] = "non_finite_output"
+                return result
+            ss_res = np.sum((self.y - y_pred) ** 2)
+            ss_tot = np.sum((self.y - np.mean(self.y)) ** 2)
+            if ss_tot == 0:
+                r2 = 0.0
+            else:
+                r2 = 1 - (ss_res / ss_tot)
+            result["r2"] = float(np.clip(r2, -1.0, 1.0))
+            result["is_valid"] = True
+        except Exception as e:
+            result["error_type"] = "parse_error"
+            result["error_message"] = str(e)[:100]
+        return result
+    def generate_batch(self, batch_size: int = 16, max_new_tokens: int = 50):
+        """Generate batch and evaluate."""
+        results = []
+        for _ in range(batch_size):
+            generated_ids = self.prompt_ids.clone()
+            generated_tokens = []
+            with torch.no_grad():
+                for _ in range(max_new_tokens):
+                    outputs = self.model(generated_ids)
+                    logits = outputs.logits[:, -1, :] / 0.7
+                    probs = F.softmax(logits, dim=-1)
+                    next_token = torch.multinomial(probs, num_samples=1)
+                    generated_tokens.append(next_token.item())
+                    generated_ids = torch.cat([generated_ids, next_token], dim=1)
+                    if next_token.item() == self.tokenizer.eos_token_id:
+                        break
+                    text = self.tokenizer.decode(generated_ids[0], skip_special_tokens=True)
+                    if '"}' in text[len(self.prompt):]:
+                        break
+            text = self.tokenizer.decode(generated_ids[0], skip_special_tokens=True)
+            expr_str = self.extract_expression(text)
+            # Evaluate with detailed info
+            eval_result = self.compute_r2(expr_str)
+            # Compute log prob
+            if len(generated_tokens) > 0:
+                full_ids = torch.cat([self.prompt_ids, torch.tensor([generated_tokens], device=self.device)], dim=1)
+                outputs = self.model(full_ids[:, :-1])
+                logits = outputs.logits / 0.7
+                prompt_len = self.prompt_ids.shape[1]
+                gen_logits = logits[:, prompt_len-1:, :]
+                log_probs_all = F.log_softmax(gen_logits, dim=-1)
+                target_tokens = torch.tensor(generated_tokens, device=self.device).unsqueeze(0)
+                selected_log_probs = log_probs_all.gather(2, target_tokens.unsqueeze(-1)).squeeze(-1)
+                total_log_prob = selected_log_probs.sum()
+            else:
+                total_log_prob = torch.tensor(0.0, device=self.device, requires_grad=True)
+            eval_result["log_prob"] = total_log_prob
+            results.append(eval_result)
+            # Log ALL expressions
+            self.all_expressions.append(eval_result.copy())
+        return results
+    def train_step(self, batch_size: int = 16):
+        """One training step."""
+        results = self.generate_batch(batch_size)
+        # Compute rewards
+        rewards = [r["r2"] if r["is_valid"] else -0.1 for r in results]
+        # Update baseline
+        valid_rewards = [r for r in rewards if r > -0.1]
+        if valid_rewards:
+            mean_reward = np.mean(valid_rewards)
+            self.baseline = self.baseline_decay * self.baseline + (1 - self.baseline_decay) * mean_reward
+        # Advantages
+        advantages = [r - self.baseline for r in rewards]
+        # Update
+        self.optimizer.zero_grad()
+        policy_loss = torch.tensor(0.0, device=self.device)
+        for result, advantage in zip(results, advantages):
+            if result["is_valid"] or result["error_type"] == "parse_error":
+                policy_loss = policy_loss - result["log_prob"] * advantage
+        if len(results) > 0:
+            policy_loss = policy_loss / len(results)
+            policy_loss.backward()
+            torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)
+            self.optimizer.step()
+        # Stats
+        valid_count = sum(1 for r in results if r["is_valid"])
+        valid_r2 = [r["r2"] for r in results if r["is_valid"]]
+        return {
+            "valid_count": valid_count,
+            "total_count": len(results),
+            "mean_r2": np.mean(valid_r2) if valid_r2 else -1.0,
+            "max_r2": max(r["r2"] for r in results),
+            "baseline": self.baseline,
+        }
+    def run(self, epochs: int = 10):
+        """Run training."""
+        print(f"Running debug REINFORCE for {epochs} epochs...")
+        print()
+        for epoch in range(1, epochs + 1):
+            stats = self.train_step()
+            print(f"Epoch {epoch:2d} | Valid: {stats['valid_count']}/{stats['total_count']} | Mean R²: {stats['mean_r2']:.4f} | Max R²: {stats['max_r2']:.4f}")
+        # Save ALL expressions
+        output_file = "debug_expressions.json"
+        with open(output_file, "w") as f:
+            json.dump({"all_expressions": self.all_expressions}, f, indent=2, default=str)
+        print()
+        print(f"Saved {len(self.all_expressions)} expressions to {output_file}")
+        # Analyze
+        valid = [e for e in self.all_expressions if e["is_valid"]]
+        invalid = [e for e in self.all_expressions if not e["is_valid"]]
+        print()
+        print("SUMMARY:")
+        print(f"  Total: {len(self.all_expressions)}")
+        print(f"  Valid: {len(valid)} ({100*len(valid)/len(self.all_expressions):.1f}%)")
+        print(f"  Invalid: {len(invalid)} ({100*len(invalid)/len(self.all_expressions):.1f}%)")
+        if invalid:
+            error_types = {}
+            for e in invalid:
+                et = e.get("error_type", "unknown")
+                error_types[et] = error_types.get(et, 0) + 1
+            print()
+            print("Invalid expression types:")
+            for et, count in sorted(error_types.items(), key=lambda x: -x[1]):
+                print(f"  {et}: {count} ({100*count/len(invalid):.1f}%)")
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model_path", type=str, required=True)
+    parser.add_argument("--dataset", type=str, required=True)
+    parser.add_argument("--epochs", type=int, default=10)
+    args = parser.parse_args()
+    # Load dataset
+    import pandas as pd
+    df = pd.read_csv(args.dataset)
+    x_cols = [c for c in df.columns if c.startswith('x_')]
+    X = df[x_cols].values
+    y = df['y'].values
+    print(f"Dataset: {args.dataset}")
+    print(f"  Samples: {len(df)}, Variables: {len(x_cols)}")
+    print()
+    # Run
+    reinforce = DebugREINFORCE(args.model_path, X, y)
+    reinforce.run(epochs=args.epochs)
+if __name__ == "__main__":
+    main()

2_training/reinforcement/grpo_experiment.py ADDED Viewed

	@@ -0,0 +1,344 @@

+#!/usr/bin/env python3
+"""
+GRPO Experiment for Symbolic Regression
+GRPO (Group Relative Policy Optimization) supports custom reward functions
+via the reward_funcs parameter, making it ideal for symbolic regression
+where we compute R^2 scores as rewards.
+This is the recommended approach for TRL 0.27+ since PPO experimental
+has compatibility issues.
+Usage:
+    python scripts/grpo_experiment.py --dataset ./data/ppo_test/sin_x1.csv
+"""
+import os
+os.environ['TRL_EXPERIMENTAL_SILENCE'] = '1'
+import sys
+import json
+import argparse
+import logging
+import datetime
+from pathlib import Path
+from typing import List
+import numpy as np
+import torch
+# Add project root to path
+PROJECT_ROOT = Path(__file__).parent.parent
+sys.path.insert(0, str(PROJECT_ROOT))
+sys.path.insert(0, str(PROJECT_ROOT / "classes"))
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from trl import GRPOConfig, GRPOTrainer
+from datasets import Dataset
+from peft import PeftModel
+from expression import Expression
+from dataset import RegressionDataset
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+)
+logger = logging.getLogger(__name__)
+class SymbolicRegressionReward:
+    """
+    Reward function for symbolic regression.
+    Computes R^2 score for generated expressions.
+    """
+    def __init__(self, X: np.ndarray, y: np.ndarray, tokenizer):
+        self.X = X
+        self.y = y
+        self.tokenizer = tokenizer
+        self.n_vars = X.shape[1]
+        self.best_r2 = -np.inf
+        self.best_expression = None
+        self.history = []
+    def extract_expression(self, text: str) -> str:
+        """Extract expression from JSON format output."""
+        try:
+            # Case 1: Standard JSON with quotes
+            if '"expr": "' in text:
+                start = text.index('"expr": "') + len('"expr": "')
+                remaining = text[start:]
+                if '"}' in remaining:
+                    return remaining[:remaining.index('"}')].strip()
+                if '"' in remaining:
+                    return remaining[:remaining.index('"')].strip()
+                return remaining.strip()
+            # Case 2: Model output without quotes
+            if '"expr": ' in text:
+                start = text.index('"expr": ') + len('"expr": ')
+                remaining = text[start:]
+                if '"}' in remaining:
+                    return remaining[:remaining.index('"}')].strip()
+                return remaining.strip()
+        except (ValueError, IndexError):
+            pass
+        return text.split('"expr"')[-1].strip(' ":}')
+    def compute_r2(self, expression_str: str) -> float:
+        """Compute R^2 score for an expression."""
+        if not expression_str or expression_str.isspace():
+            return -1.0
+        # Substitute C with 1
+        if 'C' in expression_str:
+            expression_str = expression_str.replace('C', '1')
+        try:
+            expr = Expression(expression_str, is_prefix=False)
+            if not expr.is_valid_on_dataset(self.X):
+                return -1.0
+            y_pred = expr.evaluate(self.X)
+            if not np.all(np.isfinite(y_pred)):
+                return -1.0
+            ss_res = np.sum((self.y - y_pred) ** 2)
+            ss_tot = np.sum((self.y - np.mean(self.y)) ** 2)
+            if ss_tot == 0:
+                return 0.0
+            r2 = 1 - (ss_res / ss_tot)
+            return float(np.clip(r2, -1.0, 1.0))
+        except Exception:
+            return -1.0
+    def __call__(self, completions: List[str], **kwargs) -> List[float]:
+        """
+        Compute rewards for a batch of completions.
+        Args:
+            completions: List of generated completion strings
+        Returns:
+            List of R^2 scores
+        """
+        rewards = []
+        for completion in completions:
+            # Extract expression from completion
+            expr_str = self.extract_expression(completion)
+            # Compute R^2
+            r2 = self.compute_r2(expr_str)
+            rewards.append(r2)
+            # Track best
+            if r2 > self.best_r2:
+                self.best_r2 = r2
+                self.best_expression = expr_str
+                logger.info(f"New best R^2: {r2:.4f} - {expr_str}")
+        # Log batch statistics
+        valid_rewards = [r for r in rewards if r > -1.0]
+        if valid_rewards:
+            self.history.append({
+                "mean_r2": np.mean(valid_rewards),
+                "max_r2": max(valid_rewards),
+                "valid_rate": len(valid_rewards) / len(rewards),
+            })
+        return rewards
+def build_prompt(n_vars: int) -> str:
+    """Build JSON format prompt matching training data."""
+    vars_list = [f"x_{i+1}" for i in range(n_vars)]
+    ops_list = ["+", "-", "*", "sin", "cos"]
+    prompt = json.dumps({
+        "vars": vars_list,
+        "ops": ops_list,
+        "cons": None,
+        "expr": ""
+    })[:-3]  # Remove trailing '"}' for model to complete
+    return prompt
+def run_grpo_experiment(
+    model_path: str,
+    dataset_path: str,
+    output_dir: str = "./output/grpo_results",
+    num_episodes: int = 100,
+    batch_size: int = 4,
+    learning_rate: float = 1e-5,
+    use_cpu: bool = False,
+):
+    """Run GRPO experiment with custom R^2 reward function."""
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    # Device setup
+    device = "cpu" if use_cpu else ("cuda" if torch.cuda.is_available() else "cpu")
+    logger.info(f"Using device: {device}")
+    # Load dataset
+    logger.info(f"Loading dataset from {dataset_path}")
+    dataset_path = Path(dataset_path)
+    reg = RegressionDataset(str(dataset_path.parent), dataset_path.name)
+    X, y = reg.get_numpy()
+    n_vars = X.shape[1]
+    logger.info(f"Dataset: {X.shape[0]} samples, {n_vars} variables")
+    # Load tokenizer and model
+    logger.info(f"Loading model from {model_path}")
+    # Check if model_path is a local path or HuggingFace model
+    if Path(model_path).exists():
+        # Load tokenizer from trained model
+        tokenizer = AutoTokenizer.from_pretrained(model_path)
+        tokenizer.pad_token = tokenizer.eos_token
+        # Load base model and LoRA
+        base_model = AutoModelForCausalLM.from_pretrained("gpt2")
+        if len(tokenizer) != base_model.config.vocab_size:
+            base_model.resize_token_embeddings(len(tokenizer))
+        try:
+            model_with_lora = PeftModel.from_pretrained(base_model, model_path)
+            model = model_with_lora.merge_and_unload()
+            logger.info("LoRA adapter loaded and merged")
+        except Exception as e:
+            logger.warning(f"Could not load LoRA: {e}")
+            model = AutoModelForCausalLM.from_pretrained(model_path)
+    else:
+        # Load from HuggingFace Hub
+        tokenizer = AutoTokenizer.from_pretrained(model_path)
+        tokenizer.pad_token = tokenizer.eos_token
+        model = AutoModelForCausalLM.from_pretrained(model_path)
+    logger.info("Model loaded successfully")
+    # Build prompt and create dataset
+    prompt = build_prompt(n_vars)
+    logger.info(f"Prompt: {prompt}...")
+    train_dataset = Dataset.from_dict({"prompt": [prompt] * num_episodes})
+    # Create reward function
+    reward_func = SymbolicRegressionReward(X, y, tokenizer)
+    # GRPO Config
+    grpo_config = GRPOConfig(
+        output_dir=str(output_dir),
+        learning_rate=learning_rate,
+        per_device_train_batch_size=batch_size,
+        num_generations=batch_size,  # Generate batch_size samples per prompt
+        max_completion_length=50,
+        num_train_epochs=1,
+        report_to=[],
+        use_cpu=use_cpu or device == "cpu",
+        bf16=False if use_cpu or device == "cpu" else True,
+        logging_steps=10,
+        save_strategy="epoch",
+    )
+    # Create trainer
+    logger.info("Creating GRPO Trainer...")
+    trainer = GRPOTrainer(
+        model=model,
+        args=grpo_config,
+        processing_class=tokenizer,
+        train_dataset=train_dataset,
+        reward_funcs=reward_func,
+    )
+    # Train
+    logger.info("="*60)
+    logger.info("GRPO SYMBOLIC REGRESSION EXPERIMENT")
+    logger.info("="*60)
+    logger.info(f"Dataset: {dataset_path}")
+    logger.info(f"Model: {model_path}")
+    logger.info(f"Episodes: {num_episodes}")
+    logger.info("="*60)
+    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+    try:
+        trainer.train()
+        logger.info("Training completed!")
+    except Exception as e:
+        logger.error(f"Training failed: {e}")
+        import traceback
+        traceback.print_exc()
+    # Results
+    logger.info("\n" + "="*60)
+    logger.info("RESULTS")
+    logger.info("="*60)
+    logger.info(f"Best R^2: {reward_func.best_r2:.4f}")
+    logger.info(f"Best expression: {reward_func.best_expression}")
+    # Save results
+    results = {
+        "timestamp": timestamp,
+        "model_path": model_path,
+        "dataset_path": str(dataset_path),
+        "best_r2": reward_func.best_r2,
+        "best_expression": reward_func.best_expression,
+        "history": reward_func.history,
+    }
+    results_file = output_dir / f"grpo_results_{timestamp}.json"
+    with open(results_file, 'w') as f:
+        json.dump(results, f, indent=2)
+    logger.info(f"Results saved to: {results_file}")
+    # Save model
+    trainer.save_model(str(output_dir / "final_model"))
+    return results
+def main():
+    parser = argparse.ArgumentParser(description="GRPO Symbolic Regression")
+    parser.add_argument("--model_path", type=str, default="gpt2",
+                        help="Path to model (local or HuggingFace)")
+    parser.add_argument("--dataset", type=str, default="./data/ppo_test/sin_x1.csv",
+                        help="Path to test dataset CSV")
+    parser.add_argument("--output_dir", type=str, default="./output/grpo_results",
+                        help="Output directory")
+    parser.add_argument("--num_episodes", type=int, default=100,
+                        help="Number of training episodes")
+    parser.add_argument("--batch_size", type=int, default=4,
+                        help="Batch size")
+    parser.add_argument("--lr", type=float, default=1e-5,
+                        help="Learning rate")
+    parser.add_argument("--cpu", action="store_true",
+                        help="Force CPU usage")
+    args = parser.parse_args()
+    run_grpo_experiment(
+        model_path=args.model_path,
+        dataset_path=args.dataset,
+        output_dir=args.output_dir,
+        num_episodes=args.num_episodes,
+        batch_size=args.batch_size,
+        learning_rate=args.lr,
+        use_cpu=args.cpu,
+    )
+if __name__ == "__main__":
+    main()

2_training/reinforcement/grpo_improved.py ADDED Viewed

	@@ -0,0 +1,625 @@

+#!/usr/bin/env python3
+"""
+Improved GRPO (Group Relative Policy Optimization) for Symbolic Regression
+Improvements over basic GRPO:
+1. Filter invalid expressions before computing group statistics
+2. Reward shaping with softer penalties
+3. Hybrid baseline: group stats + exponential moving average
+4. Entropy bonus for exploration
+5. Advantage clipping to prevent extreme updates
+6. Minimum valid ratio check before updates
+7. Temperature annealing for better exploration/exploitation
+"""
+import os
+import sys
+import json
+import argparse
+import logging
+import datetime
+from pathlib import Path
+from typing import List, Dict, Tuple
+from collections import deque
+import numpy as np
+import torch
+import torch.nn.functional as F
+# Add project root to path
+PROJECT_ROOT = Path(__file__).parent.parent
+sys.path.insert(0, str(PROJECT_ROOT))
+sys.path.insert(0, str(PROJECT_ROOT / "classes"))
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from peft import PeftModel, LoraConfig, get_peft_model
+from expression import Expression
+from dataset import RegressionDataset
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+)
+logger = logging.getLogger(__name__)
+class ImprovedGRPO:
+    """Improved GRPO for symbolic regression."""
+    def __init__(
+        self,
+        model_path: str,
+        X: np.ndarray,
+        y: np.ndarray,
+        output_dir: str = "./output/grpo",
+        learning_rate: float = 5e-5,
+        device: str = None,
+        group_size: int = 16,  # Larger groups for better statistics
+        entropy_coef: float = 0.01,
+        advantage_clip: float = 2.0,  # Clip extreme advantages
+        min_valid_ratio: float = 0.2,  # Minimum valid expressions to update
+    ):
+        self.X = X
+        self.y = y
+        self.n_vars = X.shape[1]
+        self.output_dir = Path(output_dir)
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+        self.learning_rate = learning_rate
+        self.group_size = group_size
+        self.entropy_coef = entropy_coef
+        self.advantage_clip = advantage_clip
+        self.min_valid_ratio = min_valid_ratio
+        # Device
+        if device:
+            self.device = torch.device(device)
+        else:
+            self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        logger.info(f"Using device: {self.device}")
+        # Load model
+        self._load_model(model_path)
+        # Build prompt
+        self.prompt = self._build_prompt()
+        self.prompt_ids = self.tokenizer(self.prompt, return_tensors="pt")["input_ids"].to(self.device)
+        # Optimizer
+        self.optimizer = torch.optim.AdamW(
+            self.model.parameters(),
+            lr=learning_rate,
+            weight_decay=0.01
+        )
+        # Scheduler
+        self.scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
+            self.optimizer, T_0=10, T_mult=2
+        )
+        # Tracking
+        self.best_r2 = -np.inf
+        self.best_expression = None
+        self.history = []
+        self.discovered_expressions: Dict[str, float] = {}
+        # Hybrid baseline: EMA of valid rewards
+        self.ema_baseline = 0.0
+        self.ema_decay = 0.9
+        self.reward_buffer = deque(maxlen=100)
+        # Temperature annealing
+        self.initial_temp = 0.8
+        self.min_temp = 0.5
+        self.current_temp = self.initial_temp
+    def _load_model(self, model_path: str):
+        """Load model and tokenizer."""
+        logger.info(f"Loading model from {model_path}")
+        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
+        self.tokenizer.pad_token = self.tokenizer.eos_token
+        try:
+            logger.info("Attempting to load as LoRA adapter...")
+            base_model = AutoModelForCausalLM.from_pretrained("gpt2")
+            if len(self.tokenizer) != base_model.config.vocab_size:
+                base_model.resize_token_embeddings(len(self.tokenizer))
+                logger.info(f"Resized embeddings to {len(self.tokenizer)}")
+            model_with_lora = PeftModel.from_pretrained(base_model, model_path)
+            self.model = model_with_lora.merge_and_unload()
+            logger.info("LoRA adapter loaded and merged successfully")
+        except Exception as e:
+            logger.info(f"LoRA load failed ({e}), loading as standalone model...")
+            self.model = AutoModelForCausalLM.from_pretrained(model_path)
+        # Add LoRA for training
+        lora_config = LoraConfig(
+            r=8,
+            lora_alpha=16,
+            target_modules=["c_attn"],
+            lora_dropout=0.05,
+            bias="none",
+        )
+        self.model = get_peft_model(self.model, lora_config)
+        self.model = self.model.to(self.device)
+        self.model.train()
+        trainable = sum(p.numel() for p in self.model.parameters() if p.requires_grad)
+        logger.info(f"Model loaded with {trainable} trainable params")
+    def _build_prompt(self, ops: list = None) -> str:
+        """Build JSON format prompt."""
+        vars_list = [f"x_{i+1}" for i in range(self.n_vars)]
+        if ops is None:
+            ops_list = ["+", "-", "*", "/", "sin", "cos", "sqrt", "log", "exp", "pow"]
+        else:
+            ops_list = ops
+        prompt = json.dumps({
+            "vars": vars_list,
+            "ops": ops_list,
+            "cons": "C",
+            "expr": ""
+        })
+        prompt = prompt[:-2]
+        return prompt
+    def extract_expression(self, text: str) -> str:
+        """Extract expression from generated text."""
+        try:
+            eos_token = "<|endoftext|>"
+            if eos_token in text:
+                text = text[:text.index(eos_token)]
+            if '"expr": "' in text:
+                start = text.index('"expr": "') + len('"expr": "')
+                remaining = text[start:]
+                for terminator in ['"}', '"']:
+                    if terminator in remaining:
+                        return remaining[:remaining.index(terminator)].strip()
+                return remaining.strip()
+            if '"expr": ' in text:
+                start = text.index('"expr": ') + len('"expr": ')
+                remaining = text[start:]
+                if '"}' in remaining:
+                    return remaining[:remaining.index('"}')].strip()
+                return remaining.strip(' "')
+        except (ValueError, IndexError):
+            pass
+        if '"expr"' in text:
+            return text.split('"expr"')[-1].strip(' ":{}')
+        return text.strip()
+    def compute_r2(self, expression_str: str) -> Tuple[float, bool]:
+        """Compute R^2 score."""
+        if not expression_str or expression_str.isspace():
+            return -1.0, False
+        if 'C' in expression_str:
+            expression_str = expression_str.replace('C', '1')
+        try:
+            expr = Expression(expression_str, is_prefix=False)
+            if not expr.is_valid_on_dataset(self.X):
+                return -1.0, False
+            y_pred = expr.evaluate(self.X)
+            if not np.all(np.isfinite(y_pred)):
+                return -1.0, False
+            ss_res = np.sum((self.y - y_pred) ** 2)
+            ss_tot = np.sum((self.y - np.mean(self.y)) ** 2)
+            if ss_tot == 0:
+                return 0.0, True
+            r2 = 1 - (ss_res / ss_tot)
+            return float(np.clip(r2, -1.0, 1.0)), True
+        except Exception:
+            return -1.0, False
+    def shape_reward(self, r2: float, is_valid: bool) -> float:
+        """Shape reward for better learning signal."""
+        if not is_valid:
+            return -0.1  # Small penalty, not -1.0
+        # Bonus for high R²
+        if r2 >= 0.99:
+            return 2.0  # Big bonus for near-perfect
+        elif r2 >= 0.9:
+            return r2 * 1.5
+        elif r2 >= 0.5:
+            return r2 * 1.2
+        elif r2 >= 0:
+            return r2
+        else:
+            return r2 * 0.5  # Reduce negative penalty
+    def generate_group(self, max_new_tokens: int = 50) -> List[Dict]:
+        """Generate a group of expressions."""
+        results = []
+        for _ in range(self.group_size):
+            generated_ids = self.prompt_ids.clone()
+            generated_tokens = []
+            # Phase 1: Generate tokens
+            with torch.no_grad():
+                for _ in range(max_new_tokens):
+                    outputs = self.model(generated_ids)
+                    logits = outputs.logits[:, -1, :] / self.current_temp
+                    probs = F.softmax(logits, dim=-1)
+                    next_token = torch.multinomial(probs, num_samples=1)
+                    generated_tokens.append(next_token.item())
+                    generated_ids = torch.cat([generated_ids, next_token], dim=1)
+                    if next_token.item() == self.tokenizer.eos_token_id:
+                        break
+                    text = self.tokenizer.decode(generated_ids[0], skip_special_tokens=True)
+                    if '"}' in text[len(self.prompt):]:
+                        break
+            # Decode and evaluate
+            text = self.tokenizer.decode(generated_ids[0], skip_special_tokens=True)
+            expr_str = self.extract_expression(text)
+            r2, is_valid = self.compute_r2(expr_str)
+            reward = self.shape_reward(r2, is_valid)
+            # Phase 2: Compute log probs with gradients
+            if len(generated_tokens) > 0:
+                full_ids = torch.cat([
+                    self.prompt_ids,
+                    torch.tensor([generated_tokens], device=self.device)
+                ], dim=1)
+                outputs = self.model(full_ids[:, :-1])
+                logits = outputs.logits / self.current_temp
+                prompt_len = self.prompt_ids.shape[1]
+                gen_logits = logits[:, prompt_len-1:, :]
+                log_probs_all = F.log_softmax(gen_logits, dim=-1)
+                probs_all = F.softmax(gen_logits, dim=-1)
+                target_tokens = torch.tensor(generated_tokens, device=self.device).unsqueeze(0)
+                selected_log_probs = log_probs_all.gather(2, target_tokens.unsqueeze(-1)).squeeze(-1)
+                total_log_prob = selected_log_probs.sum()
+                # Entropy for exploration
+                entropy_per_pos = -(probs_all * log_probs_all).sum(dim=-1)
+                total_entropy = entropy_per_pos.mean()
+            else:
+                total_log_prob = torch.tensor(0.0, device=self.device, requires_grad=True)
+                total_entropy = torch.tensor(0.0, device=self.device)
+            results.append({
+                "text": text,
+                "expression": expr_str,
+                "r2": r2,
+                "is_valid": is_valid,
+                "reward": reward,
+                "log_prob": total_log_prob,
+                "entropy": total_entropy,
+            })
+            # Track best
+            if is_valid:
+                self.discovered_expressions[expr_str] = max(
+                    self.discovered_expressions.get(expr_str, -np.inf), r2
+                )
+                self.reward_buffer.append(reward)
+            if r2 > self.best_r2:
+                self.best_r2 = r2
+                self.best_expression = expr_str
+            if self.device.type == "cuda":
+                torch.cuda.empty_cache()
+        return results
+    def compute_advantages(self, results: List[Dict]) -> Tuple[List[float], dict]:
+        """
+        Compute improved GRPO advantages.
+        Key improvement: Only use VALID expressions for group statistics.
+        Invalid expressions get a fixed small negative advantage.
+        """
+        valid_results = [r for r in results if r["is_valid"]]
+        valid_rewards = [r["reward"] for r in valid_results]
+        stats = {
+            "valid_count": len(valid_results),
+            "total_count": len(results),
+            "valid_ratio": len(valid_results) / len(results),
+        }
+        # If too few valid expressions, use EMA baseline only
+        if len(valid_rewards) < 2:
+            advantages = []
+            for r in results:
+                if r["is_valid"]:
+                    adv = r["reward"] - self.ema_baseline
+                else:
+                    adv = -0.5  # Fixed penalty for invalid
+                advantages.append(adv)
+            stats["method"] = "ema_only"
+            return advantages, stats
+        # Compute group statistics from valid expressions only
+        group_mean = np.mean(valid_rewards)
+        group_std = np.std(valid_rewards)
+        # Update EMA baseline
+        self.ema_baseline = self.ema_decay * self.ema_baseline + (1 - self.ema_decay) * group_mean
+        # Hybrid baseline: combine group mean with EMA
+        hybrid_baseline = 0.7 * group_mean + 0.3 * self.ema_baseline
+        # Avoid division by zero
+        if group_std < 1e-8:
+            group_std = 1.0
+        # Compute advantages
+        advantages = []
+        for r in results:
+            if r["is_valid"]:
+                # Normalized advantage for valid expressions
+                adv = (r["reward"] - hybrid_baseline) / group_std
+                # Clip to prevent extreme updates
+                adv = np.clip(adv, -self.advantage_clip, self.advantage_clip)
+            else:
+                # Small fixed penalty for invalid (doesn't pollute group stats)
+                adv = -0.3
+            advantages.append(adv)
+        stats["method"] = "hybrid"
+        stats["group_mean"] = group_mean
+        stats["group_std"] = group_std
+        stats["ema_baseline"] = self.ema_baseline
+        return advantages, stats
+    def train_step(self, num_groups: int = 2) -> dict:
+        """Perform one training step."""
+        self.model.train()
+        all_results = []
+        all_advantages = []
+        total_policy_loss = 0.0
+        total_entropy_loss = 0.0
+        skipped_groups = 0
+        self.optimizer.zero_grad()
+        for _ in range(num_groups):
+            if self.device.type == "cuda":
+                torch.cuda.empty_cache()
+            # Generate group
+            group_results = self.generate_group()
+            all_results.extend(group_results)
+            # Compute advantages
+            advantages, adv_stats = self.compute_advantages(group_results)
+            all_advantages.extend(advantages)
+            # Skip update if too few valid expressions
+            if adv_stats["valid_ratio"] < self.min_valid_ratio:
+                skipped_groups += 1
+                continue
+            # Compute loss
+            policy_loss = torch.tensor(0.0, device=self.device)
+            entropy_loss = torch.tensor(0.0, device=self.device)
+            valid_count = 0
+            for result, advantage in zip(group_results, advantages):
+                if result["is_valid"] and advantage != 0:
+                    policy_loss = policy_loss - result["log_prob"] * advantage
+                    entropy_loss = entropy_loss - result["entropy"]
+                    valid_count += 1
+            if valid_count > 0:
+                policy_loss = policy_loss / valid_count
+                entropy_loss = entropy_loss / valid_count
+                # Combined loss
+                loss = policy_loss + self.entropy_coef * entropy_loss
+                loss = loss / num_groups
+                loss.backward()
+                total_policy_loss += policy_loss.item()
+                total_entropy_loss += entropy_loss.item()
+        # Only update if we had valid groups
+        if skipped_groups < num_groups:
+            torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)
+            self.optimizer.step()
+            self.scheduler.step()
+        # Statistics
+        r2_values = [r["r2"] for r in all_results]
+        valid_mask = [r["is_valid"] for r in all_results]
+        valid_r2 = [r2 for r2, v in zip(r2_values, valid_mask) if v]
+        return {
+            "valid_count": int(sum(valid_mask)),
+            "total_count": len(all_results),
+            "valid_rate": sum(valid_mask) / len(all_results) if all_results else 0,
+            "mean_r2": float(np.mean(valid_r2)) if valid_r2 else 0.0,
+            "max_r2": float(max(r2_values)) if r2_values else 0.0,
+            "mean_advantage": float(np.mean(all_advantages)) if all_advantages else 0.0,
+            "ema_baseline": self.ema_baseline,
+            "policy_loss": total_policy_loss / max(num_groups - skipped_groups, 1),
+            "entropy_loss": total_entropy_loss / max(num_groups - skipped_groups, 1),
+            "lr": self.scheduler.get_last_lr()[0],
+            "temperature": self.current_temp,
+            "skipped_groups": skipped_groups,
+        }
+    def anneal_temperature(self, epoch: int, total_epochs: int):
+        """Anneal temperature from initial to minimum."""
+        progress = epoch / total_epochs
+        self.current_temp = self.initial_temp - progress * (self.initial_temp - self.min_temp)
+    def run(
+        self,
+        epochs: int = 50,
+        num_groups: int = 2,
+        target_r2: float = 0.99,
+        patience: int = 20,
+    ) -> dict:
+        """Run improved GRPO training."""
+        logger.info("=" * 60)
+        logger.info("IMPROVED GRPO SYMBOLIC REGRESSION")
+        logger.info("=" * 60)
+        logger.info(f"Epochs: {epochs}")
+        logger.info(f"Group size: {self.group_size}")
+        logger.info(f"Num groups: {num_groups}")
+        logger.info(f"Effective batch: {self.group_size * num_groups}")
+        logger.info(f"Entropy coef: {self.entropy_coef}")
+        logger.info(f"Advantage clip: {self.advantage_clip}")
+        logger.info(f"Min valid ratio: {self.min_valid_ratio}")
+        logger.info(f"Target R^2: {target_r2}")
+        logger.info("=" * 60)
+        no_improvement_count = 0
+        best_r2_at_start = self.best_r2
+        for epoch in range(1, epochs + 1):
+            # Anneal temperature
+            self.anneal_temperature(epoch, epochs)
+            stats = self.train_step(num_groups)
+            self.history.append({
+                "epoch": epoch,
+                **stats,
+                "best_r2": self.best_r2,
+            })
+            logger.info(
+                f"Epoch {epoch:3d} | "
+                f"Valid: {stats['valid_count']}/{stats['total_count']} | "
+                f"Mean R²: {stats['mean_r2']:.4f} | "
+                f"Best: {self.best_r2:.4f} | "
+                f"EMA: {stats['ema_baseline']:.3f} | "
+                f"Temp: {stats['temperature']:.2f} | "
+                f"LR: {stats['lr']:.2e}"
+            )
+            # Check for target
+            if self.best_r2 >= target_r2:
+                logger.info(f"Target R^2 {target_r2} reached at epoch {epoch}!")
+                break
+            # Early stopping
+            if self.best_r2 > best_r2_at_start:
+                best_r2_at_start = self.best_r2
+                no_improvement_count = 0
+            else:
+                no_improvement_count += 1
+            if no_improvement_count >= patience:
+                logger.info(f"No improvement for {patience} epochs. Early stopping.")
+                break
+        # Final results
+        logger.info("")
+        logger.info("=" * 60)
+        logger.info("FINAL RESULTS")
+        logger.info("=" * 60)
+        logger.info(f"Best R^2: {self.best_r2:.4f}")
+        logger.info(f"Best expression: {self.best_expression}")
+        logger.info(f"Unique expressions discovered: {len(self.discovered_expressions)}")
+        top_exprs = sorted(
+            self.discovered_expressions.items(),
+            key=lambda x: x[1],
+            reverse=True
+        )[:5]
+        logger.info("Top 5 expressions:")
+        for expr, r2 in top_exprs:
+            logger.info(f"  R²={r2:.4f}: {expr}")
+        # Save results
+        results = {
+            "algorithm": "ImprovedGRPO",
+            "best_r2": self.best_r2,
+            "best_expression": self.best_expression,
+            "history": self.history,
+            "discovered_expressions": dict(list(self.discovered_expressions.items())[:100]),
+            "config": {
+                "group_size": self.group_size,
+                "num_groups": num_groups,
+                "learning_rate": self.learning_rate,
+                "entropy_coef": self.entropy_coef,
+                "advantage_clip": self.advantage_clip,
+                "min_valid_ratio": self.min_valid_ratio,
+            }
+        }
+        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+        output_path = self.output_dir / f"results_grpo_improved_{timestamp}.json"
+        with open(output_path, "w") as f:
+            json.dump(results, f, indent=2)
+        logger.info(f"Results saved to: {output_path}")
+        return results
+def main():
+    parser = argparse.ArgumentParser(description="Improved GRPO for Symbolic Regression")
+    parser.add_argument("--model_path", type=str, required=True)
+    parser.add_argument("--dataset", type=str, required=True)
+    parser.add_argument("--output_dir", type=str, default="./output/grpo")
+    parser.add_argument("--epochs", type=int, default=50)
+    parser.add_argument("--group_size", type=int, default=16)
+    parser.add_argument("--num_groups", type=int, default=2)
+    parser.add_argument("--learning_rate", type=float, default=5e-5)
+    parser.add_argument("--target_r2", type=float, default=0.99)
+    parser.add_argument("--entropy_coef", type=float, default=0.01)
+    args = parser.parse_args()
+    # Load dataset
+    import pandas as pd
+    df = pd.read_csv(args.dataset)
+    x_cols = [c for c in df.columns if c.startswith('x_')]
+    X = df[x_cols].values
+    y = df['y'].values
+    logger.info(f"Loaded dataset: {args.dataset}")
+    logger.info(f"  Samples: {len(df)}, Variables: {len(x_cols)}")
+    # Create trainer
+    grpo = ImprovedGRPO(
+        model_path=args.model_path,
+        X=X,
+        y=y,
+        output_dir=args.output_dir,
+        learning_rate=args.learning_rate,
+        group_size=args.group_size,
+        entropy_coef=args.entropy_coef,
+    )
+    # Run training
+    results = grpo.run(
+        epochs=args.epochs,
+        num_groups=args.num_groups,
+        target_r2=args.target_r2,
+    )
+if __name__ == "__main__":
+    main()

2_training/reinforcement/grpo_symbolic.py ADDED Viewed

	@@ -0,0 +1,539 @@

+#!/usr/bin/env python3
+"""
+GRPO (Group Relative Policy Optimization) for Symbolic Regression
+Based on DeepSeek-R1 approach:
+- Generate a group of N samples
+- Compute advantages relative to group mean/std
+- No external baseline needed
+Comparison with REINFORCE:
+- REINFORCE: advantage = reward - moving_average_baseline
+- GRPO: advantage = (reward - group_mean) / group_std
+"""
+import os
+import sys
+import json
+import argparse
+import logging
+import datetime
+from pathlib import Path
+from typing import List, Dict, Tuple
+from copy import deepcopy
+import numpy as np
+import torch
+import torch.nn.functional as F
+# Add project root to path
+PROJECT_ROOT = Path(__file__).parent.parent
+sys.path.insert(0, str(PROJECT_ROOT))
+sys.path.insert(0, str(PROJECT_ROOT / "classes"))
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from peft import PeftModel, LoraConfig, get_peft_model
+from expression import Expression
+from dataset import RegressionDataset
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+)
+logger = logging.getLogger(__name__)
+class GRPO:
+    """Group Relative Policy Optimization for symbolic regression."""
+    def __init__(
+        self,
+        model_path: str,
+        X: np.ndarray,
+        y: np.ndarray,
+        output_dir: str = "./output/grpo",
+        learning_rate: float = 5e-5,
+        device: str = None,
+        group_size: int = 8,  # Number of samples per group
+        kl_coef: float = 0.01,  # KL penalty coefficient
+        clip_range: float = 0.2,  # PPO-style clipping (optional)
+    ):
+        self.X = X
+        self.y = y
+        self.n_vars = X.shape[1]
+        self.output_dir = Path(output_dir)
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+        self.learning_rate = learning_rate
+        self.group_size = group_size
+        self.kl_coef = kl_coef
+        self.clip_range = clip_range
+        # Device
+        if device:
+            self.device = torch.device(device)
+        else:
+            self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        logger.info(f"Using device: {self.device}")
+        # Load model
+        self._load_model(model_path)
+        # Keep reference model for KL penalty
+        self.ref_model = None  # Will be set after first update
+        # Build prompt
+        self.prompt = self._build_prompt()
+        self.prompt_ids = self.tokenizer(self.prompt, return_tensors="pt")["input_ids"].to(self.device)
+        # Optimizer
+        self.optimizer = torch.optim.AdamW(
+            self.model.parameters(),
+            lr=learning_rate,
+            weight_decay=0.01
+        )
+        # Scheduler
+        self.scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
+            self.optimizer, T_0=10, T_mult=2
+        )
+        # Tracking
+        self.best_r2 = -np.inf
+        self.best_expression = None
+        self.history = []
+        self.discovered_expressions: Dict[str, float] = {}
+    def _load_model(self, model_path: str):
+        """Load model and tokenizer."""
+        logger.info(f"Loading model from {model_path}")
+        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
+        self.tokenizer.pad_token = self.tokenizer.eos_token
+        try:
+            logger.info("Attempting to load as LoRA adapter...")
+            base_model = AutoModelForCausalLM.from_pretrained("gpt2")
+            if len(self.tokenizer) != base_model.config.vocab_size:
+                base_model.resize_token_embeddings(len(self.tokenizer))
+                logger.info(f"Resized embeddings to {len(self.tokenizer)}")
+            model_with_lora = PeftModel.from_pretrained(base_model, model_path)
+            self.model = model_with_lora.merge_and_unload()
+            logger.info("LoRA adapter loaded and merged successfully")
+        except Exception as e:
+            logger.info(f"LoRA load failed ({e}), loading as standalone model...")
+            self.model = AutoModelForCausalLM.from_pretrained(model_path)
+        # Add LoRA for training
+        lora_config = LoraConfig(
+            r=8,
+            lora_alpha=16,
+            target_modules=["c_attn"],
+            lora_dropout=0.05,
+            bias="none",
+        )
+        self.model = get_peft_model(self.model, lora_config)
+        self.model = self.model.to(self.device)
+        self.model.train()
+        trainable = sum(p.numel() for p in self.model.parameters() if p.requires_grad)
+        logger.info(f"Model loaded with {trainable} trainable params")
+    def _build_prompt(self, ops: list = None) -> str:
+        """Build JSON format prompt."""
+        vars_list = [f"x_{i+1}" for i in range(self.n_vars)]
+        if ops is None:
+            ops_list = ["+", "-", "*", "/", "sin", "cos", "sqrt", "log", "exp", "pow"]
+        else:
+            ops_list = ops
+        prompt = json.dumps({
+            "vars": vars_list,
+            "ops": ops_list,
+            "cons": "C",
+            "expr": ""
+        })
+        prompt = prompt[:-2]
+        return prompt
+    def extract_expression(self, text: str) -> str:
+        """Extract expression from generated text."""
+        try:
+            eos_token = "<|endoftext|>"
+            if eos_token in text:
+                text = text[:text.index(eos_token)]
+            if '"expr": "' in text:
+                start = text.index('"expr": "') + len('"expr": "')
+                remaining = text[start:]
+                for terminator in ['"}', '"']:
+                    if terminator in remaining:
+                        return remaining[:remaining.index(terminator)].strip()
+                return remaining.strip()
+            if '"expr": ' in text:
+                start = text.index('"expr": ') + len('"expr": ')
+                remaining = text[start:]
+                if '"}' in remaining:
+                    return remaining[:remaining.index('"}')].strip()
+                return remaining.strip(' "')
+        except (ValueError, IndexError):
+            pass
+        if '"expr"' in text:
+            return text.split('"expr"')[-1].strip(' ":{}')
+        return text.strip()
+    def compute_r2(self, expression_str: str) -> Tuple[float, bool]:
+        """Compute R^2 score. Returns (score, is_valid)."""
+        if not expression_str or expression_str.isspace():
+            return -1.0, False
+        if 'C' in expression_str:
+            expression_str = expression_str.replace('C', '1')
+        try:
+            expr = Expression(expression_str, is_prefix=False)
+            if not expr.is_valid_on_dataset(self.X):
+                return -1.0, False
+            y_pred = expr.evaluate(self.X)
+            if not np.all(np.isfinite(y_pred)):
+                return -1.0, False
+            ss_res = np.sum((self.y - y_pred) ** 2)
+            ss_tot = np.sum((self.y - np.mean(self.y)) ** 2)
+            if ss_tot == 0:
+                return 0.0, True
+            r2 = 1 - (ss_res / ss_tot)
+            return float(np.clip(r2, -1.0, 1.0)), True
+        except Exception:
+            return -1.0, False
+    def generate_group(
+        self,
+        temperature: float = 0.7,
+        max_new_tokens: int = 50
+    ) -> List[Dict]:
+        """Generate a group of expressions."""
+        results = []
+        for _ in range(self.group_size):
+            generated_ids = self.prompt_ids.clone()
+            generated_tokens = []
+            # Phase 1: Generate tokens without gradients
+            with torch.no_grad():
+                for _ in range(max_new_tokens):
+                    outputs = self.model(generated_ids)
+                    logits = outputs.logits[:, -1, :] / temperature
+                    probs = F.softmax(logits, dim=-1)
+                    next_token = torch.multinomial(probs, num_samples=1)
+                    generated_tokens.append(next_token.item())
+                    generated_ids = torch.cat([generated_ids, next_token], dim=1)
+                    if next_token.item() == self.tokenizer.eos_token_id:
+                        break
+                    text = self.tokenizer.decode(generated_ids[0], skip_special_tokens=True)
+                    if '"}' in text[len(self.prompt):]:
+                        break
+            # Decode and extract expression
+            text = self.tokenizer.decode(generated_ids[0], skip_special_tokens=True)
+            expr_str = self.extract_expression(text)
+            r2, is_valid = self.compute_r2(expr_str)
+            # Phase 2: Efficient log prob computation
+            if len(generated_tokens) > 0:
+                full_ids = torch.cat([
+                    self.prompt_ids,
+                    torch.tensor([generated_tokens], device=self.device)
+                ], dim=1)
+                outputs = self.model(full_ids[:, :-1])
+                logits = outputs.logits / temperature
+                prompt_len = self.prompt_ids.shape[1]
+                gen_logits = logits[:, prompt_len-1:, :]
+                log_probs_all = F.log_softmax(gen_logits, dim=-1)
+                target_tokens = torch.tensor(generated_tokens, device=self.device).unsqueeze(0)
+                selected_log_probs = log_probs_all.gather(2, target_tokens.unsqueeze(-1)).squeeze(-1)
+                total_log_prob = selected_log_probs.sum()
+            else:
+                total_log_prob = torch.tensor(0.0, device=self.device, requires_grad=True)
+            results.append({
+                "text": text,
+                "expression": expr_str,
+                "r2": r2,
+                "is_valid": is_valid,
+                "log_prob": total_log_prob,
+                "generated_tokens": generated_tokens,
+            })
+            # Track best
+            if is_valid:
+                self.discovered_expressions[expr_str] = max(
+                    self.discovered_expressions.get(expr_str, -np.inf), r2
+                )
+            if r2 > self.best_r2:
+                self.best_r2 = r2
+                self.best_expression = expr_str
+            # Clear cache
+            if self.device.type == "cuda":
+                torch.cuda.empty_cache()
+        return results
+    def compute_group_advantages(self, results: List[Dict]) -> List[float]:
+        """
+        Compute GRPO advantages: (reward - mean) / std
+        This is the key difference from REINFORCE:
+        - REINFORCE uses external moving average baseline
+        - GRPO uses within-group statistics
+        """
+        # Get rewards (R² values, with penalty for invalid)
+        rewards = []
+        for r in results:
+            if r["is_valid"]:
+                rewards.append(r["r2"])
+            else:
+                rewards.append(-0.1)  # Small penalty for invalid
+        rewards = np.array(rewards)
+        # Compute group statistics
+        mean_reward = np.mean(rewards)
+        std_reward = np.std(rewards)
+        # Avoid division by zero
+        if std_reward < 1e-8:
+            std_reward = 1.0
+        # Compute normalized advantages
+        advantages = (rewards - mean_reward) / std_reward
+        return advantages.tolist(), mean_reward, std_reward
+    def train_step(self, num_groups: int = 4) -> dict:
+        """
+        Perform one GRPO training step.
+        Args:
+            num_groups: Number of groups to sample (effective batch = num_groups * group_size)
+        """
+        self.model.train()
+        all_results = []
+        all_advantages = []
+        total_loss = 0.0
+        self.optimizer.zero_grad()
+        # Generate multiple groups
+        for _ in range(num_groups):
+            if self.device.type == "cuda":
+                torch.cuda.empty_cache()
+            # Generate a group of samples
+            group_results = self.generate_group()
+            all_results.extend(group_results)
+            # Compute group-relative advantages
+            advantages, group_mean, group_std = self.compute_group_advantages(group_results)
+            all_advantages.extend(advantages)
+            # Compute loss for this group
+            group_loss = torch.tensor(0.0, device=self.device)
+            valid_count = 0
+            for result, advantage in zip(group_results, advantages):
+                if result["is_valid"]:
+                    # Policy gradient loss with advantage
+                    group_loss = group_loss - result["log_prob"] * advantage
+                    valid_count += 1
+            if valid_count > 0:
+                group_loss = group_loss / valid_count
+                group_loss = group_loss / num_groups  # Scale for accumulation
+                group_loss.backward()
+                total_loss += group_loss.item()
+        # Gradient clipping
+        torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)
+        # Update
+        self.optimizer.step()
+        self.scheduler.step()
+        # Statistics
+        r2_values = [r["r2"] for r in all_results]
+        valid_mask = [r["is_valid"] for r in all_results]
+        valid_r2 = [r2 for r2, v in zip(r2_values, valid_mask) if v]
+        return {
+            "valid_count": int(sum(valid_mask)),
+            "total_count": len(all_results),
+            "valid_rate": sum(valid_mask) / len(all_results),
+            "mean_r2": float(np.mean(valid_r2)) if valid_r2 else 0.0,
+            "max_r2": float(max(r2_values)),
+            "mean_advantage": float(np.mean(all_advantages)),
+            "std_advantage": float(np.std(all_advantages)),
+            "loss": total_loss,
+            "lr": self.scheduler.get_last_lr()[0],
+        }
+    def run(
+        self,
+        epochs: int = 50,
+        num_groups: int = 4,
+        target_r2: float = 0.99,
+        patience: int = 20,
+    ) -> dict:
+        """Run GRPO training."""
+        logger.info("=" * 60)
+        logger.info("GRPO SYMBOLIC REGRESSION")
+        logger.info("=" * 60)
+        logger.info(f"Epochs: {epochs}")
+        logger.info(f"Group size: {self.group_size}")
+        logger.info(f"Num groups: {num_groups}")
+        logger.info(f"Effective batch: {self.group_size * num_groups}")
+        logger.info(f"Target R^2: {target_r2}")
+        logger.info("=" * 60)
+        no_improvement_count = 0
+        best_r2_at_start = self.best_r2
+        for epoch in range(1, epochs + 1):
+            stats = self.train_step(num_groups)
+            self.history.append({
+                "epoch": epoch,
+                **stats,
+                "best_r2": self.best_r2,
+            })
+            logger.info(
+                f"Epoch {epoch:3d} | "
+                f"Valid: {stats['valid_count']}/{stats['total_count']} | "
+                f"Mean R²: {stats['mean_r2']:.4f} | "
+                f"Best: {self.best_r2:.4f} | "
+                f"Adv μ: {stats['mean_advantage']:.3f} σ: {stats['std_advantage']:.3f} | "
+                f"LR: {stats['lr']:.2e}"
+            )
+            # Check for target
+            if self.best_r2 >= target_r2:
+                logger.info(f"Target R^2 {target_r2} reached at epoch {epoch}!")
+                break
+            # Early stopping
+            if self.best_r2 > best_r2_at_start:
+                best_r2_at_start = self.best_r2
+                no_improvement_count = 0
+            else:
+                no_improvement_count += 1
+            if no_improvement_count >= patience:
+                logger.info(f"No improvement for {patience} epochs. Early stopping.")
+                break
+        # Final results
+        logger.info("")
+        logger.info("=" * 60)
+        logger.info("FINAL RESULTS")
+        logger.info("=" * 60)
+        logger.info(f"Best R^2: {self.best_r2:.4f}")
+        logger.info(f"Best expression: {self.best_expression}")
+        logger.info(f"Unique expressions discovered: {len(self.discovered_expressions)}")
+        # Top expressions
+        top_exprs = sorted(
+            self.discovered_expressions.items(),
+            key=lambda x: x[1],
+            reverse=True
+        )[:5]
+        logger.info("Top 5 expressions:")
+        for expr, r2 in top_exprs:
+            logger.info(f"  R²={r2:.4f}: {expr}")
+        # Save results
+        results = {
+            "algorithm": "GRPO",
+            "best_r2": self.best_r2,
+            "best_expression": self.best_expression,
+            "history": self.history,
+            "discovered_expressions": dict(list(self.discovered_expressions.items())[:100]),
+            "config": {
+                "group_size": self.group_size,
+                "num_groups": num_groups,
+                "learning_rate": self.learning_rate,
+                "kl_coef": self.kl_coef,
+            }
+        }
+        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+        output_path = self.output_dir / f"results_grpo_{timestamp}.json"
+        with open(output_path, "w") as f:
+            json.dump(results, f, indent=2)
+        logger.info(f"Results saved to: {output_path}")
+        return results
+def main():
+    parser = argparse.ArgumentParser(description="GRPO for Symbolic Regression")
+    parser.add_argument("--model_path", type=str, required=True)
+    parser.add_argument("--dataset", type=str, required=True)
+    parser.add_argument("--output_dir", type=str, default="./output/grpo")
+    parser.add_argument("--epochs", type=int, default=50)
+    parser.add_argument("--group_size", type=int, default=8)
+    parser.add_argument("--num_groups", type=int, default=4)
+    parser.add_argument("--learning_rate", type=float, default=5e-5)
+    parser.add_argument("--target_r2", type=float, default=0.99)
+    args = parser.parse_args()
+    # Load dataset
+    import pandas as pd
+    df = pd.read_csv(args.dataset)
+    x_cols = [c for c in df.columns if c.startswith('x_')]
+    X = df[x_cols].values
+    y = df['y'].values
+    logger.info(f"Loaded dataset: {args.dataset}")
+    logger.info(f"  Samples: {len(df)}, Variables: {len(x_cols)}")
+    # Create GRPO trainer
+    grpo = GRPO(
+        model_path=args.model_path,
+        X=X,
+        y=y,
+        output_dir=args.output_dir,
+        learning_rate=args.learning_rate,
+        group_size=args.group_size,
+    )
+    # Run training
+    results = grpo.run(
+        epochs=args.epochs,
+        num_groups=args.num_groups,
+        target_r2=args.target_r2,
+    )
+if __name__ == "__main__":
+    main()