| project: coconut | |
| save_path: ......... | |
| name: gsm_coconut_eval | |
| only_eval: True | |
| coconut: True | |
| cot: False | |
| no_thoughts: False | |
| no_cot: False | |
| coconutgpt: True | |
| # visualize: True | |
| c_thought: 2 | |
| epochs_per_stage: 3 | |
| max_latent_stage: 10 | |
| pad_latent_to_max: True | |
| save_only_improve: False | |
| uniform_prob: 0.0 | |
| # Model and checkpoint paths (use relative or env variables) | |
| model_id: /scratch/e1374322/meta-llama/Llama-3.2-1B-Instruct | |
| load_model_path: ... | |
| wandb: False | |
| seed: 0 | |
| resume: 0 | |
| bf16: False | |
| # data | |
| train_path: .../gsm/train.json | |
| val_path: .../gsm/val.json | |
| test_path: .../gsm/MultiArith.json | |
| reset_optimizer: True | |
| batch_size_training: 64 | |
| batch_size_validating: 1 | |
| debug: False | |
| gradient_accumulation_steps: 1 | |
| num_epochs: 35 | |
| lr: !!float "1e-4" | |
| weight_decay: 0.01 | |
| w_prompt: False | |
| mode: coconutgpt_same_word_embedding_len_vae | |
| explain_mode: v1_aug | |
| training_method: full | |
| train_or_eval: eval | |