| batch_size: 2 | |
| controller_dropout: 0.1 | |
| controller_layers: 2 | |
| controller_lr: 0.0001 | |
| dataset_name: gsm8k | |
| epochs: 1 | |
| eval_baseline: true | |
| eval_interval: 1 | |
| eval_samples: 5 | |
| hidden_size: 2560 | |
| lambda_accuracy: 1.0 | |
| lambda_flops: 0.005 | |
| log_interval: 10 | |
| max_ponder_steps: 3 | |
| model_name: microsoft/phi-2 | |
| output_dir: outputs/test_training | |
| save_interval: 1 | |
| threshold: 0.3 | |
| train_samples: 10 | |
| warmup_steps: 100 | |