| seed: 0 |
| exp_name: probit_llama8b_uf |
| lr: 5.0e-06 |
| lr_mlp: 5.0e-05 |
| batch_size: 4 |
| eval_batch_size: 4 |
| datasets: |
| - uf |
| wandb: |
| enabled: true |
| entity: null |
| project: rupo |
| local_dirs: |
| - /workspace/gupo_cache |
| sample_during_eval: false |
| do_first_eval: false |
| local_run_dir: /workspace/gupo_cache/probit_llama8b_uf_2026-02-16_14-40-14_979841 |
| gradient_accumulation_steps: 1 |
| max_grad_norm: 5.0 |
| max_grad_norm_mlp: 1.0 |
| max_length: 512 |
| max_prompt_length: 256 |
| n_epochs: 2 |
| n_examples: 80000 |
| n_eval_examples: 128 |
| optimizer: RMSprop |
| optimizer_mlp: RMSprop |
| warmup_steps: 150 |
| activation_checkpointing: false |
| eval_every: 40000 |
| minimum_log_interval_secs: 1.0 |
| lora: |
| enabled: false |
| r: 8 |
| target_modules: |
| - q_proj |
| - k_proj |
| - v_proj |
| - o_proj |
| alpha: 32 |
| dropout: 0.05 |
| model: |
| name_or_path: meta-llama/Llama-3.1-8B-Instruct |
| tokenizer_name_or_path: meta-llama/Llama-3.1-8B-Instruct |
| archive: null |
| block_name: LlamaDecoderLayer |
| policy_dtype: float32 |
| fsdp_policy_mp: null |
| reference_dtype: float16 |
| policy_quantization: 8bit |
| loss: |
| name: probit |
| beta: 0.9 |
| reference_free: false |
|
|