probit_llama8b_uf / config.yaml
razozang's picture
Upload config.yaml
dad1d10 verified
seed: 0
exp_name: probit_llama8b_uf
lr: 5.0e-06
lr_mlp: 5.0e-05
batch_size: 4
eval_batch_size: 4
datasets:
- uf
wandb:
enabled: true
entity: null
project: rupo
local_dirs:
- /workspace/gupo_cache
sample_during_eval: false
do_first_eval: false
local_run_dir: /workspace/gupo_cache/probit_llama8b_uf_2026-02-16_14-40-14_979841
gradient_accumulation_steps: 1
max_grad_norm: 5.0
max_grad_norm_mlp: 1.0
max_length: 512
max_prompt_length: 256
n_epochs: 2
n_examples: 80000
n_eval_examples: 128
optimizer: RMSprop
optimizer_mlp: RMSprop
warmup_steps: 150
activation_checkpointing: false
eval_every: 40000
minimum_log_interval_secs: 1.0
lora:
enabled: false
r: 8
target_modules:
- q_proj
- k_proj
- v_proj
- o_proj
alpha: 32
dropout: 0.05
model:
name_or_path: meta-llama/Llama-3.1-8B-Instruct
tokenizer_name_or_path: meta-llama/Llama-3.1-8B-Instruct
archive: null
block_name: LlamaDecoderLayer
policy_dtype: float32
fsdp_policy_mp: null
reference_dtype: float16
policy_quantization: 8bit
loss:
name: probit
beta: 0.9
reference_free: false