# Optimizer Parameters # AdamW configuration lr=5e-5 beta1=0.9 beta2=0.999 eps=1e-8 weight_decay=0.01 warmup_steps=500 total_steps=10000 scheduler=linear