| rank=64 | |
| lr=1e-4 | |
| mixed_precision='bf16' | |
| lr_scheduler="constant" | |
| lr_warmup_steps=0 | |
| size=256 | |
| batch=16 | |
| LoRA: target_modules=["to_k", "to_q", "to_v", "to_out.0", "add_k_proj", "add_v_proj"] |
| rank=64 | |
| lr=1e-4 | |
| mixed_precision='bf16' | |
| lr_scheduler="constant" | |
| lr_warmup_steps=0 | |
| size=256 | |
| batch=16 | |
| LoRA: target_modules=["to_k", "to_q", "to_v", "to_out.0", "add_k_proj", "add_v_proj"] |