| # EVAFRILL-Mo 3B ORPO โ Single GPU (H100 MIG 3g.40gb, 42.3GB VRAM) | |
| # | |
| # Base model: checkpoints/3b_final/checkpoint-0319772 (Pretrained, NOT SFT) | |
| # Method: ORPO (SFT + Odds Ratio Preference) with LoRA | |
| # | |
| # [์ค๊ณ ๊ทผ๊ฑฐ] | |
| # - ORPO๋ SFT+์ ๋ ฌ์ ๋์์ ํ์ต โ pretrained ๋ชจ๋ธ์์ ์์ | |
| # - Reference model ๋ถํ์ โ DPO๋ณด๋ค VRAM ์ ์ฝ | |
| # - LoRA rank=32: base(6GB) + LoRA(0.3GB) + optim(0.2GB) + act(~8GB) โ 15GB | |
| # - eff_batch: 1 ร 16 grad_accum = 16 | |
| train: | |
| max_steps: 10000 | |
| batch_size: 1 | |
| grad_accum_steps: 16 | |
| lr: 5.0e-6 | |
| weight_decay: 0.01 | |
| warmup_steps: 500 | |
| max_length: 1024 | |
| lambda_or: 1.0 | |
| use_lora: true | |
| lora_rank: 32 | |
| lora_alpha: 64 | |
| save_interval: 1000 | |
| log_interval: 10 | |