EVAFRILL-Mo-3B / configs /orpo_3b_1gpu.yaml
pathcosmos's picture
Upload folder using huggingface_hub
29fc577 verified
raw
history blame contribute delete
730 Bytes
# EVAFRILL-Mo 3B ORPO โ€” Single GPU (H100 MIG 3g.40gb, 42.3GB VRAM)
#
# Base model: checkpoints/3b_final/checkpoint-0319772 (Pretrained, NOT SFT)
# Method: ORPO (SFT + Odds Ratio Preference) with LoRA
#
# [์„ค๊ณ„ ๊ทผ๊ฑฐ]
# - ORPO๋Š” SFT+์ •๋ ฌ์„ ๋™์‹œ์— ํ•™์Šต โ†’ pretrained ๋ชจ๋ธ์—์„œ ์‹œ์ž‘
# - Reference model ๋ถˆํ•„์š” โ†’ DPO๋ณด๋‹ค VRAM ์ ˆ์•ฝ
# - LoRA rank=32: base(6GB) + LoRA(0.3GB) + optim(0.2GB) + act(~8GB) โ‰ˆ 15GB
# - eff_batch: 1 ร— 16 grad_accum = 16
train:
max_steps: 10000
batch_size: 1
grad_accum_steps: 16
lr: 5.0e-6
weight_decay: 0.01
warmup_steps: 500
max_length: 1024
lambda_or: 1.0
use_lora: true
lora_rank: 32
lora_alpha: 64
save_interval: 1000
log_interval: 10