Codette-Reasoning / training /configs /default_training.yaml
Raiff1982's picture
Upload 120 files
ed1b365 verified
raw
history blame contribute delete
404 Bytes
model:
name: meta-llama/Llama-3.1-8B-Instruct
quantization: 4bit
lora:
rank: 16
alpha: 32
dropout: 0.05
target_modules: ["q_proj", "k_proj", "v_proj", "o_proj"]
training:
epochs: 3
batch_size: 2
gradient_accumulation_steps: 4
learning_rate: 2e-4
max_seq_length: 2048
warmup_ratio: 0.03
logging_steps: 10
save_steps: 100
output:
base_dir: ./adapters