File size: 767 Bytes
6e3f60b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8b32d7f
6e3f60b
 
8b32d7f
6e3f60b
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
eval_steps: 50
eval_strategy: steps
logging_steps: 10
lora:
  alpha: 128
  dropout: 0.1
  r: 64
  target_modules:
  - q_proj
  - k_proj
  - v_proj
  - o_proj
  - gate_proj
  - up_proj
  - down_proj
max_length: 1024
model_name: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
save_steps: 50
save_strategy: steps
training:
  batch_size: 2
  bf16: true
  eval_batch_size: 2
  fp16: false
  gradient_accumulation_steps: 8
  gradient_checkpointing: false
  greater_is_better: true
  group_by_length: true
  learning_rate: 0.0001
  load_best_model_at_end: true
  lr_scheduler: cosine
  max_grad_norm: 1.0
  metric_for_best_model: eval_decoder_accuracy
  num_epochs: 3
  optim: adamw_torch
  save_total_limit: 2
  warmup_ratio: 0.1
wandb:
  enabled: true
  project: steg-finetune