File size: 721 Bytes
6c351a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
{
  "model_name": "Qwen/Qwen2.5-Math-1.5B-Instruct",
  "lora_r": 8,
  "lora_alpha": 16,
  "lora_dropout": 0.05,
  "target_modules": [
    "q_proj",
    "k_proj",
    "v_proj",
    "o_proj"
  ],
  "num_train_epochs": 10,
  "per_device_train_batch_size": 2,
  "gradient_accumulation_steps": 8,
  "learning_rate": 0.0002,
  "lr_scheduler_type": "cosine",
  "warmup_ratio": 0.05,
  "weight_decay": 0.01,
  "fp16": true,
  "max_seq_length": 512,
  "label_smoothing_configs": [
    0.0,
    0.02,
    0.05,
    0.1,
    0.2
  ],
  "perturbation_sigmas": [
    0.005,
    0.01,
    0.015,
    0.02,
    0.025,
    0.03
  ],
  "data_seed": 42,
  "data_size": 2000,
  "eval_questions": 300,
  "created_at": "2026-03-11 17:52:45"
}