RegTech-14B-Instruct / training_report.json
MwSpace's picture
Upload RegTech-14B-Instruct
4318e8c verified
{
"model_base": "Qwen/Qwen2.5-14B-Instruct",
"model_name": "RegTech-14B-Instruct",
"dataset": "./train.jsonl",
"env_file": "/home/ubuntu/sophia-core-server/.tuning/.env.14B",
"train_samples": 923,
"eval_samples": 102,
"params": {
"rank": 16,
"alpha": 32,
"dropout": 0.1,
"lr": 5e-06,
"scheduler": "cosine",
"epochs": 3,
"effective_batch": 4,
"max_seq_length": 4096,
"neftune_alpha": 5.0,
"target_modules": [
"q_proj",
"k_proj",
"v_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj"
]
},
"results": {
"total_steps": 693,
"final_train_loss": 1.1265,
"best_eval_loss": 1.2247475385665894,
"best_eval_step": 640,
"elapsed_minutes": 23.5
},
"loss_history": {
"train": [
[
5,
1.7295
],
[
10,
1.802
],
[
15,
2.0237
],
[
20,
1.7917
],
[
25,
2.0068
],
[
30,
1.9094
],
[
35,
1.8299
],
[
40,
1.7277
],
[
45,
1.8172
],
[
50,
1.7058
],
[
55,
1.3853
],
[
60,
1.7634
],
[
65,
1.5767
],
[
70,
1.754
],
[
75,
1.7128
],
[
80,
1.5807
],
[
85,
1.5355
],
[
90,
1.4244
],
[
95,
1.5826
],
[
100,
1.5446
],
[
105,
1.4907
],
[
110,
1.432
],
[
115,
1.5543
],
[
120,
1.3322
],
[
125,
1.2579
],
[
130,
1.7049
],
[
135,
1.2563
],
[
140,
1.6188
],
[
145,
1.3941
],
[
150,
1.456
],
[
155,
1.5397
],
[
160,
1.3587
],
[
165,
1.4827
],
[
170,
1.4644
],
[
175,
1.1337
],
[
180,
1.302
],
[
185,
1.4177
],
[
190,
1.3671
],
[
195,
1.3418
],
[
200,
1.0722
],
[
205,
1.315
],
[
210,
1.1977
],
[
215,
1.2221
],
[
220,
1.1184
],
[
225,
1.282
],
[
230,
1.1874
],
[
235,
1.4661
],
[
240,
1.1914
],
[
245,
1.1856
],
[
250,
1.0897
],
[
255,
1.1284
],
[
260,
1.27
],
[
265,
1.378
],
[
270,
1.1756
],
[
275,
1.1799
],
[
280,
1.3092
],
[
285,
1.3576
],
[
290,
1.2254
],
[
295,
1.076
],
[
300,
1.0677
],
[
305,
1.3067
],
[
310,
1.1462
],
[
315,
1.2189
],
[
320,
1.1138
],
[
325,
1.2075
],
[
330,
1.1388
],
[
335,
1.1792
],
[
340,
1.1284
],
[
345,
1.3012
],
[
350,
0.9941
],
[
355,
1.1375
],
[
360,
1.2087
],
[
365,
1.2662
],
[
370,
1.2091
],
[
375,
1.0686
],
[
380,
0.9329
],
[
385,
1.0478
],
[
390,
1.0606
],
[
395,
1.0607
],
[
400,
1.1947
],
[
405,
1.158
],
[
410,
1.2246
],
[
415,
1.0888
],
[
420,
1.1178
],
[
425,
1.0749
],
[
430,
1.3176
],
[
435,
1.0919
],
[
440,
1.0837
],
[
445,
0.9845
],
[
450,
1.0481
],
[
455,
1.1088
],
[
460,
0.9743
],
[
465,
1.3255
],
[
470,
1.0583
],
[
475,
1.1048
],
[
480,
0.9751
],
[
485,
0.9574
],
[
490,
1.1434
],
[
495,
1.2023
],
[
500,
1.1073
],
[
505,
1.053
],
[
510,
1.1742
],
[
515,
0.9444
],
[
520,
1.088
],
[
525,
1.0151
],
[
530,
1.0707
],
[
535,
1.0992
],
[
540,
1.0729
],
[
545,
0.9759
],
[
550,
1.0995
],
[
555,
1.1917
],
[
560,
1.0671
],
[
565,
1.1079
],
[
570,
0.8946
],
[
575,
1.0232
],
[
580,
1.0634
],
[
585,
1.0558
],
[
590,
1.1866
],
[
595,
0.9822
],
[
600,
1.0456
],
[
605,
0.8698
],
[
610,
1.1024
],
[
615,
1.1079
],
[
620,
1.0469
],
[
625,
1.0726
],
[
630,
1.0963
],
[
635,
1.0431
],
[
640,
1.0866
],
[
645,
1.0284
],
[
650,
1.2035
],
[
655,
1.0262
],
[
660,
0.9705
],
[
665,
1.0958
],
[
670,
1.1898
],
[
675,
1.0921
],
[
680,
0.9948
],
[
685,
1.1798
],
[
690,
1.1265
]
],
"eval": [
[
40,
2.2252511978149414
],
[
80,
1.9501206874847412
],
[
120,
1.708126187324524
],
[
160,
1.5921595096588135
],
[
200,
1.513823390007019
],
[
240,
1.4531652927398682
],
[
280,
1.400567650794983
],
[
320,
1.356279969215393
],
[
360,
1.3178586959838867
],
[
400,
1.288638949394226
],
[
440,
1.2657065391540527
],
[
480,
1.250612735748291
],
[
520,
1.2387887239456177
],
[
560,
1.2310446500778198
],
[
600,
1.2271382808685303
],
[
640,
1.2247475385665894
],
[
680,
1.2248592376708984
]
]
}
}