RegTech-32B-Instruct / training_report.json
MwSpace's picture
Upload RegTech-32B-Instruct
a0be32e verified
{
"model_base": "Qwen/Qwen2.5-32B-Instruct",
"model_name": "RegTech-32B-Instruct",
"dataset": "./train.jsonl",
"env_file": "/home/ubuntu/sophia-core-server/.tuning/.env.32B",
"train_samples": 923,
"eval_samples": 102,
"params": {
"rank": 16,
"alpha": 32,
"dropout": 0.1,
"lr": 1e-05,
"scheduler": "cosine",
"epochs": 3,
"effective_batch": 4,
"max_seq_length": 4096,
"neftune_alpha": 5.0,
"target_modules": [
"q_proj",
"k_proj",
"v_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj"
]
},
"results": {
"total_steps": 693,
"final_train_loss": 0.8432,
"best_eval_loss": 0.8133148550987244,
"best_eval_step": 640,
"elapsed_minutes": 40.0
},
"loss_history": {
"train": [
[
5,
1.7606
],
[
10,
1.8257
],
[
15,
2.0366
],
[
20,
1.8063
],
[
25,
1.9763
],
[
30,
1.9014
],
[
35,
1.8005
],
[
40,
1.6563
],
[
45,
1.6997
],
[
50,
1.5501
],
[
55,
1.273
],
[
60,
1.5863
],
[
65,
1.3741
],
[
70,
1.5504
],
[
75,
1.5004
],
[
80,
1.3517
],
[
85,
1.3193
],
[
90,
1.2519
],
[
95,
1.4066
],
[
100,
1.3787
],
[
105,
1.317
],
[
110,
1.273
],
[
115,
1.366
],
[
120,
1.1712
],
[
125,
1.1309
],
[
130,
1.5067
],
[
135,
1.1212
],
[
140,
1.4059
],
[
145,
1.2398
],
[
150,
1.2904
],
[
155,
1.3055
],
[
160,
1.1672
],
[
165,
1.2585
],
[
170,
1.252
],
[
175,
0.9654
],
[
180,
1.1089
],
[
185,
1.1518
],
[
190,
1.1216
],
[
195,
1.1329
],
[
200,
0.9045
],
[
205,
1.0882
],
[
210,
1.0312
],
[
215,
0.9879
],
[
220,
0.9716
],
[
225,
1.0379
],
[
230,
1.0032
],
[
235,
1.1349
],
[
240,
0.9745
],
[
245,
0.9717
],
[
250,
0.8808
],
[
255,
0.9055
],
[
260,
1.0285
],
[
265,
1.0887
],
[
270,
0.8846
],
[
275,
0.9121
],
[
280,
1.032
],
[
285,
1.043
],
[
290,
0.9623
],
[
295,
0.8949
],
[
300,
0.8456
],
[
305,
0.8745
],
[
310,
0.94
],
[
315,
0.8265
],
[
320,
0.9407
],
[
325,
0.9309
],
[
330,
0.8971
],
[
335,
0.8191
],
[
340,
0.7918
],
[
345,
0.9414
],
[
350,
0.783
],
[
355,
0.8103
],
[
360,
0.8443
],
[
365,
0.9374
],
[
370,
0.9564
],
[
375,
0.8063
],
[
380,
0.7323
],
[
385,
0.826
],
[
390,
0.8682
],
[
395,
0.8559
],
[
400,
0.9216
],
[
405,
0.8368
],
[
410,
0.9346
],
[
415,
0.8632
],
[
420,
0.8698
],
[
425,
0.7973
],
[
430,
0.8807
],
[
435,
0.7736
],
[
440,
0.7795
],
[
445,
0.768
],
[
450,
0.8426
],
[
455,
0.8065
],
[
460,
0.7986
],
[
465,
1.0072
],
[
470,
0.7926
],
[
475,
0.7541
],
[
480,
0.7158
],
[
485,
0.7268
],
[
490,
0.793
],
[
495,
0.9075
],
[
500,
0.8299
],
[
505,
0.8684
],
[
510,
0.8688
],
[
515,
0.6847
],
[
520,
0.7814
],
[
525,
0.7811
],
[
530,
0.7919
],
[
535,
0.8829
],
[
540,
0.7775
],
[
545,
0.7112
],
[
550,
0.7917
],
[
555,
0.769
],
[
560,
0.7328
],
[
565,
0.7695
],
[
570,
0.5831
],
[
575,
0.7399
],
[
580,
0.8321
],
[
585,
0.8167
],
[
590,
0.832
],
[
595,
0.7446
],
[
600,
0.698
],
[
605,
0.5891
],
[
610,
0.8106
],
[
615,
0.7455
],
[
620,
0.7738
],
[
625,
0.7939
],
[
630,
0.8003
],
[
635,
0.7349
],
[
640,
0.7863
],
[
645,
0.7968
],
[
650,
0.8229
],
[
655,
0.7529
],
[
660,
0.6354
],
[
665,
0.8064
],
[
670,
0.9099
],
[
675,
0.8156
],
[
680,
0.7462
],
[
685,
0.907
],
[
690,
0.8432
]
],
"eval": [
[
40,
2.072058916091919
],
[
80,
1.625858187675476
],
[
120,
1.4893486499786377
],
[
160,
1.359459400177002
],
[
200,
1.2217525243759155
],
[
240,
1.113280177116394
],
[
280,
1.0186439752578735
],
[
320,
0.937186598777771
],
[
360,
0.8894771337509155
],
[
400,
0.862065315246582
],
[
440,
0.8410201072692871
],
[
480,
0.8306860327720642
],
[
520,
0.8223150372505188
],
[
560,
0.8172081112861633
],
[
600,
0.8150315880775452
],
[
640,
0.8133148550987244
],
[
680,
0.8135352730751038
]
]
}
}