MwSpace commited on
Commit
14a198b
·
verified ·
1 Parent(s): f57c0d6

Upload RegTech-14B-Instruct

Browse files
model-00001-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d60197165349792044d5e1489bbee9ad7e388190555d1659274f1901c55e76d1
3
  size 4986211280
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43fc26c9c5311bfac0ffb290b01589fac271b941da368e3873bc4b1d81e63bc6
3
  size 4986211280
model-00002-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:996e83ae5f630926a0b9382d0651752fe4ce2e5aeb3e293dae14bbdbbf93d494
3
  size 4954847344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6ab33ca80c5d2a528877030b9f83c5b84c485b3fd5771a0da39fff3bf2da794
3
  size 4954847344
model-00003-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f66874e7a940286c158687f731db691dd605a311b0386b099f3af1d8f27ef22
3
  size 4954847392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1d255cf1f2bb92712e2d9a8cdab57f8f1752b4c9a2bb2c0e538a0d48c0fa424
3
  size 4954847392
model-00004-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd5ef99189d0880d8c9334412c09b4478b82547d405e7b11dc900ea173d51fb7
3
  size 4954847392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ef6318d2f290564f77ed5414b262e3964b98d26133e1320733ee4a8039cb9ff
3
  size 4954847392
model-00005-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b6eb5179e0987bbf7c7774737f273ede16f6f542da4d1db39264b4547d08e2a
3
  size 4954847392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6cfc7a2def3f023d31c9c67340113053b6df83aa6ef4f2598591f1c12ebba06
3
  size 4954847392
model-00006-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b16bb22f531dc046f89183e79d82ee7bbee454afa6568d439a61942fb97bae24
3
  size 4734533160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1997e8d1ba66d7c2b4283582c2f8b054eba6f410b402fa8add05456dbdece9a0
3
  size 4734533160
training_report.json CHANGED
@@ -3,15 +3,15 @@
3
  "model_name": "RegTech-14B-Instruct",
4
  "dataset": "./train.jsonl",
5
  "env_file": "/home/ubuntu/sophia-core-server/.tuning/.env.14B",
6
- "train_samples": 923,
7
- "eval_samples": 102,
8
  "params": {
9
  "rank": 16,
10
- "alpha": 32,
11
  "dropout": 0.1,
12
- "lr": 5e-06,
13
  "scheduler": "cosine",
14
- "epochs": 3,
15
  "effective_batch": 4,
16
  "max_seq_length": 4096,
17
  "neftune_alpha": 5.0,
@@ -26,635 +26,546 @@
26
  ]
27
  },
28
  "results": {
29
- "total_steps": 693,
30
- "final_train_loss": 1.1265,
31
- "best_eval_loss": 1.2247475385665894,
32
- "best_eval_step": 640,
33
- "elapsed_minutes": 23.5
 
34
  },
35
  "loss_history": {
36
  "train": [
37
  [
38
  5,
39
- 1.7295
40
  ],
41
  [
42
  10,
43
- 1.802
44
  ],
45
  [
46
  15,
47
- 2.0237
48
  ],
49
  [
50
  20,
51
- 1.7917
52
  ],
53
  [
54
  25,
55
- 2.0068
56
  ],
57
  [
58
  30,
59
- 1.9094
60
  ],
61
  [
62
  35,
63
- 1.8299
64
  ],
65
  [
66
  40,
67
- 1.7277
68
  ],
69
  [
70
  45,
71
- 1.8172
72
  ],
73
  [
74
  50,
75
- 1.7058
76
  ],
77
  [
78
  55,
79
- 1.3853
80
  ],
81
  [
82
  60,
83
- 1.7634
84
  ],
85
  [
86
  65,
87
- 1.5767
88
  ],
89
  [
90
  70,
91
- 1.754
92
  ],
93
  [
94
  75,
95
- 1.7128
96
  ],
97
  [
98
  80,
99
- 1.5807
100
  ],
101
  [
102
  85,
103
- 1.5355
104
  ],
105
  [
106
  90,
107
- 1.4244
108
  ],
109
  [
110
  95,
111
- 1.5826
112
  ],
113
  [
114
  100,
115
- 1.5446
116
  ],
117
  [
118
  105,
119
- 1.4907
120
  ],
121
  [
122
  110,
123
- 1.432
124
  ],
125
  [
126
  115,
127
- 1.5543
128
  ],
129
  [
130
  120,
131
- 1.3322
132
  ],
133
  [
134
  125,
135
- 1.2579
136
  ],
137
  [
138
  130,
139
- 1.7049
140
  ],
141
  [
142
  135,
143
- 1.2563
144
  ],
145
  [
146
  140,
147
- 1.6188
148
  ],
149
  [
150
  145,
151
- 1.3941
152
  ],
153
  [
154
  150,
155
- 1.456
156
  ],
157
  [
158
  155,
159
- 1.5397
160
  ],
161
  [
162
  160,
163
- 1.3587
164
  ],
165
  [
166
  165,
167
- 1.4827
168
  ],
169
  [
170
  170,
171
- 1.4644
172
  ],
173
  [
174
  175,
175
- 1.1337
176
  ],
177
  [
178
  180,
179
- 1.302
180
  ],
181
  [
182
  185,
183
- 1.4177
184
  ],
185
  [
186
  190,
187
- 1.3671
188
  ],
189
  [
190
  195,
191
- 1.3418
192
  ],
193
  [
194
  200,
195
- 1.0722
196
  ],
197
  [
198
  205,
199
- 1.315
200
  ],
201
  [
202
  210,
203
- 1.1977
204
  ],
205
  [
206
  215,
207
- 1.2221
208
  ],
209
  [
210
  220,
211
- 1.1184
212
  ],
213
  [
214
  225,
215
- 1.282
216
  ],
217
  [
218
  230,
219
- 1.1874
220
  ],
221
  [
222
  235,
223
- 1.4661
224
  ],
225
  [
226
  240,
227
- 1.1914
228
  ],
229
  [
230
  245,
231
- 1.1856
232
  ],
233
  [
234
  250,
235
- 1.0897
236
  ],
237
  [
238
  255,
239
- 1.1284
240
  ],
241
  [
242
  260,
243
- 1.27
244
  ],
245
  [
246
  265,
247
- 1.378
248
  ],
249
  [
250
  270,
251
- 1.1756
252
  ],
253
  [
254
  275,
255
- 1.1799
256
  ],
257
  [
258
  280,
259
- 1.3092
260
  ],
261
  [
262
  285,
263
- 1.3576
264
  ],
265
  [
266
  290,
267
- 1.2254
268
  ],
269
  [
270
  295,
271
- 1.076
272
  ],
273
  [
274
  300,
275
- 1.0677
276
  ],
277
  [
278
  305,
279
- 1.3067
280
  ],
281
  [
282
  310,
283
- 1.1462
284
  ],
285
  [
286
  315,
287
- 1.2189
288
  ],
289
  [
290
  320,
291
- 1.1138
292
  ],
293
  [
294
  325,
295
- 1.2075
296
  ],
297
  [
298
  330,
299
- 1.1388
300
  ],
301
  [
302
  335,
303
- 1.1792
304
  ],
305
  [
306
  340,
307
- 1.1284
308
  ],
309
  [
310
  345,
311
- 1.3012
312
  ],
313
  [
314
  350,
315
- 0.9941
316
  ],
317
  [
318
  355,
319
- 1.1375
320
  ],
321
  [
322
  360,
323
- 1.2087
324
  ],
325
  [
326
  365,
327
- 1.2662
328
  ],
329
  [
330
  370,
331
- 1.2091
332
  ],
333
  [
334
  375,
335
- 1.0686
336
  ],
337
  [
338
  380,
339
- 0.9329
340
  ],
341
  [
342
  385,
343
- 1.0478
344
  ],
345
  [
346
  390,
347
- 1.0606
348
  ],
349
  [
350
  395,
351
- 1.0607
352
  ],
353
  [
354
  400,
355
- 1.1947
356
  ],
357
  [
358
  405,
359
- 1.158
360
  ],
361
  [
362
  410,
363
- 1.2246
364
  ],
365
  [
366
  415,
367
- 1.0888
368
  ],
369
  [
370
  420,
371
- 1.1178
372
  ],
373
  [
374
  425,
375
- 1.0749
376
  ],
377
  [
378
  430,
379
- 1.3176
380
  ],
381
  [
382
  435,
383
- 1.0919
384
  ],
385
  [
386
  440,
387
- 1.0837
388
  ],
389
  [
390
  445,
391
- 0.9845
392
  ],
393
  [
394
  450,
395
- 1.0481
396
  ],
397
  [
398
  455,
399
- 1.1088
400
  ],
401
  [
402
  460,
403
- 0.9743
404
  ],
405
  [
406
  465,
407
- 1.3255
408
  ],
409
  [
410
  470,
411
- 1.0583
412
  ],
413
  [
414
  475,
415
- 1.1048
416
  ],
417
  [
418
  480,
419
- 0.9751
420
  ],
421
  [
422
  485,
423
- 0.9574
424
  ],
425
  [
426
  490,
427
- 1.1434
428
  ],
429
  [
430
  495,
431
- 1.2023
432
  ],
433
  [
434
  500,
435
- 1.1073
436
  ],
437
  [
438
  505,
439
- 1.053
440
  ],
441
  [
442
  510,
443
- 1.1742
444
  ],
445
  [
446
  515,
447
- 0.9444
448
  ],
449
  [
450
  520,
451
- 1.088
452
  ],
453
  [
454
  525,
455
- 1.0151
456
  ],
457
  [
458
  530,
459
- 1.0707
460
- ],
461
- [
462
- 535,
463
- 1.0992
464
- ],
465
- [
466
- 540,
467
- 1.0729
468
- ],
469
- [
470
- 545,
471
- 0.9759
472
- ],
473
- [
474
- 550,
475
- 1.0995
476
- ],
477
- [
478
- 555,
479
- 1.1917
480
- ],
481
- [
482
- 560,
483
- 1.0671
484
- ],
485
- [
486
- 565,
487
- 1.1079
488
- ],
489
- [
490
- 570,
491
- 0.8946
492
- ],
493
- [
494
- 575,
495
- 1.0232
496
- ],
497
- [
498
- 580,
499
- 1.0634
500
- ],
501
- [
502
- 585,
503
- 1.0558
504
- ],
505
- [
506
- 590,
507
- 1.1866
508
- ],
509
- [
510
- 595,
511
- 0.9822
512
- ],
513
- [
514
- 600,
515
- 1.0456
516
- ],
517
- [
518
- 605,
519
- 0.8698
520
- ],
521
- [
522
- 610,
523
- 1.1024
524
- ],
525
- [
526
- 615,
527
- 1.1079
528
- ],
529
- [
530
- 620,
531
- 1.0469
532
- ],
533
- [
534
- 625,
535
- 1.0726
536
- ],
537
  [
538
- 630,
539
- 1.0963
540
  ],
541
  [
542
- 635,
543
- 1.0431
544
  ],
545
  [
546
- 640,
547
- 1.0866
548
  ],
549
  [
550
- 645,
551
- 1.0284
552
  ],
553
  [
554
- 650,
555
- 1.2035
556
  ],
557
  [
558
- 655,
559
- 1.0262
560
  ],
561
  [
562
- 660,
563
- 0.9705
564
  ],
565
  [
566
- 665,
567
- 1.0958
568
  ],
569
  [
570
- 670,
571
- 1.1898
572
  ],
573
  [
574
- 675,
575
- 1.0921
576
  ],
577
  [
578
- 680,
579
- 0.9948
580
  ],
581
  [
582
- 685,
583
- 1.1798
584
  ],
585
  [
586
- 690,
587
- 1.1265
588
  ]
589
  ],
590
- "eval": [
591
  [
592
  40,
593
- 2.2252511978149414
594
  ],
595
  [
596
  80,
597
- 1.9501206874847412
598
  ],
599
  [
600
  120,
601
- 1.708126187324524
602
  ],
603
  [
604
  160,
605
- 1.5921595096588135
606
  ],
607
  [
608
  200,
609
- 1.513823390007019
610
  ],
611
  [
612
  240,
613
- 1.4531652927398682
614
  ],
615
  [
616
  280,
617
- 1.400567650794983
618
  ],
619
  [
620
  320,
621
- 1.356279969215393
622
  ],
623
  [
624
  360,
625
- 1.3178586959838867
626
  ],
627
  [
628
  400,
629
- 1.288638949394226
630
  ],
631
  [
632
  440,
633
- 1.2657065391540527
634
  ],
635
  [
636
  480,
637
- 1.250612735748291
638
  ],
639
  [
640
  520,
641
- 1.2387887239456177
642
- ],
643
- [
644
- 560,
645
- 1.2310446500778198
646
- ],
647
- [
648
- 600,
649
- 1.2271382808685303
650
- ],
651
- [
652
- 640,
653
- 1.2247475385665894
654
- ],
655
- [
656
- 680,
657
- 1.2248592376708984
658
  ]
659
  ]
660
  }
 
3
  "model_name": "RegTech-14B-Instruct",
4
  "dataset": "./train.jsonl",
5
  "env_file": "/home/ubuntu/sophia-core-server/.tuning/.env.14B",
6
+ "train_samples": 2134,
7
+ "eval_samples": 237,
8
  "params": {
9
  "rank": 16,
10
+ "alpha": 16,
11
  "dropout": 0.1,
12
+ "lr": 2e-05,
13
  "scheduler": "cosine",
14
+ "epochs": 1,
15
  "effective_batch": 4,
16
  "max_seq_length": 4096,
17
  "neftune_alpha": 5.0,
 
26
  ]
27
  },
28
  "results": {
29
+ "total_steps": 534,
30
+ "final_train_loss": 1.073,
31
+ "best_eval_loss": 1.098220944404602,
32
+ "best_eval_step": 520,
33
+ "best_token_accuracy": 0.7624,
34
+ "elapsed_minutes": 19.9
35
  },
36
  "loss_history": {
37
  "train": [
38
  [
39
  5,
40
+ 1.8103
41
  ],
42
  [
43
  10,
44
+ 1.9463
45
  ],
46
  [
47
  15,
48
+ 1.7076
49
  ],
50
  [
51
  20,
52
+ 1.8542
53
  ],
54
  [
55
  25,
56
+ 1.9426
57
  ],
58
  [
59
  30,
60
+ 2.026
61
  ],
62
  [
63
  35,
64
+ 1.9177
65
  ],
66
  [
67
  40,
68
+ 1.7516
69
  ],
70
  [
71
  45,
72
+ 1.655
73
  ],
74
  [
75
  50,
76
+ 1.4414
77
  ],
78
  [
79
  55,
80
+ 1.4496
81
  ],
82
  [
83
  60,
84
+ 1.3549
85
  ],
86
  [
87
  65,
88
+ 1.3485
89
  ],
90
  [
91
  70,
92
+ 1.6647
93
  ],
94
  [
95
  75,
96
+ 1.3798
97
  ],
98
  [
99
  80,
100
+ 1.5445
101
  ],
102
  [
103
  85,
104
+ 1.3067
105
  ],
106
  [
107
  90,
108
+ 1.3387
109
  ],
110
  [
111
  95,
112
+ 1.2908
113
  ],
114
  [
115
  100,
116
+ 1.2998
117
  ],
118
  [
119
  105,
120
+ 1.3263
121
  ],
122
  [
123
  110,
124
+ 1.3664
125
  ],
126
  [
127
  115,
128
+ 1.2699
129
  ],
130
  [
131
  120,
132
+ 1.3007
133
  ],
134
  [
135
  125,
136
+ 1.3573
137
  ],
138
  [
139
  130,
140
+ 1.243
141
  ],
142
  [
143
  135,
144
+ 1.3071
145
  ],
146
  [
147
  140,
148
+ 1.2171
149
  ],
150
  [
151
  145,
152
+ 1.1797
153
  ],
154
  [
155
  150,
156
+ 1.1813
157
  ],
158
  [
159
  155,
160
+ 1.3488
161
  ],
162
  [
163
  160,
164
+ 1.1427
165
  ],
166
  [
167
  165,
168
+ 1.2909
169
  ],
170
  [
171
  170,
172
+ 1.1646
173
  ],
174
  [
175
  175,
176
+ 1.0927
177
  ],
178
  [
179
  180,
180
+ 1.1764
181
  ],
182
  [
183
  185,
184
+ 1.1671
185
  ],
186
  [
187
  190,
188
+ 1.2527
189
  ],
190
  [
191
  195,
192
+ 1.0971
193
  ],
194
  [
195
  200,
196
+ 1.0705
197
  ],
198
  [
199
  205,
200
+ 1.1283
201
  ],
202
  [
203
  210,
204
+ 1.1866
205
  ],
206
  [
207
  215,
208
+ 1.1713
209
  ],
210
  [
211
  220,
212
+ 1.0456
213
  ],
214
  [
215
  225,
216
+ 1.137
217
  ],
218
  [
219
  230,
220
+ 1.074
221
  ],
222
  [
223
  235,
224
+ 1.062
225
  ],
226
  [
227
  240,
228
+ 1.0085
229
  ],
230
  [
231
  245,
232
+ 1.0669
233
  ],
234
  [
235
  250,
236
+ 0.9657
237
  ],
238
  [
239
  255,
240
+ 1.0515
241
  ],
242
  [
243
  260,
244
+ 1.0492
245
  ],
246
  [
247
  265,
248
+ 1.1586
249
  ],
250
  [
251
  270,
252
+ 1.0454
253
  ],
254
  [
255
  275,
256
+ 1.1171
257
  ],
258
  [
259
  280,
260
+ 1.0412
261
  ],
262
  [
263
  285,
264
+ 0.9569
265
  ],
266
  [
267
  290,
268
+ 1.0504
269
  ],
270
  [
271
  295,
272
+ 1.0302
273
  ],
274
  [
275
  300,
276
+ 1.0497
277
  ],
278
  [
279
  305,
280
+ 0.9437
281
  ],
282
  [
283
  310,
284
+ 0.9805
285
  ],
286
  [
287
  315,
288
+ 1.0755
289
  ],
290
  [
291
  320,
292
+ 1.0508
293
  ],
294
  [
295
  325,
296
+ 1.0563
297
  ],
298
  [
299
  330,
300
+ 0.9653
301
  ],
302
  [
303
  335,
304
+ 0.9692
305
  ],
306
  [
307
  340,
308
+ 1.1218
309
  ],
310
  [
311
  345,
312
+ 1.0367
313
  ],
314
  [
315
  350,
316
+ 0.9287
317
  ],
318
  [
319
  355,
320
+ 1.0165
321
  ],
322
  [
323
  360,
324
+ 1.0356
325
  ],
326
  [
327
  365,
328
+ 1.0249
329
  ],
330
  [
331
  370,
332
+ 0.9885
333
  ],
334
  [
335
  375,
336
+ 0.9588
337
  ],
338
  [
339
  380,
340
+ 0.977
341
  ],
342
  [
343
  385,
344
+ 0.9921
345
  ],
346
  [
347
  390,
348
+ 0.9863
349
  ],
350
  [
351
  395,
352
+ 1.0569
353
  ],
354
  [
355
  400,
356
+ 0.982
357
  ],
358
  [
359
  405,
360
+ 1.0738
361
  ],
362
  [
363
  410,
364
+ 1.034
365
  ],
366
  [
367
  415,
368
+ 1.067
369
  ],
370
  [
371
  420,
372
+ 1.044
373
  ],
374
  [
375
  425,
376
+ 0.9622
377
  ],
378
  [
379
  430,
380
+ 0.9102
381
  ],
382
  [
383
  435,
384
+ 1.0164
385
  ],
386
  [
387
  440,
388
+ 0.9833
389
  ],
390
  [
391
  445,
392
+ 0.9276
393
  ],
394
  [
395
  450,
396
+ 0.8751
397
  ],
398
  [
399
  455,
400
+ 1.0087
401
  ],
402
  [
403
  460,
404
+ 1.032
405
  ],
406
  [
407
  465,
408
+ 1.0099
409
  ],
410
  [
411
  470,
412
+ 1.0805
413
  ],
414
  [
415
  475,
416
+ 1.0326
417
  ],
418
  [
419
  480,
420
+ 0.968
421
  ],
422
  [
423
  485,
424
+ 0.948
425
  ],
426
  [
427
  490,
428
+ 1.0297
429
  ],
430
  [
431
  495,
432
+ 1.0083
433
  ],
434
  [
435
  500,
436
+ 0.955
437
  ],
438
  [
439
  505,
440
+ 0.967
441
  ],
442
  [
443
  510,
444
+ 0.8891
445
  ],
446
  [
447
  515,
448
+ 0.9298
449
  ],
450
  [
451
  520,
452
+ 0.9516
453
  ],
454
  [
455
  525,
456
+ 0.9553
457
  ],
458
  [
459
  530,
460
+ 1.073
461
+ ]
462
+ ],
463
+ "eval": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
464
  [
465
+ 40,
466
+ 1.9159308671951294
467
  ],
468
  [
469
+ 80,
470
+ 1.5397088527679443
471
  ],
472
  [
473
+ 120,
474
+ 1.398988127708435
475
  ],
476
  [
477
+ 160,
478
+ 1.3029450178146362
479
  ],
480
  [
481
+ 200,
482
+ 1.2370014190673828
483
  ],
484
  [
485
+ 240,
486
+ 1.1978071928024292
487
  ],
488
  [
489
+ 280,
490
+ 1.1654218435287476
491
  ],
492
  [
493
+ 320,
494
+ 1.1381187438964844
495
  ],
496
  [
497
+ 360,
498
+ 1.1211411952972412
499
  ],
500
  [
501
+ 400,
502
+ 1.107499599456787
503
  ],
504
  [
505
+ 440,
506
+ 1.1020454168319702
507
  ],
508
  [
509
+ 480,
510
+ 1.0991754531860352
511
  ],
512
  [
513
+ 520,
514
+ 1.098220944404602
515
  ]
516
  ],
517
+ "token_accuracy": [
518
  [
519
  40,
520
+ 0.6898
521
  ],
522
  [
523
  80,
524
+ 0.7085
525
  ],
526
  [
527
  120,
528
+ 0.7226
529
  ],
530
  [
531
  160,
532
+ 0.7344
533
  ],
534
  [
535
  200,
536
+ 0.7426
537
  ],
538
  [
539
  240,
540
+ 0.7473
541
  ],
542
  [
543
  280,
544
+ 0.7522
545
  ],
546
  [
547
  320,
548
+ 0.7564
549
  ],
550
  [
551
  360,
552
+ 0.7585
553
  ],
554
  [
555
  400,
556
+ 0.7605
557
  ],
558
  [
559
  440,
560
+ 0.7618
561
  ],
562
  [
563
  480,
564
+ 0.7624
565
  ],
566
  [
567
  520,
568
+ 0.7624
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
569
  ]
570
  ]
571
  }