izzcw commited on
Commit
f1d54bb
·
verified ·
1 Parent(s): 74fc9d6

Training in progress, step 455

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f90e0ca67d2cc52ccb47b1c83996352cdfac911c0e6f78a0de5ac81ae6fd13f
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb9f73b7073a1c472f96b7fdd6e255357b74ecaf09c3ec3e4aac189951246175
3
  size 4976698672
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad49613a7fa10d8aba2580b04128ecacb5376654e08eb8d4576606608729d60d
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1eb7c6a6b71e1d0762efdb2b9e55f1864e9edd6b63bb4166744424e8e8c7b331
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e27c340a2fe4adf17c7db760e8a1ab6044b06ae6db066b1f41422bf04c77b220
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:835c622f683ab3acb2d477bfb990200d8f1a40bd06ee52d9e0f124c961a24778
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:efd78552e2b08a49b0ac9a99971ed4bfaabd27b3ef4d746b6fa1d67a9e76c1f2
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e77f0b127c89ca1c307564ff33a282bb4e802439a24e1a39cfc734f706097018
3
  size 1168138808
trainer_log.jsonl CHANGED
@@ -1,55 +1,55 @@
1
- {"current_steps": 10, "total_steps": 455, "loss": 0.6001, "lr": 2.173913043478261e-06, "epoch": 0.021938845468257234, "percentage": 2.2, "elapsed_time": "0:04:48", "remaining_time": "3:33:52"}
2
- {"current_steps": 20, "total_steps": 455, "loss": 0.4498, "lr": 4.347826086956522e-06, "epoch": 0.04387769093651447, "percentage": 4.4, "elapsed_time": "0:09:58", "remaining_time": "3:37:04"}
3
- {"current_steps": 30, "total_steps": 455, "loss": 0.4136, "lr": 6.521739130434783e-06, "epoch": 0.0658165364047717, "percentage": 6.59, "elapsed_time": "0:14:59", "remaining_time": "3:32:26"}
4
- {"current_steps": 40, "total_steps": 455, "loss": 0.3849, "lr": 8.695652173913044e-06, "epoch": 0.08775538187302893, "percentage": 8.79, "elapsed_time": "0:20:19", "remaining_time": "3:30:53"}
5
- {"current_steps": 50, "total_steps": 455, "loss": 0.4, "lr": 9.997640179574575e-06, "epoch": 0.10969422734128617, "percentage": 10.99, "elapsed_time": "0:25:34", "remaining_time": "3:27:09"}
6
- {"current_steps": 50, "total_steps": 455, "eval_loss": 0.4120236337184906, "epoch": 0.10969422734128617, "percentage": 10.99, "elapsed_time": "0:26:46", "remaining_time": "3:36:53"}
7
- {"current_steps": 60, "total_steps": 455, "loss": 0.3886, "lr": 9.971117774604978e-06, "epoch": 0.1316330728095434, "percentage": 13.19, "elapsed_time": "0:31:53", "remaining_time": "3:29:55"}
8
- {"current_steps": 70, "total_steps": 455, "loss": 0.398, "lr": 9.915280116903003e-06, "epoch": 0.15357191827780062, "percentage": 15.38, "elapsed_time": "0:36:52", "remaining_time": "3:22:46"}
9
- {"current_steps": 80, "total_steps": 455, "loss": 0.3763, "lr": 9.83045648755225e-06, "epoch": 0.17551076374605787, "percentage": 17.58, "elapsed_time": "0:41:57", "remaining_time": "3:16:40"}
10
- {"current_steps": 90, "total_steps": 455, "loss": 0.3693, "lr": 9.717147101241817e-06, "epoch": 0.1974496092143151, "percentage": 19.78, "elapsed_time": "0:47:06", "remaining_time": "3:11:01"}
11
- {"current_steps": 100, "total_steps": 455, "loss": 0.3674, "lr": 9.576020156442802e-06, "epoch": 0.21938845468257234, "percentage": 21.98, "elapsed_time": "0:51:58", "remaining_time": "3:04:30"}
12
- {"current_steps": 100, "total_steps": 455, "eval_loss": 0.36464354395866394, "epoch": 0.21938845468257234, "percentage": 21.98, "elapsed_time": "0:53:09", "remaining_time": "3:08:43"}
13
- {"current_steps": 110, "total_steps": 455, "loss": 0.3605, "lr": 9.407907894965138e-06, "epoch": 0.24132730015082957, "percentage": 24.18, "elapsed_time": "0:58:20", "remaining_time": "3:02:58"}
14
- {"current_steps": 120, "total_steps": 455, "loss": 0.3663, "lr": 9.213801694132014e-06, "epoch": 0.2632661456190868, "percentage": 26.37, "elapsed_time": "1:03:28", "remaining_time": "2:57:11"}
15
- {"current_steps": 130, "total_steps": 455, "loss": 0.3598, "lr": 8.994846220513872e-06, "epoch": 0.28520499108734404, "percentage": 28.57, "elapsed_time": "1:08:35", "remaining_time": "2:51:28"}
16
- {"current_steps": 140, "total_steps": 455, "loss": 0.3494, "lr": 8.752332679698128e-06, "epoch": 0.30714383655560124, "percentage": 30.77, "elapsed_time": "1:13:43", "remaining_time": "2:45:53"}
17
- {"current_steps": 150, "total_steps": 455, "loss": 0.3596, "lr": 8.48769120190144e-06, "epoch": 0.3290826820238585, "percentage": 32.97, "elapsed_time": "1:19:01", "remaining_time": "2:40:41"}
18
- {"current_steps": 150, "total_steps": 455, "eval_loss": 0.3322122395038605, "epoch": 0.3290826820238585, "percentage": 32.97, "elapsed_time": "1:20:13", "remaining_time": "2:43:07"}
19
- {"current_steps": 160, "total_steps": 455, "loss": 0.3502, "lr": 8.202482408327496e-06, "epoch": 0.35102152749211574, "percentage": 35.16, "elapsed_time": "1:25:30", "remaining_time": "2:37:39"}
20
- {"current_steps": 170, "total_steps": 455, "loss": 0.3361, "lr": 7.898388208004449e-06, "epoch": 0.372960372960373, "percentage": 37.36, "elapsed_time": "1:30:23", "remaining_time": "2:31:31"}
21
- {"current_steps": 180, "total_steps": 455, "loss": 0.3453, "lr": 7.577201879374114e-06, "epoch": 0.3948992184286302, "percentage": 39.56, "elapsed_time": "1:35:23", "remaining_time": "2:25:44"}
22
- {"current_steps": 190, "total_steps": 455, "loss": 0.3362, "lr": 7.240817495122936e-06, "epoch": 0.41683806389688743, "percentage": 41.76, "elapsed_time": "1:40:36", "remaining_time": "2:20:18"}
23
- {"current_steps": 200, "total_steps": 455, "loss": 0.3172, "lr": 6.891218752617715e-06, "epoch": 0.4387769093651447, "percentage": 43.96, "elapsed_time": "1:45:35", "remaining_time": "2:14:37"}
24
- {"current_steps": 200, "total_steps": 455, "eval_loss": 0.3266438841819763, "epoch": 0.4387769093651447, "percentage": 43.96, "elapsed_time": "1:46:47", "remaining_time": "2:16:09"}
25
- {"current_steps": 210, "total_steps": 455, "loss": 0.3325, "lr": 6.5304672758143014e-06, "epoch": 0.4607157548334019, "percentage": 46.15, "elapsed_time": "1:51:59", "remaining_time": "2:10:39"}
26
- {"current_steps": 220, "total_steps": 455, "loss": 0.3295, "lr": 6.160690457624223e-06, "epoch": 0.48265460030165913, "percentage": 48.35, "elapsed_time": "1:57:05", "remaining_time": "2:05:04"}
27
- {"current_steps": 230, "total_steps": 455, "loss": 0.3134, "lr": 5.784068914434239e-06, "epoch": 0.5045934457699164, "percentage": 50.55, "elapsed_time": "2:02:10", "remaining_time": "1:59:30"}
28
- {"current_steps": 240, "total_steps": 455, "loss": 0.3168, "lr": 5.40282362676094e-06, "epoch": 0.5265322912381736, "percentage": 52.75, "elapsed_time": "2:07:30", "remaining_time": "1:54:13"}
29
- {"current_steps": 250, "total_steps": 455, "loss": 0.2993, "lr": 5.019202841873434e-06, "epoch": 0.5484711367064308, "percentage": 54.95, "elapsed_time": "2:12:48", "remaining_time": "1:48:54"}
30
- {"current_steps": 250, "total_steps": 455, "eval_loss": 0.29728370904922485, "epoch": 0.5484711367064308, "percentage": 54.95, "elapsed_time": "2:14:01", "remaining_time": "1:49:53"}
31
- {"current_steps": 260, "total_steps": 455, "loss": 0.3141, "lr": 4.635468815620862e-06, "epoch": 0.5704099821746881, "percentage": 57.14, "elapsed_time": "2:19:11", "remaining_time": "1:44:23"}
32
- {"current_steps": 270, "total_steps": 455, "loss": 0.3017, "lr": 4.2538844716497075e-06, "epoch": 0.5923488276429453, "percentage": 59.34, "elapsed_time": "2:24:27", "remaining_time": "1:38:58"}
33
- {"current_steps": 280, "total_steps": 455, "loss": 0.2967, "lr": 3.876700056683026e-06, "epoch": 0.6142876731112025, "percentage": 61.54, "elapsed_time": "2:29:39", "remaining_time": "1:33:32"}
34
- {"current_steps": 290, "total_steps": 455, "loss": 0.2901, "lr": 3.5061398705569544e-06, "epoch": 0.6362265185794598, "percentage": 63.74, "elapsed_time": "2:34:53", "remaining_time": "1:28:07"}
35
- {"current_steps": 300, "total_steps": 455, "loss": 0.299, "lr": 3.144389149268983e-06, "epoch": 0.658165364047717, "percentage": 65.93, "elapsed_time": "2:39:58", "remaining_time": "1:22:39"}
36
- {"current_steps": 300, "total_steps": 455, "eval_loss": 0.2771042287349701, "epoch": 0.658165364047717, "percentage": 65.93, "elapsed_time": "2:41:11", "remaining_time": "1:23:16"}
37
- {"current_steps": 310, "total_steps": 455, "loss": 0.2898, "lr": 2.7935811783901878e-06, "epoch": 0.6801042095159742, "percentage": 68.13, "elapsed_time": "2:46:10", "remaining_time": "1:17:43"}
38
- {"current_steps": 320, "total_steps": 455, "loss": 0.2889, "lr": 2.455784712835084e-06, "epoch": 0.7020430549842315, "percentage": 70.33, "elapsed_time": "2:51:01", "remaining_time": "1:12:09"}
39
- {"current_steps": 330, "total_steps": 455, "loss": 0.2961, "lr": 2.1329917771761806e-06, "epoch": 0.7239819004524887, "percentage": 72.53, "elapsed_time": "2:56:12", "remaining_time": "1:06:44"}
40
- {"current_steps": 340, "total_steps": 455, "loss": 0.2857, "lr": 1.8271059184461781e-06, "epoch": 0.745920745920746, "percentage": 74.73, "elapsed_time": "3:01:23", "remaining_time": "1:01:21"}
41
- {"current_steps": 350, "total_steps": 455, "loss": 0.2896, "lr": 1.5399309807023942e-06, "epoch": 0.7678595913890032, "percentage": 76.92, "elapsed_time": "3:06:34", "remaining_time": "0:55:58"}
42
- {"current_steps": 350, "total_steps": 455, "eval_loss": 0.26701319217681885, "epoch": 0.7678595913890032, "percentage": 76.92, "elapsed_time": "3:07:46", "remaining_time": "0:56:19"}
43
- {"current_steps": 360, "total_steps": 455, "loss": 0.276, "lr": 1.2731604675510729e-06, "epoch": 0.7897984368572604, "percentage": 79.12, "elapsed_time": "3:12:57", "remaining_time": "0:50:55"}
44
- {"current_steps": 370, "total_steps": 455, "loss": 0.2762, "lr": 1.0283675553620281e-06, "epoch": 0.8117372823255177, "percentage": 81.32, "elapsed_time": "3:18:04", "remaining_time": "0:45:30"}
45
- {"current_steps": 380, "total_steps": 455, "loss": 0.2821, "lr": 8.069958160668256e-07, "epoch": 0.8336761277937749, "percentage": 83.52, "elapsed_time": "3:23:12", "remaining_time": "0:40:06"}
46
- {"current_steps": 390, "total_steps": 455, "loss": 0.2628, "lr": 6.10350704249219e-07, "epoch": 0.8556149732620321, "percentage": 85.71, "elapsed_time": "3:28:16", "remaining_time": "0:34:42"}
47
- {"current_steps": 400, "total_steps": 455, "loss": 0.2718, "lr": 4.3959185872947007e-07, "epoch": 0.8775538187302894, "percentage": 87.91, "elapsed_time": "3:33:26", "remaining_time": "0:29:20"}
48
- {"current_steps": 400, "total_steps": 455, "eval_loss": 0.2597688138484955, "epoch": 0.8775538187302894, "percentage": 87.91, "elapsed_time": "3:34:39", "remaining_time": "0:29:30"}
49
- {"current_steps": 410, "total_steps": 455, "loss": 0.2801, "lr": 2.9572626404096915e-07, "epoch": 0.8994926641985466, "percentage": 90.11, "elapsed_time": "3:39:39", "remaining_time": "0:24:06"}
50
- {"current_steps": 420, "total_steps": 455, "loss": 0.2651, "lr": 1.7960231212674095e-07, "epoch": 0.9214315096668038, "percentage": 92.31, "elapsed_time": "3:44:53", "remaining_time": "0:18:44"}
51
- {"current_steps": 430, "total_steps": 455, "loss": 0.28, "lr": 9.190479927466023e-08, "epoch": 0.9433703551350611, "percentage": 94.51, "elapsed_time": "3:50:02", "remaining_time": "0:13:22"}
52
- {"current_steps": 440, "total_steps": 455, "loss": 0.2743, "lr": 3.315088779506259e-08, "epoch": 0.9653092006033183, "percentage": 96.7, "elapsed_time": "3:55:12", "remaining_time": "0:08:01"}
53
- {"current_steps": 450, "total_steps": 455, "loss": 0.2792, "lr": 3.6870562551699627e-09, "epoch": 0.9872480460715755, "percentage": 98.9, "elapsed_time": "4:00:06", "remaining_time": "0:02:40"}
54
- {"current_steps": 450, "total_steps": 455, "eval_loss": 0.2568427622318268, "epoch": 0.9872480460715755, "percentage": 98.9, "elapsed_time": "4:01:19", "remaining_time": "0:02:40"}
55
- {"current_steps": 455, "total_steps": 455, "epoch": 0.9982174688057041, "percentage": 100.0, "elapsed_time": "4:04:41", "remaining_time": "0:00:00"}
 
1
+ {"current_steps": 10, "total_steps": 455, "loss": 0.6003, "lr": 2.173913043478261e-06, "epoch": 0.021938845468257234, "percentage": 2.2, "elapsed_time": "0:02:09", "remaining_time": "1:36:15"}
2
+ {"current_steps": 20, "total_steps": 455, "loss": 0.4488, "lr": 4.347826086956522e-06, "epoch": 0.04387769093651447, "percentage": 4.4, "elapsed_time": "0:04:21", "remaining_time": "1:34:38"}
3
+ {"current_steps": 30, "total_steps": 455, "loss": 0.4128, "lr": 6.521739130434783e-06, "epoch": 0.0658165364047717, "percentage": 6.59, "elapsed_time": "0:06:30", "remaining_time": "1:32:06"}
4
+ {"current_steps": 40, "total_steps": 455, "loss": 0.3861, "lr": 8.695652173913044e-06, "epoch": 0.08775538187302893, "percentage": 8.79, "elapsed_time": "0:08:38", "remaining_time": "1:29:44"}
5
+ {"current_steps": 50, "total_steps": 455, "loss": 0.3971, "lr": 9.997640179574575e-06, "epoch": 0.10969422734128617, "percentage": 10.99, "elapsed_time": "0:10:46", "remaining_time": "1:27:18"}
6
+ {"current_steps": 50, "total_steps": 455, "eval_loss": 0.40791964530944824, "epoch": 0.10969422734128617, "percentage": 10.99, "elapsed_time": "0:11:17", "remaining_time": "1:31:24"}
7
+ {"current_steps": 60, "total_steps": 455, "loss": 0.3866, "lr": 9.971117774604978e-06, "epoch": 0.1316330728095434, "percentage": 13.19, "elapsed_time": "0:13:24", "remaining_time": "1:28:14"}
8
+ {"current_steps": 70, "total_steps": 455, "loss": 0.3991, "lr": 9.915280116903003e-06, "epoch": 0.15357191827780062, "percentage": 15.38, "elapsed_time": "0:15:34", "remaining_time": "1:25:37"}
9
+ {"current_steps": 80, "total_steps": 455, "loss": 0.3761, "lr": 9.83045648755225e-06, "epoch": 0.17551076374605787, "percentage": 17.58, "elapsed_time": "0:17:44", "remaining_time": "1:23:09"}
10
+ {"current_steps": 90, "total_steps": 455, "loss": 0.3702, "lr": 9.717147101241817e-06, "epoch": 0.1974496092143151, "percentage": 19.78, "elapsed_time": "0:19:54", "remaining_time": "1:20:46"}
11
+ {"current_steps": 100, "total_steps": 455, "loss": 0.3678, "lr": 9.576020156442802e-06, "epoch": 0.21938845468257234, "percentage": 21.98, "elapsed_time": "0:22:03", "remaining_time": "1:18:19"}
12
+ {"current_steps": 100, "total_steps": 455, "eval_loss": 0.36698606610298157, "epoch": 0.21938845468257234, "percentage": 21.98, "elapsed_time": "0:22:34", "remaining_time": "1:20:07"}
13
+ {"current_steps": 110, "total_steps": 455, "loss": 0.3638, "lr": 9.407907894965138e-06, "epoch": 0.24132730015082957, "percentage": 24.18, "elapsed_time": "0:24:43", "remaining_time": "1:17:33"}
14
+ {"current_steps": 120, "total_steps": 455, "loss": 0.3657, "lr": 9.213801694132014e-06, "epoch": 0.2632661456190868, "percentage": 26.37, "elapsed_time": "0:26:53", "remaining_time": "1:15:03"}
15
+ {"current_steps": 130, "total_steps": 455, "loss": 0.3588, "lr": 8.994846220513872e-06, "epoch": 0.28520499108734404, "percentage": 28.57, "elapsed_time": "0:29:01", "remaining_time": "1:12:33"}
16
+ {"current_steps": 140, "total_steps": 455, "loss": 0.3497, "lr": 8.752332679698128e-06, "epoch": 0.30714383655560124, "percentage": 30.77, "elapsed_time": "0:31:10", "remaining_time": "1:10:08"}
17
+ {"current_steps": 150, "total_steps": 455, "loss": 0.3599, "lr": 8.48769120190144e-06, "epoch": 0.3290826820238585, "percentage": 32.97, "elapsed_time": "0:33:15", "remaining_time": "1:07:37"}
18
+ {"current_steps": 150, "total_steps": 455, "eval_loss": 0.33442166447639465, "epoch": 0.3290826820238585, "percentage": 32.97, "elapsed_time": "0:33:45", "remaining_time": "1:08:39"}
19
+ {"current_steps": 160, "total_steps": 455, "loss": 0.3526, "lr": 8.202482408327496e-06, "epoch": 0.35102152749211574, "percentage": 35.16, "elapsed_time": "0:35:53", "remaining_time": "1:06:11"}
20
+ {"current_steps": 170, "total_steps": 455, "loss": 0.3357, "lr": 7.898388208004449e-06, "epoch": 0.372960372960373, "percentage": 37.36, "elapsed_time": "0:38:03", "remaining_time": "1:03:47"}
21
+ {"current_steps": 180, "total_steps": 455, "loss": 0.3415, "lr": 7.577201879374114e-06, "epoch": 0.3948992184286302, "percentage": 39.56, "elapsed_time": "0:40:12", "remaining_time": "1:01:25"}
22
+ {"current_steps": 190, "total_steps": 455, "loss": 0.3376, "lr": 7.240817495122936e-06, "epoch": 0.41683806389688743, "percentage": 41.76, "elapsed_time": "0:42:20", "remaining_time": "0:59:03"}
23
+ {"current_steps": 200, "total_steps": 455, "loss": 0.3186, "lr": 6.891218752617715e-06, "epoch": 0.4387769093651447, "percentage": 43.96, "elapsed_time": "0:44:28", "remaining_time": "0:56:41"}
24
+ {"current_steps": 200, "total_steps": 455, "eval_loss": 0.32387444376945496, "epoch": 0.4387769093651447, "percentage": 43.96, "elapsed_time": "0:44:58", "remaining_time": "0:57:20"}
25
+ {"current_steps": 210, "total_steps": 455, "loss": 0.3317, "lr": 6.5304672758143014e-06, "epoch": 0.4607157548334019, "percentage": 46.15, "elapsed_time": "0:47:06", "remaining_time": "0:54:57"}
26
+ {"current_steps": 220, "total_steps": 455, "loss": 0.327, "lr": 6.160690457624223e-06, "epoch": 0.48265460030165913, "percentage": 48.35, "elapsed_time": "0:49:15", "remaining_time": "0:52:36"}
27
+ {"current_steps": 230, "total_steps": 455, "loss": 0.3146, "lr": 5.784068914434239e-06, "epoch": 0.5045934457699164, "percentage": 50.55, "elapsed_time": "0:51:25", "remaining_time": "0:50:18"}
28
+ {"current_steps": 240, "total_steps": 455, "loss": 0.3171, "lr": 5.40282362676094e-06, "epoch": 0.5265322912381736, "percentage": 52.75, "elapsed_time": "0:53:33", "remaining_time": "0:47:58"}
29
+ {"current_steps": 250, "total_steps": 455, "loss": 0.2979, "lr": 5.019202841873434e-06, "epoch": 0.5484711367064308, "percentage": 54.95, "elapsed_time": "0:55:39", "remaining_time": "0:45:38"}
30
+ {"current_steps": 250, "total_steps": 455, "eval_loss": 0.29967406392097473, "epoch": 0.5484711367064308, "percentage": 54.95, "elapsed_time": "0:56:10", "remaining_time": "0:46:03"}
31
+ {"current_steps": 260, "total_steps": 455, "loss": 0.3121, "lr": 4.635468815620862e-06, "epoch": 0.5704099821746881, "percentage": 57.14, "elapsed_time": "0:58:18", "remaining_time": "0:43:43"}
32
+ {"current_steps": 270, "total_steps": 455, "loss": 0.3036, "lr": 4.2538844716497075e-06, "epoch": 0.5923488276429453, "percentage": 59.34, "elapsed_time": "1:00:29", "remaining_time": "0:41:27"}
33
+ {"current_steps": 280, "total_steps": 455, "loss": 0.2978, "lr": 3.876700056683026e-06, "epoch": 0.6142876731112025, "percentage": 61.54, "elapsed_time": "1:02:41", "remaining_time": "0:39:10"}
34
+ {"current_steps": 290, "total_steps": 455, "loss": 0.2901, "lr": 3.5061398705569544e-06, "epoch": 0.6362265185794598, "percentage": 63.74, "elapsed_time": "1:04:49", "remaining_time": "0:36:52"}
35
+ {"current_steps": 300, "total_steps": 455, "loss": 0.2996, "lr": 3.144389149268983e-06, "epoch": 0.658165364047717, "percentage": 65.93, "elapsed_time": "1:06:58", "remaining_time": "0:34:35"}
36
+ {"current_steps": 300, "total_steps": 455, "eval_loss": 0.2765507102012634, "epoch": 0.658165364047717, "percentage": 65.93, "elapsed_time": "1:07:28", "remaining_time": "0:34:51"}
37
+ {"current_steps": 310, "total_steps": 455, "loss": 0.2885, "lr": 2.7935811783901878e-06, "epoch": 0.6801042095159742, "percentage": 68.13, "elapsed_time": "1:09:39", "remaining_time": "0:32:34"}
38
+ {"current_steps": 320, "total_steps": 455, "loss": 0.2898, "lr": 2.455784712835084e-06, "epoch": 0.7020430549842315, "percentage": 70.33, "elapsed_time": "1:11:47", "remaining_time": "0:30:17"}
39
+ {"current_steps": 330, "total_steps": 455, "loss": 0.2961, "lr": 2.1329917771761806e-06, "epoch": 0.7239819004524887, "percentage": 72.53, "elapsed_time": "1:14:00", "remaining_time": "0:28:01"}
40
+ {"current_steps": 340, "total_steps": 455, "loss": 0.2856, "lr": 1.8271059184461781e-06, "epoch": 0.745920745920746, "percentage": 74.73, "elapsed_time": "1:16:08", "remaining_time": "0:25:45"}
41
+ {"current_steps": 350, "total_steps": 455, "loss": 0.2887, "lr": 1.5399309807023942e-06, "epoch": 0.7678595913890032, "percentage": 76.92, "elapsed_time": "1:18:17", "remaining_time": "0:23:29"}
42
+ {"current_steps": 350, "total_steps": 455, "eval_loss": 0.2653253376483917, "epoch": 0.7678595913890032, "percentage": 76.92, "elapsed_time": "1:18:47", "remaining_time": "0:23:38"}
43
+ {"current_steps": 360, "total_steps": 455, "loss": 0.2752, "lr": 1.2731604675510729e-06, "epoch": 0.7897984368572604, "percentage": 79.12, "elapsed_time": "1:20:56", "remaining_time": "0:21:21"}
44
+ {"current_steps": 370, "total_steps": 455, "loss": 0.2744, "lr": 1.0283675553620281e-06, "epoch": 0.8117372823255177, "percentage": 81.32, "elapsed_time": "1:23:05", "remaining_time": "0:19:05"}
45
+ {"current_steps": 380, "total_steps": 455, "loss": 0.2813, "lr": 8.069958160668256e-07, "epoch": 0.8336761277937749, "percentage": 83.52, "elapsed_time": "1:25:14", "remaining_time": "0:16:49"}
46
+ {"current_steps": 390, "total_steps": 455, "loss": 0.2624, "lr": 6.10350704249219e-07, "epoch": 0.8556149732620321, "percentage": 85.71, "elapsed_time": "1:27:20", "remaining_time": "0:14:33"}
47
+ {"current_steps": 400, "total_steps": 455, "loss": 0.2696, "lr": 4.3959185872947007e-07, "epoch": 0.8775538187302894, "percentage": 87.91, "elapsed_time": "1:29:32", "remaining_time": "0:12:18"}
48
+ {"current_steps": 400, "total_steps": 455, "eval_loss": 0.25855177640914917, "epoch": 0.8775538187302894, "percentage": 87.91, "elapsed_time": "1:30:02", "remaining_time": "0:12:22"}
49
+ {"current_steps": 410, "total_steps": 455, "loss": 0.2795, "lr": 2.9572626404096915e-07, "epoch": 0.8994926641985466, "percentage": 90.11, "elapsed_time": "1:32:08", "remaining_time": "0:10:06"}
50
+ {"current_steps": 420, "total_steps": 455, "loss": 0.2661, "lr": 1.7960231212674095e-07, "epoch": 0.9214315096668038, "percentage": 92.31, "elapsed_time": "1:34:15", "remaining_time": "0:07:51"}
51
+ {"current_steps": 430, "total_steps": 455, "loss": 0.2787, "lr": 9.190479927466023e-08, "epoch": 0.9433703551350611, "percentage": 94.51, "elapsed_time": "1:36:25", "remaining_time": "0:05:36"}
52
+ {"current_steps": 440, "total_steps": 455, "loss": 0.2752, "lr": 3.315088779506259e-08, "epoch": 0.9653092006033183, "percentage": 96.7, "elapsed_time": "1:38:33", "remaining_time": "0:03:21"}
53
+ {"current_steps": 450, "total_steps": 455, "loss": 0.2784, "lr": 3.6870562551699627e-09, "epoch": 0.9872480460715755, "percentage": 98.9, "elapsed_time": "1:40:44", "remaining_time": "0:01:07"}
54
+ {"current_steps": 450, "total_steps": 455, "eval_loss": 0.2559176981449127, "epoch": 0.9872480460715755, "percentage": 98.9, "elapsed_time": "1:41:14", "remaining_time": "0:01:07"}
55
+ {"current_steps": 455, "total_steps": 455, "epoch": 0.9982174688057041, "percentage": 100.0, "elapsed_time": "1:43:16", "remaining_time": "0:00:00"}
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8eed0fc114f6cbc201f43eb7ec93159071904282c4fbd1e7514887358f3126e9
3
  size 7608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da49d7bc6153266740c60b542e01d6ad6cb9a1d807163f1f309c8911fe0a18e7
3
  size 7608