izzcw commited on
Commit
9fa8cbd
·
verified ·
1 Parent(s): 779de29

Training in progress, step 500

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fcc90af6550923a511aac4819a6d21a408ad541b284fd3dc4452a9123dab8cce
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d27713a849508d3f1b9a4c10cc9589c5fc54465ecd775765923e630ffad8902
3
  size 4976698672
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35f4514ded249891e8d0aa76278354de2ad67774dcc4898e2a6280f3503d705d
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2c9f5d1d300b5215f2803433565807c7a6bc60ea4ceaae80b81ed0a1cb000ae
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9d8609e9b17222bfbce229ee90dc78ef6fd49b82a2ccd848302ebe42f2e6ec4
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18521e5ec62ead98eba12a153c51cadfe3eb919328406b688d06b22a081cebff
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df24ca6f5f3c80be62f7db39cb3f681fd1c31a8679c4f5e5e61e0f365464a0c8
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67efdac01d671f49e02d8c72f4fe6fdd783d34109c8394b3163297d9cdf28710
3
  size 1168138808
trainer_log.jsonl CHANGED
@@ -1,180 +1,60 @@
1
- {"current_steps": 10, "total_steps": 3751, "loss": 0.5958, "lr": 2.6595744680851066e-07, "epoch": 0.002665911325124548, "percentage": 0.27, "elapsed_time": "0:02:24", "remaining_time": "14:59:12"}
2
- {"current_steps": 20, "total_steps": 3751, "loss": 0.5162, "lr": 5.319148936170213e-07, "epoch": 0.005331822650249096, "percentage": 0.53, "elapsed_time": "0:04:38", "remaining_time": "14:25:18"}
3
- {"current_steps": 30, "total_steps": 3751, "loss": 0.4332, "lr": 7.97872340425532e-07, "epoch": 0.007997733975373645, "percentage": 0.8, "elapsed_time": "0:07:01", "remaining_time": "14:31:32"}
4
- {"current_steps": 40, "total_steps": 3751, "loss": 0.3824, "lr": 1.0638297872340427e-06, "epoch": 0.010663645300498192, "percentage": 1.07, "elapsed_time": "0:09:18", "remaining_time": "14:23:50"}
5
- {"current_steps": 50, "total_steps": 3751, "loss": 0.3822, "lr": 1.3297872340425533e-06, "epoch": 0.01332955662562274, "percentage": 1.33, "elapsed_time": "0:11:38", "remaining_time": "14:21:16"}
6
- {"current_steps": 50, "total_steps": 3751, "eval_loss": 0.4726083278656006, "epoch": 0.01332955662562274, "percentage": 1.33, "elapsed_time": "0:16:01", "remaining_time": "19:46:19"}
7
- {"current_steps": 60, "total_steps": 3751, "loss": 0.3428, "lr": 1.595744680851064e-06, "epoch": 0.01599546795074729, "percentage": 1.6, "elapsed_time": "0:18:20", "remaining_time": "18:48:17"}
8
- {"current_steps": 70, "total_steps": 3751, "loss": 0.3327, "lr": 1.8617021276595745e-06, "epoch": 0.018661379275871838, "percentage": 1.87, "elapsed_time": "0:20:39", "remaining_time": "18:06:05"}
9
- {"current_steps": 80, "total_steps": 3751, "loss": 0.3085, "lr": 2.1276595744680853e-06, "epoch": 0.021327290600996383, "percentage": 2.13, "elapsed_time": "0:22:57", "remaining_time": "17:33:19"}
10
- {"current_steps": 90, "total_steps": 3751, "loss": 0.306, "lr": 2.393617021276596e-06, "epoch": 0.023993201926120932, "percentage": 2.4, "elapsed_time": "0:25:18", "remaining_time": "17:09:45"}
11
- {"current_steps": 100, "total_steps": 3751, "loss": 0.3024, "lr": 2.6595744680851065e-06, "epoch": 0.02665911325124548, "percentage": 2.67, "elapsed_time": "0:27:36", "remaining_time": "16:48:06"}
12
- {"current_steps": 100, "total_steps": 3751, "eval_loss": 0.31838247179985046, "epoch": 0.02665911325124548, "percentage": 2.67, "elapsed_time": "0:32:00", "remaining_time": "19:28:20"}
13
- {"current_steps": 110, "total_steps": 3751, "loss": 0.2991, "lr": 2.9255319148936174e-06, "epoch": 0.02932502457637003, "percentage": 2.93, "elapsed_time": "0:34:18", "remaining_time": "18:55:28"}
14
- {"current_steps": 120, "total_steps": 3751, "loss": 0.3045, "lr": 3.191489361702128e-06, "epoch": 0.03199093590149458, "percentage": 3.2, "elapsed_time": "0:36:36", "remaining_time": "18:27:56"}
15
- {"current_steps": 130, "total_steps": 3751, "loss": 0.3081, "lr": 3.457446808510639e-06, "epoch": 0.034656847226619124, "percentage": 3.47, "elapsed_time": "0:38:53", "remaining_time": "18:03:03"}
16
- {"current_steps": 140, "total_steps": 3751, "loss": 0.3135, "lr": 3.723404255319149e-06, "epoch": 0.037322758551743676, "percentage": 3.73, "elapsed_time": "0:41:11", "remaining_time": "17:42:29"}
17
- {"current_steps": 150, "total_steps": 3751, "loss": 0.3003, "lr": 3.98936170212766e-06, "epoch": 0.03998866987686822, "percentage": 4.0, "elapsed_time": "0:43:30", "remaining_time": "17:24:30"}
18
- {"current_steps": 150, "total_steps": 3751, "eval_loss": 0.3203234076499939, "epoch": 0.03998866987686822, "percentage": 4.0, "elapsed_time": "0:47:54", "remaining_time": "19:09:55"}
19
- {"current_steps": 160, "total_steps": 3751, "loss": 0.3066, "lr": 4.255319148936171e-06, "epoch": 0.04265458120199277, "percentage": 4.27, "elapsed_time": "0:50:14", "remaining_time": "18:47:33"}
20
- {"current_steps": 170, "total_steps": 3751, "loss": 0.31, "lr": 4.521276595744681e-06, "epoch": 0.04532049252711732, "percentage": 4.53, "elapsed_time": "0:52:31", "remaining_time": "18:26:25"}
21
- {"current_steps": 180, "total_steps": 3751, "loss": 0.2978, "lr": 4.787234042553192e-06, "epoch": 0.047986403852241864, "percentage": 4.8, "elapsed_time": "0:54:51", "remaining_time": "18:08:19"}
22
- {"current_steps": 190, "total_steps": 3751, "loss": 0.3012, "lr": 5.053191489361703e-06, "epoch": 0.05065231517736641, "percentage": 5.07, "elapsed_time": "0:57:11", "remaining_time": "17:52:00"}
23
- {"current_steps": 200, "total_steps": 3751, "loss": 0.3059, "lr": 5.319148936170213e-06, "epoch": 0.05331822650249096, "percentage": 5.33, "elapsed_time": "0:59:34", "remaining_time": "17:37:53"}
24
- {"current_steps": 200, "total_steps": 3751, "eval_loss": 0.3241155743598938, "epoch": 0.05331822650249096, "percentage": 5.33, "elapsed_time": "1:03:58", "remaining_time": "18:55:52"}
25
- {"current_steps": 210, "total_steps": 3751, "loss": 0.3128, "lr": 5.5851063829787235e-06, "epoch": 0.05598413782761551, "percentage": 5.6, "elapsed_time": "1:06:11", "remaining_time": "18:36:15"}
26
- {"current_steps": 220, "total_steps": 3751, "loss": 0.3113, "lr": 5.851063829787235e-06, "epoch": 0.05865004915274006, "percentage": 5.87, "elapsed_time": "1:08:31", "remaining_time": "18:19:45"}
27
- {"current_steps": 230, "total_steps": 3751, "loss": 0.3104, "lr": 6.117021276595745e-06, "epoch": 0.061315960477864605, "percentage": 6.13, "elapsed_time": "1:10:50", "remaining_time": "18:04:35"}
28
- {"current_steps": 240, "total_steps": 3751, "loss": 0.3052, "lr": 6.382978723404256e-06, "epoch": 0.06398187180298916, "percentage": 6.4, "elapsed_time": "1:13:07", "remaining_time": "17:49:47"}
29
- {"current_steps": 250, "total_steps": 3751, "loss": 0.2985, "lr": 6.648936170212767e-06, "epoch": 0.0666477831281137, "percentage": 6.66, "elapsed_time": "1:15:28", "remaining_time": "17:36:55"}
30
- {"current_steps": 250, "total_steps": 3751, "eval_loss": 0.3189364969730377, "epoch": 0.0666477831281137, "percentage": 6.66, "elapsed_time": "1:19:51", "remaining_time": "18:38:25"}
31
- {"current_steps": 260, "total_steps": 3751, "loss": 0.3127, "lr": 6.914893617021278e-06, "epoch": 0.06931369445323825, "percentage": 6.93, "elapsed_time": "1:22:09", "remaining_time": "18:23:11"}
32
- {"current_steps": 270, "total_steps": 3751, "loss": 0.3138, "lr": 7.1808510638297875e-06, "epoch": 0.07197960577836279, "percentage": 7.2, "elapsed_time": "1:24:26", "remaining_time": "18:08:46"}
33
- {"current_steps": 280, "total_steps": 3751, "loss": 0.3039, "lr": 7.446808510638298e-06, "epoch": 0.07464551710348735, "percentage": 7.46, "elapsed_time": "1:26:47", "remaining_time": "17:55:49"}
34
- {"current_steps": 290, "total_steps": 3751, "loss": 0.3146, "lr": 7.71276595744681e-06, "epoch": 0.0773114284286119, "percentage": 7.73, "elapsed_time": "1:29:08", "remaining_time": "17:43:52"}
35
- {"current_steps": 300, "total_steps": 3751, "loss": 0.3138, "lr": 7.97872340425532e-06, "epoch": 0.07997733975373644, "percentage": 8.0, "elapsed_time": "1:31:28", "remaining_time": "17:32:11"}
36
- {"current_steps": 300, "total_steps": 3751, "eval_loss": 0.32009848952293396, "epoch": 0.07997733975373644, "percentage": 8.0, "elapsed_time": "1:35:51", "remaining_time": "18:22:41"}
37
- {"current_steps": 310, "total_steps": 3751, "loss": 0.3095, "lr": 8.24468085106383e-06, "epoch": 0.08264325107886099, "percentage": 8.26, "elapsed_time": "1:38:07", "remaining_time": "18:09:10"}
38
- {"current_steps": 320, "total_steps": 3751, "loss": 0.3237, "lr": 8.510638297872341e-06, "epoch": 0.08530916240398553, "percentage": 8.53, "elapsed_time": "1:40:24", "remaining_time": "17:56:32"}
39
- {"current_steps": 330, "total_steps": 3751, "loss": 0.3212, "lr": 8.776595744680852e-06, "epoch": 0.08797507372911008, "percentage": 8.8, "elapsed_time": "1:42:44", "remaining_time": "17:45:01"}
40
- {"current_steps": 340, "total_steps": 3751, "loss": 0.311, "lr": 9.042553191489362e-06, "epoch": 0.09064098505423464, "percentage": 9.06, "elapsed_time": "1:45:02", "remaining_time": "17:33:44"}
41
- {"current_steps": 350, "total_steps": 3751, "loss": 0.3231, "lr": 9.308510638297872e-06, "epoch": 0.09330689637935918, "percentage": 9.33, "elapsed_time": "1:47:22", "remaining_time": "17:23:20"}
42
- {"current_steps": 350, "total_steps": 3751, "eval_loss": 0.3210580050945282, "epoch": 0.09330689637935918, "percentage": 9.33, "elapsed_time": "1:51:45", "remaining_time": "18:06:00"}
43
- {"current_steps": 360, "total_steps": 3751, "loss": 0.3175, "lr": 9.574468085106385e-06, "epoch": 0.09597280770448373, "percentage": 9.6, "elapsed_time": "1:54:00", "remaining_time": "17:53:57"}
44
- {"current_steps": 370, "total_steps": 3751, "loss": 0.3171, "lr": 9.840425531914895e-06, "epoch": 0.09863871902960827, "percentage": 9.86, "elapsed_time": "1:56:23", "remaining_time": "17:43:33"}
45
- {"current_steps": 380, "total_steps": 3751, "loss": 0.319, "lr": 9.999965341346946e-06, "epoch": 0.10130463035473282, "percentage": 10.13, "elapsed_time": "1:58:44", "remaining_time": "17:33:18"}
46
- {"current_steps": 390, "total_steps": 3751, "loss": 0.3141, "lr": 9.999575437018172e-06, "epoch": 0.10397054167985738, "percentage": 10.4, "elapsed_time": "2:01:08", "remaining_time": "17:23:56"}
47
- {"current_steps": 400, "total_steps": 3751, "loss": 0.3176, "lr": 9.998752338940612e-06, "epoch": 0.10663645300498192, "percentage": 10.66, "elapsed_time": "2:03:30", "remaining_time": "17:14:39"}
48
- {"current_steps": 400, "total_steps": 3751, "eval_loss": 0.31982332468032837, "epoch": 0.10663645300498192, "percentage": 10.66, "elapsed_time": "2:07:53", "remaining_time": "17:51:26"}
49
- {"current_steps": 410, "total_steps": 3751, "loss": 0.3014, "lr": 9.997496118432509e-06, "epoch": 0.10930236433010647, "percentage": 10.93, "elapsed_time": "2:10:11", "remaining_time": "17:40:55"}
50
- {"current_steps": 420, "total_steps": 3751, "loss": 0.3197, "lr": 9.995806884340483e-06, "epoch": 0.11196827565523101, "percentage": 11.2, "elapsed_time": "2:12:28", "remaining_time": "17:30:41"}
51
- {"current_steps": 430, "total_steps": 3751, "loss": 0.3144, "lr": 9.99368478303009e-06, "epoch": 0.11463418698035556, "percentage": 11.46, "elapsed_time": "2:14:48", "remaining_time": "17:21:09"}
52
- {"current_steps": 440, "total_steps": 3751, "loss": 0.319, "lr": 9.991129998373145e-06, "epoch": 0.11730009830548012, "percentage": 11.73, "elapsed_time": "2:17:09", "remaining_time": "17:12:05"}
53
- {"current_steps": 450, "total_steps": 3751, "loss": 0.3073, "lr": 9.988142751731797e-06, "epoch": 0.11996600963060466, "percentage": 12.0, "elapsed_time": "2:19:27", "remaining_time": "17:03:00"}
54
- {"current_steps": 450, "total_steps": 3751, "eval_loss": 0.3180363178253174, "epoch": 0.11996600963060466, "percentage": 12.0, "elapsed_time": "2:23:50", "remaining_time": "17:35:12"}
55
- {"current_steps": 460, "total_steps": 3751, "loss": 0.3164, "lr": 9.984723301939337e-06, "epoch": 0.12263192095572921, "percentage": 12.26, "elapsed_time": "2:26:09", "remaining_time": "17:25:41"}
56
- {"current_steps": 470, "total_steps": 3751, "loss": 0.3167, "lr": 9.980871945277777e-06, "epoch": 0.12529783228085375, "percentage": 12.53, "elapsed_time": "2:28:28", "remaining_time": "17:16:29"}
57
- {"current_steps": 480, "total_steps": 3751, "loss": 0.3192, "lr": 9.976589015452178e-06, "epoch": 0.12796374360597831, "percentage": 12.8, "elapsed_time": "2:30:46", "remaining_time": "17:07:31"}
58
- {"current_steps": 490, "total_steps": 3751, "loss": 0.3112, "lr": 9.97187488356174e-06, "epoch": 0.13062965493110285, "percentage": 13.06, "elapsed_time": "2:33:05", "remaining_time": "16:58:53"}
59
- {"current_steps": 500, "total_steps": 3751, "loss": 0.3116, "lr": 9.966729958067638e-06, "epoch": 0.1332955662562274, "percentage": 13.33, "elapsed_time": "2:35:23", "remaining_time": "16:50:20"}
60
- {"current_steps": 500, "total_steps": 3751, "eval_loss": 0.30701354146003723, "epoch": 0.1332955662562274, "percentage": 13.33, "elapsed_time": "2:39:46", "remaining_time": "17:18:53"}
61
- {"current_steps": 510, "total_steps": 3751, "loss": 0.3037, "lr": 9.961154684757636e-06, "epoch": 0.13596147758135196, "percentage": 13.6, "elapsed_time": "2:43:08", "remaining_time": "17:16:42"}
62
- {"current_steps": 520, "total_steps": 3751, "loss": 0.3027, "lr": 9.955149546707465e-06, "epoch": 0.1386273889064765, "percentage": 13.86, "elapsed_time": "2:45:25", "remaining_time": "17:07:54"}
63
- {"current_steps": 530, "total_steps": 3751, "loss": 0.2957, "lr": 9.948715064238956e-06, "epoch": 0.14129330023160105, "percentage": 14.13, "elapsed_time": "2:47:49", "remaining_time": "16:59:54"}
64
- {"current_steps": 540, "total_steps": 3751, "loss": 0.2993, "lr": 9.941851794874969e-06, "epoch": 0.14395921155672559, "percentage": 14.4, "elapsed_time": "2:50:05", "remaining_time": "16:51:26"}
65
- {"current_steps": 550, "total_steps": 3751, "loss": 0.3142, "lr": 9.934560333291077e-06, "epoch": 0.14662512288185015, "percentage": 14.66, "elapsed_time": "2:52:24", "remaining_time": "16:43:24"}
66
- {"current_steps": 550, "total_steps": 3751, "eval_loss": 0.3080333173274994, "epoch": 0.14662512288185015, "percentage": 14.66, "elapsed_time": "2:56:47", "remaining_time": "17:08:57"}
67
- {"current_steps": 560, "total_steps": 3751, "loss": 0.302, "lr": 9.926841311264037e-06, "epoch": 0.1492910342069747, "percentage": 14.93, "elapsed_time": "2:59:04", "remaining_time": "17:00:25"}
68
- {"current_steps": 570, "total_steps": 3751, "loss": 0.3098, "lr": 9.918695397617064e-06, "epoch": 0.15195694553209924, "percentage": 15.2, "elapsed_time": "3:01:25", "remaining_time": "16:52:28"}
69
- {"current_steps": 580, "total_steps": 3751, "loss": 0.3001, "lr": 9.91012329816186e-06, "epoch": 0.1546228568572238, "percentage": 15.46, "elapsed_time": "3:03:48", "remaining_time": "16:44:53"}
70
- {"current_steps": 590, "total_steps": 3751, "loss": 0.3014, "lr": 9.901125755637473e-06, "epoch": 0.15728876818234833, "percentage": 15.73, "elapsed_time": "3:06:07", "remaining_time": "16:37:11"}
71
- {"current_steps": 600, "total_steps": 3751, "loss": 0.3046, "lr": 9.89170354964594e-06, "epoch": 0.15995467950747289, "percentage": 16.0, "elapsed_time": "3:08:26", "remaining_time": "16:29:37"}
72
- {"current_steps": 600, "total_steps": 3751, "eval_loss": 0.3062264621257782, "epoch": 0.15995467950747289, "percentage": 16.0, "elapsed_time": "3:12:49", "remaining_time": "16:52:40"}
73
- {"current_steps": 610, "total_steps": 3751, "loss": 0.2994, "lr": 9.881857496584726e-06, "epoch": 0.16262059083259742, "percentage": 16.26, "elapsed_time": "3:15:11", "remaining_time": "16:45:03"}
74
- {"current_steps": 620, "total_steps": 3751, "loss": 0.2953, "lr": 9.871588449575999e-06, "epoch": 0.16528650215772198, "percentage": 16.53, "elapsed_time": "3:17:30", "remaining_time": "16:37:25"}
75
- {"current_steps": 630, "total_steps": 3751, "loss": 0.3035, "lr": 9.860897298392712e-06, "epoch": 0.16795241348284654, "percentage": 16.8, "elapsed_time": "3:19:47", "remaining_time": "16:29:43"}
76
- {"current_steps": 640, "total_steps": 3751, "loss": 0.3005, "lr": 9.849784969381488e-06, "epoch": 0.17061832480797107, "percentage": 17.06, "elapsed_time": "3:22:06", "remaining_time": "16:22:26"}
77
- {"current_steps": 650, "total_steps": 3751, "loss": 0.2853, "lr": 9.83825242538238e-06, "epoch": 0.17328423613309563, "percentage": 17.33, "elapsed_time": "3:24:26", "remaining_time": "16:15:19"}
78
- {"current_steps": 650, "total_steps": 3751, "eval_loss": 0.30302974581718445, "epoch": 0.17328423613309563, "percentage": 17.33, "elapsed_time": "3:28:49", "remaining_time": "16:36:16"}
79
- {"current_steps": 660, "total_steps": 3751, "loss": 0.28, "lr": 9.826300665645432e-06, "epoch": 0.17595014745822016, "percentage": 17.6, "elapsed_time": "3:31:06", "remaining_time": "16:28:42"}
80
- {"current_steps": 670, "total_steps": 3751, "loss": 0.2843, "lr": 9.813930725744095e-06, "epoch": 0.17861605878334472, "percentage": 17.86, "elapsed_time": "3:33:29", "remaining_time": "16:21:44"}
81
- {"current_steps": 680, "total_steps": 3751, "loss": 0.302, "lr": 9.801143677485509e-06, "epoch": 0.18128197010846928, "percentage": 18.13, "elapsed_time": "3:35:51", "remaining_time": "16:14:52"}
82
- {"current_steps": 690, "total_steps": 3751, "loss": 0.3038, "lr": 9.787940628817627e-06, "epoch": 0.1839478814335938, "percentage": 18.4, "elapsed_time": "3:38:11", "remaining_time": "16:07:56"}
83
- {"current_steps": 700, "total_steps": 3751, "loss": 0.2933, "lr": 9.774322723733216e-06, "epoch": 0.18661379275871837, "percentage": 18.66, "elapsed_time": "3:40:28", "remaining_time": "16:00:56"}
84
- {"current_steps": 700, "total_steps": 3751, "eval_loss": 0.2952025234699249, "epoch": 0.18661379275871837, "percentage": 18.66, "elapsed_time": "3:44:51", "remaining_time": "16:20:03"}
85
- {"current_steps": 710, "total_steps": 3751, "loss": 0.2799, "lr": 9.760291142170739e-06, "epoch": 0.1892797040838429, "percentage": 18.93, "elapsed_time": "3:47:11", "remaining_time": "16:13:05"}
86
- {"current_steps": 720, "total_steps": 3751, "loss": 0.2938, "lr": 9.745847099912116e-06, "epoch": 0.19194561540896746, "percentage": 19.19, "elapsed_time": "3:49:31", "remaining_time": "16:06:13"}
87
- {"current_steps": 730, "total_steps": 3751, "loss": 0.2936, "lr": 9.73099184847738e-06, "epoch": 0.19461152673409202, "percentage": 19.46, "elapsed_time": "3:51:46", "remaining_time": "15:59:09"}
88
- {"current_steps": 740, "total_steps": 3751, "loss": 0.2846, "lr": 9.715726675016238e-06, "epoch": 0.19727743805921655, "percentage": 19.73, "elapsed_time": "3:54:06", "remaining_time": "15:52:32"}
89
- {"current_steps": 750, "total_steps": 3751, "loss": 0.2862, "lr": 9.700052902196541e-06, "epoch": 0.1999433493843411, "percentage": 19.99, "elapsed_time": "3:56:28", "remaining_time": "15:46:12"}
90
- {"current_steps": 750, "total_steps": 3751, "eval_loss": 0.2914896607398987, "epoch": 0.1999433493843411, "percentage": 19.99, "elapsed_time": "4:00:51", "remaining_time": "16:03:47"}
91
- {"current_steps": 760, "total_steps": 3751, "loss": 0.2914, "lr": 9.68397188808969e-06, "epoch": 0.20260926070946564, "percentage": 20.26, "elapsed_time": "4:03:07", "remaining_time": "15:56:51"}
92
- {"current_steps": 770, "total_steps": 3751, "loss": 0.299, "lr": 9.667485026052956e-06, "epoch": 0.2052751720345902, "percentage": 20.53, "elapsed_time": "4:05:23", "remaining_time": "15:50:01"}
93
- {"current_steps": 780, "total_steps": 3751, "loss": 0.2953, "lr": 9.650593744608754e-06, "epoch": 0.20794108335971476, "percentage": 20.79, "elapsed_time": "4:07:49", "remaining_time": "15:43:59"}
94
- {"current_steps": 790, "total_steps": 3751, "loss": 0.2913, "lr": 9.633299507320862e-06, "epoch": 0.2106069946848393, "percentage": 21.06, "elapsed_time": "4:10:14", "remaining_time": "15:37:54"}
95
- {"current_steps": 800, "total_steps": 3751, "loss": 0.2867, "lr": 9.615603812667618e-06, "epoch": 0.21327290600996385, "percentage": 21.33, "elapsed_time": "4:12:30", "remaining_time": "15:31:25"}
96
- {"current_steps": 800, "total_steps": 3751, "eval_loss": 0.2883636951446533, "epoch": 0.21327290600996385, "percentage": 21.33, "elapsed_time": "4:16:53", "remaining_time": "15:47:36"}
97
- {"current_steps": 810, "total_steps": 3751, "loss": 0.2891, "lr": 9.597508193912077e-06, "epoch": 0.21593881733508838, "percentage": 21.59, "elapsed_time": "4:19:10", "remaining_time": "15:41:02"}
98
- {"current_steps": 820, "total_steps": 3751, "loss": 0.2853, "lr": 9.579014218969158e-06, "epoch": 0.21860472866021294, "percentage": 21.86, "elapsed_time": "4:21:34", "remaining_time": "15:34:57"}
99
- {"current_steps": 830, "total_steps": 3751, "loss": 0.2936, "lr": 9.560123490269795e-06, "epoch": 0.2212706399853375, "percentage": 22.13, "elapsed_time": "4:23:55", "remaining_time": "15:28:47"}
100
- {"current_steps": 840, "total_steps": 3751, "loss": 0.282, "lr": 9.540837644622091e-06, "epoch": 0.22393655131046203, "percentage": 22.39, "elapsed_time": "4:26:15", "remaining_time": "15:22:43"}
101
- {"current_steps": 850, "total_steps": 3751, "loss": 0.2871, "lr": 9.521158353069494e-06, "epoch": 0.2266024626355866, "percentage": 22.66, "elapsed_time": "4:28:34", "remaining_time": "15:16:36"}
102
- {"current_steps": 850, "total_steps": 3751, "eval_loss": 0.2833527624607086, "epoch": 0.2266024626355866, "percentage": 22.66, "elapsed_time": "4:32:57", "remaining_time": "15:31:35"}
103
- {"current_steps": 860, "total_steps": 3751, "loss": 0.2869, "lr": 9.501087320746007e-06, "epoch": 0.22926837396071112, "percentage": 22.93, "elapsed_time": "4:35:17", "remaining_time": "15:25:24"}
104
- {"current_steps": 870, "total_steps": 3751, "loss": 0.2857, "lr": 9.480626286728445e-06, "epoch": 0.23193428528583568, "percentage": 23.19, "elapsed_time": "4:37:36", "remaining_time": "15:19:16"}
105
- {"current_steps": 880, "total_steps": 3751, "loss": 0.2826, "lr": 9.459777023885754e-06, "epoch": 0.23460019661096024, "percentage": 23.46, "elapsed_time": "4:39:50", "remaining_time": "15:12:58"}
106
- {"current_steps": 890, "total_steps": 3751, "loss": 0.2834, "lr": 9.438541338725397e-06, "epoch": 0.23726610793608477, "percentage": 23.73, "elapsed_time": "4:42:10", "remaining_time": "15:07:04"}
107
- {"current_steps": 900, "total_steps": 3751, "loss": 0.2921, "lr": 9.416921071236821e-06, "epoch": 0.23993201926120933, "percentage": 23.99, "elapsed_time": "4:44:30", "remaining_time": "15:01:15"}
108
- {"current_steps": 900, "total_steps": 3751, "eval_loss": 0.27931955456733704, "epoch": 0.23993201926120933, "percentage": 23.99, "elapsed_time": "4:48:53", "remaining_time": "15:15:10"}
109
- {"current_steps": 910, "total_steps": 3751, "loss": 0.2886, "lr": 9.394918094732044e-06, "epoch": 0.24259793058633386, "percentage": 24.26, "elapsed_time": "4:51:19", "remaining_time": "15:09:31"}
110
- {"current_steps": 920, "total_steps": 3751, "loss": 0.2839, "lr": 9.37253431568332e-06, "epoch": 0.24526384191145842, "percentage": 24.53, "elapsed_time": "4:53:37", "remaining_time": "15:03:33"}
111
- {"current_steps": 930, "total_steps": 3751, "loss": 0.2893, "lr": 9.349771673557966e-06, "epoch": 0.24792975323658298, "percentage": 24.79, "elapsed_time": "4:55:57", "remaining_time": "14:57:44"}
112
- {"current_steps": 940, "total_steps": 3751, "loss": 0.289, "lr": 9.326632140650311e-06, "epoch": 0.2505956645617075, "percentage": 25.06, "elapsed_time": "4:58:16", "remaining_time": "14:51:58"}
113
- {"current_steps": 950, "total_steps": 3751, "loss": 0.2848, "lr": 9.303117721910801e-06, "epoch": 0.25326157588683207, "percentage": 25.33, "elapsed_time": "5:00:36", "remaining_time": "14:46:18"}
114
- {"current_steps": 950, "total_steps": 3751, "eval_loss": 0.28279709815979004, "epoch": 0.25326157588683207, "percentage": 25.33, "elapsed_time": "5:04:59", "remaining_time": "14:59:14"}
115
- {"current_steps": 960, "total_steps": 3751, "loss": 0.2745, "lr": 9.279230454772282e-06, "epoch": 0.25592748721195663, "percentage": 25.59, "elapsed_time": "5:07:20", "remaining_time": "14:53:33"}
116
- {"current_steps": 970, "total_steps": 3751, "loss": 0.2772, "lr": 9.25497240897346e-06, "epoch": 0.2585933985370812, "percentage": 25.86, "elapsed_time": "5:09:39", "remaining_time": "14:47:48"}
117
- {"current_steps": 980, "total_steps": 3751, "loss": 0.2804, "lr": 9.23034568637957e-06, "epoch": 0.2612593098622057, "percentage": 26.13, "elapsed_time": "5:11:59", "remaining_time": "14:42:09"}
118
- {"current_steps": 990, "total_steps": 3751, "loss": 0.2869, "lr": 9.205352420800253e-06, "epoch": 0.26392522118733025, "percentage": 26.39, "elapsed_time": "5:14:16", "remaining_time": "14:36:29"}
119
- {"current_steps": 1000, "total_steps": 3751, "loss": 0.2896, "lr": 9.179994777804677e-06, "epoch": 0.2665911325124548, "percentage": 26.66, "elapsed_time": "5:16:35", "remaining_time": "14:30:56"}
120
- {"current_steps": 1000, "total_steps": 3751, "eval_loss": 0.2778474688529968, "epoch": 0.2665911325124548, "percentage": 26.66, "elapsed_time": "5:20:58", "remaining_time": "14:43:01"}
121
- {"current_steps": 1010, "total_steps": 3751, "loss": 0.2851, "lr": 9.154274954533895e-06, "epoch": 0.26925704383757937, "percentage": 26.93, "elapsed_time": "5:24:32", "remaining_time": "14:40:45"}
122
- {"current_steps": 1020, "total_steps": 3751, "loss": 0.2791, "lr": 9.128195179510466e-06, "epoch": 0.2719229551627039, "percentage": 27.19, "elapsed_time": "5:26:53", "remaining_time": "14:35:15"}
123
- {"current_steps": 1030, "total_steps": 3751, "loss": 0.275, "lr": 9.101757712445369e-06, "epoch": 0.27458886648782843, "percentage": 27.46, "elapsed_time": "5:29:14", "remaining_time": "14:29:46"}
124
- {"current_steps": 1040, "total_steps": 3751, "loss": 0.274, "lr": 9.07496484404221e-06, "epoch": 0.277254777812953, "percentage": 27.73, "elapsed_time": "5:31:34", "remaining_time": "14:24:19"}
125
- {"current_steps": 1050, "total_steps": 3751, "loss": 0.2791, "lr": 9.04781889579873e-06, "epoch": 0.27992068913807755, "percentage": 27.99, "elapsed_time": "5:33:52", "remaining_time": "14:18:51"}
126
- {"current_steps": 1050, "total_steps": 3751, "eval_loss": 0.2756091356277466, "epoch": 0.27992068913807755, "percentage": 27.99, "elapsed_time": "5:38:16", "remaining_time": "14:30:09"}
127
- {"current_steps": 1060, "total_steps": 3751, "loss": 0.2797, "lr": 9.020322219805674e-06, "epoch": 0.2825866004632021, "percentage": 28.26, "elapsed_time": "5:40:39", "remaining_time": "14:24:48"}
128
- {"current_steps": 1070, "total_steps": 3751, "loss": 0.2827, "lr": 8.99247719854297e-06, "epoch": 0.28525251178832667, "percentage": 28.53, "elapsed_time": "5:43:00", "remaining_time": "14:19:27"}
129
- {"current_steps": 1080, "total_steps": 3751, "loss": 0.2789, "lr": 8.964286244673315e-06, "epoch": 0.28791842311345117, "percentage": 28.79, "elapsed_time": "5:45:18", "remaining_time": "14:13:59"}
130
- {"current_steps": 1090, "total_steps": 3751, "loss": 0.2709, "lr": 8.935751800833117e-06, "epoch": 0.29058433443857573, "percentage": 29.06, "elapsed_time": "5:47:36", "remaining_time": "14:08:35"}
131
- {"current_steps": 1100, "total_steps": 3751, "loss": 0.2666, "lr": 8.906876339420851e-06, "epoch": 0.2932502457637003, "percentage": 29.33, "elapsed_time": "5:49:56", "remaining_time": "14:03:22"}
132
- {"current_steps": 1100, "total_steps": 3751, "eval_loss": 0.27202168107032776, "epoch": 0.2932502457637003, "percentage": 29.33, "elapsed_time": "5:54:20", "remaining_time": "14:13:57"}
133
- {"current_steps": 1110, "total_steps": 3751, "loss": 0.2735, "lr": 8.877662362382844e-06, "epoch": 0.29591615708882485, "percentage": 29.59, "elapsed_time": "5:56:39", "remaining_time": "14:08:34"}
134
- {"current_steps": 1120, "total_steps": 3751, "loss": 0.268, "lr": 8.848112400996473e-06, "epoch": 0.2985820684139494, "percentage": 29.86, "elapsed_time": "5:59:03", "remaining_time": "14:03:27"}
135
- {"current_steps": 1130, "total_steps": 3751, "loss": 0.2735, "lr": 8.818229015650862e-06, "epoch": 0.3012479797390739, "percentage": 30.13, "elapsed_time": "6:01:24", "remaining_time": "13:58:16"}
136
- {"current_steps": 1140, "total_steps": 3751, "loss": 0.2769, "lr": 8.788014795625018e-06, "epoch": 0.30391389106419847, "percentage": 30.39, "elapsed_time": "6:03:44", "remaining_time": "13:53:06"}
137
- {"current_steps": 1150, "total_steps": 3751, "loss": 0.2695, "lr": 8.757472358863481e-06, "epoch": 0.30657980238932303, "percentage": 30.66, "elapsed_time": "6:06:03", "remaining_time": "13:47:55"}
138
- {"current_steps": 1150, "total_steps": 3751, "eval_loss": 0.2705162465572357, "epoch": 0.30657980238932303, "percentage": 30.66, "elapsed_time": "6:10:27", "remaining_time": "13:57:51"}
139
- {"current_steps": 1160, "total_steps": 3751, "loss": 0.267, "lr": 8.726604351749503e-06, "epoch": 0.3092457137144476, "percentage": 30.93, "elapsed_time": "6:12:45", "remaining_time": "13:52:35"}
140
- {"current_steps": 1170, "total_steps": 3751, "loss": 0.277, "lr": 8.69541344887573e-06, "epoch": 0.3119116250395721, "percentage": 31.19, "elapsed_time": "6:15:04", "remaining_time": "13:47:25"}
141
- {"current_steps": 1180, "total_steps": 3751, "loss": 0.2693, "lr": 8.66390235281248e-06, "epoch": 0.31457753636469665, "percentage": 31.46, "elapsed_time": "6:17:21", "remaining_time": "13:42:10"}
142
- {"current_steps": 1190, "total_steps": 3751, "loss": 0.2715, "lr": 8.632073793873548e-06, "epoch": 0.3172434476898212, "percentage": 31.72, "elapsed_time": "6:19:44", "remaining_time": "13:37:13"}
143
- {"current_steps": 1200, "total_steps": 3751, "loss": 0.2763, "lr": 8.599930529879669e-06, "epoch": 0.31990935901494577, "percentage": 31.99, "elapsed_time": "6:22:08", "remaining_time": "13:32:23"}
144
- {"current_steps": 1200, "total_steps": 3751, "eval_loss": 0.2758665978908539, "epoch": 0.31990935901494577, "percentage": 31.99, "elapsed_time": "6:26:32", "remaining_time": "13:41:43"}
145
- {"current_steps": 1210, "total_steps": 3751, "loss": 0.2637, "lr": 8.567475345919532e-06, "epoch": 0.32257527034007033, "percentage": 32.26, "elapsed_time": "6:28:54", "remaining_time": "13:36:42"}
146
- {"current_steps": 1220, "total_steps": 3751, "loss": 0.2734, "lr": 8.534711054108487e-06, "epoch": 0.32524118166519483, "percentage": 32.52, "elapsed_time": "6:31:13", "remaining_time": "13:31:37"}
147
- {"current_steps": 1230, "total_steps": 3751, "loss": 0.2642, "lr": 8.501640493344866e-06, "epoch": 0.3279070929903194, "percentage": 32.79, "elapsed_time": "6:33:29", "remaining_time": "13:26:29"}
148
- {"current_steps": 1240, "total_steps": 3751, "loss": 0.2625, "lr": 8.468266529064025e-06, "epoch": 0.33057300431544395, "percentage": 33.06, "elapsed_time": "6:35:44", "remaining_time": "13:21:23"}
149
- {"current_steps": 1250, "total_steps": 3751, "loss": 0.268, "lr": 8.434592052990044e-06, "epoch": 0.3332389156405685, "percentage": 33.32, "elapsed_time": "6:38:01", "remaining_time": "13:16:22"}
150
- {"current_steps": 1250, "total_steps": 3751, "eval_loss": 0.26985007524490356, "epoch": 0.3332389156405685, "percentage": 33.32, "elapsed_time": "6:42:24", "remaining_time": "13:25:09"}
151
- {"current_steps": 1260, "total_steps": 3751, "loss": 0.2719, "lr": 8.400619982885183e-06, "epoch": 0.33590482696569307, "percentage": 33.59, "elapsed_time": "6:44:42", "remaining_time": "13:20:06"}
152
- {"current_steps": 1270, "total_steps": 3751, "loss": 0.2698, "lr": 8.366353262297069e-06, "epoch": 0.3385707382908176, "percentage": 33.86, "elapsed_time": "6:47:03", "remaining_time": "13:15:11"}
153
- {"current_steps": 1280, "total_steps": 3751, "loss": 0.2792, "lr": 8.331794860303644e-06, "epoch": 0.34123664961594213, "percentage": 34.12, "elapsed_time": "6:49:21", "remaining_time": "13:10:15"}
154
- {"current_steps": 1290, "total_steps": 3751, "loss": 0.2628, "lr": 8.296947771255905e-06, "epoch": 0.3439025609410667, "percentage": 34.39, "elapsed_time": "6:51:38", "remaining_time": "13:05:19"}
155
- {"current_steps": 1300, "total_steps": 3751, "loss": 0.2694, "lr": 8.261815014518465e-06, "epoch": 0.34656847226619125, "percentage": 34.66, "elapsed_time": "6:53:57", "remaining_time": "13:00:27"}
156
- {"current_steps": 1300, "total_steps": 3751, "eval_loss": 0.2712825834751129, "epoch": 0.34656847226619125, "percentage": 34.66, "elapsed_time": "6:58:20", "remaining_time": "13:08:44"}
157
- {"current_steps": 1310, "total_steps": 3751, "loss": 0.2722, "lr": 8.226399634207929e-06, "epoch": 0.3492343835913158, "percentage": 34.92, "elapsed_time": "7:00:41", "remaining_time": "13:03:53"}
158
- {"current_steps": 1320, "total_steps": 3751, "loss": 0.2645, "lr": 8.190704698929128e-06, "epoch": 0.3519002949164403, "percentage": 35.19, "elapsed_time": "7:03:00", "remaining_time": "12:59:01"}
159
- {"current_steps": 1330, "total_steps": 3751, "loss": 0.2604, "lr": 8.154733301509249e-06, "epoch": 0.3545662062415649, "percentage": 35.46, "elapsed_time": "7:05:18", "remaining_time": "12:54:11"}
160
- {"current_steps": 1340, "total_steps": 3751, "loss": 0.2671, "lr": 8.118488558729846e-06, "epoch": 0.35723211756668943, "percentage": 35.72, "elapsed_time": "7:07:41", "remaining_time": "12:49:31"}
161
- {"current_steps": 1350, "total_steps": 3751, "loss": 0.2674, "lr": 8.081973611056784e-06, "epoch": 0.359898028891814, "percentage": 35.99, "elapsed_time": "7:10:02", "remaining_time": "12:44:50"}
162
- {"current_steps": 1350, "total_steps": 3751, "eval_loss": 0.2661799490451813, "epoch": 0.359898028891814, "percentage": 35.99, "elapsed_time": "7:14:26", "remaining_time": "12:52:39"}
163
- {"current_steps": 1360, "total_steps": 3751, "loss": 0.261, "lr": 8.045191622368128e-06, "epoch": 0.36256394021693855, "percentage": 36.26, "elapsed_time": "7:16:44", "remaining_time": "12:47:49"}
164
- {"current_steps": 1370, "total_steps": 3751, "loss": 0.2642, "lr": 8.008145779680011e-06, "epoch": 0.36522985154206306, "percentage": 36.52, "elapsed_time": "7:19:04", "remaining_time": "12:43:06"}
165
- {"current_steps": 1380, "total_steps": 3751, "loss": 0.2667, "lr": 7.970839292870488e-06, "epoch": 0.3678957628671876, "percentage": 36.79, "elapsed_time": "7:21:20", "remaining_time": "12:38:16"}
166
- {"current_steps": 1390, "total_steps": 3751, "loss": 0.2561, "lr": 7.933275394401407e-06, "epoch": 0.3705616741923122, "percentage": 37.06, "elapsed_time": "7:23:38", "remaining_time": "12:33:33"}
167
- {"current_steps": 1400, "total_steps": 3751, "loss": 0.2668, "lr": 7.89545733903834e-06, "epoch": 0.37322758551743673, "percentage": 37.32, "elapsed_time": "7:25:55", "remaining_time": "12:28:49"}
168
- {"current_steps": 1400, "total_steps": 3751, "eval_loss": 0.26635491847991943, "epoch": 0.37322758551743673, "percentage": 37.32, "elapsed_time": "7:30:18", "remaining_time": "12:36:12"}
169
- {"current_steps": 1410, "total_steps": 3751, "loss": 0.2547, "lr": 7.857388403568564e-06, "epoch": 0.3758934968425613, "percentage": 37.59, "elapsed_time": "7:32:38", "remaining_time": "12:31:31"}
170
- {"current_steps": 1420, "total_steps": 3751, "loss": 0.2646, "lr": 7.819071886517134e-06, "epoch": 0.3785594081676858, "percentage": 37.86, "elapsed_time": "7:34:57", "remaining_time": "12:26:50"}
171
- {"current_steps": 1430, "total_steps": 3751, "loss": 0.2579, "lr": 7.780511107861095e-06, "epoch": 0.38122531949281036, "percentage": 38.12, "elapsed_time": "7:37:14", "remaining_time": "12:22:07"}
172
- {"current_steps": 1440, "total_steps": 3751, "loss": 0.2527, "lr": 7.741709408741804e-06, "epoch": 0.3838912308179349, "percentage": 38.39, "elapsed_time": "7:39:32", "remaining_time": "12:17:29"}
173
- {"current_steps": 1450, "total_steps": 3751, "loss": 0.262, "lr": 7.702670151175435e-06, "epoch": 0.3865571421430595, "percentage": 38.66, "elapsed_time": "7:41:52", "remaining_time": "12:12:56"}
174
- {"current_steps": 1450, "total_steps": 3751, "eval_loss": 0.269025057554245, "epoch": 0.3865571421430595, "percentage": 38.66, "elapsed_time": "7:46:15", "remaining_time": "12:19:54"}
175
- {"current_steps": 1460, "total_steps": 3751, "loss": 0.2603, "lr": 7.663396717761687e-06, "epoch": 0.38922305346818403, "percentage": 38.92, "elapsed_time": "7:48:36", "remaining_time": "12:15:19"}
176
- {"current_steps": 1470, "total_steps": 3751, "loss": 0.2588, "lr": 7.6238925113906715e-06, "epoch": 0.39188896479330854, "percentage": 39.19, "elapsed_time": "7:50:52", "remaining_time": "12:10:39"}
177
- {"current_steps": 1480, "total_steps": 3751, "loss": 0.2687, "lr": 7.5841609549480854e-06, "epoch": 0.3945548761184331, "percentage": 39.46, "elapsed_time": "7:53:09", "remaining_time": "12:06:02"}
178
- {"current_steps": 1490, "total_steps": 3751, "loss": 0.2524, "lr": 7.544205491018626e-06, "epoch": 0.39722078744355765, "percentage": 39.72, "elapsed_time": "7:55:31", "remaining_time": "12:01:35"}
179
- {"current_steps": 1500, "total_steps": 3751, "loss": 0.2481, "lr": 7.5040295815877e-06, "epoch": 0.3998866987686822, "percentage": 39.99, "elapsed_time": "7:57:51", "remaining_time": "11:57:06"}
180
- {"current_steps": 1500, "total_steps": 3751, "eval_loss": 0.25703608989715576, "epoch": 0.3998866987686822, "percentage": 39.99, "elapsed_time": "8:02:14", "remaining_time": "12:03:41"}
 
1
+ {"current_steps": 10, "total_steps": 3751, "loss": 0.5957, "lr": 2.6595744680851066e-07, "epoch": 0.002665911325124548, "percentage": 0.27, "elapsed_time": "0:02:24", "remaining_time": "14:58:16"}
2
+ {"current_steps": 20, "total_steps": 3751, "loss": 0.5163, "lr": 5.319148936170213e-07, "epoch": 0.005331822650249096, "percentage": 0.53, "elapsed_time": "0:04:37", "remaining_time": "14:23:41"}
3
+ {"current_steps": 30, "total_steps": 3751, "loss": 0.4334, "lr": 7.97872340425532e-07, "epoch": 0.007997733975373645, "percentage": 0.8, "elapsed_time": "0:07:00", "remaining_time": "14:30:12"}
4
+ {"current_steps": 40, "total_steps": 3751, "loss": 0.3824, "lr": 1.0638297872340427e-06, "epoch": 0.010663645300498192, "percentage": 1.07, "elapsed_time": "0:09:17", "remaining_time": "14:21:59"}
5
+ {"current_steps": 50, "total_steps": 3751, "loss": 0.3821, "lr": 1.3297872340425533e-06, "epoch": 0.01332955662562274, "percentage": 1.33, "elapsed_time": "0:11:36", "remaining_time": "14:19:22"}
6
+ {"current_steps": 50, "total_steps": 3751, "eval_loss": 0.4735161364078522, "epoch": 0.01332955662562274, "percentage": 1.33, "elapsed_time": "0:15:59", "remaining_time": "19:43:59"}
7
+ {"current_steps": 60, "total_steps": 3751, "loss": 0.3431, "lr": 1.595744680851064e-06, "epoch": 0.01599546795074729, "percentage": 1.6, "elapsed_time": "0:18:18", "remaining_time": "18:45:49"}
8
+ {"current_steps": 70, "total_steps": 3751, "loss": 0.3327, "lr": 1.8617021276595745e-06, "epoch": 0.018661379275871838, "percentage": 1.87, "elapsed_time": "0:20:36", "remaining_time": "18:03:51"}
9
+ {"current_steps": 80, "total_steps": 3751, "loss": 0.3079, "lr": 2.1276595744680853e-06, "epoch": 0.021327290600996383, "percentage": 2.13, "elapsed_time": "0:22:54", "remaining_time": "17:31:07"}
10
+ {"current_steps": 90, "total_steps": 3751, "loss": 0.306, "lr": 2.393617021276596e-06, "epoch": 0.023993201926120932, "percentage": 2.4, "elapsed_time": "0:25:15", "remaining_time": "17:07:34"}
11
+ {"current_steps": 100, "total_steps": 3751, "loss": 0.302, "lr": 2.6595744680851065e-06, "epoch": 0.02665911325124548, "percentage": 2.67, "elapsed_time": "0:27:33", "remaining_time": "16:46:04"}
12
+ {"current_steps": 100, "total_steps": 3751, "eval_loss": 0.3177907168865204, "epoch": 0.02665911325124548, "percentage": 2.67, "elapsed_time": "0:31:56", "remaining_time": "19:26:13"}
13
+ {"current_steps": 110, "total_steps": 3751, "loss": 0.2978, "lr": 2.9255319148936174e-06, "epoch": 0.02932502457637003, "percentage": 2.93, "elapsed_time": "0:34:14", "remaining_time": "18:53:34"}
14
+ {"current_steps": 120, "total_steps": 3751, "loss": 0.3046, "lr": 3.191489361702128e-06, "epoch": 0.03199093590149458, "percentage": 3.2, "elapsed_time": "0:36:33", "remaining_time": "18:26:07"}
15
+ {"current_steps": 130, "total_steps": 3751, "loss": 0.3074, "lr": 3.457446808510639e-06, "epoch": 0.034656847226619124, "percentage": 3.47, "elapsed_time": "0:38:49", "remaining_time": "18:01:18"}
16
+ {"current_steps": 140, "total_steps": 3751, "loss": 0.3147, "lr": 3.723404255319149e-06, "epoch": 0.037322758551743676, "percentage": 3.73, "elapsed_time": "0:41:07", "remaining_time": "17:40:42"}
17
+ {"current_steps": 150, "total_steps": 3751, "loss": 0.2988, "lr": 3.98936170212766e-06, "epoch": 0.03998866987686822, "percentage": 4.0, "elapsed_time": "0:43:25", "remaining_time": "17:22:41"}
18
+ {"current_steps": 150, "total_steps": 3751, "eval_loss": 0.32532769441604614, "epoch": 0.03998866987686822, "percentage": 4.0, "elapsed_time": "0:47:48", "remaining_time": "19:07:55"}
19
+ {"current_steps": 160, "total_steps": 3751, "loss": 0.3049, "lr": 4.255319148936171e-06, "epoch": 0.04265458120199277, "percentage": 4.27, "elapsed_time": "0:50:09", "remaining_time": "18:45:35"}
20
+ {"current_steps": 170, "total_steps": 3751, "loss": 0.3077, "lr": 4.521276595744681e-06, "epoch": 0.04532049252711732, "percentage": 4.53, "elapsed_time": "0:52:26", "remaining_time": "18:24:31"}
21
+ {"current_steps": 180, "total_steps": 3751, "loss": 0.2957, "lr": 4.787234042553192e-06, "epoch": 0.047986403852241864, "percentage": 4.8, "elapsed_time": "0:54:46", "remaining_time": "18:06:31"}
22
+ {"current_steps": 190, "total_steps": 3751, "loss": 0.2993, "lr": 5.053191489361703e-06, "epoch": 0.05065231517736641, "percentage": 5.07, "elapsed_time": "0:57:06", "remaining_time": "17:50:12"}
23
+ {"current_steps": 200, "total_steps": 3751, "loss": 0.3054, "lr": 5.319148936170213e-06, "epoch": 0.05331822650249096, "percentage": 5.33, "elapsed_time": "0:59:28", "remaining_time": "17:36:07"}
24
+ {"current_steps": 200, "total_steps": 3751, "eval_loss": 0.3250272572040558, "epoch": 0.05331822650249096, "percentage": 5.33, "elapsed_time": "1:03:52", "remaining_time": "18:53:58"}
25
+ {"current_steps": 210, "total_steps": 3751, "loss": 0.3117, "lr": 5.5851063829787235e-06, "epoch": 0.05598413782761551, "percentage": 5.6, "elapsed_time": "1:06:05", "remaining_time": "18:34:26"}
26
+ {"current_steps": 220, "total_steps": 3751, "loss": 0.3102, "lr": 5.851063829787235e-06, "epoch": 0.05865004915274006, "percentage": 5.87, "elapsed_time": "1:08:24", "remaining_time": "18:17:54"}
27
+ {"current_steps": 230, "total_steps": 3751, "loss": 0.3096, "lr": 6.117021276595745e-06, "epoch": 0.061315960477864605, "percentage": 6.13, "elapsed_time": "1:10:43", "remaining_time": "18:02:42"}
28
+ {"current_steps": 240, "total_steps": 3751, "loss": 0.3046, "lr": 6.382978723404256e-06, "epoch": 0.06398187180298916, "percentage": 6.4, "elapsed_time": "1:13:00", "remaining_time": "17:47:59"}
29
+ {"current_steps": 250, "total_steps": 3751, "loss": 0.2967, "lr": 6.648936170212767e-06, "epoch": 0.0666477831281137, "percentage": 6.66, "elapsed_time": "1:15:20", "remaining_time": "17:35:08"}
30
+ {"current_steps": 250, "total_steps": 3751, "eval_loss": 0.3232134282588959, "epoch": 0.0666477831281137, "percentage": 6.66, "elapsed_time": "1:19:43", "remaining_time": "18:36:31"}
31
+ {"current_steps": 260, "total_steps": 3751, "loss": 0.3112, "lr": 6.914893617021278e-06, "epoch": 0.06931369445323825, "percentage": 6.93, "elapsed_time": "1:22:01", "remaining_time": "18:21:19"}
32
+ {"current_steps": 270, "total_steps": 3751, "loss": 0.314, "lr": 7.1808510638297875e-06, "epoch": 0.07197960577836279, "percentage": 7.2, "elapsed_time": "1:24:18", "remaining_time": "18:06:57"}
33
+ {"current_steps": 280, "total_steps": 3751, "loss": 0.3041, "lr": 7.446808510638298e-06, "epoch": 0.07464551710348735, "percentage": 7.46, "elapsed_time": "1:26:38", "remaining_time": "17:54:04"}
34
+ {"current_steps": 290, "total_steps": 3751, "loss": 0.3136, "lr": 7.71276595744681e-06, "epoch": 0.0773114284286119, "percentage": 7.73, "elapsed_time": "1:28:59", "remaining_time": "17:42:08"}
35
+ {"current_steps": 300, "total_steps": 3751, "loss": 0.3137, "lr": 7.97872340425532e-06, "epoch": 0.07997733975373644, "percentage": 8.0, "elapsed_time": "1:31:19", "remaining_time": "17:30:30"}
36
+ {"current_steps": 300, "total_steps": 3751, "eval_loss": 0.3207298815250397, "epoch": 0.07997733975373644, "percentage": 8.0, "elapsed_time": "1:35:42", "remaining_time": "18:20:55"}
37
+ {"current_steps": 310, "total_steps": 3751, "loss": 0.3092, "lr": 8.24468085106383e-06, "epoch": 0.08264325107886099, "percentage": 8.26, "elapsed_time": "1:37:58", "remaining_time": "18:07:26"}
38
+ {"current_steps": 320, "total_steps": 3751, "loss": 0.3255, "lr": 8.510638297872341e-06, "epoch": 0.08530916240398553, "percentage": 8.53, "elapsed_time": "1:40:14", "remaining_time": "17:54:48"}
39
+ {"current_steps": 330, "total_steps": 3751, "loss": 0.3219, "lr": 8.776595744680852e-06, "epoch": 0.08797507372911008, "percentage": 8.8, "elapsed_time": "1:42:34", "remaining_time": "17:43:19"}
40
+ {"current_steps": 340, "total_steps": 3751, "loss": 0.3093, "lr": 9.042553191489362e-06, "epoch": 0.09064098505423464, "percentage": 9.06, "elapsed_time": "1:44:51", "remaining_time": "17:32:01"}
41
+ {"current_steps": 350, "total_steps": 3751, "loss": 0.3221, "lr": 9.308510638297872e-06, "epoch": 0.09330689637935918, "percentage": 9.33, "elapsed_time": "1:47:12", "remaining_time": "17:21:41"}
42
+ {"current_steps": 350, "total_steps": 3751, "eval_loss": 0.3211060166358948, "epoch": 0.09330689637935918, "percentage": 9.33, "elapsed_time": "1:51:35", "remaining_time": "18:04:16"}
43
+ {"current_steps": 360, "total_steps": 3751, "loss": 0.3209, "lr": 9.574468085106385e-06, "epoch": 0.09597280770448373, "percentage": 9.6, "elapsed_time": "1:53:50", "remaining_time": "17:52:15"}
44
+ {"current_steps": 370, "total_steps": 3751, "loss": 0.3188, "lr": 9.840425531914895e-06, "epoch": 0.09863871902960827, "percentage": 9.86, "elapsed_time": "1:56:12", "remaining_time": "17:41:53"}
45
+ {"current_steps": 380, "total_steps": 3751, "loss": 0.3202, "lr": 9.999965341346946e-06, "epoch": 0.10130463035473282, "percentage": 10.13, "elapsed_time": "1:58:33", "remaining_time": "17:31:41"}
46
+ {"current_steps": 390, "total_steps": 3751, "loss": 0.3143, "lr": 9.999575437018172e-06, "epoch": 0.10397054167985738, "percentage": 10.4, "elapsed_time": "2:00:56", "remaining_time": "17:22:19"}
47
+ {"current_steps": 400, "total_steps": 3751, "loss": 0.3188, "lr": 9.998752338940612e-06, "epoch": 0.10663645300498192, "percentage": 10.66, "elapsed_time": "2:03:18", "remaining_time": "17:13:01"}
48
+ {"current_steps": 400, "total_steps": 3751, "eval_loss": 0.3204084634780884, "epoch": 0.10663645300498192, "percentage": 10.66, "elapsed_time": "2:07:41", "remaining_time": "17:49:45"}
49
+ {"current_steps": 410, "total_steps": 3751, "loss": 0.3026, "lr": 9.997496118432509e-06, "epoch": 0.10930236433010647, "percentage": 10.93, "elapsed_time": "2:09:59", "remaining_time": "17:39:15"}
50
+ {"current_steps": 420, "total_steps": 3751, "loss": 0.3192, "lr": 9.995806884340483e-06, "epoch": 0.11196827565523101, "percentage": 11.2, "elapsed_time": "2:12:16", "remaining_time": "17:29:02"}
51
+ {"current_steps": 430, "total_steps": 3751, "loss": 0.3143, "lr": 9.99368478303009e-06, "epoch": 0.11463418698035556, "percentage": 11.46, "elapsed_time": "2:14:36", "remaining_time": "17:19:33"}
52
+ {"current_steps": 440, "total_steps": 3751, "loss": 0.3189, "lr": 9.991129998373145e-06, "epoch": 0.11730009830548012, "percentage": 11.73, "elapsed_time": "2:16:56", "remaining_time": "17:10:27"}
53
+ {"current_steps": 450, "total_steps": 3751, "loss": 0.308, "lr": 9.988142751731797e-06, "epoch": 0.11996600963060466, "percentage": 12.0, "elapsed_time": "2:19:14", "remaining_time": "17:01:22"}
54
+ {"current_steps": 450, "total_steps": 3751, "eval_loss": 0.31486886739730835, "epoch": 0.11996600963060466, "percentage": 12.0, "elapsed_time": "2:23:37", "remaining_time": "17:33:31"}
55
+ {"current_steps": 460, "total_steps": 3751, "loss": 0.3172, "lr": 9.984723301939337e-06, "epoch": 0.12263192095572921, "percentage": 12.26, "elapsed_time": "2:25:55", "remaining_time": "17:24:02"}
56
+ {"current_steps": 470, "total_steps": 3751, "loss": 0.3147, "lr": 9.980871945277777e-06, "epoch": 0.12529783228085375, "percentage": 12.53, "elapsed_time": "2:28:14", "remaining_time": "17:14:49"}
57
+ {"current_steps": 480, "total_steps": 3751, "loss": 0.3211, "lr": 9.976589015452178e-06, "epoch": 0.12796374360597831, "percentage": 12.8, "elapsed_time": "2:30:32", "remaining_time": "17:05:51"}
58
+ {"current_steps": 490, "total_steps": 3751, "loss": 0.3109, "lr": 9.97187488356174e-06, "epoch": 0.13062965493110285, "percentage": 13.06, "elapsed_time": "2:32:51", "remaining_time": "16:57:15"}
59
+ {"current_steps": 500, "total_steps": 3751, "loss": 0.3123, "lr": 9.966729958067638e-06, "epoch": 0.1332955662562274, "percentage": 13.33, "elapsed_time": "2:35:08", "remaining_time": "16:48:43"}
60
+ {"current_steps": 500, "total_steps": 3751, "eval_loss": 0.3106406331062317, "epoch": 0.1332955662562274, "percentage": 13.33, "elapsed_time": "2:39:31", "remaining_time": "17:17:13"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:190b515c198fa058ac40fafe72169d2e3fb3ded866a00f99421114fe2da47712
3
  size 7544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22ebf1c25989dc4abd15cd7d7c48b7dd4e74040858c521200ef3704895f769e7
3
  size 7544