izzcw commited on
Commit
bf7419a
·
verified ·
1 Parent(s): 0a16aa8

Training in progress, step 1500

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28e1eba4924ea6f1cc1fed1146d78da8e7c910f953e27b92b7ff18bdb789bb8e
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:523a0067116a19088da5083c6b3f3a8df3104711862192248d8b08a77371f78b
3
  size 4976698672
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:954a4a2421b5463efa1257e415a8625e24f4ddb57a1de1494b5b8429901c2bca
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b236534f44f4136a68e8c9f4ce32206afde6eaff511932aa255370858aa37dd4
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e961290765389b32679e23f4d0a1a25ef2fdd09c5d2e83911438f9d849e0d1c
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b59419c3a2afea91b6a628f7b83cfa0ec5bf2b546ec93d1196e6df90fc2529ef
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1cbd8b8f0ca0d977b205a5c45698942a25612652a4585b56689b683e84beebd0
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d819e4ff11fa8f2ad835f05112229473559e21148fd66b84416d97d21dd8834
3
  size 1168138808
trainer_log.jsonl CHANGED
@@ -1,180 +1,180 @@
1
- {"current_steps": 10, "total_steps": 4953, "loss": 0.7552, "lr": 2.0161290322580645e-07, "epoch": 0.0020186217859756253, "percentage": 0.2, "elapsed_time": "0:02:21", "remaining_time": "19:23:29"}
2
- {"current_steps": 20, "total_steps": 4953, "loss": 0.6515, "lr": 4.032258064516129e-07, "epoch": 0.0040372435719512505, "percentage": 0.4, "elapsed_time": "0:04:38", "remaining_time": "19:05:06"}
3
- {"current_steps": 30, "total_steps": 4953, "loss": 0.4798, "lr": 6.048387096774194e-07, "epoch": 0.006055865357926875, "percentage": 0.61, "elapsed_time": "0:06:52", "remaining_time": "18:49:22"}
4
- {"current_steps": 40, "total_steps": 4953, "loss": 0.4291, "lr": 8.064516129032258e-07, "epoch": 0.008074487143902501, "percentage": 0.81, "elapsed_time": "0:09:10", "remaining_time": "18:45:55"}
5
- {"current_steps": 50, "total_steps": 4953, "loss": 0.3989, "lr": 1.0080645161290323e-06, "epoch": 0.010093108929878126, "percentage": 1.01, "elapsed_time": "0:11:30", "remaining_time": "18:49:04"}
6
- {"current_steps": 50, "total_steps": 4953, "eval_loss": 0.47881850600242615, "epoch": 0.010093108929878126, "percentage": 1.01, "elapsed_time": "0:17:16", "remaining_time": "1 day, 4:14:03"}
7
- {"current_steps": 60, "total_steps": 4953, "loss": 0.371, "lr": 1.2096774193548388e-06, "epoch": 0.01211173071585375, "percentage": 1.21, "elapsed_time": "0:19:32", "remaining_time": "1 day, 2:34:08"}
8
- {"current_steps": 70, "total_steps": 4953, "loss": 0.3349, "lr": 1.4112903225806455e-06, "epoch": 0.014130352501829375, "percentage": 1.41, "elapsed_time": "0:21:53", "remaining_time": "1 day, 1:27:25"}
9
- {"current_steps": 80, "total_steps": 4953, "loss": 0.338, "lr": 1.6129032258064516e-06, "epoch": 0.016148974287805002, "percentage": 1.62, "elapsed_time": "0:24:11", "remaining_time": "1 day, 0:33:27"}
10
- {"current_steps": 90, "total_steps": 4953, "loss": 0.3292, "lr": 1.8145161290322583e-06, "epoch": 0.018167596073780625, "percentage": 1.82, "elapsed_time": "0:26:30", "remaining_time": "23:52:02"}
11
- {"current_steps": 100, "total_steps": 4953, "loss": 0.3196, "lr": 2.0161290322580646e-06, "epoch": 0.02018621785975625, "percentage": 2.02, "elapsed_time": "0:28:51", "remaining_time": "23:20:18"}
12
- {"current_steps": 100, "total_steps": 4953, "eval_loss": 0.3391382694244385, "epoch": 0.02018621785975625, "percentage": 2.02, "elapsed_time": "0:34:36", "remaining_time": "1 day, 3:59:30"}
13
- {"current_steps": 110, "total_steps": 4953, "loss": 0.3189, "lr": 2.217741935483871e-06, "epoch": 0.022204839645731878, "percentage": 2.22, "elapsed_time": "0:36:53", "remaining_time": "1 day, 3:04:04"}
14
- {"current_steps": 120, "total_steps": 4953, "loss": 0.2938, "lr": 2.4193548387096776e-06, "epoch": 0.0242234614317075, "percentage": 2.42, "elapsed_time": "0:39:09", "remaining_time": "1 day, 2:16:50"}
15
- {"current_steps": 130, "total_steps": 4953, "loss": 0.3257, "lr": 2.620967741935484e-06, "epoch": 0.026242083217683128, "percentage": 2.62, "elapsed_time": "0:41:26", "remaining_time": "1 day, 1:37:40"}
16
- {"current_steps": 140, "total_steps": 4953, "loss": 0.3025, "lr": 2.822580645161291e-06, "epoch": 0.02826070500365875, "percentage": 2.83, "elapsed_time": "0:43:48", "remaining_time": "1 day, 1:06:17"}
17
- {"current_steps": 150, "total_steps": 4953, "loss": 0.3058, "lr": 3.024193548387097e-06, "epoch": 0.030279326789634377, "percentage": 3.03, "elapsed_time": "0:46:09", "remaining_time": "1 day, 0:38:07"}
18
- {"current_steps": 150, "total_steps": 4953, "eval_loss": 0.3168211281299591, "epoch": 0.030279326789634377, "percentage": 3.03, "elapsed_time": "0:51:54", "remaining_time": "1 day, 3:42:18"}
19
- {"current_steps": 160, "total_steps": 4953, "loss": 0.3154, "lr": 3.225806451612903e-06, "epoch": 0.032297948575610004, "percentage": 3.23, "elapsed_time": "0:54:15", "remaining_time": "1 day, 3:05:19"}
20
- {"current_steps": 170, "total_steps": 4953, "loss": 0.3215, "lr": 3.4274193548387097e-06, "epoch": 0.03431657036158563, "percentage": 3.43, "elapsed_time": "0:56:35", "remaining_time": "1 day, 2:32:16"}
21
- {"current_steps": 180, "total_steps": 4953, "loss": 0.2946, "lr": 3.6290322580645166e-06, "epoch": 0.03633519214756125, "percentage": 3.63, "elapsed_time": "0:58:56", "remaining_time": "1 day, 2:02:51"}
22
- {"current_steps": 190, "total_steps": 4953, "loss": 0.2977, "lr": 3.830645161290323e-06, "epoch": 0.03835381393353688, "percentage": 3.84, "elapsed_time": "1:01:11", "remaining_time": "1 day, 1:33:58"}
23
- {"current_steps": 200, "total_steps": 4953, "loss": 0.3127, "lr": 4.032258064516129e-06, "epoch": 0.0403724357195125, "percentage": 4.04, "elapsed_time": "1:03:30", "remaining_time": "1 day, 1:09:14"}
24
- {"current_steps": 200, "total_steps": 4953, "eval_loss": 0.3119991421699524, "epoch": 0.0403724357195125, "percentage": 4.04, "elapsed_time": "1:09:15", "remaining_time": "1 day, 3:25:57"}
25
- {"current_steps": 210, "total_steps": 4953, "loss": 0.2987, "lr": 4.233870967741936e-06, "epoch": 0.042391057505488126, "percentage": 4.24, "elapsed_time": "1:11:35", "remaining_time": "1 day, 2:56:55"}
26
- {"current_steps": 220, "total_steps": 4953, "loss": 0.3074, "lr": 4.435483870967742e-06, "epoch": 0.044409679291463756, "percentage": 4.44, "elapsed_time": "1:13:53", "remaining_time": "1 day, 2:29:35"}
27
- {"current_steps": 230, "total_steps": 4953, "loss": 0.3099, "lr": 4.637096774193548e-06, "epoch": 0.04642830107743938, "percentage": 4.64, "elapsed_time": "1:16:14", "remaining_time": "1 day, 2:05:43"}
28
- {"current_steps": 240, "total_steps": 4953, "loss": 0.3252, "lr": 4.838709677419355e-06, "epoch": 0.048446922863415, "percentage": 4.85, "elapsed_time": "1:18:33", "remaining_time": "1 day, 1:42:32"}
29
- {"current_steps": 250, "total_steps": 4953, "loss": 0.3182, "lr": 5.040322580645161e-06, "epoch": 0.050465544649390626, "percentage": 5.05, "elapsed_time": "1:20:52", "remaining_time": "1 day, 1:21:31"}
30
- {"current_steps": 250, "total_steps": 4953, "eval_loss": 0.3168998956680298, "epoch": 0.050465544649390626, "percentage": 5.05, "elapsed_time": "1:26:38", "remaining_time": "1 day, 3:09:45"}
31
- {"current_steps": 260, "total_steps": 4953, "loss": 0.3259, "lr": 5.241935483870968e-06, "epoch": 0.052484166435366256, "percentage": 5.25, "elapsed_time": "1:28:56", "remaining_time": "1 day, 2:45:27"}
32
- {"current_steps": 270, "total_steps": 4953, "loss": 0.2974, "lr": 5.443548387096774e-06, "epoch": 0.05450278822134188, "percentage": 5.45, "elapsed_time": "1:31:13", "remaining_time": "1 day, 2:22:15"}
33
- {"current_steps": 280, "total_steps": 4953, "loss": 0.2931, "lr": 5.645161290322582e-06, "epoch": 0.0565214100073175, "percentage": 5.65, "elapsed_time": "1:33:28", "remaining_time": "1 day, 2:00:03"}
34
- {"current_steps": 290, "total_steps": 4953, "loss": 0.313, "lr": 5.846774193548388e-06, "epoch": 0.05854003179329313, "percentage": 5.86, "elapsed_time": "1:35:43", "remaining_time": "1 day, 1:39:09"}
35
- {"current_steps": 300, "total_steps": 4953, "loss": 0.305, "lr": 6.048387096774194e-06, "epoch": 0.060558653579268755, "percentage": 6.06, "elapsed_time": "1:37:58", "remaining_time": "1 day, 1:19:42"}
36
- {"current_steps": 300, "total_steps": 4953, "eval_loss": 0.3143216371536255, "epoch": 0.060558653579268755, "percentage": 6.06, "elapsed_time": "1:43:44", "remaining_time": "1 day, 2:48:56"}
37
- {"current_steps": 310, "total_steps": 4953, "loss": 0.3124, "lr": 6.25e-06, "epoch": 0.06257727536524438, "percentage": 6.26, "elapsed_time": "1:46:00", "remaining_time": "1 day, 2:27:46"}
38
- {"current_steps": 320, "total_steps": 4953, "loss": 0.3218, "lr": 6.451612903225806e-06, "epoch": 0.06459589715122001, "percentage": 6.46, "elapsed_time": "1:48:16", "remaining_time": "1 day, 2:07:40"}
39
- {"current_steps": 330, "total_steps": 4953, "loss": 0.3037, "lr": 6.653225806451613e-06, "epoch": 0.06661451893719562, "percentage": 6.66, "elapsed_time": "1:50:34", "remaining_time": "1 day, 1:48:59"}
40
- {"current_steps": 340, "total_steps": 4953, "loss": 0.3258, "lr": 6.854838709677419e-06, "epoch": 0.06863314072317125, "percentage": 6.86, "elapsed_time": "1:52:55", "remaining_time": "1 day, 1:32:12"}
41
- {"current_steps": 350, "total_steps": 4953, "loss": 0.3213, "lr": 7.056451612903227e-06, "epoch": 0.07065176250914688, "percentage": 7.07, "elapsed_time": "1:55:13", "remaining_time": "1 day, 1:15:27"}
42
- {"current_steps": 350, "total_steps": 4953, "eval_loss": 0.319449782371521, "epoch": 0.07065176250914688, "percentage": 7.07, "elapsed_time": "2:00:59", "remaining_time": "1 day, 2:31:07"}
43
- {"current_steps": 360, "total_steps": 4953, "loss": 0.3113, "lr": 7.258064516129033e-06, "epoch": 0.0726703842951225, "percentage": 7.27, "elapsed_time": "2:03:15", "remaining_time": "1 day, 2:12:32"}
44
- {"current_steps": 370, "total_steps": 4953, "loss": 0.3169, "lr": 7.459677419354839e-06, "epoch": 0.07468900608109813, "percentage": 7.47, "elapsed_time": "2:05:33", "remaining_time": "1 day, 1:55:17"}
45
- {"current_steps": 380, "total_steps": 4953, "loss": 0.3207, "lr": 7.661290322580646e-06, "epoch": 0.07670762786707376, "percentage": 7.67, "elapsed_time": "2:07:55", "remaining_time": "1 day, 1:39:22"}
46
- {"current_steps": 390, "total_steps": 4953, "loss": 0.3251, "lr": 7.862903225806451e-06, "epoch": 0.07872624965304938, "percentage": 7.87, "elapsed_time": "2:10:09", "remaining_time": "1 day, 1:22:55"}
47
- {"current_steps": 400, "total_steps": 4953, "loss": 0.307, "lr": 8.064516129032258e-06, "epoch": 0.080744871439025, "percentage": 8.08, "elapsed_time": "2:12:29", "remaining_time": "1 day, 1:08:05"}
48
- {"current_steps": 400, "total_steps": 4953, "eval_loss": 0.33380112051963806, "epoch": 0.080744871439025, "percentage": 8.08, "elapsed_time": "2:18:14", "remaining_time": "1 day, 2:13:33"}
49
- {"current_steps": 410, "total_steps": 4953, "loss": 0.313, "lr": 8.266129032258065e-06, "epoch": 0.08276349322500064, "percentage": 8.28, "elapsed_time": "2:20:31", "remaining_time": "1 day, 1:57:00"}
50
- {"current_steps": 420, "total_steps": 4953, "loss": 0.3186, "lr": 8.467741935483872e-06, "epoch": 0.08478211501097625, "percentage": 8.48, "elapsed_time": "2:22:48", "remaining_time": "1 day, 1:41:15"}
51
- {"current_steps": 430, "total_steps": 4953, "loss": 0.3187, "lr": 8.669354838709677e-06, "epoch": 0.08680073679695188, "percentage": 8.68, "elapsed_time": "2:25:05", "remaining_time": "1 day, 1:26:13"}
52
- {"current_steps": 440, "total_steps": 4953, "loss": 0.3213, "lr": 8.870967741935484e-06, "epoch": 0.08881935858292751, "percentage": 8.88, "elapsed_time": "2:27:23", "remaining_time": "1 day, 1:11:49"}
53
- {"current_steps": 450, "total_steps": 4953, "loss": 0.321, "lr": 9.072580645161291e-06, "epoch": 0.09083798036890313, "percentage": 9.09, "elapsed_time": "2:29:42", "remaining_time": "1 day, 0:58:07"}
54
- {"current_steps": 450, "total_steps": 4953, "eval_loss": 0.3182971179485321, "epoch": 0.09083798036890313, "percentage": 9.09, "elapsed_time": "2:35:27", "remaining_time": "1 day, 1:55:41"}
55
- {"current_steps": 460, "total_steps": 4953, "loss": 0.31, "lr": 9.274193548387097e-06, "epoch": 0.09285660215487876, "percentage": 9.29, "elapsed_time": "2:37:43", "remaining_time": "1 day, 1:40:33"}
56
- {"current_steps": 470, "total_steps": 4953, "loss": 0.3261, "lr": 9.475806451612905e-06, "epoch": 0.09487522394085438, "percentage": 9.49, "elapsed_time": "2:40:00", "remaining_time": "1 day, 1:26:12"}
57
- {"current_steps": 480, "total_steps": 4953, "loss": 0.3212, "lr": 9.67741935483871e-06, "epoch": 0.09689384572683, "percentage": 9.69, "elapsed_time": "2:42:19", "remaining_time": "1 day, 1:12:41"}
58
- {"current_steps": 490, "total_steps": 4953, "loss": 0.3119, "lr": 9.879032258064517e-06, "epoch": 0.09891246751280564, "percentage": 9.89, "elapsed_time": "2:44:36", "remaining_time": "1 day, 0:59:20"}
59
- {"current_steps": 500, "total_steps": 4953, "loss": 0.3166, "lr": 9.99998012650816e-06, "epoch": 0.10093108929878125, "percentage": 10.09, "elapsed_time": "2:46:55", "remaining_time": "1 day, 0:46:40"}
60
- {"current_steps": 500, "total_steps": 4953, "eval_loss": 0.33069753646850586, "epoch": 0.10093108929878125, "percentage": 10.09, "elapsed_time": "2:52:40", "remaining_time": "1 day, 1:37:53"}
61
- {"current_steps": 510, "total_steps": 4953, "loss": 0.3197, "lr": 9.999756551539276e-06, "epoch": 0.10294971108475688, "percentage": 10.3, "elapsed_time": "2:56:01", "remaining_time": "1 day, 1:33:28"}
62
- {"current_steps": 520, "total_steps": 4953, "loss": 0.3272, "lr": 9.999284570881733e-06, "epoch": 0.10496833287073251, "percentage": 10.5, "elapsed_time": "2:58:19", "remaining_time": "1 day, 1:20:10"}
63
- {"current_steps": 530, "total_steps": 4953, "loss": 0.3169, "lr": 9.99856420798521e-06, "epoch": 0.10698695465670813, "percentage": 10.7, "elapsed_time": "3:00:40", "remaining_time": "1 day, 1:07:45"}
64
- {"current_steps": 540, "total_steps": 4953, "loss": 0.3196, "lr": 9.997595498639894e-06, "epoch": 0.10900557644268376, "percentage": 10.9, "elapsed_time": "3:03:02", "remaining_time": "1 day, 0:55:49"}
65
- {"current_steps": 550, "total_steps": 4953, "loss": 0.3229, "lr": 9.996378490974716e-06, "epoch": 0.11102419822865939, "percentage": 11.1, "elapsed_time": "3:05:20", "remaining_time": "1 day, 0:43:42"}
66
- {"current_steps": 550, "total_steps": 4953, "eval_loss": 0.31820276379585266, "epoch": 0.11102419822865939, "percentage": 11.1, "elapsed_time": "3:11:05", "remaining_time": "1 day, 1:29:45"}
67
- {"current_steps": 560, "total_steps": 4953, "loss": 0.3158, "lr": 9.994913245454944e-06, "epoch": 0.113042820014635, "percentage": 11.31, "elapsed_time": "3:13:20", "remaining_time": "1 day, 1:16:44"}
68
- {"current_steps": 570, "total_steps": 4953, "loss": 0.3229, "lr": 9.993199834879187e-06, "epoch": 0.11506144180061063, "percentage": 11.51, "elapsed_time": "3:15:35", "remaining_time": "1 day, 1:03:58"}
69
- {"current_steps": 580, "total_steps": 4953, "loss": 0.3112, "lr": 9.991238344375774e-06, "epoch": 0.11708006358658626, "percentage": 11.71, "elapsed_time": "3:17:55", "remaining_time": "1 day, 0:52:20"}
70
- {"current_steps": 590, "total_steps": 4953, "loss": 0.3175, "lr": 9.989028871398531e-06, "epoch": 0.11909868537256188, "percentage": 11.91, "elapsed_time": "3:20:14", "remaining_time": "1 day, 0:40:44"}
71
- {"current_steps": 600, "total_steps": 4953, "loss": 0.3033, "lr": 9.986571525721938e-06, "epoch": 0.12111730715853751, "percentage": 12.11, "elapsed_time": "3:22:32", "remaining_time": "1 day, 0:29:26"}
72
- {"current_steps": 600, "total_steps": 4953, "eval_loss": 0.31507593393325806, "epoch": 0.12111730715853751, "percentage": 12.11, "elapsed_time": "3:28:17", "remaining_time": "1 day, 1:11:11"}
73
- {"current_steps": 610, "total_steps": 4953, "loss": 0.3117, "lr": 9.983866429435663e-06, "epoch": 0.12313592894451314, "percentage": 12.32, "elapsed_time": "3:30:36", "remaining_time": "1 day, 0:59:30"}
74
- {"current_steps": 620, "total_steps": 4953, "loss": 0.3215, "lr": 9.980913716938514e-06, "epoch": 0.12515455073048876, "percentage": 12.52, "elapsed_time": "3:32:59", "remaining_time": "1 day, 0:48:29"}
75
- {"current_steps": 630, "total_steps": 4953, "loss": 0.2962, "lr": 9.977713534931752e-06, "epoch": 0.12717317251646437, "percentage": 12.72, "elapsed_time": "3:35:15", "remaining_time": "1 day, 0:37:03"}
76
- {"current_steps": 640, "total_steps": 4953, "loss": 0.3141, "lr": 9.974266042411796e-06, "epoch": 0.12919179430244002, "percentage": 12.92, "elapsed_time": "3:37:31", "remaining_time": "1 day, 0:25:56"}
77
- {"current_steps": 650, "total_steps": 4953, "loss": 0.3097, "lr": 9.970571410662342e-06, "epoch": 0.13121041608841563, "percentage": 13.12, "elapsed_time": "3:39:48", "remaining_time": "1 day, 0:15:04"}
78
- {"current_steps": 650, "total_steps": 4953, "eval_loss": 0.30941498279571533, "epoch": 0.13121041608841563, "percentage": 13.12, "elapsed_time": "3:45:33", "remaining_time": "1 day, 0:53:11"}
79
- {"current_steps": 660, "total_steps": 4953, "loss": 0.3051, "lr": 9.966629823245833e-06, "epoch": 0.13322903787439125, "percentage": 13.33, "elapsed_time": "3:47:52", "remaining_time": "1 day, 0:42:12"}
80
- {"current_steps": 670, "total_steps": 4953, "loss": 0.3135, "lr": 9.962441475994353e-06, "epoch": 0.1352476596603669, "percentage": 13.53, "elapsed_time": "3:50:10", "remaining_time": "1 day, 0:31:25"}
81
- {"current_steps": 680, "total_steps": 4953, "loss": 0.3038, "lr": 9.95800657699989e-06, "epoch": 0.1372662814463425, "percentage": 13.73, "elapsed_time": "3:52:28", "remaining_time": "1 day, 0:20:46"}
82
- {"current_steps": 690, "total_steps": 4953, "loss": 0.305, "lr": 9.953325346604e-06, "epoch": 0.13928490323231812, "percentage": 13.93, "elapsed_time": "3:54:49", "remaining_time": "1 day, 0:10:46"}
83
- {"current_steps": 700, "total_steps": 4953, "loss": 0.3076, "lr": 9.94839801738686e-06, "epoch": 0.14130352501829377, "percentage": 14.13, "elapsed_time": "3:57:10", "remaining_time": "1 day, 0:01:00"}
84
- {"current_steps": 700, "total_steps": 4953, "eval_loss": 0.31086432933807373, "epoch": 0.14130352501829377, "percentage": 14.13, "elapsed_time": "4:02:55", "remaining_time": "1 day, 0:35:58"}
85
- {"current_steps": 710, "total_steps": 4953, "loss": 0.3056, "lr": 9.94322483415571e-06, "epoch": 0.14332214680426938, "percentage": 14.33, "elapsed_time": "4:05:15", "remaining_time": "1 day, 0:25:39"}
86
- {"current_steps": 720, "total_steps": 4953, "loss": 0.3069, "lr": 9.937806053932693e-06, "epoch": 0.145340768590245, "percentage": 14.54, "elapsed_time": "4:07:36", "remaining_time": "1 day, 0:15:45"}
87
- {"current_steps": 730, "total_steps": 4953, "loss": 0.2979, "lr": 9.932141945942084e-06, "epoch": 0.14735939037622064, "percentage": 14.74, "elapsed_time": "4:09:54", "remaining_time": "1 day, 0:05:40"}
88
- {"current_steps": 740, "total_steps": 4953, "loss": 0.2957, "lr": 9.926232791596915e-06, "epoch": 0.14937801216219626, "percentage": 14.94, "elapsed_time": "4:12:10", "remaining_time": "23:55:41"}
89
- {"current_steps": 750, "total_steps": 4953, "loss": 0.313, "lr": 9.920078884484992e-06, "epoch": 0.15139663394817188, "percentage": 15.14, "elapsed_time": "4:14:28", "remaining_time": "23:46:06"}
90
- {"current_steps": 750, "total_steps": 4953, "eval_loss": 0.30409327149391174, "epoch": 0.15139663394817188, "percentage": 15.14, "elapsed_time": "4:20:14", "remaining_time": "1 day, 0:18:22"}
91
- {"current_steps": 760, "total_steps": 4953, "loss": 0.2944, "lr": 9.913680530354308e-06, "epoch": 0.15341525573414752, "percentage": 15.34, "elapsed_time": "4:22:31", "remaining_time": "1 day, 0:08:20"}
92
- {"current_steps": 770, "total_steps": 4953, "loss": 0.3051, "lr": 9.907038047097853e-06, "epoch": 0.15543387752012314, "percentage": 15.55, "elapsed_time": "4:24:52", "remaining_time": "23:58:53"}
93
- {"current_steps": 780, "total_steps": 4953, "loss": 0.2995, "lr": 9.900151764737821e-06, "epoch": 0.15745249930609875, "percentage": 15.75, "elapsed_time": "4:27:10", "remaining_time": "23:49:22"}
94
- {"current_steps": 790, "total_steps": 4953, "loss": 0.3041, "lr": 9.893022025409217e-06, "epoch": 0.1594711210920744, "percentage": 15.95, "elapsed_time": "4:29:27", "remaining_time": "23:39:57"}
95
- {"current_steps": 800, "total_steps": 4953, "loss": 0.3031, "lr": 9.885649183342844e-06, "epoch": 0.16148974287805, "percentage": 16.15, "elapsed_time": "4:31:45", "remaining_time": "23:30:46"}
96
- {"current_steps": 800, "total_steps": 4953, "eval_loss": 0.307149738073349, "epoch": 0.16148974287805, "percentage": 16.15, "elapsed_time": "4:37:30", "remaining_time": "1 day, 0:00:38"}
97
- {"current_steps": 810, "total_steps": 4953, "loss": 0.2952, "lr": 9.878033604847725e-06, "epoch": 0.16350836466402563, "percentage": 16.35, "elapsed_time": "4:39:49", "remaining_time": "23:51:13"}
98
- {"current_steps": 820, "total_steps": 4953, "loss": 0.3026, "lr": 9.870175668292882e-06, "epoch": 0.16552698645000127, "percentage": 16.56, "elapsed_time": "4:42:03", "remaining_time": "23:41:36"}
99
- {"current_steps": 830, "total_steps": 4953, "loss": 0.2917, "lr": 9.862075764088555e-06, "epoch": 0.1675456082359769, "percentage": 16.76, "elapsed_time": "4:44:23", "remaining_time": "23:32:44"}
100
- {"current_steps": 840, "total_steps": 4953, "loss": 0.2951, "lr": 9.853734294666794e-06, "epoch": 0.1695642300219525, "percentage": 16.96, "elapsed_time": "4:46:39", "remaining_time": "23:23:35"}
101
- {"current_steps": 850, "total_steps": 4953, "loss": 0.3083, "lr": 9.845151674461471e-06, "epoch": 0.17158285180792815, "percentage": 17.16, "elapsed_time": "4:48:57", "remaining_time": "23:14:47"}
102
- {"current_steps": 850, "total_steps": 4953, "eval_loss": 0.30141887068748474, "epoch": 0.17158285180792815, "percentage": 17.16, "elapsed_time": "4:54:42", "remaining_time": "23:42:34"}
103
- {"current_steps": 860, "total_steps": 4953, "loss": 0.2948, "lr": 9.83632832988768e-06, "epoch": 0.17360147359390377, "percentage": 17.36, "elapsed_time": "4:57:01", "remaining_time": "23:33:37"}
104
- {"current_steps": 870, "total_steps": 4953, "loss": 0.3053, "lr": 9.827264699320567e-06, "epoch": 0.17562009537987938, "percentage": 17.57, "elapsed_time": "4:59:23", "remaining_time": "23:25:02"}
105
- {"current_steps": 880, "total_steps": 4953, "loss": 0.3025, "lr": 9.817961233073531e-06, "epoch": 0.17763871716585503, "percentage": 17.77, "elapsed_time": "5:01:43", "remaining_time": "23:16:29"}
106
- {"current_steps": 890, "total_steps": 4953, "loss": 0.2959, "lr": 9.808418393375872e-06, "epoch": 0.17965733895183064, "percentage": 17.97, "elapsed_time": "5:04:02", "remaining_time": "23:07:58"}
107
- {"current_steps": 900, "total_steps": 4953, "loss": 0.2797, "lr": 9.798636654349804e-06, "epoch": 0.18167596073780626, "percentage": 18.17, "elapsed_time": "5:06:15", "remaining_time": "22:59:13"}
108
- {"current_steps": 900, "total_steps": 4953, "eval_loss": 0.29934194684028625, "epoch": 0.18167596073780626, "percentage": 18.17, "elapsed_time": "5:12:01", "remaining_time": "23:25:08"}
109
- {"current_steps": 910, "total_steps": 4953, "loss": 0.2929, "lr": 9.788616501986916e-06, "epoch": 0.18369458252378187, "percentage": 18.37, "elapsed_time": "5:14:19", "remaining_time": "23:16:31"}
110
- {"current_steps": 920, "total_steps": 4953, "loss": 0.2921, "lr": 9.778358434124014e-06, "epoch": 0.18571320430975752, "percentage": 18.57, "elapsed_time": "5:16:41", "remaining_time": "23:08:15"}
111
- {"current_steps": 930, "total_steps": 4953, "loss": 0.3062, "lr": 9.767862960418397e-06, "epoch": 0.18773182609573313, "percentage": 18.78, "elapsed_time": "5:18:58", "remaining_time": "22:59:49"}
112
- {"current_steps": 940, "total_steps": 4953, "loss": 0.2929, "lr": 9.75713060232253e-06, "epoch": 0.18975044788170875, "percentage": 18.98, "elapsed_time": "5:21:14", "remaining_time": "22:51:23"}
113
- {"current_steps": 950, "total_steps": 4953, "loss": 0.2822, "lr": 9.746161893058137e-06, "epoch": 0.1917690696676844, "percentage": 19.18, "elapsed_time": "5:23:30", "remaining_time": "22:43:10"}
114
- {"current_steps": 950, "total_steps": 4953, "eval_loss": 0.2959352433681488, "epoch": 0.1917690696676844, "percentage": 19.18, "elapsed_time": "5:29:16", "remaining_time": "23:07:25"}
115
- {"current_steps": 960, "total_steps": 4953, "loss": 0.2956, "lr": 9.734957377589707e-06, "epoch": 0.19378769145366, "percentage": 19.38, "elapsed_time": "5:31:30", "remaining_time": "22:58:52"}
116
- {"current_steps": 970, "total_steps": 4953, "loss": 0.2819, "lr": 9.723517612597417e-06, "epoch": 0.19580631323963563, "percentage": 19.58, "elapsed_time": "5:33:48", "remaining_time": "22:50:39"}
117
- {"current_steps": 980, "total_steps": 4953, "loss": 0.2992, "lr": 9.711843166449486e-06, "epoch": 0.19782493502561127, "percentage": 19.79, "elapsed_time": "5:36:08", "remaining_time": "22:42:43"}
118
- {"current_steps": 990, "total_steps": 4953, "loss": 0.294, "lr": 9.69993461917392e-06, "epoch": 0.1998435568115869, "percentage": 19.99, "elapsed_time": "5:38:26", "remaining_time": "22:34:47"}
119
- {"current_steps": 1000, "total_steps": 4953, "loss": 0.3024, "lr": 9.687792562429705e-06, "epoch": 0.2018621785975625, "percentage": 20.19, "elapsed_time": "5:40:47", "remaining_time": "22:27:07"}
120
- {"current_steps": 1000, "total_steps": 4953, "eval_loss": 0.2942771911621094, "epoch": 0.2018621785975625, "percentage": 20.19, "elapsed_time": "5:46:32", "remaining_time": "22:49:52"}
121
- {"current_steps": 1010, "total_steps": 4953, "loss": 0.2927, "lr": 9.675417599477406e-06, "epoch": 0.20388080038353815, "percentage": 20.39, "elapsed_time": "5:49:56", "remaining_time": "22:46:09"}
122
- {"current_steps": 1020, "total_steps": 4953, "loss": 0.2851, "lr": 9.6628103451492e-06, "epoch": 0.20589942216951376, "percentage": 20.59, "elapsed_time": "5:52:13", "remaining_time": "22:38:09"}
123
- {"current_steps": 1030, "total_steps": 4953, "loss": 0.2958, "lr": 9.649971425818321e-06, "epoch": 0.20791804395548938, "percentage": 20.8, "elapsed_time": "5:54:32", "remaining_time": "22:30:21"}
124
- {"current_steps": 1040, "total_steps": 4953, "loss": 0.3014, "lr": 9.636901479367948e-06, "epoch": 0.20993666574146502, "percentage": 21.0, "elapsed_time": "5:56:51", "remaining_time": "22:22:39"}
125
- {"current_steps": 1050, "total_steps": 4953, "loss": 0.2918, "lr": 9.623601155159507e-06, "epoch": 0.21195528752744064, "percentage": 21.2, "elapsed_time": "5:59:07", "remaining_time": "22:14:56"}
126
- {"current_steps": 1050, "total_steps": 4953, "eval_loss": 0.29167234897613525, "epoch": 0.21195528752744064, "percentage": 21.2, "elapsed_time": "6:04:53", "remaining_time": "22:36:19"}
127
- {"current_steps": 1060, "total_steps": 4953, "loss": 0.2951, "lr": 9.610071114000411e-06, "epoch": 0.21397390931341626, "percentage": 21.4, "elapsed_time": "6:07:10", "remaining_time": "22:28:30"}
128
- {"current_steps": 1070, "total_steps": 4953, "loss": 0.2958, "lr": 9.596312028111234e-06, "epoch": 0.2159925310993919, "percentage": 21.6, "elapsed_time": "6:09:28", "remaining_time": "22:20:49"}
129
- {"current_steps": 1080, "total_steps": 4953, "loss": 0.3025, "lr": 9.582324581092295e-06, "epoch": 0.21801115288536752, "percentage": 21.8, "elapsed_time": "6:11:47", "remaining_time": "22:13:15"}
130
- {"current_steps": 1090, "total_steps": 4953, "loss": 0.2928, "lr": 9.568109467889716e-06, "epoch": 0.22002977467134313, "percentage": 22.01, "elapsed_time": "6:14:02", "remaining_time": "22:05:38"}
131
- {"current_steps": 1100, "total_steps": 4953, "loss": 0.2907, "lr": 9.55366739476088e-06, "epoch": 0.22204839645731878, "percentage": 22.21, "elapsed_time": "6:16:19", "remaining_time": "21:58:11"}
132
- {"current_steps": 1100, "total_steps": 4953, "eval_loss": 0.2893689274787903, "epoch": 0.22204839645731878, "percentage": 22.21, "elapsed_time": "6:22:05", "remaining_time": "22:18:20"}
133
- {"current_steps": 1110, "total_steps": 4953, "loss": 0.2893, "lr": 9.538999079239346e-06, "epoch": 0.2240670182432944, "percentage": 22.41, "elapsed_time": "6:24:25", "remaining_time": "22:10:55"}
134
- {"current_steps": 1120, "total_steps": 4953, "loss": 0.2789, "lr": 9.524105250099205e-06, "epoch": 0.22608564002927, "percentage": 22.61, "elapsed_time": "6:26:44", "remaining_time": "22:03:33"}
135
- {"current_steps": 1130, "total_steps": 4953, "loss": 0.2785, "lr": 9.508986647318862e-06, "epoch": 0.22810426181524565, "percentage": 22.81, "elapsed_time": "6:29:04", "remaining_time": "21:56:17"}
136
- {"current_steps": 1140, "total_steps": 4953, "loss": 0.2819, "lr": 9.493644022044275e-06, "epoch": 0.23012288360122127, "percentage": 23.02, "elapsed_time": "6:31:24", "remaining_time": "21:49:08"}
137
- {"current_steps": 1150, "total_steps": 4953, "loss": 0.2842, "lr": 9.478078136551641e-06, "epoch": 0.23214150538719688, "percentage": 23.22, "elapsed_time": "6:33:45", "remaining_time": "21:42:07"}
138
- {"current_steps": 1150, "total_steps": 4953, "eval_loss": 0.2872868776321411, "epoch": 0.23214150538719688, "percentage": 23.22, "elapsed_time": "6:39:30", "remaining_time": "22:01:09"}
139
- {"current_steps": 1160, "total_steps": 4953, "loss": 0.278, "lr": 9.462289764209518e-06, "epoch": 0.23416012717317253, "percentage": 23.42, "elapsed_time": "6:41:50", "remaining_time": "21:53:56"}
140
- {"current_steps": 1170, "total_steps": 4953, "loss": 0.2883, "lr": 9.446279689440394e-06, "epoch": 0.23617874895914814, "percentage": 23.62, "elapsed_time": "6:44:12", "remaining_time": "21:46:55"}
141
- {"current_steps": 1180, "total_steps": 4953, "loss": 0.2847, "lr": 9.430048707681732e-06, "epoch": 0.23819737074512376, "percentage": 23.82, "elapsed_time": "6:46:27", "remaining_time": "21:39:39"}
142
- {"current_steps": 1190, "total_steps": 4953, "loss": 0.2867, "lr": 9.413597625346438e-06, "epoch": 0.2402159925310994, "percentage": 24.03, "elapsed_time": "6:48:44", "remaining_time": "21:32:30"}
143
- {"current_steps": 1200, "total_steps": 4953, "loss": 0.2834, "lr": 9.396927259782793e-06, "epoch": 0.24223461431707502, "percentage": 24.23, "elapsed_time": "6:51:00", "remaining_time": "21:25:27"}
144
- {"current_steps": 1200, "total_steps": 4953, "eval_loss": 0.2943769097328186, "epoch": 0.24223461431707502, "percentage": 24.23, "elapsed_time": "6:56:46", "remaining_time": "21:43:26"}
145
- {"current_steps": 1210, "total_steps": 4953, "loss": 0.2966, "lr": 9.380038439233852e-06, "epoch": 0.24425323610305064, "percentage": 24.43, "elapsed_time": "6:59:06", "remaining_time": "21:36:27"}
146
- {"current_steps": 1220, "total_steps": 4953, "loss": 0.2727, "lr": 9.36293200279629e-06, "epoch": 0.24627185788902628, "percentage": 24.63, "elapsed_time": "7:01:23", "remaining_time": "21:29:24"}
147
- {"current_steps": 1230, "total_steps": 4953, "loss": 0.287, "lr": 9.345608800378716e-06, "epoch": 0.2482904796750019, "percentage": 24.83, "elapsed_time": "7:03:43", "remaining_time": "21:22:32"}
148
- {"current_steps": 1240, "total_steps": 4953, "loss": 0.2905, "lr": 9.328069692659437e-06, "epoch": 0.2503091014609775, "percentage": 25.04, "elapsed_time": "7:06:04", "remaining_time": "21:15:50"}
149
- {"current_steps": 1250, "total_steps": 4953, "loss": 0.279, "lr": 9.310315551043713e-06, "epoch": 0.25232772324695313, "percentage": 25.24, "elapsed_time": "7:08:27", "remaining_time": "21:09:15"}
150
- {"current_steps": 1250, "total_steps": 4953, "eval_loss": 0.2850056290626526, "epoch": 0.25232772324695313, "percentage": 25.24, "elapsed_time": "7:14:12", "remaining_time": "21:26:18"}
151
- {"current_steps": 1260, "total_steps": 4953, "loss": 0.2915, "lr": 9.292347257620442e-06, "epoch": 0.25434634503292874, "percentage": 25.44, "elapsed_time": "7:16:31", "remaining_time": "21:19:24"}
152
- {"current_steps": 1270, "total_steps": 4953, "loss": 0.2783, "lr": 9.274165705118356e-06, "epoch": 0.2563649668189044, "percentage": 25.64, "elapsed_time": "7:18:48", "remaining_time": "21:12:33"}
153
- {"current_steps": 1280, "total_steps": 4953, "loss": 0.2821, "lr": 9.255771796861649e-06, "epoch": 0.25838358860488003, "percentage": 25.84, "elapsed_time": "7:21:06", "remaining_time": "21:05:47"}
154
- {"current_steps": 1290, "total_steps": 4953, "loss": 0.2925, "lr": 9.237166446725108e-06, "epoch": 0.26040221039085565, "percentage": 26.04, "elapsed_time": "7:23:24", "remaining_time": "20:59:05"}
155
- {"current_steps": 1300, "total_steps": 4953, "loss": 0.2759, "lr": 9.2183505790887e-06, "epoch": 0.26242083217683126, "percentage": 26.25, "elapsed_time": "7:25:42", "remaining_time": "20:52:25"}
156
- {"current_steps": 1300, "total_steps": 4953, "eval_loss": 0.28468266129493713, "epoch": 0.26242083217683126, "percentage": 26.25, "elapsed_time": "7:31:27", "remaining_time": "21:08:35"}
157
- {"current_steps": 1310, "total_steps": 4953, "loss": 0.2793, "lr": 9.199325128791658e-06, "epoch": 0.2644394539628069, "percentage": 26.45, "elapsed_time": "7:33:44", "remaining_time": "21:01:50"}
158
- {"current_steps": 1320, "total_steps": 4953, "loss": 0.2949, "lr": 9.180091041086022e-06, "epoch": 0.2664580757487825, "percentage": 26.65, "elapsed_time": "7:36:05", "remaining_time": "20:55:17"}
159
- {"current_steps": 1330, "total_steps": 4953, "loss": 0.2686, "lr": 9.160649271589679e-06, "epoch": 0.26847669753475817, "percentage": 26.85, "elapsed_time": "7:38:21", "remaining_time": "20:48:34"}
160
- {"current_steps": 1340, "total_steps": 4953, "loss": 0.2717, "lr": 9.14100078623889e-06, "epoch": 0.2704953193207338, "percentage": 27.05, "elapsed_time": "7:40:41", "remaining_time": "20:42:08"}
161
- {"current_steps": 1350, "total_steps": 4953, "loss": 0.2891, "lr": 9.121146561240293e-06, "epoch": 0.2725139411067094, "percentage": 27.26, "elapsed_time": "7:43:01", "remaining_time": "20:35:46"}
162
- {"current_steps": 1350, "total_steps": 4953, "eval_loss": 0.27978071570396423, "epoch": 0.2725139411067094, "percentage": 27.26, "elapsed_time": "7:48:47", "remaining_time": "20:51:08"}
163
- {"current_steps": 1360, "total_steps": 4953, "loss": 0.2807, "lr": 9.101087583022401e-06, "epoch": 0.274532562892685, "percentage": 27.46, "elapsed_time": "7:51:06", "remaining_time": "20:44:36"}
164
- {"current_steps": 1370, "total_steps": 4953, "loss": 0.2834, "lr": 9.080824848186603e-06, "epoch": 0.27655118467866063, "percentage": 27.66, "elapsed_time": "7:53:23", "remaining_time": "20:38:04"}
165
- {"current_steps": 1380, "total_steps": 4953, "loss": 0.2736, "lr": 9.060359363457631e-06, "epoch": 0.27856980646463625, "percentage": 27.86, "elapsed_time": "7:55:42", "remaining_time": "20:31:40"}
166
- {"current_steps": 1390, "total_steps": 4953, "loss": 0.2782, "lr": 9.03969214563356e-06, "epoch": 0.2805884282506119, "percentage": 28.06, "elapsed_time": "7:57:59", "remaining_time": "20:25:15"}
167
- {"current_steps": 1400, "total_steps": 4953, "loss": 0.279, "lr": 9.018824221535282e-06, "epoch": 0.28260705003658754, "percentage": 28.27, "elapsed_time": "8:00:18", "remaining_time": "20:18:56"}
168
- {"current_steps": 1400, "total_steps": 4953, "eval_loss": 0.2792831361293793, "epoch": 0.28260705003658754, "percentage": 28.27, "elapsed_time": "8:06:03", "remaining_time": "20:33:31"}
169
- {"current_steps": 1410, "total_steps": 4953, "loss": 0.2649, "lr": 8.997756627955489e-06, "epoch": 0.28462567182256315, "percentage": 28.47, "elapsed_time": "8:08:21", "remaining_time": "20:27:07"}
170
- {"current_steps": 1420, "total_steps": 4953, "loss": 0.2713, "lr": 8.976490411607165e-06, "epoch": 0.28664429360853877, "percentage": 28.67, "elapsed_time": "8:10:38", "remaining_time": "20:20:43"}
171
- {"current_steps": 1430, "total_steps": 4953, "loss": 0.2746, "lr": 8.955026629071574e-06, "epoch": 0.2886629153945144, "percentage": 28.87, "elapsed_time": "8:12:57", "remaining_time": "20:14:29"}
172
- {"current_steps": 1440, "total_steps": 4953, "loss": 0.2795, "lr": 8.933366346745778e-06, "epoch": 0.29068153718049, "percentage": 29.07, "elapsed_time": "8:15:14", "remaining_time": "20:08:11"}
173
- {"current_steps": 1450, "total_steps": 4953, "loss": 0.2861, "lr": 8.911510640789644e-06, "epoch": 0.2927001589664657, "percentage": 29.28, "elapsed_time": "8:17:31", "remaining_time": "20:01:58"}
174
- {"current_steps": 1450, "total_steps": 4953, "eval_loss": 0.27748537063598633, "epoch": 0.2927001589664657, "percentage": 29.28, "elapsed_time": "8:23:17", "remaining_time": "20:15:51"}
175
- {"current_steps": 1460, "total_steps": 4953, "loss": 0.2769, "lr": 8.889460597072378e-06, "epoch": 0.2947187807524413, "percentage": 29.48, "elapsed_time": "8:25:40", "remaining_time": "20:09:47"}
176
- {"current_steps": 1470, "total_steps": 4953, "loss": 0.2831, "lr": 8.867217311118583e-06, "epoch": 0.2967374025384169, "percentage": 29.68, "elapsed_time": "8:27:58", "remaining_time": "20:03:34"}
177
- {"current_steps": 1480, "total_steps": 4953, "loss": 0.265, "lr": 8.84478188805382e-06, "epoch": 0.2987560243243925, "percentage": 29.88, "elapsed_time": "8:30:15", "remaining_time": "19:57:24"}
178
- {"current_steps": 1490, "total_steps": 4953, "loss": 0.2684, "lr": 8.822155442549702e-06, "epoch": 0.30077464611036814, "percentage": 30.08, "elapsed_time": "8:32:34", "remaining_time": "19:51:18"}
179
- {"current_steps": 1500, "total_steps": 4953, "loss": 0.2754, "lr": 8.799339098768525e-06, "epoch": 0.30279326789634375, "percentage": 30.28, "elapsed_time": "8:34:52", "remaining_time": "19:45:14"}
180
- {"current_steps": 1500, "total_steps": 4953, "eval_loss": 0.27722880244255066, "epoch": 0.30279326789634375, "percentage": 30.28, "elapsed_time": "8:40:37", "remaining_time": "19:58:28"}
 
1
+ {"current_steps": 10, "total_steps": 4953, "loss": 0.755, "lr": 2.0161290322580645e-07, "epoch": 0.0020186217859756253, "percentage": 0.2, "elapsed_time": "0:02:20", "remaining_time": "19:21:04"}
2
+ {"current_steps": 20, "total_steps": 4953, "loss": 0.6516, "lr": 4.032258064516129e-07, "epoch": 0.0040372435719512505, "percentage": 0.4, "elapsed_time": "0:04:38", "remaining_time": "19:04:04"}
3
+ {"current_steps": 30, "total_steps": 4953, "loss": 0.4801, "lr": 6.048387096774194e-07, "epoch": 0.006055865357926875, "percentage": 0.61, "elapsed_time": "0:06:53", "remaining_time": "18:49:33"}
4
+ {"current_steps": 40, "total_steps": 4953, "loss": 0.4294, "lr": 8.064516129032258e-07, "epoch": 0.008074487143902501, "percentage": 0.81, "elapsed_time": "0:09:10", "remaining_time": "18:46:40"}
5
+ {"current_steps": 50, "total_steps": 4953, "loss": 0.3997, "lr": 1.0080645161290323e-06, "epoch": 0.010093108929878126, "percentage": 1.01, "elapsed_time": "0:11:31", "remaining_time": "18:49:52"}
6
+ {"current_steps": 50, "total_steps": 4953, "eval_loss": 0.4787580370903015, "epoch": 0.010093108929878126, "percentage": 1.01, "elapsed_time": "0:17:17", "remaining_time": "1 day, 4:14:57"}
7
+ {"current_steps": 60, "total_steps": 4953, "loss": 0.3709, "lr": 1.2096774193548388e-06, "epoch": 0.01211173071585375, "percentage": 1.21, "elapsed_time": "0:19:33", "remaining_time": "1 day, 2:35:14"}
8
+ {"current_steps": 70, "total_steps": 4953, "loss": 0.3343, "lr": 1.4112903225806455e-06, "epoch": 0.014130352501829375, "percentage": 1.41, "elapsed_time": "0:21:54", "remaining_time": "1 day, 1:28:32"}
9
+ {"current_steps": 80, "total_steps": 4953, "loss": 0.3382, "lr": 1.6129032258064516e-06, "epoch": 0.016148974287805002, "percentage": 1.62, "elapsed_time": "0:24:12", "remaining_time": "1 day, 0:34:34"}
10
+ {"current_steps": 90, "total_steps": 4953, "loss": 0.3292, "lr": 1.8145161290322583e-06, "epoch": 0.018167596073780625, "percentage": 1.82, "elapsed_time": "0:26:31", "remaining_time": "23:53:13"}
11
+ {"current_steps": 100, "total_steps": 4953, "loss": 0.3194, "lr": 2.0161290322580646e-06, "epoch": 0.02018621785975625, "percentage": 2.02, "elapsed_time": "0:28:52", "remaining_time": "23:21:28"}
12
+ {"current_steps": 100, "total_steps": 4953, "eval_loss": 0.34027454257011414, "epoch": 0.02018621785975625, "percentage": 2.02, "elapsed_time": "0:34:38", "remaining_time": "1 day, 4:00:55"}
13
+ {"current_steps": 110, "total_steps": 4953, "loss": 0.3186, "lr": 2.217741935483871e-06, "epoch": 0.022204839645731878, "percentage": 2.22, "elapsed_time": "0:36:54", "remaining_time": "1 day, 3:05:20"}
14
+ {"current_steps": 120, "total_steps": 4953, "loss": 0.2938, "lr": 2.4193548387096776e-06, "epoch": 0.0242234614317075, "percentage": 2.42, "elapsed_time": "0:39:11", "remaining_time": "1 day, 2:18:10"}
15
+ {"current_steps": 130, "total_steps": 4953, "loss": 0.3267, "lr": 2.620967741935484e-06, "epoch": 0.026242083217683128, "percentage": 2.62, "elapsed_time": "0:41:28", "remaining_time": "1 day, 1:38:57"}
16
+ {"current_steps": 140, "total_steps": 4953, "loss": 0.3025, "lr": 2.822580645161291e-06, "epoch": 0.02826070500365875, "percentage": 2.83, "elapsed_time": "0:43:50", "remaining_time": "1 day, 1:07:29"}
17
+ {"current_steps": 150, "total_steps": 4953, "loss": 0.3038, "lr": 3.024193548387097e-06, "epoch": 0.030279326789634377, "percentage": 3.03, "elapsed_time": "0:46:12", "remaining_time": "1 day, 0:39:28"}
18
+ {"current_steps": 150, "total_steps": 4953, "eval_loss": 0.31383150815963745, "epoch": 0.030279326789634377, "percentage": 3.03, "elapsed_time": "0:51:57", "remaining_time": "1 day, 3:43:49"}
19
+ {"current_steps": 160, "total_steps": 4953, "loss": 0.3161, "lr": 3.225806451612903e-06, "epoch": 0.032297948575610004, "percentage": 3.23, "elapsed_time": "0:54:18", "remaining_time": "1 day, 3:06:49"}
20
+ {"current_steps": 170, "total_steps": 4953, "loss": 0.3215, "lr": 3.4274193548387097e-06, "epoch": 0.03431657036158563, "percentage": 3.43, "elapsed_time": "0:56:38", "remaining_time": "1 day, 2:33:47"}
21
+ {"current_steps": 180, "total_steps": 4953, "loss": 0.2951, "lr": 3.6290322580645166e-06, "epoch": 0.03633519214756125, "percentage": 3.63, "elapsed_time": "0:58:59", "remaining_time": "1 day, 2:04:19"}
22
+ {"current_steps": 190, "total_steps": 4953, "loss": 0.2971, "lr": 3.830645161290323e-06, "epoch": 0.03835381393353688, "percentage": 3.84, "elapsed_time": "1:01:14", "remaining_time": "1 day, 1:35:25"}
23
+ {"current_steps": 200, "total_steps": 4953, "loss": 0.3132, "lr": 4.032258064516129e-06, "epoch": 0.0403724357195125, "percentage": 4.04, "elapsed_time": "1:03:33", "remaining_time": "1 day, 1:10:39"}
24
+ {"current_steps": 200, "total_steps": 4953, "eval_loss": 0.3134636878967285, "epoch": 0.0403724357195125, "percentage": 4.04, "elapsed_time": "1:09:19", "remaining_time": "1 day, 3:27:32"}
25
+ {"current_steps": 210, "total_steps": 4953, "loss": 0.2994, "lr": 4.233870967741936e-06, "epoch": 0.042391057505488126, "percentage": 4.24, "elapsed_time": "1:11:39", "remaining_time": "1 day, 2:58:24"}
26
+ {"current_steps": 220, "total_steps": 4953, "loss": 0.3081, "lr": 4.435483870967742e-06, "epoch": 0.044409679291463756, "percentage": 4.44, "elapsed_time": "1:13:57", "remaining_time": "1 day, 2:31:07"}
27
+ {"current_steps": 230, "total_steps": 4953, "loss": 0.3111, "lr": 4.637096774193548e-06, "epoch": 0.04642830107743938, "percentage": 4.64, "elapsed_time": "1:16:19", "remaining_time": "1 day, 2:07:13"}
28
+ {"current_steps": 240, "total_steps": 4953, "loss": 0.3243, "lr": 4.838709677419355e-06, "epoch": 0.048446922863415, "percentage": 4.85, "elapsed_time": "1:18:37", "remaining_time": "1 day, 1:44:00"}
29
+ {"current_steps": 250, "total_steps": 4953, "loss": 0.3184, "lr": 5.040322580645161e-06, "epoch": 0.050465544649390626, "percentage": 5.05, "elapsed_time": "1:20:57", "remaining_time": "1 day, 1:22:58"}
30
+ {"current_steps": 250, "total_steps": 4953, "eval_loss": 0.3162420094013214, "epoch": 0.050465544649390626, "percentage": 5.05, "elapsed_time": "1:26:43", "remaining_time": "1 day, 3:11:20"}
31
+ {"current_steps": 260, "total_steps": 4953, "loss": 0.3254, "lr": 5.241935483870968e-06, "epoch": 0.052484166435366256, "percentage": 5.25, "elapsed_time": "1:29:01", "remaining_time": "1 day, 2:46:58"}
32
+ {"current_steps": 270, "total_steps": 4953, "loss": 0.2962, "lr": 5.443548387096774e-06, "epoch": 0.05450278822134188, "percentage": 5.45, "elapsed_time": "1:31:18", "remaining_time": "1 day, 2:23:43"}
33
+ {"current_steps": 280, "total_steps": 4953, "loss": 0.2922, "lr": 5.645161290322582e-06, "epoch": 0.0565214100073175, "percentage": 5.65, "elapsed_time": "1:33:33", "remaining_time": "1 day, 2:01:30"}
34
+ {"current_steps": 290, "total_steps": 4953, "loss": 0.3134, "lr": 5.846774193548388e-06, "epoch": 0.05854003179329313, "percentage": 5.86, "elapsed_time": "1:35:48", "remaining_time": "1 day, 1:40:35"}
35
+ {"current_steps": 300, "total_steps": 4953, "loss": 0.3041, "lr": 6.048387096774194e-06, "epoch": 0.060558653579268755, "percentage": 6.06, "elapsed_time": "1:38:04", "remaining_time": "1 day, 1:21:07"}
36
+ {"current_steps": 300, "total_steps": 4953, "eval_loss": 0.31838396191596985, "epoch": 0.060558653579268755, "percentage": 6.06, "elapsed_time": "1:43:49", "remaining_time": "1 day, 2:50:25"}
37
+ {"current_steps": 310, "total_steps": 4953, "loss": 0.3125, "lr": 6.25e-06, "epoch": 0.06257727536524438, "percentage": 6.26, "elapsed_time": "1:46:06", "remaining_time": "1 day, 2:29:15"}
38
+ {"current_steps": 320, "total_steps": 4953, "loss": 0.3206, "lr": 6.451612903225806e-06, "epoch": 0.06459589715122001, "percentage": 6.46, "elapsed_time": "1:48:22", "remaining_time": "1 day, 2:09:10"}
39
+ {"current_steps": 330, "total_steps": 4953, "loss": 0.3053, "lr": 6.653225806451613e-06, "epoch": 0.06661451893719562, "percentage": 6.66, "elapsed_time": "1:50:40", "remaining_time": "1 day, 1:50:28"}
40
+ {"current_steps": 340, "total_steps": 4953, "loss": 0.3274, "lr": 6.854838709677419e-06, "epoch": 0.06863314072317125, "percentage": 6.86, "elapsed_time": "1:53:02", "remaining_time": "1 day, 1:33:39"}
41
+ {"current_steps": 350, "total_steps": 4953, "loss": 0.3214, "lr": 7.056451612903227e-06, "epoch": 0.07065176250914688, "percentage": 7.07, "elapsed_time": "1:55:20", "remaining_time": "1 day, 1:16:52"}
42
+ {"current_steps": 350, "total_steps": 4953, "eval_loss": 0.3188110589981079, "epoch": 0.07065176250914688, "percentage": 7.07, "elapsed_time": "2:01:05", "remaining_time": "1 day, 2:32:35"}
43
+ {"current_steps": 360, "total_steps": 4953, "loss": 0.3106, "lr": 7.258064516129033e-06, "epoch": 0.0726703842951225, "percentage": 7.27, "elapsed_time": "2:03:22", "remaining_time": "1 day, 2:13:58"}
44
+ {"current_steps": 370, "total_steps": 4953, "loss": 0.3156, "lr": 7.459677419354839e-06, "epoch": 0.07468900608109813, "percentage": 7.47, "elapsed_time": "2:05:40", "remaining_time": "1 day, 1:56:43"}
45
+ {"current_steps": 380, "total_steps": 4953, "loss": 0.3189, "lr": 7.661290322580646e-06, "epoch": 0.07670762786707376, "percentage": 7.67, "elapsed_time": "2:08:01", "remaining_time": "1 day, 1:40:46"}
46
+ {"current_steps": 390, "total_steps": 4953, "loss": 0.3246, "lr": 7.862903225806451e-06, "epoch": 0.07872624965304938, "percentage": 7.87, "elapsed_time": "2:10:17", "remaining_time": "1 day, 1:24:19"}
47
+ {"current_steps": 400, "total_steps": 4953, "loss": 0.3057, "lr": 8.064516129032258e-06, "epoch": 0.080744871439025, "percentage": 8.08, "elapsed_time": "2:12:36", "remaining_time": "1 day, 1:09:29"}
48
+ {"current_steps": 400, "total_steps": 4953, "eval_loss": 0.33126088976860046, "epoch": 0.080744871439025, "percentage": 8.08, "elapsed_time": "2:18:22", "remaining_time": "1 day, 2:14:59"}
49
+ {"current_steps": 410, "total_steps": 4953, "loss": 0.3142, "lr": 8.266129032258065e-06, "epoch": 0.08276349322500064, "percentage": 8.28, "elapsed_time": "2:20:38", "remaining_time": "1 day, 1:58:25"}
50
+ {"current_steps": 420, "total_steps": 4953, "loss": 0.3183, "lr": 8.467741935483872e-06, "epoch": 0.08478211501097625, "percentage": 8.48, "elapsed_time": "2:22:56", "remaining_time": "1 day, 1:42:41"}
51
+ {"current_steps": 430, "total_steps": 4953, "loss": 0.3177, "lr": 8.669354838709677e-06, "epoch": 0.08680073679695188, "percentage": 8.68, "elapsed_time": "2:25:13", "remaining_time": "1 day, 1:27:39"}
52
+ {"current_steps": 440, "total_steps": 4953, "loss": 0.3199, "lr": 8.870967741935484e-06, "epoch": 0.08881935858292751, "percentage": 8.88, "elapsed_time": "2:27:32", "remaining_time": "1 day, 1:13:14"}
53
+ {"current_steps": 450, "total_steps": 4953, "loss": 0.3195, "lr": 9.072580645161291e-06, "epoch": 0.09083798036890313, "percentage": 9.09, "elapsed_time": "2:29:51", "remaining_time": "1 day, 0:59:32"}
54
+ {"current_steps": 450, "total_steps": 4953, "eval_loss": 0.317932665348053, "epoch": 0.09083798036890313, "percentage": 9.09, "elapsed_time": "2:35:36", "remaining_time": "1 day, 1:57:08"}
55
+ {"current_steps": 460, "total_steps": 4953, "loss": 0.3107, "lr": 9.274193548387097e-06, "epoch": 0.09285660215487876, "percentage": 9.29, "elapsed_time": "2:37:52", "remaining_time": "1 day, 1:41:59"}
56
+ {"current_steps": 470, "total_steps": 4953, "loss": 0.3264, "lr": 9.475806451612905e-06, "epoch": 0.09487522394085438, "percentage": 9.49, "elapsed_time": "2:40:09", "remaining_time": "1 day, 1:27:36"}
57
+ {"current_steps": 480, "total_steps": 4953, "loss": 0.3183, "lr": 9.67741935483871e-06, "epoch": 0.09689384572683, "percentage": 9.69, "elapsed_time": "2:42:28", "remaining_time": "1 day, 1:14:04"}
58
+ {"current_steps": 490, "total_steps": 4953, "loss": 0.3103, "lr": 9.879032258064517e-06, "epoch": 0.09891246751280564, "percentage": 9.89, "elapsed_time": "2:44:45", "remaining_time": "1 day, 1:00:42"}
59
+ {"current_steps": 500, "total_steps": 4953, "loss": 0.318, "lr": 9.99998012650816e-06, "epoch": 0.10093108929878125, "percentage": 10.09, "elapsed_time": "2:47:04", "remaining_time": "1 day, 0:48:00"}
60
+ {"current_steps": 500, "total_steps": 4953, "eval_loss": 0.3283348083496094, "epoch": 0.10093108929878125, "percentage": 10.09, "elapsed_time": "2:52:50", "remaining_time": "1 day, 1:39:16"}
61
+ {"current_steps": 510, "total_steps": 4953, "loss": 0.3172, "lr": 9.999756551539276e-06, "epoch": 0.10294971108475688, "percentage": 10.3, "elapsed_time": "2:56:15", "remaining_time": "1 day, 1:35:27"}
62
+ {"current_steps": 520, "total_steps": 4953, "loss": 0.3258, "lr": 9.999284570881733e-06, "epoch": 0.10496833287073251, "percentage": 10.5, "elapsed_time": "2:58:32", "remaining_time": "1 day, 1:22:05"}
63
+ {"current_steps": 530, "total_steps": 4953, "loss": 0.3178, "lr": 9.99856420798521e-06, "epoch": 0.10698695465670813, "percentage": 10.7, "elapsed_time": "3:00:53", "remaining_time": "1 day, 1:09:37"}
64
+ {"current_steps": 540, "total_steps": 4953, "loss": 0.3206, "lr": 9.997595498639894e-06, "epoch": 0.10900557644268376, "percentage": 10.9, "elapsed_time": "3:03:15", "remaining_time": "1 day, 0:57:37"}
65
+ {"current_steps": 550, "total_steps": 4953, "loss": 0.3233, "lr": 9.996378490974716e-06, "epoch": 0.11102419822865939, "percentage": 11.1, "elapsed_time": "3:05:33", "remaining_time": "1 day, 0:45:27"}
66
+ {"current_steps": 550, "total_steps": 4953, "eval_loss": 0.31938090920448303, "epoch": 0.11102419822865939, "percentage": 11.1, "elapsed_time": "3:11:19", "remaining_time": "1 day, 1:31:35"}
67
+ {"current_steps": 560, "total_steps": 4953, "loss": 0.3132, "lr": 9.994913245454944e-06, "epoch": 0.113042820014635, "percentage": 11.31, "elapsed_time": "3:13:34", "remaining_time": "1 day, 1:18:31"}
68
+ {"current_steps": 570, "total_steps": 4953, "loss": 0.321, "lr": 9.993199834879187e-06, "epoch": 0.11506144180061063, "percentage": 11.51, "elapsed_time": "3:15:48", "remaining_time": "1 day, 1:05:42"}
69
+ {"current_steps": 580, "total_steps": 4953, "loss": 0.3114, "lr": 9.991238344375774e-06, "epoch": 0.11708006358658626, "percentage": 11.71, "elapsed_time": "3:18:09", "remaining_time": "1 day, 0:54:01"}
70
+ {"current_steps": 590, "total_steps": 4953, "loss": 0.3144, "lr": 9.989028871398531e-06, "epoch": 0.11909868537256188, "percentage": 11.91, "elapsed_time": "3:20:27", "remaining_time": "1 day, 0:42:22"}
71
+ {"current_steps": 600, "total_steps": 4953, "loss": 0.2996, "lr": 9.986571525721938e-06, "epoch": 0.12111730715853751, "percentage": 12.11, "elapsed_time": "3:22:45", "remaining_time": "1 day, 0:31:02"}
72
+ {"current_steps": 600, "total_steps": 4953, "eval_loss": 0.3137419521808624, "epoch": 0.12111730715853751, "percentage": 12.11, "elapsed_time": "3:28:31", "remaining_time": "1 day, 1:12:49"}
73
+ {"current_steps": 610, "total_steps": 4953, "loss": 0.308, "lr": 9.983866429435663e-06, "epoch": 0.12313592894451314, "percentage": 12.32, "elapsed_time": "3:30:50", "remaining_time": "1 day, 1:01:07"}
74
+ {"current_steps": 620, "total_steps": 4953, "loss": 0.321, "lr": 9.980913716938514e-06, "epoch": 0.12515455073048876, "percentage": 12.52, "elapsed_time": "3:33:12", "remaining_time": "1 day, 0:50:03"}
75
+ {"current_steps": 630, "total_steps": 4953, "loss": 0.2976, "lr": 9.977713534931752e-06, "epoch": 0.12717317251646437, "percentage": 12.72, "elapsed_time": "3:35:28", "remaining_time": "1 day, 0:38:34"}
76
+ {"current_steps": 640, "total_steps": 4953, "loss": 0.3123, "lr": 9.974266042411796e-06, "epoch": 0.12919179430244002, "percentage": 12.92, "elapsed_time": "3:37:44", "remaining_time": "1 day, 0:27:23"}
77
+ {"current_steps": 650, "total_steps": 4953, "loss": 0.3089, "lr": 9.970571410662342e-06, "epoch": 0.13121041608841563, "percentage": 13.12, "elapsed_time": "3:40:01", "remaining_time": "1 day, 0:16:30"}
78
+ {"current_steps": 650, "total_steps": 4953, "eval_loss": 0.30751538276672363, "epoch": 0.13121041608841563, "percentage": 13.12, "elapsed_time": "3:45:46", "remaining_time": "1 day, 0:54:38"}
79
+ {"current_steps": 660, "total_steps": 4953, "loss": 0.3046, "lr": 9.966629823245833e-06, "epoch": 0.13322903787439125, "percentage": 13.33, "elapsed_time": "3:48:05", "remaining_time": "1 day, 0:43:38"}
80
+ {"current_steps": 670, "total_steps": 4953, "loss": 0.3144, "lr": 9.962441475994353e-06, "epoch": 0.1352476596603669, "percentage": 13.53, "elapsed_time": "3:50:23", "remaining_time": "1 day, 0:32:47"}
81
+ {"current_steps": 680, "total_steps": 4953, "loss": 0.3031, "lr": 9.95800657699989e-06, "epoch": 0.1372662814463425, "percentage": 13.73, "elapsed_time": "3:52:40", "remaining_time": "1 day, 0:22:06"}
82
+ {"current_steps": 690, "total_steps": 4953, "loss": 0.3058, "lr": 9.953325346604e-06, "epoch": 0.13928490323231812, "percentage": 13.93, "elapsed_time": "3:55:01", "remaining_time": "1 day, 0:12:02"}
83
+ {"current_steps": 700, "total_steps": 4953, "loss": 0.3085, "lr": 9.94839801738686e-06, "epoch": 0.14130352501829377, "percentage": 14.13, "elapsed_time": "3:57:22", "remaining_time": "1 day, 0:02:12"}
84
+ {"current_steps": 700, "total_steps": 4953, "eval_loss": 0.30891653895378113, "epoch": 0.14130352501829377, "percentage": 14.13, "elapsed_time": "4:03:07", "remaining_time": "1 day, 0:37:11"}
85
+ {"current_steps": 710, "total_steps": 4953, "loss": 0.3036, "lr": 9.94322483415571e-06, "epoch": 0.14332214680426938, "percentage": 14.33, "elapsed_time": "4:05:26", "remaining_time": "1 day, 0:26:48"}
86
+ {"current_steps": 720, "total_steps": 4953, "loss": 0.3077, "lr": 9.937806053932693e-06, "epoch": 0.145340768590245, "percentage": 14.54, "elapsed_time": "4:07:48", "remaining_time": "1 day, 0:16:51"}
87
+ {"current_steps": 730, "total_steps": 4953, "loss": 0.2969, "lr": 9.932141945942084e-06, "epoch": 0.14735939037622064, "percentage": 14.74, "elapsed_time": "4:10:05", "remaining_time": "1 day, 0:06:45"}
88
+ {"current_steps": 740, "total_steps": 4953, "loss": 0.2952, "lr": 9.926232791596915e-06, "epoch": 0.14937801216219626, "percentage": 14.94, "elapsed_time": "4:12:21", "remaining_time": "23:56:43"}
89
+ {"current_steps": 750, "total_steps": 4953, "loss": 0.3129, "lr": 9.920078884484992e-06, "epoch": 0.15139663394817188, "percentage": 15.14, "elapsed_time": "4:14:39", "remaining_time": "23:47:05"}
90
+ {"current_steps": 750, "total_steps": 4953, "eval_loss": 0.30419299006462097, "epoch": 0.15139663394817188, "percentage": 15.14, "elapsed_time": "4:20:24", "remaining_time": "1 day, 0:19:21"}
91
+ {"current_steps": 760, "total_steps": 4953, "loss": 0.2936, "lr": 9.913680530354308e-06, "epoch": 0.15341525573414752, "percentage": 15.34, "elapsed_time": "4:22:41", "remaining_time": "1 day, 0:09:18"}
92
+ {"current_steps": 770, "total_steps": 4953, "loss": 0.3047, "lr": 9.907038047097853e-06, "epoch": 0.15543387752012314, "percentage": 15.55, "elapsed_time": "4:25:02", "remaining_time": "23:59:47"}
93
+ {"current_steps": 780, "total_steps": 4953, "loss": 0.2996, "lr": 9.900151764737821e-06, "epoch": 0.15745249930609875, "percentage": 15.75, "elapsed_time": "4:27:20", "remaining_time": "23:50:14"}
94
+ {"current_steps": 790, "total_steps": 4953, "loss": 0.3047, "lr": 9.893022025409217e-06, "epoch": 0.1594711210920744, "percentage": 15.95, "elapsed_time": "4:29:36", "remaining_time": "23:40:46"}
95
+ {"current_steps": 800, "total_steps": 4953, "loss": 0.307, "lr": 9.885649183342844e-06, "epoch": 0.16148974287805, "percentage": 16.15, "elapsed_time": "4:31:54", "remaining_time": "23:31:33"}
96
+ {"current_steps": 800, "total_steps": 4953, "eval_loss": 0.30816882848739624, "epoch": 0.16148974287805, "percentage": 16.15, "elapsed_time": "4:37:40", "remaining_time": "1 day, 0:01:26"}
97
+ {"current_steps": 810, "total_steps": 4953, "loss": 0.298, "lr": 9.878033604847725e-06, "epoch": 0.16350836466402563, "percentage": 16.35, "elapsed_time": "4:39:58", "remaining_time": "23:51:59"}
98
+ {"current_steps": 820, "total_steps": 4953, "loss": 0.2998, "lr": 9.870175668292882e-06, "epoch": 0.16552698645000127, "percentage": 16.56, "elapsed_time": "4:42:11", "remaining_time": "23:42:21"}
99
+ {"current_steps": 830, "total_steps": 4953, "loss": 0.2904, "lr": 9.862075764088555e-06, "epoch": 0.1675456082359769, "percentage": 16.76, "elapsed_time": "4:44:32", "remaining_time": "23:33:28"}
100
+ {"current_steps": 840, "total_steps": 4953, "loss": 0.2928, "lr": 9.853734294666794e-06, "epoch": 0.1695642300219525, "percentage": 16.96, "elapsed_time": "4:46:47", "remaining_time": "23:24:17"}
101
+ {"current_steps": 850, "total_steps": 4953, "loss": 0.3101, "lr": 9.845151674461471e-06, "epoch": 0.17158285180792815, "percentage": 17.16, "elapsed_time": "4:49:05", "remaining_time": "23:15:28"}
102
+ {"current_steps": 850, "total_steps": 4953, "eval_loss": 0.3016580045223236, "epoch": 0.17158285180792815, "percentage": 17.16, "elapsed_time": "4:54:51", "remaining_time": "23:43:16"}
103
+ {"current_steps": 860, "total_steps": 4953, "loss": 0.2964, "lr": 9.83632832988768e-06, "epoch": 0.17360147359390377, "percentage": 17.36, "elapsed_time": "4:57:09", "remaining_time": "23:34:17"}
104
+ {"current_steps": 870, "total_steps": 4953, "loss": 0.3078, "lr": 9.827264699320567e-06, "epoch": 0.17562009537987938, "percentage": 17.57, "elapsed_time": "4:59:31", "remaining_time": "23:25:41"}
105
+ {"current_steps": 880, "total_steps": 4953, "loss": 0.3008, "lr": 9.817961233073531e-06, "epoch": 0.17763871716585503, "percentage": 17.77, "elapsed_time": "5:01:51", "remaining_time": "23:17:06"}
106
+ {"current_steps": 890, "total_steps": 4953, "loss": 0.295, "lr": 9.808418393375872e-06, "epoch": 0.17965733895183064, "percentage": 17.97, "elapsed_time": "5:04:09", "remaining_time": "23:08:33"}
107
+ {"current_steps": 900, "total_steps": 4953, "loss": 0.2798, "lr": 9.798636654349804e-06, "epoch": 0.18167596073780626, "percentage": 18.17, "elapsed_time": "5:06:23", "remaining_time": "22:59:46"}
108
+ {"current_steps": 900, "total_steps": 4953, "eval_loss": 0.2997885048389435, "epoch": 0.18167596073780626, "percentage": 18.17, "elapsed_time": "5:12:08", "remaining_time": "23:25:41"}
109
+ {"current_steps": 910, "total_steps": 4953, "loss": 0.2894, "lr": 9.788616501986916e-06, "epoch": 0.18369458252378187, "percentage": 18.37, "elapsed_time": "5:14:27", "remaining_time": "23:17:03"}
110
+ {"current_steps": 920, "total_steps": 4953, "loss": 0.2913, "lr": 9.778358434124014e-06, "epoch": 0.18571320430975752, "percentage": 18.57, "elapsed_time": "5:16:48", "remaining_time": "23:08:45"}
111
+ {"current_steps": 930, "total_steps": 4953, "loss": 0.3003, "lr": 9.767862960418397e-06, "epoch": 0.18773182609573313, "percentage": 18.78, "elapsed_time": "5:19:05", "remaining_time": "23:00:19"}
112
+ {"current_steps": 940, "total_steps": 4953, "loss": 0.2927, "lr": 9.75713060232253e-06, "epoch": 0.18975044788170875, "percentage": 18.98, "elapsed_time": "5:21:20", "remaining_time": "22:51:51"}
113
+ {"current_steps": 950, "total_steps": 4953, "loss": 0.2833, "lr": 9.746161893058137e-06, "epoch": 0.1917690696676844, "percentage": 19.18, "elapsed_time": "5:23:36", "remaining_time": "22:43:36"}
114
+ {"current_steps": 950, "total_steps": 4953, "eval_loss": 0.2945195138454437, "epoch": 0.1917690696676844, "percentage": 19.18, "elapsed_time": "5:29:22", "remaining_time": "23:07:51"}
115
+ {"current_steps": 960, "total_steps": 4953, "loss": 0.2944, "lr": 9.734957377589707e-06, "epoch": 0.19378769145366, "percentage": 19.38, "elapsed_time": "5:31:36", "remaining_time": "22:59:17"}
116
+ {"current_steps": 970, "total_steps": 4953, "loss": 0.2802, "lr": 9.723517612597417e-06, "epoch": 0.19580631323963563, "percentage": 19.58, "elapsed_time": "5:33:54", "remaining_time": "22:51:03"}
117
+ {"current_steps": 980, "total_steps": 4953, "loss": 0.2982, "lr": 9.711843166449486e-06, "epoch": 0.19782493502561127, "percentage": 19.79, "elapsed_time": "5:36:13", "remaining_time": "22:43:05"}
118
+ {"current_steps": 990, "total_steps": 4953, "loss": 0.2958, "lr": 9.69993461917392e-06, "epoch": 0.1998435568115869, "percentage": 19.99, "elapsed_time": "5:38:31", "remaining_time": "22:35:08"}
119
+ {"current_steps": 1000, "total_steps": 4953, "loss": 0.3025, "lr": 9.687792562429705e-06, "epoch": 0.2018621785975625, "percentage": 20.19, "elapsed_time": "5:40:52", "remaining_time": "22:27:27"}
120
+ {"current_steps": 1000, "total_steps": 4953, "eval_loss": 0.29460784792900085, "epoch": 0.2018621785975625, "percentage": 20.19, "elapsed_time": "5:46:37", "remaining_time": "22:50:12"}
121
+ {"current_steps": 1010, "total_steps": 4953, "loss": 0.2942, "lr": 9.675417599477406e-06, "epoch": 0.20388080038353815, "percentage": 20.39, "elapsed_time": "5:50:11", "remaining_time": "22:47:06"}
122
+ {"current_steps": 1020, "total_steps": 4953, "loss": 0.2844, "lr": 9.6628103451492e-06, "epoch": 0.20589942216951376, "percentage": 20.59, "elapsed_time": "5:52:28", "remaining_time": "22:39:05"}
123
+ {"current_steps": 1030, "total_steps": 4953, "loss": 0.2955, "lr": 9.649971425818321e-06, "epoch": 0.20791804395548938, "percentage": 20.8, "elapsed_time": "5:54:46", "remaining_time": "22:31:16"}
124
+ {"current_steps": 1040, "total_steps": 4953, "loss": 0.3005, "lr": 9.636901479367948e-06, "epoch": 0.20993666574146502, "percentage": 21.0, "elapsed_time": "5:57:05", "remaining_time": "22:23:34"}
125
+ {"current_steps": 1050, "total_steps": 4953, "loss": 0.2925, "lr": 9.623601155159507e-06, "epoch": 0.21195528752744064, "percentage": 21.2, "elapsed_time": "5:59:22", "remaining_time": "22:15:49"}
126
+ {"current_steps": 1050, "total_steps": 4953, "eval_loss": 0.2931045889854431, "epoch": 0.21195528752744064, "percentage": 21.2, "elapsed_time": "6:05:07", "remaining_time": "22:37:13"}
127
+ {"current_steps": 1060, "total_steps": 4953, "loss": 0.2955, "lr": 9.610071114000411e-06, "epoch": 0.21397390931341626, "percentage": 21.4, "elapsed_time": "6:07:25", "remaining_time": "22:29:23"}
128
+ {"current_steps": 1070, "total_steps": 4953, "loss": 0.2956, "lr": 9.596312028111234e-06, "epoch": 0.2159925310993919, "percentage": 21.6, "elapsed_time": "6:09:42", "remaining_time": "22:21:41"}
129
+ {"current_steps": 1080, "total_steps": 4953, "loss": 0.3009, "lr": 9.582324581092295e-06, "epoch": 0.21801115288536752, "percentage": 21.8, "elapsed_time": "6:12:00", "remaining_time": "22:14:04"}
130
+ {"current_steps": 1090, "total_steps": 4953, "loss": 0.2937, "lr": 9.568109467889716e-06, "epoch": 0.22002977467134313, "percentage": 22.01, "elapsed_time": "6:14:16", "remaining_time": "22:06:25"}
131
+ {"current_steps": 1100, "total_steps": 4953, "loss": 0.2903, "lr": 9.55366739476088e-06, "epoch": 0.22204839645731878, "percentage": 22.21, "elapsed_time": "6:16:33", "remaining_time": "21:58:57"}
132
+ {"current_steps": 1100, "total_steps": 4953, "eval_loss": 0.2908966839313507, "epoch": 0.22204839645731878, "percentage": 22.21, "elapsed_time": "6:22:18", "remaining_time": "22:19:06"}
133
+ {"current_steps": 1110, "total_steps": 4953, "loss": 0.2892, "lr": 9.538999079239346e-06, "epoch": 0.2240670182432944, "percentage": 22.41, "elapsed_time": "6:24:38", "remaining_time": "22:11:40"}
134
+ {"current_steps": 1120, "total_steps": 4953, "loss": 0.2787, "lr": 9.524105250099205e-06, "epoch": 0.22608564002927, "percentage": 22.61, "elapsed_time": "6:26:56", "remaining_time": "22:04:15"}
135
+ {"current_steps": 1130, "total_steps": 4953, "loss": 0.2782, "lr": 9.508986647318862e-06, "epoch": 0.22810426181524565, "percentage": 22.81, "elapsed_time": "6:29:16", "remaining_time": "21:56:58"}
136
+ {"current_steps": 1140, "total_steps": 4953, "loss": 0.2817, "lr": 9.493644022044275e-06, "epoch": 0.23012288360122127, "percentage": 23.02, "elapsed_time": "6:31:36", "remaining_time": "21:49:48"}
137
+ {"current_steps": 1150, "total_steps": 4953, "loss": 0.2843, "lr": 9.478078136551641e-06, "epoch": 0.23214150538719688, "percentage": 23.22, "elapsed_time": "6:33:56", "remaining_time": "21:42:45"}
138
+ {"current_steps": 1150, "total_steps": 4953, "eval_loss": 0.2857635021209717, "epoch": 0.23214150538719688, "percentage": 23.22, "elapsed_time": "6:39:41", "remaining_time": "22:01:46"}
139
+ {"current_steps": 1160, "total_steps": 4953, "loss": 0.2791, "lr": 9.462289764209518e-06, "epoch": 0.23416012717317253, "percentage": 23.42, "elapsed_time": "6:42:01", "remaining_time": "21:54:32"}
140
+ {"current_steps": 1170, "total_steps": 4953, "loss": 0.2898, "lr": 9.446279689440394e-06, "epoch": 0.23617874895914814, "percentage": 23.62, "elapsed_time": "6:44:22", "remaining_time": "21:47:30"}
141
+ {"current_steps": 1180, "total_steps": 4953, "loss": 0.2857, "lr": 9.430048707681732e-06, "epoch": 0.23819737074512376, "percentage": 23.82, "elapsed_time": "6:46:38", "remaining_time": "21:40:12"}
142
+ {"current_steps": 1190, "total_steps": 4953, "loss": 0.2853, "lr": 9.413597625346438e-06, "epoch": 0.2402159925310994, "percentage": 24.03, "elapsed_time": "6:48:54", "remaining_time": "21:33:02"}
143
+ {"current_steps": 1200, "total_steps": 4953, "loss": 0.2833, "lr": 9.396927259782793e-06, "epoch": 0.24223461431707502, "percentage": 24.23, "elapsed_time": "6:51:10", "remaining_time": "21:25:58"}
144
+ {"current_steps": 1200, "total_steps": 4953, "eval_loss": 0.2929224669933319, "epoch": 0.24223461431707502, "percentage": 24.23, "elapsed_time": "6:56:55", "remaining_time": "21:43:57"}
145
+ {"current_steps": 1210, "total_steps": 4953, "loss": 0.2957, "lr": 9.380038439233852e-06, "epoch": 0.24425323610305064, "percentage": 24.43, "elapsed_time": "6:59:16", "remaining_time": "21:36:57"}
146
+ {"current_steps": 1220, "total_steps": 4953, "loss": 0.2724, "lr": 9.36293200279629e-06, "epoch": 0.24627185788902628, "percentage": 24.63, "elapsed_time": "7:01:33", "remaining_time": "21:29:53"}
147
+ {"current_steps": 1230, "total_steps": 4953, "loss": 0.2874, "lr": 9.345608800378716e-06, "epoch": 0.2482904796750019, "percentage": 24.83, "elapsed_time": "7:03:52", "remaining_time": "21:23:00"}
148
+ {"current_steps": 1240, "total_steps": 4953, "loss": 0.2894, "lr": 9.328069692659437e-06, "epoch": 0.2503091014609775, "percentage": 25.04, "elapsed_time": "7:06:13", "remaining_time": "21:16:17"}
149
+ {"current_steps": 1250, "total_steps": 4953, "loss": 0.2797, "lr": 9.310315551043713e-06, "epoch": 0.25232772324695313, "percentage": 25.24, "elapsed_time": "7:08:36", "remaining_time": "21:09:41"}
150
+ {"current_steps": 1250, "total_steps": 4953, "eval_loss": 0.28447580337524414, "epoch": 0.25232772324695313, "percentage": 25.24, "elapsed_time": "7:14:21", "remaining_time": "21:26:43"}
151
+ {"current_steps": 1260, "total_steps": 4953, "loss": 0.292, "lr": 9.292347257620442e-06, "epoch": 0.25434634503292874, "percentage": 25.44, "elapsed_time": "7:16:39", "remaining_time": "21:19:48"}
152
+ {"current_steps": 1270, "total_steps": 4953, "loss": 0.2776, "lr": 9.274165705118356e-06, "epoch": 0.2563649668189044, "percentage": 25.64, "elapsed_time": "7:18:56", "remaining_time": "21:12:54"}
153
+ {"current_steps": 1280, "total_steps": 4953, "loss": 0.2821, "lr": 9.255771796861649e-06, "epoch": 0.25838358860488003, "percentage": 25.84, "elapsed_time": "7:21:14", "remaining_time": "21:06:08"}
154
+ {"current_steps": 1290, "total_steps": 4953, "loss": 0.2926, "lr": 9.237166446725108e-06, "epoch": 0.26040221039085565, "percentage": 26.04, "elapsed_time": "7:23:31", "remaining_time": "20:59:24"}
155
+ {"current_steps": 1300, "total_steps": 4953, "loss": 0.2752, "lr": 9.2183505790887e-06, "epoch": 0.26242083217683126, "percentage": 26.25, "elapsed_time": "7:25:48", "remaining_time": "20:52:43"}
156
+ {"current_steps": 1300, "total_steps": 4953, "eval_loss": 0.28487566113471985, "epoch": 0.26242083217683126, "percentage": 26.25, "elapsed_time": "7:31:33", "remaining_time": "21:08:53"}
157
+ {"current_steps": 1310, "total_steps": 4953, "loss": 0.2784, "lr": 9.199325128791658e-06, "epoch": 0.2644394539628069, "percentage": 26.45, "elapsed_time": "7:33:50", "remaining_time": "21:02:06"}
158
+ {"current_steps": 1320, "total_steps": 4953, "loss": 0.2932, "lr": 9.180091041086022e-06, "epoch": 0.2664580757487825, "percentage": 26.65, "elapsed_time": "7:36:11", "remaining_time": "20:55:32"}
159
+ {"current_steps": 1330, "total_steps": 4953, "loss": 0.2697, "lr": 9.160649271589679e-06, "epoch": 0.26847669753475817, "percentage": 26.85, "elapsed_time": "7:38:26", "remaining_time": "20:48:49"}
160
+ {"current_steps": 1340, "total_steps": 4953, "loss": 0.272, "lr": 9.14100078623889e-06, "epoch": 0.2704953193207338, "percentage": 27.05, "elapsed_time": "7:40:46", "remaining_time": "20:42:21"}
161
+ {"current_steps": 1350, "total_steps": 4953, "loss": 0.2896, "lr": 9.121146561240293e-06, "epoch": 0.2725139411067094, "percentage": 27.26, "elapsed_time": "7:43:06", "remaining_time": "20:35:59"}
162
+ {"current_steps": 1350, "total_steps": 4953, "eval_loss": 0.27954837679862976, "epoch": 0.2725139411067094, "percentage": 27.26, "elapsed_time": "7:48:51", "remaining_time": "20:51:20"}
163
+ {"current_steps": 1360, "total_steps": 4953, "loss": 0.2792, "lr": 9.101087583022401e-06, "epoch": 0.274532562892685, "percentage": 27.46, "elapsed_time": "7:51:10", "remaining_time": "20:44:48"}
164
+ {"current_steps": 1370, "total_steps": 4953, "loss": 0.283, "lr": 9.080824848186603e-06, "epoch": 0.27655118467866063, "percentage": 27.66, "elapsed_time": "7:53:27", "remaining_time": "20:38:15"}
165
+ {"current_steps": 1380, "total_steps": 4953, "loss": 0.2725, "lr": 9.060359363457631e-06, "epoch": 0.27856980646463625, "percentage": 27.86, "elapsed_time": "7:55:46", "remaining_time": "20:31:50"}
166
+ {"current_steps": 1390, "total_steps": 4953, "loss": 0.2793, "lr": 9.03969214563356e-06, "epoch": 0.2805884282506119, "percentage": 28.06, "elapsed_time": "7:58:03", "remaining_time": "20:25:25"}
167
+ {"current_steps": 1400, "total_steps": 4953, "loss": 0.2781, "lr": 9.018824221535282e-06, "epoch": 0.28260705003658754, "percentage": 28.27, "elapsed_time": "8:00:21", "remaining_time": "20:19:05"}
168
+ {"current_steps": 1400, "total_steps": 4953, "eval_loss": 0.2789679765701294, "epoch": 0.28260705003658754, "percentage": 28.27, "elapsed_time": "8:06:07", "remaining_time": "20:33:41"}
169
+ {"current_steps": 1410, "total_steps": 4953, "loss": 0.267, "lr": 8.997756627955489e-06, "epoch": 0.28462567182256315, "percentage": 28.47, "elapsed_time": "8:08:24", "remaining_time": "20:27:15"}
170
+ {"current_steps": 1420, "total_steps": 4953, "loss": 0.2725, "lr": 8.976490411607165e-06, "epoch": 0.28664429360853877, "percentage": 28.67, "elapsed_time": "8:10:41", "remaining_time": "20:20:52"}
171
+ {"current_steps": 1430, "total_steps": 4953, "loss": 0.2747, "lr": 8.955026629071574e-06, "epoch": 0.2886629153945144, "percentage": 28.87, "elapsed_time": "8:13:01", "remaining_time": "20:14:37"}
172
+ {"current_steps": 1440, "total_steps": 4953, "loss": 0.278, "lr": 8.933366346745778e-06, "epoch": 0.29068153718049, "percentage": 29.07, "elapsed_time": "8:15:17", "remaining_time": "20:08:18"}
173
+ {"current_steps": 1450, "total_steps": 4953, "loss": 0.2853, "lr": 8.911510640789644e-06, "epoch": 0.2927001589664657, "percentage": 29.28, "elapsed_time": "8:17:34", "remaining_time": "20:02:04"}
174
+ {"current_steps": 1450, "total_steps": 4953, "eval_loss": 0.2782709002494812, "epoch": 0.2927001589664657, "percentage": 29.28, "elapsed_time": "8:23:19", "remaining_time": "20:15:58"}
175
+ {"current_steps": 1460, "total_steps": 4953, "loss": 0.2769, "lr": 8.889460597072378e-06, "epoch": 0.2947187807524413, "percentage": 29.48, "elapsed_time": "8:25:42", "remaining_time": "20:09:54"}
176
+ {"current_steps": 1470, "total_steps": 4953, "loss": 0.2821, "lr": 8.867217311118583e-06, "epoch": 0.2967374025384169, "percentage": 29.68, "elapsed_time": "8:28:00", "remaining_time": "20:03:40"}
177
+ {"current_steps": 1480, "total_steps": 4953, "loss": 0.2638, "lr": 8.84478188805382e-06, "epoch": 0.2987560243243925, "percentage": 29.88, "elapsed_time": "8:30:18", "remaining_time": "19:57:29"}
178
+ {"current_steps": 1490, "total_steps": 4953, "loss": 0.2693, "lr": 8.822155442549702e-06, "epoch": 0.30077464611036814, "percentage": 30.08, "elapsed_time": "8:32:36", "remaining_time": "19:51:23"}
179
+ {"current_steps": 1500, "total_steps": 4953, "loss": 0.2758, "lr": 8.799339098768525e-06, "epoch": 0.30279326789634375, "percentage": 30.28, "elapsed_time": "8:34:54", "remaining_time": "19:45:19"}
180
+ {"current_steps": 1500, "total_steps": 4953, "eval_loss": 0.27583077549934387, "epoch": 0.30279326789634375, "percentage": 30.28, "elapsed_time": "8:40:39", "remaining_time": "19:58:33"}
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b24821dcd93ebe4a6264dcc1c2944b7ab905e539a9bf15ce50d9c0072c8071c
3
  size 7544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65fed208eb9328d1ad4245f012e27e451f39de1beca8d2a19d2380cd4a4ad511
3
  size 7544