sedrickkeh commited on
Commit
8417768
·
verified ·
1 Parent(s): 0118cb8

Training in progress, epoch 0

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:befeb9f6fe6bc91634b89944d4d0d6061670084aae2ebe2f7baebec6f1cc1cd2
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7974697220db89903a1e52c85aabb493b91c08af8f790ddd35d8f451ccbca550
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eed4bf5ac5c1d7804248c8c4de16ea6835b2566017019e92464a44351a75cda0
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dd315599590e0c1296335dfb37d73cee841438594f8b592761cc053c330db33
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a10eaec3142e44afe4768e7e009c7de1e7b754634c0d8304e51346206e374a97
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6019b6da49b2642d2c8780c589d667b917457dfd9e5a4692ebe2ab05e7bb0568
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b1bf9fcca80776260f0d0b31bf05e013752f724c3977660973ea98f9c6e5c03
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:478df5a84c3c87b23bcaa9bb4bc05e15876992ce185ae028853ef14b1ff90bea
3
  size 1089994880
trainer_log.jsonl CHANGED
@@ -1,127 +1,47 @@
1
- {"current_steps": 1, "total_steps": 126, "loss": 1.1119, "lr": 7.692307692307694e-07, "epoch": 0.023529411764705882, "percentage": 0.79, "elapsed_time": "0:00:34", "remaining_time": "1:12:52"}
2
- {"current_steps": 2, "total_steps": 126, "loss": 1.1537, "lr": 1.5384615384615387e-06, "epoch": 0.047058823529411764, "percentage": 1.59, "elapsed_time": "0:01:10", "remaining_time": "1:12:39"}
3
- {"current_steps": 3, "total_steps": 126, "loss": 1.1205, "lr": 2.307692307692308e-06, "epoch": 0.07058823529411765, "percentage": 2.38, "elapsed_time": "0:01:39", "remaining_time": "1:08:07"}
4
- {"current_steps": 4, "total_steps": 126, "loss": 1.0476, "lr": 3.0769230769230774e-06, "epoch": 0.09411764705882353, "percentage": 3.17, "elapsed_time": "0:02:05", "remaining_time": "1:03:39"}
5
- {"current_steps": 5, "total_steps": 126, "loss": 1.0753, "lr": 3.846153846153847e-06, "epoch": 0.11764705882352941, "percentage": 3.97, "elapsed_time": "0:02:41", "remaining_time": "1:05:18"}
6
- {"current_steps": 6, "total_steps": 126, "loss": 0.9761, "lr": 4.615384615384616e-06, "epoch": 0.1411764705882353, "percentage": 4.76, "elapsed_time": "0:03:12", "remaining_time": "1:04:06"}
7
- {"current_steps": 7, "total_steps": 126, "loss": 0.9581, "lr": 5.384615384615385e-06, "epoch": 0.16470588235294117, "percentage": 5.56, "elapsed_time": "0:03:47", "remaining_time": "1:04:21"}
8
- {"current_steps": 8, "total_steps": 126, "loss": 1.0193, "lr": 6.153846153846155e-06, "epoch": 0.18823529411764706, "percentage": 6.35, "elapsed_time": "0:04:19", "remaining_time": "1:03:45"}
9
- {"current_steps": 9, "total_steps": 126, "loss": 0.951, "lr": 6.923076923076923e-06, "epoch": 0.21176470588235294, "percentage": 7.14, "elapsed_time": "0:04:53", "remaining_time": "1:03:39"}
10
- {"current_steps": 10, "total_steps": 126, "loss": 1.0139, "lr": 7.692307692307694e-06, "epoch": 0.23529411764705882, "percentage": 7.94, "elapsed_time": "0:05:24", "remaining_time": "1:02:45"}
11
- {"current_steps": 11, "total_steps": 126, "loss": 0.9087, "lr": 8.461538461538462e-06, "epoch": 0.25882352941176473, "percentage": 8.73, "elapsed_time": "0:05:51", "remaining_time": "1:01:13"}
12
- {"current_steps": 12, "total_steps": 126, "loss": 1.0375, "lr": 9.230769230769232e-06, "epoch": 0.2823529411764706, "percentage": 9.52, "elapsed_time": "0:06:25", "remaining_time": "1:00:58"}
13
- {"current_steps": 13, "total_steps": 126, "loss": 0.9651, "lr": 1e-05, "epoch": 0.3058823529411765, "percentage": 10.32, "elapsed_time": "0:06:59", "remaining_time": "1:00:43"}
14
- {"current_steps": 14, "total_steps": 126, "loss": 0.9098, "lr": 9.998067787472772e-06, "epoch": 0.32941176470588235, "percentage": 11.11, "elapsed_time": "0:07:27", "remaining_time": "0:59:38"}
15
- {"current_steps": 15, "total_steps": 126, "loss": 0.8308, "lr": 9.992272643269181e-06, "epoch": 0.35294117647058826, "percentage": 11.9, "elapsed_time": "0:07:50", "remaining_time": "0:58:04"}
16
- {"current_steps": 16, "total_steps": 126, "loss": 0.9148, "lr": 9.982619046369321e-06, "epoch": 0.3764705882352941, "percentage": 12.7, "elapsed_time": "0:08:23", "remaining_time": "0:57:44"}
17
- {"current_steps": 17, "total_steps": 126, "loss": 0.8948, "lr": 9.96911445789354e-06, "epoch": 0.4, "percentage": 13.49, "elapsed_time": "0:08:58", "remaining_time": "0:57:32"}
18
- {"current_steps": 18, "total_steps": 126, "loss": 0.8592, "lr": 9.951769315335843e-06, "epoch": 0.4235294117647059, "percentage": 14.29, "elapsed_time": "0:09:33", "remaining_time": "0:57:21"}
19
- {"current_steps": 19, "total_steps": 126, "loss": 0.8315, "lr": 9.930597024496933e-06, "epoch": 0.4470588235294118, "percentage": 15.08, "elapsed_time": "0:10:03", "remaining_time": "0:56:39"}
20
- {"current_steps": 20, "total_steps": 126, "loss": 0.808, "lr": 9.905613949123036e-06, "epoch": 0.47058823529411764, "percentage": 15.87, "elapsed_time": "0:10:30", "remaining_time": "0:55:42"}
21
- {"current_steps": 21, "total_steps": 126, "loss": 0.8833, "lr": 9.87683939825864e-06, "epoch": 0.49411764705882355, "percentage": 16.67, "elapsed_time": "0:10:59", "remaining_time": "0:54:55"}
22
- {"current_steps": 22, "total_steps": 126, "loss": 0.873, "lr": 9.844295611322804e-06, "epoch": 0.5176470588235295, "percentage": 17.46, "elapsed_time": "0:11:30", "remaining_time": "0:54:24"}
23
- {"current_steps": 23, "total_steps": 126, "loss": 0.7802, "lr": 9.808007740920647e-06, "epoch": 0.5411764705882353, "percentage": 18.25, "elapsed_time": "0:11:56", "remaining_time": "0:53:27"}
24
- {"current_steps": 24, "total_steps": 126, "loss": 0.8134, "lr": 9.768003833403278e-06, "epoch": 0.5647058823529412, "percentage": 19.05, "elapsed_time": "0:12:23", "remaining_time": "0:52:41"}
25
- {"current_steps": 25, "total_steps": 126, "loss": 0.8358, "lr": 9.724314807191197e-06, "epoch": 0.5882352941176471, "percentage": 19.84, "elapsed_time": "0:12:58", "remaining_time": "0:52:26"}
26
- {"current_steps": 26, "total_steps": 126, "loss": 0.8229, "lr": 9.6769744288779e-06, "epoch": 0.611764705882353, "percentage": 20.63, "elapsed_time": "0:13:27", "remaining_time": "0:51:46"}
27
- {"current_steps": 27, "total_steps": 126, "loss": 0.7928, "lr": 9.626019287132202e-06, "epoch": 0.6352941176470588, "percentage": 21.43, "elapsed_time": "0:13:51", "remaining_time": "0:50:49"}
28
- {"current_steps": 28, "total_steps": 126, "loss": 0.8129, "lr": 9.571488764419381e-06, "epoch": 0.6588235294117647, "percentage": 22.22, "elapsed_time": "0:14:26", "remaining_time": "0:50:33"}
29
- {"current_steps": 29, "total_steps": 126, "loss": 0.8572, "lr": 9.51342500656308e-06, "epoch": 0.6823529411764706, "percentage": 23.02, "elapsed_time": "0:14:59", "remaining_time": "0:50:07"}
30
- {"current_steps": 30, "total_steps": 126, "loss": 0.8103, "lr": 9.451872890171419e-06, "epoch": 0.7058823529411765, "percentage": 23.81, "elapsed_time": "0:15:27", "remaining_time": "0:49:28"}
31
- {"current_steps": 31, "total_steps": 126, "loss": 0.897, "lr": 9.386879987952549e-06, "epoch": 0.7294117647058823, "percentage": 24.6, "elapsed_time": "0:15:55", "remaining_time": "0:48:48"}
32
- {"current_steps": 32, "total_steps": 126, "loss": 0.818, "lr": 9.318496531946411e-06, "epoch": 0.7529411764705882, "percentage": 25.4, "elapsed_time": "0:16:26", "remaining_time": "0:48:17"}
33
- {"current_steps": 33, "total_steps": 126, "loss": 0.8332, "lr": 9.246775374701139e-06, "epoch": 0.7764705882352941, "percentage": 26.19, "elapsed_time": "0:16:52", "remaining_time": "0:47:33"}
34
- {"current_steps": 34, "total_steps": 126, "loss": 0.8584, "lr": 9.171771948424138e-06, "epoch": 0.8, "percentage": 26.98, "elapsed_time": "0:17:30", "remaining_time": "0:47:21"}
35
- {"current_steps": 35, "total_steps": 126, "loss": 0.8726, "lr": 9.093544222139338e-06, "epoch": 0.8235294117647058, "percentage": 27.78, "elapsed_time": "0:18:07", "remaining_time": "0:47:06"}
36
- {"current_steps": 36, "total_steps": 126, "loss": 0.7851, "lr": 9.012152656883824e-06, "epoch": 0.8470588235294118, "percentage": 28.57, "elapsed_time": "0:18:34", "remaining_time": "0:46:27"}
37
- {"current_steps": 37, "total_steps": 126, "loss": 0.8347, "lr": 8.927660158978392e-06, "epoch": 0.8705882352941177, "percentage": 29.37, "elapsed_time": "0:19:07", "remaining_time": "0:46:00"}
38
- {"current_steps": 38, "total_steps": 126, "loss": 0.8419, "lr": 8.84013203140821e-06, "epoch": 0.8941176470588236, "percentage": 30.16, "elapsed_time": "0:19:42", "remaining_time": "0:45:39"}
39
- {"current_steps": 39, "total_steps": 126, "loss": 0.776, "lr": 8.749635923351108e-06, "epoch": 0.9176470588235294, "percentage": 30.95, "elapsed_time": "0:20:12", "remaining_time": "0:45:05"}
40
- {"current_steps": 40, "total_steps": 126, "loss": 0.7207, "lr": 8.656241777892544e-06, "epoch": 0.9411764705882353, "percentage": 31.75, "elapsed_time": "0:20:38", "remaining_time": "0:44:23"}
41
- {"current_steps": 41, "total_steps": 126, "loss": 0.7694, "lr": 8.56002177796765e-06, "epoch": 0.9647058823529412, "percentage": 32.54, "elapsed_time": "0:21:06", "remaining_time": "0:43:44"}
42
- {"current_steps": 42, "total_steps": 126, "loss": 0.7732, "lr": 8.461050290572114e-06, "epoch": 0.9882352941176471, "percentage": 33.33, "elapsed_time": "0:21:38", "remaining_time": "0:43:17"}
43
- {"current_steps": 43, "total_steps": 126, "loss": 1.3119, "lr": 8.359403809285054e-06, "epoch": 1.0156862745098039, "percentage": 34.13, "elapsed_time": "0:22:55", "remaining_time": "0:44:14"}
44
- {"current_steps": 44, "total_steps": 126, "loss": 0.7665, "lr": 8.255160895148263e-06, "epoch": 1.0392156862745099, "percentage": 34.92, "elapsed_time": "0:23:25", "remaining_time": "0:43:38"}
45
  {"current_steps": 45, "total_steps": 126, "loss": 0.7368, "lr": 8.14840211594757e-06, "epoch": 1.0627450980392157, "percentage": 35.71, "elapsed_time": "0:23:54", "remaining_time": "0:43:01"}
46
  {"current_steps": 46, "total_steps": 126, "loss": 0.7976, "lr": 8.039209983943201e-06, "epoch": 1.0862745098039215, "percentage": 36.51, "elapsed_time": "0:24:22", "remaining_time": "0:42:23"}
47
  {"current_steps": 47, "total_steps": 126, "loss": 0.7109, "lr": 7.927668892097288e-06, "epoch": 1.1098039215686275, "percentage": 37.3, "elapsed_time": "0:24:45", "remaining_time": "0:41:37"}
48
- {"current_steps": 48, "total_steps": 126, "loss": 0.738, "lr": 7.81386504884782e-06, "epoch": 1.1333333333333333, "percentage": 38.1, "elapsed_time": "0:25:19", "remaining_time": "0:41:08"}
49
- {"current_steps": 49, "total_steps": 126, "loss": 0.8269, "lr": 7.697886411479422e-06, "epoch": 1.156862745098039, "percentage": 38.89, "elapsed_time": "0:25:48", "remaining_time": "0:40:33"}
50
- {"current_steps": 50, "total_steps": 126, "loss": 0.7993, "lr": 7.579822618142505e-06, "epoch": 1.1803921568627451, "percentage": 39.68, "elapsed_time": "0:26:19", "remaining_time": "0:40:01"}
51
- {"current_steps": 51, "total_steps": 126, "loss": 0.8325, "lr": 7.459764918573264e-06, "epoch": 1.203921568627451, "percentage": 40.48, "elapsed_time": "0:26:52", "remaining_time": "0:39:31"}
52
- {"current_steps": 52, "total_steps": 126, "loss": 0.73, "lr": 7.3378061035681415e-06, "epoch": 1.227450980392157, "percentage": 41.27, "elapsed_time": "0:27:19", "remaining_time": "0:38:53"}
53
- {"current_steps": 53, "total_steps": 126, "loss": 0.7398, "lr": 7.2140404332671986e-06, "epoch": 1.2509803921568627, "percentage": 42.06, "elapsed_time": "0:27:47", "remaining_time": "0:38:16"}
54
- {"current_steps": 54, "total_steps": 126, "loss": 0.8289, "lr": 7.088563564301874e-06, "epoch": 1.2745098039215685, "percentage": 42.86, "elapsed_time": "0:28:18", "remaining_time": "0:37:45"}
55
- {"current_steps": 55, "total_steps": 126, "loss": 0.7454, "lr": 6.961472475863406e-06, "epoch": 1.2980392156862746, "percentage": 43.65, "elapsed_time": "0:28:57", "remaining_time": "0:37:22"}
56
- {"current_steps": 56, "total_steps": 126, "loss": 0.6694, "lr": 6.832865394749065e-06, "epoch": 1.3215686274509804, "percentage": 44.44, "elapsed_time": "0:29:25", "remaining_time": "0:36:46"}
57
- {"current_steps": 57, "total_steps": 126, "loss": 0.8093, "lr": 6.702841719444141e-06, "epoch": 1.3450980392156864, "percentage": 45.24, "elapsed_time": "0:29:57", "remaining_time": "0:36:16"}
58
- {"current_steps": 58, "total_steps": 126, "loss": 0.7096, "lr": 6.571501943298335e-06, "epoch": 1.3686274509803922, "percentage": 46.03, "elapsed_time": "0:30:28", "remaining_time": "0:35:43"}
59
- {"current_steps": 59, "total_steps": 126, "loss": 0.8138, "lr": 6.4389475768559675e-06, "epoch": 1.392156862745098, "percentage": 46.83, "elapsed_time": "0:30:56", "remaining_time": "0:35:07"}
60
- {"current_steps": 60, "total_steps": 126, "loss": 0.6619, "lr": 6.305281069399989e-06, "epoch": 1.415686274509804, "percentage": 47.62, "elapsed_time": "0:31:19", "remaining_time": "0:34:27"}
61
- {"current_steps": 61, "total_steps": 126, "loss": 0.693, "lr": 6.17060572977047e-06, "epoch": 1.4392156862745098, "percentage": 48.41, "elapsed_time": "0:31:51", "remaining_time": "0:33:56"}
62
- {"current_steps": 62, "total_steps": 126, "loss": 0.7561, "lr": 6.035025646518747e-06, "epoch": 1.4627450980392158, "percentage": 49.21, "elapsed_time": "0:32:29", "remaining_time": "0:33:31"}
63
- {"current_steps": 63, "total_steps": 126, "loss": 0.7799, "lr": 5.898645607458941e-06, "epoch": 1.4862745098039216, "percentage": 50.0, "elapsed_time": "0:32:57", "remaining_time": "0:32:57"}
64
- {"current_steps": 64, "total_steps": 126, "loss": 0.7374, "lr": 5.761571018679025e-06, "epoch": 1.5098039215686274, "percentage": 50.79, "elapsed_time": "0:33:24", "remaining_time": "0:32:21"}
65
- {"current_steps": 65, "total_steps": 126, "loss": 0.8134, "lr": 5.623907823074044e-06, "epoch": 1.5333333333333332, "percentage": 51.59, "elapsed_time": "0:34:01", "remaining_time": "0:31:56"}
66
- {"current_steps": 66, "total_steps": 126, "loss": 0.7933, "lr": 5.48576241846443e-06, "epoch": 1.5568627450980392, "percentage": 52.38, "elapsed_time": "0:34:37", "remaining_time": "0:31:28"}
67
- {"current_steps": 67, "total_steps": 126, "loss": 0.7209, "lr": 5.347241575362729e-06, "epoch": 1.5803921568627453, "percentage": 53.17, "elapsed_time": "0:35:09", "remaining_time": "0:30:57"}
68
- {"current_steps": 68, "total_steps": 126, "loss": 0.7746, "lr": 5.208452354452275e-06, "epoch": 1.603921568627451, "percentage": 53.97, "elapsed_time": "0:35:45", "remaining_time": "0:30:29"}
69
- {"current_steps": 69, "total_steps": 126, "loss": 0.7635, "lr": 5.069502023841576e-06, "epoch": 1.6274509803921569, "percentage": 54.76, "elapsed_time": "0:36:21", "remaining_time": "0:30:01"}
70
- {"current_steps": 70, "total_steps": 126, "loss": 0.7708, "lr": 4.9304979761584256e-06, "epoch": 1.6509803921568627, "percentage": 55.56, "elapsed_time": "0:36:49", "remaining_time": "0:29:27"}
71
- {"current_steps": 71, "total_steps": 126, "loss": 0.6827, "lr": 4.791547645547727e-06, "epoch": 1.6745098039215687, "percentage": 56.35, "elapsed_time": "0:37:17", "remaining_time": "0:28:53"}
72
- {"current_steps": 72, "total_steps": 126, "loss": 0.794, "lr": 4.652758424637271e-06, "epoch": 1.6980392156862745, "percentage": 57.14, "elapsed_time": "0:37:49", "remaining_time": "0:28:22"}
73
- {"current_steps": 73, "total_steps": 126, "loss": 0.7368, "lr": 4.514237581535571e-06, "epoch": 1.7215686274509805, "percentage": 57.94, "elapsed_time": "0:38:18", "remaining_time": "0:27:48"}
74
- {"current_steps": 74, "total_steps": 126, "loss": 0.6936, "lr": 4.3760921769259585e-06, "epoch": 1.7450980392156863, "percentage": 58.73, "elapsed_time": "0:38:47", "remaining_time": "0:27:15"}
75
- {"current_steps": 75, "total_steps": 126, "loss": 0.7475, "lr": 4.2384289813209754e-06, "epoch": 1.768627450980392, "percentage": 59.52, "elapsed_time": "0:39:15", "remaining_time": "0:26:41"}
76
- {"current_steps": 76, "total_steps": 126, "loss": 0.7358, "lr": 4.101354392541061e-06, "epoch": 1.792156862745098, "percentage": 60.32, "elapsed_time": "0:39:46", "remaining_time": "0:26:09"}
77
- {"current_steps": 77, "total_steps": 126, "loss": 0.7331, "lr": 3.964974353481254e-06, "epoch": 1.815686274509804, "percentage": 61.11, "elapsed_time": "0:40:16", "remaining_time": "0:25:37"}
78
- {"current_steps": 78, "total_steps": 126, "loss": 0.7295, "lr": 3.829394270229531e-06, "epoch": 1.83921568627451, "percentage": 61.9, "elapsed_time": "0:40:48", "remaining_time": "0:25:06"}
79
- {"current_steps": 79, "total_steps": 126, "loss": 0.642, "lr": 3.694718930600012e-06, "epoch": 1.8627450980392157, "percentage": 62.7, "elapsed_time": "0:41:22", "remaining_time": "0:24:37"}
80
- {"current_steps": 80, "total_steps": 126, "loss": 0.7889, "lr": 3.5610524231440324e-06, "epoch": 1.8862745098039215, "percentage": 63.49, "elapsed_time": "0:41:56", "remaining_time": "0:24:06"}
81
- {"current_steps": 81, "total_steps": 126, "loss": 0.7499, "lr": 3.428498056701665e-06, "epoch": 1.9098039215686273, "percentage": 64.29, "elapsed_time": "0:42:24", "remaining_time": "0:23:33"}
82
- {"current_steps": 82, "total_steps": 126, "loss": 0.7663, "lr": 3.2971582805558622e-06, "epoch": 1.9333333333333333, "percentage": 65.08, "elapsed_time": "0:42:56", "remaining_time": "0:23:02"}
83
- {"current_steps": 83, "total_steps": 126, "loss": 0.7652, "lr": 3.167134605250938e-06, "epoch": 1.9568627450980394, "percentage": 65.87, "elapsed_time": "0:43:27", "remaining_time": "0:22:30"}
84
- {"current_steps": 84, "total_steps": 126, "loss": 0.7709, "lr": 3.0385275241365965e-06, "epoch": 1.9803921568627452, "percentage": 66.67, "elapsed_time": "0:44:03", "remaining_time": "0:22:01"}
85
- {"current_steps": 85, "total_steps": 126, "loss": 1.2373, "lr": 2.9114364356981274e-06, "epoch": 2.007843137254902, "percentage": 67.46, "elapsed_time": "0:45:31", "remaining_time": "0:21:57"}
86
- {"current_steps": 86, "total_steps": 126, "loss": 0.7255, "lr": 2.7859595667328027e-06, "epoch": 2.0313725490196077, "percentage": 68.25, "elapsed_time": "0:46:03", "remaining_time": "0:21:25"}
87
- {"current_steps": 87, "total_steps": 126, "loss": 0.6407, "lr": 2.6621938964318593e-06, "epoch": 2.0549019607843135, "percentage": 69.05, "elapsed_time": "0:46:30", "remaining_time": "0:20:50"}
88
- {"current_steps": 88, "total_steps": 126, "loss": 0.6873, "lr": 2.5402350814267364e-06, "epoch": 2.0784313725490198, "percentage": 69.84, "elapsed_time": "0:46:56", "remaining_time": "0:20:16"}
89
- {"current_steps": 89, "total_steps": 126, "loss": 0.6542, "lr": 2.4201773818574956e-06, "epoch": 2.1019607843137256, "percentage": 70.63, "elapsed_time": "0:47:27", "remaining_time": "0:19:43"}
90
- {"current_steps": 90, "total_steps": 126, "loss": 0.6809, "lr": 2.302113588520578e-06, "epoch": 2.1254901960784314, "percentage": 71.43, "elapsed_time": "0:47:57", "remaining_time": "0:19:10"}
91
- {"current_steps": 91, "total_steps": 126, "loss": 0.6087, "lr": 2.1861349511521817e-06, "epoch": 2.149019607843137, "percentage": 72.22, "elapsed_time": "0:48:27", "remaining_time": "0:18:38"}
92
- {"current_steps": 92, "total_steps": 126, "loss": 0.9135, "lr": 2.072331107902713e-06, "epoch": 2.172549019607843, "percentage": 73.02, "elapsed_time": "0:49:00", "remaining_time": "0:18:06"}
93
- {"current_steps": 93, "total_steps": 126, "loss": 0.6736, "lr": 1.960790016056801e-06, "epoch": 2.196078431372549, "percentage": 73.81, "elapsed_time": "0:49:30", "remaining_time": "0:17:34"}
94
- {"current_steps": 94, "total_steps": 126, "loss": 0.6972, "lr": 1.8515978840524302e-06, "epoch": 2.219607843137255, "percentage": 74.6, "elapsed_time": "0:50:00", "remaining_time": "0:17:01"}
95
- {"current_steps": 95, "total_steps": 126, "loss": 0.7224, "lr": 1.7448391048517378e-06, "epoch": 2.243137254901961, "percentage": 75.4, "elapsed_time": "0:50:32", "remaining_time": "0:16:29"}
96
- {"current_steps": 96, "total_steps": 126, "loss": 0.7225, "lr": 1.640596190714947e-06, "epoch": 2.2666666666666666, "percentage": 76.19, "elapsed_time": "0:51:01", "remaining_time": "0:15:56"}
97
- {"current_steps": 97, "total_steps": 126, "loss": 0.7208, "lr": 1.5389497094278861e-06, "epoch": 2.2901960784313724, "percentage": 76.98, "elapsed_time": "0:51:26", "remaining_time": "0:15:22"}
98
- {"current_steps": 98, "total_steps": 126, "loss": 0.6706, "lr": 1.4399782220323515e-06, "epoch": 2.313725490196078, "percentage": 77.78, "elapsed_time": "0:51:51", "remaining_time": "0:14:48"}
99
- {"current_steps": 99, "total_steps": 126, "loss": 0.7925, "lr": 1.3437582221074574e-06, "epoch": 2.3372549019607844, "percentage": 78.57, "elapsed_time": "0:52:26", "remaining_time": "0:14:18"}
100
- {"current_steps": 100, "total_steps": 126, "loss": 0.7385, "lr": 1.250364076648894e-06, "epoch": 2.3607843137254902, "percentage": 79.37, "elapsed_time": "0:53:00", "remaining_time": "0:13:46"}
101
- {"current_steps": 101, "total_steps": 126, "loss": 0.6665, "lr": 1.1598679685917901e-06, "epoch": 2.384313725490196, "percentage": 80.16, "elapsed_time": "0:53:35", "remaining_time": "0:13:15"}
102
- {"current_steps": 102, "total_steps": 126, "loss": 0.8291, "lr": 1.0723398410216085e-06, "epoch": 2.407843137254902, "percentage": 80.95, "elapsed_time": "0:54:12", "remaining_time": "0:12:45"}
103
- {"current_steps": 103, "total_steps": 126, "loss": 0.6668, "lr": 9.878473431161767e-07, "epoch": 2.431372549019608, "percentage": 81.75, "elapsed_time": "0:54:37", "remaining_time": "0:12:11"}
104
- {"current_steps": 104, "total_steps": 126, "loss": 0.6017, "lr": 9.064557778606631e-07, "epoch": 2.454901960784314, "percentage": 82.54, "elapsed_time": "0:55:03", "remaining_time": "0:11:38"}
105
- {"current_steps": 105, "total_steps": 126, "loss": 0.7824, "lr": 8.282280515758639e-07, "epoch": 2.4784313725490197, "percentage": 83.33, "elapsed_time": "0:55:34", "remaining_time": "0:11:06"}
106
- {"current_steps": 106, "total_steps": 126, "loss": 0.7446, "lr": 7.532246252988617e-07, "epoch": 2.5019607843137255, "percentage": 84.13, "elapsed_time": "0:56:07", "remaining_time": "0:10:35"}
107
- {"current_steps": 107, "total_steps": 126, "loss": 0.713, "lr": 6.815034680535915e-07, "epoch": 2.5254901960784313, "percentage": 84.92, "elapsed_time": "0:56:39", "remaining_time": "0:10:03"}
108
- {"current_steps": 108, "total_steps": 126, "loss": 0.7409, "lr": 6.131200120474512e-07, "epoch": 2.549019607843137, "percentage": 85.71, "elapsed_time": "0:57:08", "remaining_time": "0:09:31"}
109
- {"current_steps": 109, "total_steps": 126, "loss": 0.7501, "lr": 5.481271098285818e-07, "epoch": 2.572549019607843, "percentage": 86.51, "elapsed_time": "0:57:42", "remaining_time": "0:09:00"}
110
- {"current_steps": 110, "total_steps": 126, "loss": 0.6082, "lr": 4.865749934369224e-07, "epoch": 2.596078431372549, "percentage": 87.3, "elapsed_time": "0:58:10", "remaining_time": "0:08:27"}
111
- {"current_steps": 111, "total_steps": 126, "loss": 0.7516, "lr": 4.2851123558061927e-07, "epoch": 2.619607843137255, "percentage": 88.1, "elapsed_time": "0:58:44", "remaining_time": "0:07:56"}
112
- {"current_steps": 112, "total_steps": 126, "loss": 0.6589, "lr": 3.739807128677986e-07, "epoch": 2.6431372549019607, "percentage": 88.89, "elapsed_time": "0:59:09", "remaining_time": "0:07:23"}
113
- {"current_steps": 113, "total_steps": 126, "loss": 0.7008, "lr": 3.230255711220992e-07, "epoch": 2.6666666666666665, "percentage": 89.68, "elapsed_time": "0:59:35", "remaining_time": "0:06:51"}
114
- {"current_steps": 114, "total_steps": 126, "loss": 0.7579, "lr": 2.756851928088056e-07, "epoch": 2.6901960784313728, "percentage": 90.48, "elapsed_time": "1:00:06", "remaining_time": "0:06:19"}
115
- {"current_steps": 115, "total_steps": 126, "loss": 0.8005, "lr": 2.3199616659672352e-07, "epoch": 2.7137254901960786, "percentage": 91.27, "elapsed_time": "1:00:36", "remaining_time": "0:05:47"}
116
- {"current_steps": 116, "total_steps": 126, "loss": 0.6913, "lr": 1.9199225907935492e-07, "epoch": 2.7372549019607844, "percentage": 92.06, "elapsed_time": "1:01:06", "remaining_time": "0:05:16"}
117
- {"current_steps": 117, "total_steps": 126, "loss": 0.6749, "lr": 1.5570438867719695e-07, "epoch": 2.76078431372549, "percentage": 92.86, "elapsed_time": "1:01:34", "remaining_time": "0:04:44"}
118
- {"current_steps": 118, "total_steps": 126, "loss": 0.93, "lr": 1.2316060174136e-07, "epoch": 2.784313725490196, "percentage": 93.65, "elapsed_time": "1:02:09", "remaining_time": "0:04:12"}
119
- {"current_steps": 119, "total_steps": 126, "loss": 0.5853, "lr": 9.43860508769645e-08, "epoch": 2.8078431372549018, "percentage": 94.44, "elapsed_time": "1:02:39", "remaining_time": "0:03:41"}
120
- {"current_steps": 120, "total_steps": 126, "loss": 0.7548, "lr": 6.940297550306895e-08, "epoch": 2.831372549019608, "percentage": 95.24, "elapsed_time": "1:03:08", "remaining_time": "0:03:09"}
121
- {"current_steps": 121, "total_steps": 126, "loss": 0.7453, "lr": 4.823068466415615e-08, "epoch": 2.854901960784314, "percentage": 96.03, "elapsed_time": "1:03:38", "remaining_time": "0:02:37"}
122
- {"current_steps": 122, "total_steps": 126, "loss": 0.8001, "lr": 3.088554210646133e-08, "epoch": 2.8784313725490196, "percentage": 96.83, "elapsed_time": "1:04:09", "remaining_time": "0:02:06"}
123
- {"current_steps": 123, "total_steps": 126, "loss": 0.7289, "lr": 1.7380953630678488e-08, "epoch": 2.9019607843137254, "percentage": 97.62, "elapsed_time": "1:04:45", "remaining_time": "0:01:34"}
124
- {"current_steps": 124, "total_steps": 126, "loss": 0.6974, "lr": 7.727356730820035e-09, "epoch": 2.9254901960784316, "percentage": 98.41, "elapsed_time": "1:05:16", "remaining_time": "0:01:03"}
125
- {"current_steps": 125, "total_steps": 126, "loss": 0.765, "lr": 1.9322125272297488e-09, "epoch": 2.9490196078431374, "percentage": 99.21, "elapsed_time": "1:05:52", "remaining_time": "0:00:31"}
126
- {"current_steps": 126, "total_steps": 126, "loss": 0.6713, "lr": 0.0, "epoch": 2.9725490196078432, "percentage": 100.0, "elapsed_time": "1:06:23", "remaining_time": "0:00:00"}
127
- {"current_steps": 126, "total_steps": 126, "epoch": 2.9725490196078432, "percentage": 100.0, "elapsed_time": "1:08:07", "remaining_time": "0:00:00"}
 
1
+ {"current_steps": 1, "total_steps": 126, "loss": 1.1119, "lr": 7.692307692307694e-07, "epoch": 0.023529411764705882, "percentage": 0.79, "elapsed_time": "0:00:34", "remaining_time": "1:12:44"}
2
+ {"current_steps": 2, "total_steps": 126, "loss": 1.1537, "lr": 1.5384615384615387e-06, "epoch": 0.047058823529411764, "percentage": 1.59, "elapsed_time": "0:01:10", "remaining_time": "1:12:34"}
3
+ {"current_steps": 3, "total_steps": 126, "loss": 1.1207, "lr": 2.307692307692308e-06, "epoch": 0.07058823529411765, "percentage": 2.38, "elapsed_time": "0:01:39", "remaining_time": "1:08:05"}
4
+ {"current_steps": 4, "total_steps": 126, "loss": 1.0477, "lr": 3.0769230769230774e-06, "epoch": 0.09411764705882353, "percentage": 3.17, "elapsed_time": "0:02:05", "remaining_time": "1:03:35"}
5
+ {"current_steps": 5, "total_steps": 126, "loss": 1.0753, "lr": 3.846153846153847e-06, "epoch": 0.11764705882352941, "percentage": 3.97, "elapsed_time": "0:02:41", "remaining_time": "1:05:16"}
6
+ {"current_steps": 6, "total_steps": 126, "loss": 0.9761, "lr": 4.615384615384616e-06, "epoch": 0.1411764705882353, "percentage": 4.76, "elapsed_time": "0:03:12", "remaining_time": "1:04:05"}
7
+ {"current_steps": 7, "total_steps": 126, "loss": 0.958, "lr": 5.384615384615385e-06, "epoch": 0.16470588235294117, "percentage": 5.56, "elapsed_time": "0:03:47", "remaining_time": "1:04:19"}
8
+ {"current_steps": 8, "total_steps": 126, "loss": 1.0194, "lr": 6.153846153846155e-06, "epoch": 0.18823529411764706, "percentage": 6.35, "elapsed_time": "0:04:19", "remaining_time": "1:03:43"}
9
+ {"current_steps": 9, "total_steps": 126, "loss": 0.9508, "lr": 6.923076923076923e-06, "epoch": 0.21176470588235294, "percentage": 7.14, "elapsed_time": "0:04:53", "remaining_time": "1:03:38"}
10
+ {"current_steps": 10, "total_steps": 126, "loss": 1.0138, "lr": 7.692307692307694e-06, "epoch": 0.23529411764705882, "percentage": 7.94, "elapsed_time": "0:05:24", "remaining_time": "1:02:40"}
11
+ {"current_steps": 11, "total_steps": 126, "loss": 0.9089, "lr": 8.461538461538462e-06, "epoch": 0.25882352941176473, "percentage": 8.73, "elapsed_time": "0:05:50", "remaining_time": "1:01:08"}
12
+ {"current_steps": 12, "total_steps": 126, "loss": 1.0376, "lr": 9.230769230769232e-06, "epoch": 0.2823529411764706, "percentage": 9.52, "elapsed_time": "0:06:24", "remaining_time": "1:00:52"}
13
+ {"current_steps": 13, "total_steps": 126, "loss": 0.9651, "lr": 1e-05, "epoch": 0.3058823529411765, "percentage": 10.32, "elapsed_time": "0:06:58", "remaining_time": "1:00:36"}
14
+ {"current_steps": 14, "total_steps": 126, "loss": 0.9098, "lr": 9.998067787472772e-06, "epoch": 0.32941176470588235, "percentage": 11.11, "elapsed_time": "0:07:26", "remaining_time": "0:59:30"}
15
+ {"current_steps": 15, "total_steps": 126, "loss": 0.8308, "lr": 9.992272643269181e-06, "epoch": 0.35294117647058826, "percentage": 11.9, "elapsed_time": "0:07:49", "remaining_time": "0:57:56"}
16
+ {"current_steps": 16, "total_steps": 126, "loss": 0.9148, "lr": 9.982619046369321e-06, "epoch": 0.3764705882352941, "percentage": 12.7, "elapsed_time": "0:08:22", "remaining_time": "0:57:36"}
17
+ {"current_steps": 17, "total_steps": 126, "loss": 0.8949, "lr": 9.96911445789354e-06, "epoch": 0.4, "percentage": 13.49, "elapsed_time": "0:08:57", "remaining_time": "0:57:23"}
18
+ {"current_steps": 18, "total_steps": 126, "loss": 0.8593, "lr": 9.951769315335843e-06, "epoch": 0.4235294117647059, "percentage": 14.29, "elapsed_time": "0:09:31", "remaining_time": "0:57:11"}
19
+ {"current_steps": 19, "total_steps": 126, "loss": 0.8316, "lr": 9.930597024496933e-06, "epoch": 0.4470588235294118, "percentage": 15.08, "elapsed_time": "0:10:01", "remaining_time": "0:56:30"}
20
+ {"current_steps": 20, "total_steps": 126, "loss": 0.8079, "lr": 9.905613949123036e-06, "epoch": 0.47058823529411764, "percentage": 15.87, "elapsed_time": "0:10:28", "remaining_time": "0:55:33"}
21
+ {"current_steps": 21, "total_steps": 126, "loss": 0.8833, "lr": 9.87683939825864e-06, "epoch": 0.49411764705882355, "percentage": 16.67, "elapsed_time": "0:10:57", "remaining_time": "0:54:47"}
22
+ {"current_steps": 22, "total_steps": 126, "loss": 0.8729, "lr": 9.844295611322804e-06, "epoch": 0.5176470588235295, "percentage": 17.46, "elapsed_time": "0:11:28", "remaining_time": "0:54:16"}
23
+ {"current_steps": 23, "total_steps": 126, "loss": 0.7801, "lr": 9.808007740920647e-06, "epoch": 0.5411764705882353, "percentage": 18.25, "elapsed_time": "0:11:54", "remaining_time": "0:53:20"}
24
+ {"current_steps": 24, "total_steps": 126, "loss": 0.8134, "lr": 9.768003833403278e-06, "epoch": 0.5647058823529412, "percentage": 19.05, "elapsed_time": "0:12:21", "remaining_time": "0:52:33"}
25
+ {"current_steps": 25, "total_steps": 126, "loss": 0.8359, "lr": 9.724314807191197e-06, "epoch": 0.5882352941176471, "percentage": 19.84, "elapsed_time": "0:12:56", "remaining_time": "0:52:18"}
26
+ {"current_steps": 26, "total_steps": 126, "loss": 0.8229, "lr": 9.6769744288779e-06, "epoch": 0.611764705882353, "percentage": 20.63, "elapsed_time": "0:13:25", "remaining_time": "0:51:39"}
27
+ {"current_steps": 27, "total_steps": 126, "loss": 0.7927, "lr": 9.626019287132202e-06, "epoch": 0.6352941176470588, "percentage": 21.43, "elapsed_time": "0:13:50", "remaining_time": "0:50:43"}
28
+ {"current_steps": 28, "total_steps": 126, "loss": 0.8129, "lr": 9.571488764419381e-06, "epoch": 0.6588235294117647, "percentage": 22.22, "elapsed_time": "0:14:25", "remaining_time": "0:50:27"}
29
+ {"current_steps": 29, "total_steps": 126, "loss": 0.8572, "lr": 9.51342500656308e-06, "epoch": 0.6823529411764706, "percentage": 23.02, "elapsed_time": "0:14:57", "remaining_time": "0:50:02"}
30
+ {"current_steps": 30, "total_steps": 126, "loss": 0.8103, "lr": 9.451872890171419e-06, "epoch": 0.7058823529411765, "percentage": 23.81, "elapsed_time": "0:15:26", "remaining_time": "0:49:23"}
31
+ {"current_steps": 31, "total_steps": 126, "loss": 0.8969, "lr": 9.386879987952549e-06, "epoch": 0.7294117647058823, "percentage": 24.6, "elapsed_time": "0:15:54", "remaining_time": "0:48:43"}
32
+ {"current_steps": 32, "total_steps": 126, "loss": 0.818, "lr": 9.318496531946411e-06, "epoch": 0.7529411764705882, "percentage": 25.4, "elapsed_time": "0:16:24", "remaining_time": "0:48:12"}
33
+ {"current_steps": 33, "total_steps": 126, "loss": 0.8332, "lr": 9.246775374701139e-06, "epoch": 0.7764705882352941, "percentage": 26.19, "elapsed_time": "0:16:50", "remaining_time": "0:47:27"}
34
+ {"current_steps": 34, "total_steps": 126, "loss": 0.8585, "lr": 9.171771948424138e-06, "epoch": 0.8, "percentage": 26.98, "elapsed_time": "0:17:28", "remaining_time": "0:47:16"}
35
+ {"current_steps": 35, "total_steps": 126, "loss": 0.8725, "lr": 9.093544222139338e-06, "epoch": 0.8235294117647058, "percentage": 27.78, "elapsed_time": "0:18:05", "remaining_time": "0:47:01"}
36
+ {"current_steps": 36, "total_steps": 126, "loss": 0.785, "lr": 9.012152656883824e-06, "epoch": 0.8470588235294118, "percentage": 28.57, "elapsed_time": "0:18:33", "remaining_time": "0:46:22"}
37
+ {"current_steps": 37, "total_steps": 126, "loss": 0.8348, "lr": 8.927660158978392e-06, "epoch": 0.8705882352941177, "percentage": 29.37, "elapsed_time": "0:19:05", "remaining_time": "0:45:56"}
38
+ {"current_steps": 38, "total_steps": 126, "loss": 0.8418, "lr": 8.84013203140821e-06, "epoch": 0.8941176470588236, "percentage": 30.16, "elapsed_time": "0:19:41", "remaining_time": "0:45:35"}
39
+ {"current_steps": 39, "total_steps": 126, "loss": 0.776, "lr": 8.749635923351108e-06, "epoch": 0.9176470588235294, "percentage": 30.95, "elapsed_time": "0:20:11", "remaining_time": "0:45:01"}
40
+ {"current_steps": 40, "total_steps": 126, "loss": 0.7207, "lr": 8.656241777892544e-06, "epoch": 0.9411764705882353, "percentage": 31.75, "elapsed_time": "0:20:36", "remaining_time": "0:44:19"}
41
+ {"current_steps": 41, "total_steps": 126, "loss": 0.7694, "lr": 8.56002177796765e-06, "epoch": 0.9647058823529412, "percentage": 32.54, "elapsed_time": "0:21:04", "remaining_time": "0:43:41"}
42
+ {"current_steps": 42, "total_steps": 126, "loss": 0.7733, "lr": 8.461050290572114e-06, "epoch": 0.9882352941176471, "percentage": 33.33, "elapsed_time": "0:21:36", "remaining_time": "0:43:13"}
43
+ {"current_steps": 43, "total_steps": 126, "loss": 1.312, "lr": 8.359403809285054e-06, "epoch": 1.0156862745098039, "percentage": 34.13, "elapsed_time": "0:22:54", "remaining_time": "0:44:13"}
44
+ {"current_steps": 44, "total_steps": 126, "loss": 0.7666, "lr": 8.255160895148263e-06, "epoch": 1.0392156862745099, "percentage": 34.92, "elapsed_time": "0:23:24", "remaining_time": "0:43:38"}
45
  {"current_steps": 45, "total_steps": 126, "loss": 0.7368, "lr": 8.14840211594757e-06, "epoch": 1.0627450980392157, "percentage": 35.71, "elapsed_time": "0:23:54", "remaining_time": "0:43:01"}
46
  {"current_steps": 46, "total_steps": 126, "loss": 0.7976, "lr": 8.039209983943201e-06, "epoch": 1.0862745098039215, "percentage": 36.51, "elapsed_time": "0:24:22", "remaining_time": "0:42:23"}
47
  {"current_steps": 47, "total_steps": 126, "loss": 0.7109, "lr": 7.927668892097288e-06, "epoch": 1.1098039215686275, "percentage": 37.3, "elapsed_time": "0:24:45", "remaining_time": "0:41:37"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40a079931b469d81d5f19ac87678ca737c2bcb7821248c92afd315ce7fb1099c
3
  size 7416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88a6042a7b339d73d27ffb8edbc0f720ad30911ebfa3258b1a6f1dbcf8c4ce6f
3
  size 7416