sedrickkeh commited on
Commit
7e94887
·
verified ·
1 Parent(s): 8ab21e9

Training in progress, epoch 1

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7df587f9b27900e80c724f54f24be5001279fe9a5b548c5d0397955aba950b1d
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93a2183942d7c82a211dc7c1cf8a2c323db3cf3745baa8b7daa3d24679ce3e54
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e5b68159d1f8f0a69b7884d39e4a440292b6d9486879e39ae2eddd1c1676cd5
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09a21b3b61b0c39721847a308cc3f061c2441a6bfca155a2ee0bb9bf264ff687
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73aa434b405be5c5de0c66774499298ea7ef3a1ea9b8f0edbd938faf1e4c2f4a
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8529b56131d510f9ab66cfd6f97d6d41dd8f073e02be4d8f8fb877ce39dfc8a9
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df07a88306c1f935b398c3da93e18634d6b7979f40e2d4ecbd944d2a62683174
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69406788fcb9e941805ea91b07ff0cef0df01b72f359516f8ced83fa5470265a
3
  size 1089994880
trainer_log.jsonl CHANGED
@@ -83,3 +83,87 @@
83
  {"current_steps": 83, "total_steps": 249, "loss": 0.9051, "lr": 8.43498463017451e-06, "epoch": 0.996, "percentage": 33.33, "elapsed_time": "1:17:08", "remaining_time": "2:34:17"}
84
  {"current_steps": 84, "total_steps": 249, "loss": 1.5002, "lr": 8.383691540076372e-06, "epoch": 1.008, "percentage": 33.73, "elapsed_time": "1:19:10", "remaining_time": "2:35:30"}
85
  {"current_steps": 85, "total_steps": 249, "loss": 0.826, "lr": 8.331732889760021e-06, "epoch": 1.02, "percentage": 34.14, "elapsed_time": "1:20:13", "remaining_time": "2:34:47"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  {"current_steps": 83, "total_steps": 249, "loss": 0.9051, "lr": 8.43498463017451e-06, "epoch": 0.996, "percentage": 33.33, "elapsed_time": "1:17:08", "remaining_time": "2:34:17"}
84
  {"current_steps": 84, "total_steps": 249, "loss": 1.5002, "lr": 8.383691540076372e-06, "epoch": 1.008, "percentage": 33.73, "elapsed_time": "1:19:10", "remaining_time": "2:35:30"}
85
  {"current_steps": 85, "total_steps": 249, "loss": 0.826, "lr": 8.331732889760021e-06, "epoch": 1.02, "percentage": 34.14, "elapsed_time": "1:20:13", "remaining_time": "2:34:47"}
86
+ {"current_steps": 86, "total_steps": 249, "loss": 0.9041, "lr": 8.279118899309121e-06, "epoch": 1.032, "percentage": 34.54, "elapsed_time": "1:21:15", "remaining_time": "2:34:00"}
87
+ {"current_steps": 87, "total_steps": 249, "loss": 0.8642, "lr": 8.22585991771044e-06, "epoch": 1.044, "percentage": 34.94, "elapsed_time": "1:22:16", "remaining_time": "2:33:11"}
88
+ {"current_steps": 88, "total_steps": 249, "loss": 0.8225, "lr": 8.171966420818227e-06, "epoch": 1.056, "percentage": 35.34, "elapsed_time": "1:23:18", "remaining_time": "2:32:24"}
89
+ {"current_steps": 89, "total_steps": 249, "loss": 0.9152, "lr": 8.117449009293668e-06, "epoch": 1.068, "percentage": 35.74, "elapsed_time": "1:24:11", "remaining_time": "2:31:21"}
90
+ {"current_steps": 90, "total_steps": 249, "loss": 0.7907, "lr": 8.062318406519751e-06, "epoch": 1.08, "percentage": 36.14, "elapsed_time": "1:25:07", "remaining_time": "2:30:22"}
91
+ {"current_steps": 91, "total_steps": 249, "loss": 0.9164, "lr": 8.00658545649203e-06, "epoch": 1.092, "percentage": 36.55, "elapsed_time": "1:26:04", "remaining_time": "2:29:27"}
92
+ {"current_steps": 92, "total_steps": 249, "loss": 0.8928, "lr": 7.950261121685642e-06, "epoch": 1.104, "percentage": 36.95, "elapsed_time": "1:27:04", "remaining_time": "2:28:35"}
93
+ {"current_steps": 93, "total_steps": 249, "loss": 0.8455, "lr": 7.89335648089903e-06, "epoch": 1.116, "percentage": 37.35, "elapsed_time": "1:27:54", "remaining_time": "2:27:27"}
94
+ {"current_steps": 94, "total_steps": 249, "loss": 0.7902, "lr": 7.835882727074779e-06, "epoch": 1.1280000000000001, "percentage": 37.75, "elapsed_time": "1:28:47", "remaining_time": "2:26:24"}
95
+ {"current_steps": 95, "total_steps": 249, "loss": 0.917, "lr": 7.777851165098012e-06, "epoch": 1.1400000000000001, "percentage": 38.15, "elapsed_time": "1:29:47", "remaining_time": "2:25:33"}
96
+ {"current_steps": 96, "total_steps": 249, "loss": 0.94, "lr": 7.719273209572745e-06, "epoch": 1.152, "percentage": 38.55, "elapsed_time": "1:30:48", "remaining_time": "2:24:43"}
97
+ {"current_steps": 97, "total_steps": 249, "loss": 0.7936, "lr": 7.660160382576683e-06, "epoch": 1.164, "percentage": 38.96, "elapsed_time": "1:31:54", "remaining_time": "2:24:01"}
98
+ {"current_steps": 98, "total_steps": 249, "loss": 0.8666, "lr": 7.600524311394873e-06, "epoch": 1.176, "percentage": 39.36, "elapsed_time": "1:32:51", "remaining_time": "2:23:04"}
99
+ {"current_steps": 99, "total_steps": 249, "loss": 0.8397, "lr": 7.540376726232648e-06, "epoch": 1.188, "percentage": 39.76, "elapsed_time": "1:33:32", "remaining_time": "2:21:44"}
100
+ {"current_steps": 100, "total_steps": 249, "loss": 0.8552, "lr": 7.4797294579083405e-06, "epoch": 1.2, "percentage": 40.16, "elapsed_time": "1:34:19", "remaining_time": "2:20:32"}
101
+ {"current_steps": 101, "total_steps": 249, "loss": 0.8907, "lr": 7.4185944355261996e-06, "epoch": 1.212, "percentage": 40.56, "elapsed_time": "1:35:21", "remaining_time": "2:19:43"}
102
+ {"current_steps": 102, "total_steps": 249, "loss": 0.8099, "lr": 7.3569836841299905e-06, "epoch": 1.224, "percentage": 40.96, "elapsed_time": "1:36:16", "remaining_time": "2:18:44"}
103
+ {"current_steps": 103, "total_steps": 249, "loss": 0.7832, "lr": 7.294909322337689e-06, "epoch": 1.236, "percentage": 41.37, "elapsed_time": "1:37:11", "remaining_time": "2:17:45"}
104
+ {"current_steps": 104, "total_steps": 249, "loss": 0.9434, "lr": 7.232383559957815e-06, "epoch": 1.248, "percentage": 41.77, "elapsed_time": "1:38:06", "remaining_time": "2:16:46"}
105
+ {"current_steps": 105, "total_steps": 249, "loss": 0.8623, "lr": 7.169418695587791e-06, "epoch": 1.26, "percentage": 42.17, "elapsed_time": "1:39:01", "remaining_time": "2:15:48"}
106
+ {"current_steps": 106, "total_steps": 249, "loss": 0.8438, "lr": 7.106027114194856e-06, "epoch": 1.272, "percentage": 42.57, "elapsed_time": "1:39:54", "remaining_time": "2:14:47"}
107
+ {"current_steps": 107, "total_steps": 249, "loss": 0.8188, "lr": 7.042221284679982e-06, "epoch": 1.284, "percentage": 42.97, "elapsed_time": "1:40:47", "remaining_time": "2:13:45"}
108
+ {"current_steps": 108, "total_steps": 249, "loss": 0.9154, "lr": 6.978013757425295e-06, "epoch": 1.296, "percentage": 43.37, "elapsed_time": "1:41:43", "remaining_time": "2:12:48"}
109
+ {"current_steps": 109, "total_steps": 249, "loss": 0.8314, "lr": 6.913417161825449e-06, "epoch": 1.308, "percentage": 43.78, "elapsed_time": "1:42:37", "remaining_time": "2:11:48"}
110
+ {"current_steps": 110, "total_steps": 249, "loss": 0.823, "lr": 6.848444203803476e-06, "epoch": 1.32, "percentage": 44.18, "elapsed_time": "1:43:36", "remaining_time": "2:10:55"}
111
+ {"current_steps": 111, "total_steps": 249, "loss": 0.8475, "lr": 6.783107663311566e-06, "epoch": 1.332, "percentage": 44.58, "elapsed_time": "1:44:24", "remaining_time": "2:09:48"}
112
+ {"current_steps": 112, "total_steps": 249, "loss": 0.8092, "lr": 6.717420391817306e-06, "epoch": 1.3439999999999999, "percentage": 44.98, "elapsed_time": "1:45:10", "remaining_time": "2:08:38"}
113
+ {"current_steps": 113, "total_steps": 249, "loss": 0.8584, "lr": 6.651395309775837e-06, "epoch": 1.3559999999999999, "percentage": 45.38, "elapsed_time": "1:46:06", "remaining_time": "2:07:42"}
114
+ {"current_steps": 114, "total_steps": 249, "loss": 0.7915, "lr": 6.585045404088442e-06, "epoch": 1.3679999999999999, "percentage": 45.78, "elapsed_time": "1:46:58", "remaining_time": "2:06:40"}
115
+ {"current_steps": 115, "total_steps": 249, "loss": 0.9026, "lr": 6.518383725548074e-06, "epoch": 1.38, "percentage": 46.18, "elapsed_time": "1:48:03", "remaining_time": "2:05:54"}
116
+ {"current_steps": 116, "total_steps": 249, "loss": 0.84, "lr": 6.451423386272312e-06, "epoch": 1.392, "percentage": 46.59, "elapsed_time": "1:49:05", "remaining_time": "2:05:04"}
117
+ {"current_steps": 117, "total_steps": 249, "loss": 0.7698, "lr": 6.384177557124247e-06, "epoch": 1.404, "percentage": 46.99, "elapsed_time": "1:50:01", "remaining_time": "2:04:07"}
118
+ {"current_steps": 118, "total_steps": 249, "loss": 0.9143, "lr": 6.3166594651218235e-06, "epoch": 1.416, "percentage": 47.39, "elapsed_time": "1:50:57", "remaining_time": "2:03:11"}
119
+ {"current_steps": 119, "total_steps": 249, "loss": 0.8552, "lr": 6.248882390836135e-06, "epoch": 1.428, "percentage": 47.79, "elapsed_time": "1:51:50", "remaining_time": "2:02:10"}
120
+ {"current_steps": 120, "total_steps": 249, "loss": 0.808, "lr": 6.180859665779173e-06, "epoch": 1.44, "percentage": 48.19, "elapsed_time": "1:52:39", "remaining_time": "2:01:06"}
121
+ {"current_steps": 121, "total_steps": 249, "loss": 0.9172, "lr": 6.112604669781572e-06, "epoch": 1.452, "percentage": 48.59, "elapsed_time": "1:53:40", "remaining_time": "2:00:14"}
122
+ {"current_steps": 122, "total_steps": 249, "loss": 0.8035, "lr": 6.04413082836085e-06, "epoch": 1.464, "percentage": 49.0, "elapsed_time": "1:54:31", "remaining_time": "1:59:13"}
123
+ {"current_steps": 123, "total_steps": 249, "loss": 0.7913, "lr": 5.975451610080643e-06, "epoch": 1.476, "percentage": 49.4, "elapsed_time": "1:55:28", "remaining_time": "1:58:17"}
124
+ {"current_steps": 124, "total_steps": 249, "loss": 0.8627, "lr": 5.906580523901493e-06, "epoch": 1.488, "percentage": 49.8, "elapsed_time": "1:56:24", "remaining_time": "1:57:20"}
125
+ {"current_steps": 125, "total_steps": 249, "loss": 0.8117, "lr": 5.837531116523683e-06, "epoch": 1.5, "percentage": 50.2, "elapsed_time": "1:57:20", "remaining_time": "1:56:24"}
126
+ {"current_steps": 126, "total_steps": 249, "loss": 0.7876, "lr": 5.768316969722651e-06, "epoch": 1.512, "percentage": 50.6, "elapsed_time": "1:58:07", "remaining_time": "1:55:19"}
127
+ {"current_steps": 127, "total_steps": 249, "loss": 0.7882, "lr": 5.698951697677498e-06, "epoch": 1.524, "percentage": 51.0, "elapsed_time": "1:59:12", "remaining_time": "1:54:30"}
128
+ {"current_steps": 128, "total_steps": 249, "loss": 0.8077, "lr": 5.629448944293128e-06, "epoch": 1.536, "percentage": 51.41, "elapsed_time": "1:59:58", "remaining_time": "1:53:24"}
129
+ {"current_steps": 129, "total_steps": 249, "loss": 0.9296, "lr": 5.559822380516539e-06, "epoch": 1.548, "percentage": 51.81, "elapsed_time": "2:01:01", "remaining_time": "1:52:34"}
130
+ {"current_steps": 130, "total_steps": 249, "loss": 0.8909, "lr": 5.490085701647805e-06, "epoch": 1.56, "percentage": 52.21, "elapsed_time": "2:02:06", "remaining_time": "1:51:46"}
131
+ {"current_steps": 131, "total_steps": 249, "loss": 0.8243, "lr": 5.420252624646238e-06, "epoch": 1.572, "percentage": 52.61, "elapsed_time": "2:03:02", "remaining_time": "1:50:49"}
132
+ {"current_steps": 132, "total_steps": 249, "loss": 0.9343, "lr": 5.350336885432337e-06, "epoch": 1.584, "percentage": 53.01, "elapsed_time": "2:03:59", "remaining_time": "1:49:54"}
133
+ {"current_steps": 133, "total_steps": 249, "loss": 0.8446, "lr": 5.2803522361859596e-06, "epoch": 1.596, "percentage": 53.41, "elapsed_time": "2:04:53", "remaining_time": "1:48:55"}
134
+ {"current_steps": 134, "total_steps": 249, "loss": 0.825, "lr": 5.210312442641327e-06, "epoch": 1.608, "percentage": 53.82, "elapsed_time": "2:05:53", "remaining_time": "1:48:02"}
135
+ {"current_steps": 135, "total_steps": 249, "loss": 0.906, "lr": 5.140231281379345e-06, "epoch": 1.62, "percentage": 54.22, "elapsed_time": "2:06:52", "remaining_time": "1:47:08"}
136
+ {"current_steps": 136, "total_steps": 249, "loss": 0.7803, "lr": 5.070122537117812e-06, "epoch": 1.6320000000000001, "percentage": 54.62, "elapsed_time": "2:07:47", "remaining_time": "1:46:10"}
137
+ {"current_steps": 137, "total_steps": 249, "loss": 0.8701, "lr": 5e-06, "epoch": 1.6440000000000001, "percentage": 55.02, "elapsed_time": "2:08:44", "remaining_time": "1:45:14"}
138
+ {"current_steps": 138, "total_steps": 249, "loss": 0.7693, "lr": 4.92987746288219e-06, "epoch": 1.6560000000000001, "percentage": 55.42, "elapsed_time": "2:09:34", "remaining_time": "1:44:13"}
139
+ {"current_steps": 139, "total_steps": 249, "loss": 0.9239, "lr": 4.859768718620656e-06, "epoch": 1.6680000000000001, "percentage": 55.82, "elapsed_time": "2:10:23", "remaining_time": "1:43:11"}
140
+ {"current_steps": 140, "total_steps": 249, "loss": 0.8102, "lr": 4.789687557358676e-06, "epoch": 1.6800000000000002, "percentage": 56.22, "elapsed_time": "2:11:10", "remaining_time": "1:42:07"}
141
+ {"current_steps": 141, "total_steps": 249, "loss": 0.8653, "lr": 4.719647763814041e-06, "epoch": 1.692, "percentage": 56.63, "elapsed_time": "2:12:10", "remaining_time": "1:41:14"}
142
+ {"current_steps": 142, "total_steps": 249, "loss": 0.9439, "lr": 4.649663114567663e-06, "epoch": 1.704, "percentage": 57.03, "elapsed_time": "2:13:02", "remaining_time": "1:40:14"}
143
+ {"current_steps": 143, "total_steps": 249, "loss": 0.8445, "lr": 4.579747375353763e-06, "epoch": 1.716, "percentage": 57.43, "elapsed_time": "2:14:02", "remaining_time": "1:39:21"}
144
+ {"current_steps": 144, "total_steps": 249, "loss": 0.7356, "lr": 4.509914298352197e-06, "epoch": 1.728, "percentage": 57.83, "elapsed_time": "2:14:53", "remaining_time": "1:38:21"}
145
+ {"current_steps": 145, "total_steps": 249, "loss": 0.9677, "lr": 4.4401776194834615e-06, "epoch": 1.74, "percentage": 58.23, "elapsed_time": "2:15:48", "remaining_time": "1:37:24"}
146
+ {"current_steps": 146, "total_steps": 249, "loss": 0.8576, "lr": 4.3705510557068746e-06, "epoch": 1.752, "percentage": 58.63, "elapsed_time": "2:16:46", "remaining_time": "1:36:29"}
147
+ {"current_steps": 147, "total_steps": 249, "loss": 0.8079, "lr": 4.3010483023225045e-06, "epoch": 1.764, "percentage": 59.04, "elapsed_time": "2:17:45", "remaining_time": "1:35:35"}
148
+ {"current_steps": 148, "total_steps": 249, "loss": 0.8219, "lr": 4.231683030277349e-06, "epoch": 1.776, "percentage": 59.44, "elapsed_time": "2:18:36", "remaining_time": "1:34:35"}
149
+ {"current_steps": 149, "total_steps": 249, "loss": 0.8778, "lr": 4.162468883476319e-06, "epoch": 1.788, "percentage": 59.84, "elapsed_time": "2:19:23", "remaining_time": "1:33:33"}
150
+ {"current_steps": 150, "total_steps": 249, "loss": 0.8278, "lr": 4.0934194760985095e-06, "epoch": 1.8, "percentage": 60.24, "elapsed_time": "2:20:22", "remaining_time": "1:32:38"}
151
+ {"current_steps": 151, "total_steps": 249, "loss": 0.8468, "lr": 4.02454838991936e-06, "epoch": 1.812, "percentage": 60.64, "elapsed_time": "2:21:21", "remaining_time": "1:31:44"}
152
+ {"current_steps": 152, "total_steps": 249, "loss": 0.8676, "lr": 3.955869171639151e-06, "epoch": 1.8239999999999998, "percentage": 61.04, "elapsed_time": "2:22:11", "remaining_time": "1:30:44"}
153
+ {"current_steps": 153, "total_steps": 249, "loss": 0.6974, "lr": 3.887395330218429e-06, "epoch": 1.8359999999999999, "percentage": 61.45, "elapsed_time": "2:22:56", "remaining_time": "1:29:41"}
154
+ {"current_steps": 154, "total_steps": 249, "loss": 0.9395, "lr": 3.81914033422083e-06, "epoch": 1.8479999999999999, "percentage": 61.85, "elapsed_time": "2:24:00", "remaining_time": "1:28:50"}
155
+ {"current_steps": 155, "total_steps": 249, "loss": 0.7526, "lr": 3.751117609163865e-06, "epoch": 1.8599999999999999, "percentage": 62.25, "elapsed_time": "2:24:49", "remaining_time": "1:27:49"}
156
+ {"current_steps": 156, "total_steps": 249, "loss": 0.8781, "lr": 3.683340534878176e-06, "epoch": 1.8719999999999999, "percentage": 62.65, "elapsed_time": "2:25:50", "remaining_time": "1:26:56"}
157
+ {"current_steps": 157, "total_steps": 249, "loss": 0.8064, "lr": 3.6158224428757538e-06, "epoch": 1.884, "percentage": 63.05, "elapsed_time": "2:26:44", "remaining_time": "1:25:59"}
158
+ {"current_steps": 158, "total_steps": 249, "loss": 0.8821, "lr": 3.5485766137276894e-06, "epoch": 1.896, "percentage": 63.45, "elapsed_time": "2:27:40", "remaining_time": "1:25:03"}
159
+ {"current_steps": 159, "total_steps": 249, "loss": 0.8881, "lr": 3.4816162744519266e-06, "epoch": 1.908, "percentage": 63.86, "elapsed_time": "2:28:34", "remaining_time": "1:24:06"}
160
+ {"current_steps": 160, "total_steps": 249, "loss": 0.8135, "lr": 3.4149545959115604e-06, "epoch": 1.92, "percentage": 64.26, "elapsed_time": "2:29:23", "remaining_time": "1:23:05"}
161
+ {"current_steps": 161, "total_steps": 249, "loss": 0.7966, "lr": 3.3486046902241663e-06, "epoch": 1.932, "percentage": 64.66, "elapsed_time": "2:30:25", "remaining_time": "1:22:13"}
162
+ {"current_steps": 162, "total_steps": 249, "loss": 0.8471, "lr": 3.2825796081826943e-06, "epoch": 1.944, "percentage": 65.06, "elapsed_time": "2:31:17", "remaining_time": "1:21:14"}
163
+ {"current_steps": 163, "total_steps": 249, "loss": 0.8319, "lr": 3.216892336688435e-06, "epoch": 1.956, "percentage": 65.46, "elapsed_time": "2:32:17", "remaining_time": "1:20:20"}
164
+ {"current_steps": 164, "total_steps": 249, "loss": 0.9108, "lr": 3.1515557961965254e-06, "epoch": 1.968, "percentage": 65.86, "elapsed_time": "2:33:19", "remaining_time": "1:19:28"}
165
+ {"current_steps": 165, "total_steps": 249, "loss": 0.8201, "lr": 3.0865828381745515e-06, "epoch": 1.98, "percentage": 66.27, "elapsed_time": "2:34:15", "remaining_time": "1:18:31"}
166
+ {"current_steps": 166, "total_steps": 249, "loss": 0.8368, "lr": 3.021986242574707e-06, "epoch": 1.992, "percentage": 66.67, "elapsed_time": "2:35:09", "remaining_time": "1:17:34"}
167
+ {"current_steps": 167, "total_steps": 249, "loss": 1.361, "lr": 2.95777871532002e-06, "epoch": 2.004, "percentage": 67.07, "elapsed_time": "2:37:08", "remaining_time": "1:17:09"}
168
+ {"current_steps": 168, "total_steps": 249, "loss": 0.7807, "lr": 2.893972885805148e-06, "epoch": 2.016, "percentage": 67.47, "elapsed_time": "2:37:56", "remaining_time": "1:16:09"}
169
+ {"current_steps": 169, "total_steps": 249, "loss": 0.7914, "lr": 2.83058130441221e-06, "epoch": 2.028, "percentage": 67.87, "elapsed_time": "2:39:00", "remaining_time": "1:15:16"}