error577 commited on
Commit
4f17aca
·
verified ·
1 Parent(s): 56792f4

Training in progress, step 40, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8b1531d957d0a56511b821d5a6aacb3aac6ba6ab36a3763cb2d4f6efb143b6d
3
  size 639691872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16f0d7332dfed6a4a63efcdfd07c10cf2197b210dee61d9ad4914ec6596dbbed
3
  size 639691872
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55fc13f1d6ea075e0a96a6f1f3f3a01add3e27d169d777b14af7e8b85c9c104b
3
  size 1279647314
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3242aad940a9c39b3529ce253f008669cb0597966c8ed6307a2ce6975a64142b
3
  size 1279647314
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:036c6d1ced8df685be210cfe4df2afa6b83c3adf7f10c172c0f285238a3beda8
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f2026c5de2a90763e8e8281319b30d28dd1bb90121002f251649da42d60e056
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b1ece0846e2afe71b4b44981fd9a82ee7e8be48b72c44e6ff1c690617d2b724
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e5fc52ecbc811cf8c3969c1073ba49c5ec4288905574552319850bcb54c0249
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 2.6746580600738525,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-20",
4
- "epoch": 0.006333498268184067,
5
  "eval_steps": 20,
6
- "global_step": 20,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -163,6 +163,154 @@
163
  "eval_samples_per_second": 2.653,
164
  "eval_steps_per_second": 2.653,
165
  "step": 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  }
167
  ],
168
  "logging_steps": 1,
@@ -191,7 +339,7 @@
191
  "attributes": {}
192
  }
193
  },
194
- "total_flos": 2.756620932297523e+16,
195
  "train_batch_size": 1,
196
  "trial_name": null,
197
  "trial_params": null
 
1
  {
2
+ "best_metric": 2.529219627380371,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-40",
4
+ "epoch": 0.012666996536368134,
5
  "eval_steps": 20,
6
+ "global_step": 40,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
163
  "eval_samples_per_second": 2.653,
164
  "eval_steps_per_second": 2.653,
165
  "step": 20
166
+ },
167
+ {
168
+ "epoch": 0.0066501731815932705,
169
+ "grad_norm": 0.7943449020385742,
170
+ "learning_rate": 4.2e-05,
171
+ "loss": 2.6219,
172
+ "step": 21
173
+ },
174
+ {
175
+ "epoch": 0.006966848095002474,
176
+ "grad_norm": 0.7790957689285278,
177
+ "learning_rate": 4.4000000000000006e-05,
178
+ "loss": 2.5103,
179
+ "step": 22
180
+ },
181
+ {
182
+ "epoch": 0.007283523008411678,
183
+ "grad_norm": 0.7326317429542542,
184
+ "learning_rate": 4.600000000000001e-05,
185
+ "loss": 2.4827,
186
+ "step": 23
187
+ },
188
+ {
189
+ "epoch": 0.00760019792182088,
190
+ "grad_norm": 0.7278855443000793,
191
+ "learning_rate": 4.8e-05,
192
+ "loss": 2.4126,
193
+ "step": 24
194
+ },
195
+ {
196
+ "epoch": 0.007916872835230085,
197
+ "grad_norm": 0.7726553678512573,
198
+ "learning_rate": 5e-05,
199
+ "loss": 2.4046,
200
+ "step": 25
201
+ },
202
+ {
203
+ "epoch": 0.008233547748639287,
204
+ "grad_norm": 0.7696128487586975,
205
+ "learning_rate": 5.2000000000000004e-05,
206
+ "loss": 2.5753,
207
+ "step": 26
208
+ },
209
+ {
210
+ "epoch": 0.00855022266204849,
211
+ "grad_norm": 0.8221985697746277,
212
+ "learning_rate": 5.4000000000000005e-05,
213
+ "loss": 2.7203,
214
+ "step": 27
215
+ },
216
+ {
217
+ "epoch": 0.008866897575457694,
218
+ "grad_norm": 0.8378767371177673,
219
+ "learning_rate": 5.6000000000000006e-05,
220
+ "loss": 2.6197,
221
+ "step": 28
222
+ },
223
+ {
224
+ "epoch": 0.009183572488866898,
225
+ "grad_norm": 0.8172261118888855,
226
+ "learning_rate": 5.8e-05,
227
+ "loss": 2.4763,
228
+ "step": 29
229
+ },
230
+ {
231
+ "epoch": 0.009500247402276101,
232
+ "grad_norm": 0.766254723072052,
233
+ "learning_rate": 6e-05,
234
+ "loss": 2.5404,
235
+ "step": 30
236
+ },
237
+ {
238
+ "epoch": 0.009816922315685305,
239
+ "grad_norm": 0.8601265549659729,
240
+ "learning_rate": 6.2e-05,
241
+ "loss": 2.7159,
242
+ "step": 31
243
+ },
244
+ {
245
+ "epoch": 0.010133597229094508,
246
+ "grad_norm": 0.862657904624939,
247
+ "learning_rate": 6.400000000000001e-05,
248
+ "loss": 2.4799,
249
+ "step": 32
250
+ },
251
+ {
252
+ "epoch": 0.01045027214250371,
253
+ "grad_norm": 0.8857929706573486,
254
+ "learning_rate": 6.6e-05,
255
+ "loss": 2.5324,
256
+ "step": 33
257
+ },
258
+ {
259
+ "epoch": 0.010766947055912914,
260
+ "grad_norm": 0.916630744934082,
261
+ "learning_rate": 6.800000000000001e-05,
262
+ "loss": 2.6486,
263
+ "step": 34
264
+ },
265
+ {
266
+ "epoch": 0.011083621969322117,
267
+ "grad_norm": 0.9253357648849487,
268
+ "learning_rate": 7e-05,
269
+ "loss": 2.5913,
270
+ "step": 35
271
+ },
272
+ {
273
+ "epoch": 0.011400296882731321,
274
+ "grad_norm": 0.9131091237068176,
275
+ "learning_rate": 7.2e-05,
276
+ "loss": 2.3062,
277
+ "step": 36
278
+ },
279
+ {
280
+ "epoch": 0.011716971796140525,
281
+ "grad_norm": 0.908059298992157,
282
+ "learning_rate": 7.4e-05,
283
+ "loss": 2.4921,
284
+ "step": 37
285
+ },
286
+ {
287
+ "epoch": 0.012033646709549728,
288
+ "grad_norm": 0.9732745885848999,
289
+ "learning_rate": 7.6e-05,
290
+ "loss": 2.6578,
291
+ "step": 38
292
+ },
293
+ {
294
+ "epoch": 0.012350321622958932,
295
+ "grad_norm": 0.8774698972702026,
296
+ "learning_rate": 7.800000000000001e-05,
297
+ "loss": 2.5082,
298
+ "step": 39
299
+ },
300
+ {
301
+ "epoch": 0.012666996536368134,
302
+ "grad_norm": 1.0928329229354858,
303
+ "learning_rate": 8e-05,
304
+ "loss": 2.6549,
305
+ "step": 40
306
+ },
307
+ {
308
+ "epoch": 0.012666996536368134,
309
+ "eval_loss": 2.529219627380371,
310
+ "eval_runtime": 191.2367,
311
+ "eval_samples_per_second": 2.656,
312
+ "eval_steps_per_second": 2.656,
313
+ "step": 40
314
  }
315
  ],
316
  "logging_steps": 1,
 
339
  "attributes": {}
340
  }
341
  },
342
+ "total_flos": 5.417612480820019e+16,
343
  "train_batch_size": 1,
344
  "trial_name": null,
345
  "trial_params": null