CocoRoF commited on
Commit
c6b6d06
·
verified ·
1 Parent(s): 513991a

Training in progress, step 600, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b25892eb513588bbcf11bce9a0a3e83e12594fe856f5f0d8f5b1d034d451ba78
3
  size 368988278
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65b28a24cf96f633c1e0849160faa6f1c242644bdfeb8cbace24b219b5fa97c2
3
  size 368988278
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84234f0e7fa45b116430e500da89d6e9f1fa17aeb1060cb136714fd526bf6880
3
  size 1107079290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8943d3cde63c6018dd234f7968f8ed57a347eb3284892f8cf4de33d873d8af6
3
  size 1107079290
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78d3f197f6c6558fa8056324f1563ab9e957255f5a1a959362aa4eed7a9545db
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74386f26f36ed67f56395205881e5db2d0c28ffcbeed50dd95b28771d2dac588
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c1a9c65c2869356282cad6b4a0f7dff7f4dd68ab3d9d216c72b7d6cb524f860
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41c88f9de084200454883a13c3717941ea3fd433e2f8735507fc30611f9c5501
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:896febe768e17bae5022a95960c041f6425783774ec8859d99d3b149063b1bf9
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:965b00d4cb4710ebab57c8787b9925bb3f77b8eeba94a186ec4bc1c2f326ef3f
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eac482d57e966585467c8ef44dae2869bf7e5d92886f69c11ed7bccc34c07efe
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5dc374b8b9a4c45c950f9d136feab85a767081fa59f0c7d68ed3a62060c4949
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1f27d227a20dc320ac283e0938fb2f6e5b475829a583f8c44d1a16a8c828307
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c7c212fb779217f1edac0baf44f67b608eefc1e0e4e3f5a9dd7eb557032c1bc
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d05a7106aaeaec4b81704e3f4a998b5123cf9342a6733bd9fd2d578e99108c3b
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86e1effd626ce1e95dd68a0c8089fe19218f2b24dfe9e45ed2cab1c0ebc10ba1
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b94120d8d88502ec8d8b623ec7550315caca003b44fcffbb5767ab0de91baefe
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:799cc83f60dfc1c4243cfd6403592112414a2eec494e6832f10221c96ff62c20
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:332e4d901be380f740b5d8578f7b80ef1865c7fba83bc288c8a35852205cc668
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:586777c398770c3255d3a1f48c7fef44ea9d89117c627c9ea490e16bfd9a49ba
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51052504c8409fe520bbd0e2195cde8473df8898cb97356bd375976fa6c620f2
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8c36296a53c18048f4ca70fd61feb41492ed14663bc9aeac9ca5e6261898e1a
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.4904465106774292,
5
  "eval_steps": 300,
6
- "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -225,6 +225,224 @@
225
  "eval_samples_per_second": 720.766,
226
  "eval_steps_per_second": 22.528,
227
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  }
229
  ],
230
  "logging_steps": 10,
@@ -244,7 +462,7 @@
244
  "attributes": {}
245
  }
246
  },
247
- "total_flos": 8.283349733238374e+17,
248
  "train_batch_size": 4,
249
  "trial_name": null,
250
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9808930213548585,
5
  "eval_steps": 300,
6
+ "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
225
  "eval_samples_per_second": 720.766,
226
  "eval_steps_per_second": 22.528,
227
  "step": 300
228
+ },
229
+ {
230
+ "epoch": 0.5067947277000102,
231
+ "grad_norm": 16.25,
232
+ "learning_rate": 9.990101823499548e-07,
233
+ "loss": 81.1266,
234
+ "step": 310
235
+ },
236
+ {
237
+ "epoch": 0.5231429447225912,
238
+ "grad_norm": 17.6875,
239
+ "learning_rate": 9.989782527483405e-07,
240
+ "loss": 81.1453,
241
+ "step": 320
242
+ },
243
+ {
244
+ "epoch": 0.5394911617451722,
245
+ "grad_norm": 19.375,
246
+ "learning_rate": 9.98946323146726e-07,
247
+ "loss": 81.2379,
248
+ "step": 330
249
+ },
250
+ {
251
+ "epoch": 0.5558393787677531,
252
+ "grad_norm": 18.0625,
253
+ "learning_rate": 9.989143935451117e-07,
254
+ "loss": 80.8672,
255
+ "step": 340
256
+ },
257
+ {
258
+ "epoch": 0.5721875957903341,
259
+ "grad_norm": 17.21875,
260
+ "learning_rate": 9.988824639434972e-07,
261
+ "loss": 81.0685,
262
+ "step": 350
263
+ },
264
+ {
265
+ "epoch": 0.5885358128129151,
266
+ "grad_norm": 17.0625,
267
+ "learning_rate": 9.988505343418828e-07,
268
+ "loss": 80.8454,
269
+ "step": 360
270
+ },
271
+ {
272
+ "epoch": 0.604884029835496,
273
+ "grad_norm": 20.671875,
274
+ "learning_rate": 9.988186047402685e-07,
275
+ "loss": 81.0763,
276
+ "step": 370
277
+ },
278
+ {
279
+ "epoch": 0.621232246858077,
280
+ "grad_norm": 18.359375,
281
+ "learning_rate": 9.987866751386541e-07,
282
+ "loss": 81.0897,
283
+ "step": 380
284
+ },
285
+ {
286
+ "epoch": 0.637580463880658,
287
+ "grad_norm": 16.8125,
288
+ "learning_rate": 9.987547455370398e-07,
289
+ "loss": 81.1589,
290
+ "step": 390
291
+ },
292
+ {
293
+ "epoch": 0.653928680903239,
294
+ "grad_norm": 16.171875,
295
+ "learning_rate": 9.987228159354254e-07,
296
+ "loss": 81.115,
297
+ "step": 400
298
+ },
299
+ {
300
+ "epoch": 0.67027689792582,
301
+ "grad_norm": 17.484375,
302
+ "learning_rate": 9.98690886333811e-07,
303
+ "loss": 80.6342,
304
+ "step": 410
305
+ },
306
+ {
307
+ "epoch": 0.686625114948401,
308
+ "grad_norm": 18.984375,
309
+ "learning_rate": 9.986589567321967e-07,
310
+ "loss": 80.8112,
311
+ "step": 420
312
+ },
313
+ {
314
+ "epoch": 0.7029733319709819,
315
+ "grad_norm": 20.5,
316
+ "learning_rate": 9.986270271305824e-07,
317
+ "loss": 80.7997,
318
+ "step": 430
319
+ },
320
+ {
321
+ "epoch": 0.7193215489935629,
322
+ "grad_norm": 18.515625,
323
+ "learning_rate": 9.98595097528968e-07,
324
+ "loss": 80.9999,
325
+ "step": 440
326
+ },
327
+ {
328
+ "epoch": 0.7356697660161439,
329
+ "grad_norm": 17.15625,
330
+ "learning_rate": 9.985631679273537e-07,
331
+ "loss": 81.0371,
332
+ "step": 450
333
+ },
334
+ {
335
+ "epoch": 0.7520179830387248,
336
+ "grad_norm": 16.28125,
337
+ "learning_rate": 9.985312383257393e-07,
338
+ "loss": 81.1055,
339
+ "step": 460
340
+ },
341
+ {
342
+ "epoch": 0.7683662000613058,
343
+ "grad_norm": 17.453125,
344
+ "learning_rate": 9.98499308724125e-07,
345
+ "loss": 80.7224,
346
+ "step": 470
347
+ },
348
+ {
349
+ "epoch": 0.7847144170838868,
350
+ "grad_norm": 17.015625,
351
+ "learning_rate": 9.984673791225106e-07,
352
+ "loss": 80.7431,
353
+ "step": 480
354
+ },
355
+ {
356
+ "epoch": 0.8010626341064677,
357
+ "grad_norm": 17.15625,
358
+ "learning_rate": 9.984354495208962e-07,
359
+ "loss": 80.8602,
360
+ "step": 490
361
+ },
362
+ {
363
+ "epoch": 0.8174108511290488,
364
+ "grad_norm": 17.46875,
365
+ "learning_rate": 9.984035199192819e-07,
366
+ "loss": 80.7878,
367
+ "step": 500
368
+ },
369
+ {
370
+ "epoch": 0.8337590681516297,
371
+ "grad_norm": 18.4375,
372
+ "learning_rate": 9.983715903176675e-07,
373
+ "loss": 80.8012,
374
+ "step": 510
375
+ },
376
+ {
377
+ "epoch": 0.8501072851742106,
378
+ "grad_norm": 18.609375,
379
+ "learning_rate": 9.983396607160532e-07,
380
+ "loss": 81.2591,
381
+ "step": 520
382
+ },
383
+ {
384
+ "epoch": 0.8664555021967917,
385
+ "grad_norm": 16.5625,
386
+ "learning_rate": 9.983077311144388e-07,
387
+ "loss": 80.7674,
388
+ "step": 530
389
+ },
390
+ {
391
+ "epoch": 0.8828037192193726,
392
+ "grad_norm": 17.171875,
393
+ "learning_rate": 9.982758015128245e-07,
394
+ "loss": 80.5839,
395
+ "step": 540
396
+ },
397
+ {
398
+ "epoch": 0.8991519362419537,
399
+ "grad_norm": 16.8125,
400
+ "learning_rate": 9.982438719112101e-07,
401
+ "loss": 80.9023,
402
+ "step": 550
403
+ },
404
+ {
405
+ "epoch": 0.9155001532645346,
406
+ "grad_norm": 16.375,
407
+ "learning_rate": 9.982119423095958e-07,
408
+ "loss": 80.4215,
409
+ "step": 560
410
+ },
411
+ {
412
+ "epoch": 0.9318483702871155,
413
+ "grad_norm": 16.484375,
414
+ "learning_rate": 9.981800127079814e-07,
415
+ "loss": 80.5366,
416
+ "step": 570
417
+ },
418
+ {
419
+ "epoch": 0.9481965873096966,
420
+ "grad_norm": 16.40625,
421
+ "learning_rate": 9.98148083106367e-07,
422
+ "loss": 80.6862,
423
+ "step": 580
424
+ },
425
+ {
426
+ "epoch": 0.9645448043322775,
427
+ "grad_norm": 17.28125,
428
+ "learning_rate": 9.981161535047527e-07,
429
+ "loss": 81.2897,
430
+ "step": 590
431
+ },
432
+ {
433
+ "epoch": 0.9808930213548585,
434
+ "grad_norm": 17.375,
435
+ "learning_rate": 9.980842239031384e-07,
436
+ "loss": 80.4117,
437
+ "step": 600
438
+ },
439
+ {
440
+ "epoch": 0.9808930213548585,
441
+ "eval_loss": 1.2649548053741455,
442
+ "eval_runtime": 44.4765,
443
+ "eval_samples_per_second": 722.224,
444
+ "eval_steps_per_second": 22.574,
445
+ "step": 600
446
  }
447
  ],
448
  "logging_steps": 10,
 
462
  "attributes": {}
463
  }
464
  },
465
+ "total_flos": 1.656669946647675e+18,
466
  "train_batch_size": 4,
467
  "trial_name": null,
468
  "trial_params": null