YeBhoneLin10 commited on
Commit
9bcf96f
·
verified ·
1 Parent(s): 08a6a4b

Training in progress, step 2000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10b008b099f89500ea9ac9bd21d1200a78e3799c3a3672db7fdaf92a24f7e4e3
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09a50db5afb668918b5a510a75a56f0b31a7cb9a8ce9e4dee23fdaa3904406a8
3
  size 966995080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b11fa9de35167ee282200f1f5e1e213e18e19fe2611450cd5e182626d29c8bbd
3
  size 1925064044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ba2b3ca702866bbc222c3236dc3ba9d51a358efb4c14c312769dc6f3affdf07
3
  size 1925064044
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:298c3bc45193d5f9e013179fcd36fc29bded406f371da8183cdaa90d25f6d218
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43b75653b0fdbe64a4fca584309d2f205ea445c6dd839e1b14f50ed746f8d472
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5101d8c1f86d6f48167e50b1164b9ba363ab76694ff2d5c1e326e3d5f94ecaef
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b80a609c64a12b4db2f38941ea479b9a30f9351b7aac74f4956e8686dc338317
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 57.21605845920777,
3
- "best_model_checkpoint": "./checkpoint-1000",
4
- "epoch": 2.0555,
5
  "eval_steps": 1000,
6
- "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -296,6 +296,295 @@
296
  "eval_steps_per_second": 0.119,
297
  "eval_wer": 57.21605845920777,
298
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
  }
300
  ],
301
  "logging_steps": 25,
@@ -315,7 +604,7 @@
315
  "attributes": {}
316
  }
317
  },
318
- "total_flos": 4.61217186791424e+18,
319
  "train_batch_size": 16,
320
  "trial_name": null,
321
  "trial_params": null
 
1
  {
2
+ "best_metric": 53.553446508933746,
3
+ "best_model_checkpoint": "./checkpoint-2000",
4
+ "epoch": 5.01375,
5
  "eval_steps": 1000,
6
+ "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
296
  "eval_steps_per_second": 0.119,
297
  "eval_wer": 57.21605845920777,
298
  "step": 1000
299
+ },
300
+ {
301
+ "epoch": 2.06175,
302
+ "grad_norm": 3.215674638748169,
303
+ "learning_rate": 8.50857142857143e-06,
304
+ "loss": 0.0907,
305
+ "step": 1025
306
+ },
307
+ {
308
+ "epoch": 2.068,
309
+ "grad_norm": 2.3883891105651855,
310
+ "learning_rate": 8.437142857142859e-06,
311
+ "loss": 0.0876,
312
+ "step": 1050
313
+ },
314
+ {
315
+ "epoch": 2.07425,
316
+ "grad_norm": 1.890366792678833,
317
+ "learning_rate": 8.365714285714286e-06,
318
+ "loss": 0.0724,
319
+ "step": 1075
320
+ },
321
+ {
322
+ "epoch": 2.0805,
323
+ "grad_norm": 2.6750569343566895,
324
+ "learning_rate": 8.294285714285715e-06,
325
+ "loss": 0.0702,
326
+ "step": 1100
327
+ },
328
+ {
329
+ "epoch": 2.08675,
330
+ "grad_norm": 2.3613970279693604,
331
+ "learning_rate": 8.222857142857144e-06,
332
+ "loss": 0.0768,
333
+ "step": 1125
334
+ },
335
+ {
336
+ "epoch": 2.093,
337
+ "grad_norm": 2.880221128463745,
338
+ "learning_rate": 8.151428571428572e-06,
339
+ "loss": 0.0653,
340
+ "step": 1150
341
+ },
342
+ {
343
+ "epoch": 3.002,
344
+ "grad_norm": 2.5774643421173096,
345
+ "learning_rate": 8.08e-06,
346
+ "loss": 0.0863,
347
+ "step": 1175
348
+ },
349
+ {
350
+ "epoch": 3.00825,
351
+ "grad_norm": 2.212843894958496,
352
+ "learning_rate": 8.00857142857143e-06,
353
+ "loss": 0.0786,
354
+ "step": 1200
355
+ },
356
+ {
357
+ "epoch": 3.0145,
358
+ "grad_norm": 1.7202649116516113,
359
+ "learning_rate": 7.937142857142857e-06,
360
+ "loss": 0.0695,
361
+ "step": 1225
362
+ },
363
+ {
364
+ "epoch": 3.02075,
365
+ "grad_norm": 1.4540152549743652,
366
+ "learning_rate": 7.865714285714286e-06,
367
+ "loss": 0.062,
368
+ "step": 1250
369
+ },
370
+ {
371
+ "epoch": 3.027,
372
+ "grad_norm": 1.4593051671981812,
373
+ "learning_rate": 7.794285714285715e-06,
374
+ "loss": 0.0586,
375
+ "step": 1275
376
+ },
377
+ {
378
+ "epoch": 3.03325,
379
+ "grad_norm": 1.5141732692718506,
380
+ "learning_rate": 7.722857142857142e-06,
381
+ "loss": 0.0555,
382
+ "step": 1300
383
+ },
384
+ {
385
+ "epoch": 3.0395,
386
+ "grad_norm": 1.751680850982666,
387
+ "learning_rate": 7.651428571428571e-06,
388
+ "loss": 0.0572,
389
+ "step": 1325
390
+ },
391
+ {
392
+ "epoch": 3.04575,
393
+ "grad_norm": 1.7433584928512573,
394
+ "learning_rate": 7.58e-06,
395
+ "loss": 0.0514,
396
+ "step": 1350
397
+ },
398
+ {
399
+ "epoch": 3.052,
400
+ "grad_norm": 1.5257874727249146,
401
+ "learning_rate": 7.508571428571429e-06,
402
+ "loss": 0.064,
403
+ "step": 1375
404
+ },
405
+ {
406
+ "epoch": 3.05825,
407
+ "grad_norm": 1.4389644861221313,
408
+ "learning_rate": 7.4371428571428575e-06,
409
+ "loss": 0.0633,
410
+ "step": 1400
411
+ },
412
+ {
413
+ "epoch": 3.0645,
414
+ "grad_norm": 2.6773693561553955,
415
+ "learning_rate": 7.365714285714286e-06,
416
+ "loss": 0.0529,
417
+ "step": 1425
418
+ },
419
+ {
420
+ "epoch": 3.07075,
421
+ "grad_norm": 2.163686752319336,
422
+ "learning_rate": 7.294285714285715e-06,
423
+ "loss": 0.0567,
424
+ "step": 1450
425
+ },
426
+ {
427
+ "epoch": 3.077,
428
+ "grad_norm": 2.352548599243164,
429
+ "learning_rate": 7.222857142857144e-06,
430
+ "loss": 0.0378,
431
+ "step": 1475
432
+ },
433
+ {
434
+ "epoch": 3.08325,
435
+ "grad_norm": 2.7124457359313965,
436
+ "learning_rate": 7.151428571428573e-06,
437
+ "loss": 0.0468,
438
+ "step": 1500
439
+ },
440
+ {
441
+ "epoch": 3.0895,
442
+ "grad_norm": 2.316270112991333,
443
+ "learning_rate": 7.08e-06,
444
+ "loss": 0.047,
445
+ "step": 1525
446
+ },
447
+ {
448
+ "epoch": 3.09575,
449
+ "grad_norm": 4.7504963874816895,
450
+ "learning_rate": 7.008571428571429e-06,
451
+ "loss": 0.041,
452
+ "step": 1550
453
+ },
454
+ {
455
+ "epoch": 4.00475,
456
+ "grad_norm": 1.6336088180541992,
457
+ "learning_rate": 6.937142857142858e-06,
458
+ "loss": 0.061,
459
+ "step": 1575
460
+ },
461
+ {
462
+ "epoch": 4.011,
463
+ "grad_norm": 1.1898696422576904,
464
+ "learning_rate": 6.865714285714287e-06,
465
+ "loss": 0.0457,
466
+ "step": 1600
467
+ },
468
+ {
469
+ "epoch": 4.01725,
470
+ "grad_norm": 1.5887372493743896,
471
+ "learning_rate": 6.794285714285714e-06,
472
+ "loss": 0.0411,
473
+ "step": 1625
474
+ },
475
+ {
476
+ "epoch": 4.0235,
477
+ "grad_norm": 1.7909950017929077,
478
+ "learning_rate": 6.722857142857143e-06,
479
+ "loss": 0.0373,
480
+ "step": 1650
481
+ },
482
+ {
483
+ "epoch": 4.02975,
484
+ "grad_norm": 1.1431634426116943,
485
+ "learning_rate": 6.651428571428572e-06,
486
+ "loss": 0.0343,
487
+ "step": 1675
488
+ },
489
+ {
490
+ "epoch": 4.036,
491
+ "grad_norm": 1.5267891883850098,
492
+ "learning_rate": 6.5800000000000005e-06,
493
+ "loss": 0.035,
494
+ "step": 1700
495
+ },
496
+ {
497
+ "epoch": 4.04225,
498
+ "grad_norm": 1.8040025234222412,
499
+ "learning_rate": 6.5085714285714295e-06,
500
+ "loss": 0.0312,
501
+ "step": 1725
502
+ },
503
+ {
504
+ "epoch": 4.0485,
505
+ "grad_norm": 1.672837257385254,
506
+ "learning_rate": 6.437142857142858e-06,
507
+ "loss": 0.034,
508
+ "step": 1750
509
+ },
510
+ {
511
+ "epoch": 4.05475,
512
+ "grad_norm": 1.5159598588943481,
513
+ "learning_rate": 6.365714285714286e-06,
514
+ "loss": 0.0431,
515
+ "step": 1775
516
+ },
517
+ {
518
+ "epoch": 4.061,
519
+ "grad_norm": 2.1531107425689697,
520
+ "learning_rate": 6.294285714285715e-06,
521
+ "loss": 0.0357,
522
+ "step": 1800
523
+ },
524
+ {
525
+ "epoch": 4.06725,
526
+ "grad_norm": 3.2716612815856934,
527
+ "learning_rate": 6.222857142857144e-06,
528
+ "loss": 0.0341,
529
+ "step": 1825
530
+ },
531
+ {
532
+ "epoch": 4.0735,
533
+ "grad_norm": 1.041003942489624,
534
+ "learning_rate": 6.151428571428571e-06,
535
+ "loss": 0.0302,
536
+ "step": 1850
537
+ },
538
+ {
539
+ "epoch": 4.07975,
540
+ "grad_norm": 2.5585858821868896,
541
+ "learning_rate": 6.08e-06,
542
+ "loss": 0.0269,
543
+ "step": 1875
544
+ },
545
+ {
546
+ "epoch": 4.086,
547
+ "grad_norm": 2.4382855892181396,
548
+ "learning_rate": 6.008571428571429e-06,
549
+ "loss": 0.0291,
550
+ "step": 1900
551
+ },
552
+ {
553
+ "epoch": 4.09225,
554
+ "grad_norm": 3.0103349685668945,
555
+ "learning_rate": 5.937142857142858e-06,
556
+ "loss": 0.0228,
557
+ "step": 1925
558
+ },
559
+ {
560
+ "epoch": 5.00125,
561
+ "grad_norm": 1.7492283582687378,
562
+ "learning_rate": 5.865714285714286e-06,
563
+ "loss": 0.0331,
564
+ "step": 1950
565
+ },
566
+ {
567
+ "epoch": 5.0075,
568
+ "grad_norm": 1.3919484615325928,
569
+ "learning_rate": 5.794285714285715e-06,
570
+ "loss": 0.0384,
571
+ "step": 1975
572
+ },
573
+ {
574
+ "epoch": 5.01375,
575
+ "grad_norm": 1.131212592124939,
576
+ "learning_rate": 5.722857142857144e-06,
577
+ "loss": 0.0277,
578
+ "step": 2000
579
+ },
580
+ {
581
+ "epoch": 5.01375,
582
+ "eval_loss": 0.12749460339546204,
583
+ "eval_runtime": 670.7043,
584
+ "eval_samples_per_second": 0.95,
585
+ "eval_steps_per_second": 0.119,
586
+ "eval_wer": 53.553446508933746,
587
+ "step": 2000
588
  }
589
  ],
590
  "logging_steps": 25,
 
604
  "attributes": {}
605
  }
606
  },
607
+ "total_flos": 9.2217464672256e+18,
608
  "train_batch_size": 16,
609
  "trial_name": null,
610
  "trial_params": null