dq158 commited on
Commit
1d33f7b
·
1 Parent(s): 6fe89b6

Training in progress, epoch 4, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3185710f9ed48293a778de595fcd38098844825122d64b9829b1b93f6412f403
3
  size 1980860410
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de02ef88a9c06d1c4c39ca3c4c2f1202553b57c8ad4795abcd030e9f5ec610f8
3
  size 1980860410
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6d9360daf7120c5d39d8ef5625d204b3b6837a8189e798928f762e83a610468
3
  size 990409330
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef45bf07e4aa6ce72fbbd0a19258dbb992063d4bf6b046ce62330fa93b06089d
3
  size 990409330
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bbddb6a0f6f63e14b35b515dcd6478e86f1cce79a693afe0ccaec7cdbe6f4fcc
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c1fd7d1cea75e4e374c54e815a21a8bbeecb72f564c49061658d53b541359df
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:727027b6d6031aa8f34a43940937c6f906ae36c7156fab65320547a0006b7c81
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dde9ec5a9c5afe2585b7bb2f95d066fbcef5ffc592770aca32d5e4130db08ddc
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.835176944732666,
3
- "best_model_checkpoint": "dq158/morbius/checkpoint-9568",
4
- "epoch": 3.0,
5
  "eval_steps": 500,
6
- "global_step": 28704,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -406,13 +406,146 @@
406
  "eval_steps_per_second": 1.315,
407
  "eval_translation_length": 107465,
408
  "step": 28704
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
409
  }
410
  ],
411
  "logging_steps": 500,
412
  "max_steps": 47840,
413
  "num_train_epochs": 5,
414
  "save_steps": 500,
415
- "total_flos": 2.3584275781543526e+17,
416
  "trial_name": null,
417
  "trial_params": null
418
  }
 
1
  {
2
+ "best_metric": 1.8330533504486084,
3
+ "best_model_checkpoint": "dq158/morbius/checkpoint-38272",
4
+ "epoch": 4.0,
5
  "eval_steps": 500,
6
+ "global_step": 38272,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
406
  "eval_steps_per_second": 1.315,
407
  "eval_translation_length": 107465,
408
  "step": 28704
409
+ },
410
+ {
411
+ "epoch": 3.03,
412
+ "learning_rate": 1.9690635451505017e-05,
413
+ "loss": 1.9465,
414
+ "step": 29000
415
+ },
416
+ {
417
+ "epoch": 3.08,
418
+ "learning_rate": 1.9168060200668896e-05,
419
+ "loss": 1.9071,
420
+ "step": 29500
421
+ },
422
+ {
423
+ "epoch": 3.14,
424
+ "learning_rate": 1.8645484949832775e-05,
425
+ "loss": 1.893,
426
+ "step": 30000
427
+ },
428
+ {
429
+ "epoch": 3.19,
430
+ "learning_rate": 1.8122909698996657e-05,
431
+ "loss": 1.8895,
432
+ "step": 30500
433
+ },
434
+ {
435
+ "epoch": 3.24,
436
+ "learning_rate": 1.7600334448160536e-05,
437
+ "loss": 1.8914,
438
+ "step": 31000
439
+ },
440
+ {
441
+ "epoch": 3.29,
442
+ "learning_rate": 1.7077759197324418e-05,
443
+ "loss": 1.9214,
444
+ "step": 31500
445
+ },
446
+ {
447
+ "epoch": 3.34,
448
+ "learning_rate": 1.6555183946488294e-05,
449
+ "loss": 1.8911,
450
+ "step": 32000
451
+ },
452
+ {
453
+ "epoch": 3.4,
454
+ "learning_rate": 1.6032608695652173e-05,
455
+ "loss": 1.9149,
456
+ "step": 32500
457
+ },
458
+ {
459
+ "epoch": 3.45,
460
+ "learning_rate": 1.5510033444816055e-05,
461
+ "loss": 1.902,
462
+ "step": 33000
463
+ },
464
+ {
465
+ "epoch": 3.5,
466
+ "learning_rate": 1.4987458193979934e-05,
467
+ "loss": 1.9066,
468
+ "step": 33500
469
+ },
470
+ {
471
+ "epoch": 3.55,
472
+ "learning_rate": 1.4464882943143812e-05,
473
+ "loss": 1.9027,
474
+ "step": 34000
475
+ },
476
+ {
477
+ "epoch": 3.61,
478
+ "learning_rate": 1.3942307692307693e-05,
479
+ "loss": 1.9232,
480
+ "step": 34500
481
+ },
482
+ {
483
+ "epoch": 3.66,
484
+ "learning_rate": 1.3419732441471572e-05,
485
+ "loss": 1.926,
486
+ "step": 35000
487
+ },
488
+ {
489
+ "epoch": 3.71,
490
+ "learning_rate": 1.2897157190635452e-05,
491
+ "loss": 1.87,
492
+ "step": 35500
493
+ },
494
+ {
495
+ "epoch": 3.76,
496
+ "learning_rate": 1.2374581939799331e-05,
497
+ "loss": 1.9055,
498
+ "step": 36000
499
+ },
500
+ {
501
+ "epoch": 3.81,
502
+ "learning_rate": 1.1852006688963212e-05,
503
+ "loss": 1.952,
504
+ "step": 36500
505
+ },
506
+ {
507
+ "epoch": 3.87,
508
+ "learning_rate": 1.132943143812709e-05,
509
+ "loss": 1.8896,
510
+ "step": 37000
511
+ },
512
+ {
513
+ "epoch": 3.92,
514
+ "learning_rate": 1.080685618729097e-05,
515
+ "loss": 1.9144,
516
+ "step": 37500
517
+ },
518
+ {
519
+ "epoch": 3.97,
520
+ "learning_rate": 1.028428093645485e-05,
521
+ "loss": 1.9447,
522
+ "step": 38000
523
+ },
524
+ {
525
+ "epoch": 4.0,
526
+ "eval_bleu": 0.08669705678202416,
527
+ "eval_brevity_penalty": 0.7634478532624474,
528
+ "eval_length_ratio": 0.7874570959558275,
529
+ "eval_loss": 1.8330533504486084,
530
+ "eval_precisions": [
531
+ 0.1886293646657507,
532
+ 0.11698894134385307,
533
+ 0.08828452928243054,
534
+ 0.08536133232489508
535
+ ],
536
+ "eval_reference_length": 134020,
537
+ "eval_runtime": 811.3896,
538
+ "eval_samples_per_second": 15.722,
539
+ "eval_steps_per_second": 1.311,
540
+ "eval_translation_length": 105535,
541
+ "step": 38272
542
  }
543
  ],
544
  "logging_steps": 500,
545
  "max_steps": 47840,
546
  "num_train_epochs": 5,
547
  "save_steps": 500,
548
+ "total_flos": 3.1445701042058035e+17,
549
  "trial_name": null,
550
  "trial_params": null
551
  }