Nadav commited on
Commit
4ef9c28
·
1 Parent(s): 21e32f8

Training in progress, step 30000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0d203a8c7bacd9049d0a9a6ba66771bad7db3da1b0e849bcf26a2083ccab635
3
  size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce32d22625705c0286f9e6d65f3739a225afd225bf42450e442c235926dca0bd
3
  size 893438545
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83bcb89c6daff571003c7df9aae49dc4a313a7404638fcb7d95f82b950d2e5af
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecaaf33fb01c1c5987f8079721db31eb104bfbf4bf240e01f2563f77a752545b
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3271ed48861b853ff2a93ab2d113124282a36f76af112eacd53eeaa11994564a
3
  size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:267980fc8ce2bc5d8b7d17111acb61434edc89327d5100338b6bb7b7ef476513
3
  size 15523
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:865ba51ccd1f35f320c7110ccb893ffd337376d4652e722731792c01668ba190
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7533fc854a01a1c19e7dd354294c0cb875567534d008f89ea504ec2c31e7aad3
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3041a56ac9f847b3d8ba49ecd2e74fdd80acf3c5d07444653f5e498839336c44
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab97dfb3ccb8bc19754256c974ba1c934a3de0fa701671d85dd79f589e39ddfb
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.1265736645117386,
5
- "global_step": 25000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -506,11 +506,111 @@
506
  "eval_samples_per_second": 30.294,
507
  "eval_steps_per_second": 0.969,
508
  "step": 25000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
509
  }
510
  ],
511
  "max_steps": 1000000,
512
  "num_train_epochs": 86,
513
- "total_flos": 1.1500501101744764e+21,
514
  "trial_name": null,
515
  "trial_params": null
516
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.5518883974140865,
5
+ "global_step": 30000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
506
  "eval_samples_per_second": 30.294,
507
  "eval_steps_per_second": 0.969,
508
  "step": 25000
509
+ },
510
+ {
511
+ "epoch": 2.17,
512
+ "learning_rate": 9.999999999999999e-06,
513
+ "loss": 0.4271,
514
+ "step": 25500
515
+ },
516
+ {
517
+ "epoch": 2.21,
518
+ "learning_rate": 9.999999999999999e-06,
519
+ "loss": 0.4252,
520
+ "step": 26000
521
+ },
522
+ {
523
+ "epoch": 2.21,
524
+ "eval_loss": 0.3965121805667877,
525
+ "eval_runtime": 16.1623,
526
+ "eval_samples_per_second": 30.936,
527
+ "eval_steps_per_second": 0.99,
528
+ "step": 26000
529
+ },
530
+ {
531
+ "epoch": 2.25,
532
+ "learning_rate": 9.999999999999999e-06,
533
+ "loss": 0.4255,
534
+ "step": 26500
535
+ },
536
+ {
537
+ "epoch": 2.3,
538
+ "learning_rate": 9.999999999999999e-06,
539
+ "loss": 0.425,
540
+ "step": 27000
541
+ },
542
+ {
543
+ "epoch": 2.3,
544
+ "eval_loss": 0.39477214217185974,
545
+ "eval_runtime": 15.7512,
546
+ "eval_samples_per_second": 31.744,
547
+ "eval_steps_per_second": 1.016,
548
+ "step": 27000
549
+ },
550
+ {
551
+ "epoch": 2.34,
552
+ "learning_rate": 9.999999999999999e-06,
553
+ "loss": 0.4248,
554
+ "step": 27500
555
+ },
556
+ {
557
+ "epoch": 2.38,
558
+ "learning_rate": 9.999999999999999e-06,
559
+ "loss": 0.4248,
560
+ "step": 28000
561
+ },
562
+ {
563
+ "epoch": 2.38,
564
+ "eval_loss": 0.395481139421463,
565
+ "eval_runtime": 15.4129,
566
+ "eval_samples_per_second": 32.44,
567
+ "eval_steps_per_second": 1.038,
568
+ "step": 28000
569
+ },
570
+ {
571
+ "epoch": 2.42,
572
+ "learning_rate": 9.999999999999999e-06,
573
+ "loss": 0.4246,
574
+ "step": 28500
575
+ },
576
+ {
577
+ "epoch": 2.47,
578
+ "learning_rate": 9.999999999999999e-06,
579
+ "loss": 0.424,
580
+ "step": 29000
581
+ },
582
+ {
583
+ "epoch": 2.47,
584
+ "eval_loss": 0.3951389193534851,
585
+ "eval_runtime": 15.7676,
586
+ "eval_samples_per_second": 31.711,
587
+ "eval_steps_per_second": 1.015,
588
+ "step": 29000
589
+ },
590
+ {
591
+ "epoch": 2.51,
592
+ "learning_rate": 9.999999999999999e-06,
593
+ "loss": 0.4241,
594
+ "step": 29500
595
+ },
596
+ {
597
+ "epoch": 2.55,
598
+ "learning_rate": 9.999999999999999e-06,
599
+ "loss": 0.4234,
600
+ "step": 30000
601
+ },
602
+ {
603
+ "epoch": 2.55,
604
+ "eval_loss": 0.3956534266471863,
605
+ "eval_runtime": 15.8104,
606
+ "eval_samples_per_second": 31.625,
607
+ "eval_steps_per_second": 1.012,
608
+ "step": 30000
609
  }
610
  ],
611
  "max_steps": 1000000,
612
  "num_train_epochs": 86,
613
+ "total_flos": 1.3800665945772668e+21,
614
  "trial_name": null,
615
  "trial_params": null
616
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83bcb89c6daff571003c7df9aae49dc4a313a7404638fcb7d95f82b950d2e5af
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecaaf33fb01c1c5987f8079721db31eb104bfbf4bf240e01f2563f77a752545b
3
  size 449471589