guyhadad01 commited on
Commit
2f241ea
·
verified ·
1 Parent(s): 42eeccb

Training in progress, step 9000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cbfbd0f6a07b835e4eb96ae97dd49eff870ad88d6be40d3bd1e0357be8624f8
3
  size 471641972
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:beb9560a8c2fe423ef88591ac0d75f94884fe5582f9b6b783282d22b1370fc23
3
  size 471641972
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2245879460433ceb7eb7c5b19fed96d125ec36ce0d53f2cd06f9d87c1a467ab
3
  size 943408715
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e07c0e371e8347d7031e7a77e1760ae234fe283d2998e8890cb9f2acf682bd6f
3
  size 943408715
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04ed82d1fdf30f41ed12aad05e55898e36a9e89c5860f5a9745500b3b0bce109
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d02bf5604d96d55b809dd1552690250e2164168d4b80ec847e5101bd7d910b5f
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4da13d0d238bcfbcaf7c2d149fc9e058944471070eba242db234d5d3e151f20
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a8a70b54b4c634942b08249f8cc6e6889c33d7c513637bc49b9d2b7a00426ce
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:acfed49c8551f776696d2cfe4ff48e27d2dd05686444ac96aaac243eb30eaf75
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7cd3679656ae98477cc816a6c7c865253dbb97497498a91f08c26ecf155e5b3
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.812162127864775,
6
  "eval_steps": 500,
7
- "global_step": 8000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -575,6 +575,76 @@
575
  "learning_rate": 1.9790553537080574e-05,
576
  "loss": 1.967,
577
  "step": 8000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
578
  }
579
  ],
580
  "logging_steps": 100,
@@ -594,7 +664,7 @@
594
  "attributes": {}
595
  }
596
  },
597
- "total_flos": 4349680792659456.0,
598
  "train_batch_size": 32,
599
  "trial_name": null,
600
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.9136823938478719,
6
  "eval_steps": 500,
7
+ "global_step": 9000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
575
  "learning_rate": 1.9790553537080574e-05,
576
  "loss": 1.967,
577
  "step": 8000
578
+ },
579
+ {
580
+ "epoch": 0.8223141544630846,
581
+ "grad_norm": 6.654317378997803,
582
+ "learning_rate": 1.8721949134430436e-05,
583
+ "loss": 1.9463,
584
+ "step": 8100
585
+ },
586
+ {
587
+ "epoch": 0.8324661810613944,
588
+ "grad_norm": 6.44837760925293,
589
+ "learning_rate": 1.7653344731780298e-05,
590
+ "loss": 1.9443,
591
+ "step": 8200
592
+ },
593
+ {
594
+ "epoch": 0.8426182076597041,
595
+ "grad_norm": 6.596092224121094,
596
+ "learning_rate": 1.658474032913016e-05,
597
+ "loss": 1.9514,
598
+ "step": 8300
599
+ },
600
+ {
601
+ "epoch": 0.8527702342580138,
602
+ "grad_norm": 6.20723295211792,
603
+ "learning_rate": 1.5516135926480017e-05,
604
+ "loss": 1.9623,
605
+ "step": 8400
606
+ },
607
+ {
608
+ "epoch": 0.8629222608563234,
609
+ "grad_norm": 6.767848491668701,
610
+ "learning_rate": 1.4447531523829879e-05,
611
+ "loss": 1.959,
612
+ "step": 8500
613
+ },
614
+ {
615
+ "epoch": 0.8730742874546331,
616
+ "grad_norm": 7.135315418243408,
617
+ "learning_rate": 1.337892712117974e-05,
618
+ "loss": 1.9661,
619
+ "step": 8600
620
+ },
621
+ {
622
+ "epoch": 0.8832263140529428,
623
+ "grad_norm": 6.851596832275391,
624
+ "learning_rate": 1.2310322718529602e-05,
625
+ "loss": 1.8859,
626
+ "step": 8700
627
+ },
628
+ {
629
+ "epoch": 0.8933783406512525,
630
+ "grad_norm": 5.696228504180908,
631
+ "learning_rate": 1.1241718315879462e-05,
632
+ "loss": 1.9249,
633
+ "step": 8800
634
+ },
635
+ {
636
+ "epoch": 0.9035303672495621,
637
+ "grad_norm": 7.561517238616943,
638
+ "learning_rate": 1.0173113913229324e-05,
639
+ "loss": 1.8969,
640
+ "step": 8900
641
+ },
642
+ {
643
+ "epoch": 0.9136823938478719,
644
+ "grad_norm": 5.516517639160156,
645
+ "learning_rate": 9.104509510579184e-06,
646
+ "loss": 1.952,
647
+ "step": 9000
648
  }
649
  ],
650
  "logging_steps": 100,
 
664
  "attributes": {}
665
  }
666
  },
667
+ "total_flos": 4893769911505920.0,
668
  "train_batch_size": 32,
669
  "trial_name": null,
670
  "trial_params": null