Fanucci commited on
Commit
b9ebfaa
·
verified ·
1 Parent(s): a4374e9

Training in progress, step 850, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b38eab253dba5b0deb684b9be7be55b35fccfa203ed22bec471b3d2aa494879
3
  size 4995335576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da079b9d8ca9edec5c19d40185ac34a5d967b988536e67ab8c54c5fc8c3731b0
3
  size 4995335576
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2706762d78364c391d070350d7079727bc3b17acaff24aa2d513f2e945c300f
3
  size 1857639032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d536b0da6ed2281687c34137ae7211c81201040fd7f921085d915c4552739c2
3
  size 1857639032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a211653fa38f3820ad08baeef7bf2217831b7fa32c8abb08f12aef6c9bf3c4f
3
  size 13706103974
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69b86e924bddef11293e860631f6e1e1350720b1f64e7f8977d7267e36a24ee7
3
  size 13706103974
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9213080fe2b45399b87036ca9ff9164533abe6b368e5c828136ee184486749d4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86a2aa7bca84d2b8e7dd4e04a286714ba9169af11c46c739950a52df4c45259f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:462c3770d14e466903ac3cbb8d02a07b05bb99c1e78e9ab65cd1a8165b933c02
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9109740607103fa260f8765f6910df83c4ef7244fe0858cce09056260804c468
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 1,
3
  "best_metric": 1.4945952892303467,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.04996876951905059,
6
  "eval_steps": 50,
7
- "global_step": 800,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -704,6 +704,49 @@
704
  "eval_samples_per_second": 15.331,
705
  "eval_steps_per_second": 15.331,
706
  "step": 800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
707
  }
708
  ],
709
  "logging_steps": 10,
@@ -718,7 +761,7 @@
718
  "early_stopping_threshold": 0.0
719
  },
720
  "attributes": {
721
- "early_stopping_patience_counter": 16
722
  }
723
  },
724
  "TrainerControl": {
@@ -732,7 +775,7 @@
732
  "attributes": {}
733
  }
734
  },
735
- "total_flos": 3.267697311744e+16,
736
  "train_batch_size": 1,
737
  "trial_name": null,
738
  "trial_params": null
 
2
  "best_global_step": 1,
3
  "best_metric": 1.4945952892303467,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.05309181761399125,
6
  "eval_steps": 50,
7
+ "global_step": 850,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
704
  "eval_samples_per_second": 15.331,
705
  "eval_steps_per_second": 15.331,
706
  "step": 800
707
+ },
708
+ {
709
+ "epoch": 0.05059337913803873,
710
+ "grad_norm": 1072.0,
711
+ "learning_rate": 0.004453175712928476,
712
+ "loss": 8.6992,
713
+ "step": 810
714
+ },
715
+ {
716
+ "epoch": 0.05121798875702686,
717
+ "grad_norm": 3584.0,
718
+ "learning_rate": 0.004011658237794877,
719
+ "loss": 7.6904,
720
+ "step": 820
721
+ },
722
+ {
723
+ "epoch": 0.051842598376014994,
724
+ "grad_norm": 121.5,
725
+ "learning_rate": 0.003591274243710277,
726
+ "loss": 8.3538,
727
+ "step": 830
728
+ },
729
+ {
730
+ "epoch": 0.05246720799500312,
731
+ "grad_norm": 524.0,
732
+ "learning_rate": 0.0031924470217222834,
733
+ "loss": 7.6898,
734
+ "step": 840
735
+ },
736
+ {
737
+ "epoch": 0.05309181761399125,
738
+ "grad_norm": 516.0,
739
+ "learning_rate": 0.002815578157036303,
740
+ "loss": 8.4977,
741
+ "step": 850
742
+ },
743
+ {
744
+ "epoch": 0.05309181761399125,
745
+ "eval_loss": 8.241488456726074,
746
+ "eval_runtime": 55.3183,
747
+ "eval_samples_per_second": 15.239,
748
+ "eval_steps_per_second": 15.239,
749
+ "step": 850
750
  }
751
  ],
752
  "logging_steps": 10,
 
761
  "early_stopping_threshold": 0.0
762
  },
763
  "attributes": {
764
+ "early_stopping_patience_counter": 17
765
  }
766
  },
767
  "TrainerControl": {
 
775
  "attributes": {}
776
  }
777
  },
778
+ "total_flos": 3.471928393728e+16,
779
  "train_batch_size": 1,
780
  "trial_name": null,
781
  "trial_params": null