Nadav commited on
Commit
be70992
·
1 Parent(s): 7182479

Training in progress, step 50000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:974a1e790d3ca33c068dbaca3cc4297bfcbca347437b8b9cb62025728f09e96e
3
  size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b95ce7bf4a9a70df03a869bfd8710fab6f0ef7693259e8c38b3126024cbebcc3
3
  size 893438545
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5a9d277649a19c26574f85a21d72156907a1fa49083c31e96d8eb40d2455fc4
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3f8697a7005b7dc484714b01f5a75ae147ff94239fd18844849065bd12dec3d
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe764e4b14b1a41f602255c5ad661b82e79519ef62fb0a01c7236478ec943d57
3
  size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea06a30ae0f68fb2b00ec4bd1c46fefdb57836276b2fa0fd9ad846aef24b1782
3
  size 15523
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3c1e2756d4a4685b9df2f62aad4f5988b6c0b032f0fc7cb98d4e77d5c23a8e8
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57478d8d6cf2ed1954106db902717d4bdc70029fbc5494696ebfea40753450a2
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:584cc9786ad375313cf5cd3cfaf9ab2fa5956cf7f817f1132bf8fc5dbd46f871
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ae6cc6da9b6b4868c887a3704ca6ff92699322e28d657fc719a89a0a38b7bb6
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.8278325961211297,
5
- "global_step": 45000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -906,11 +906,111 @@
906
  "eval_samples_per_second": 18.281,
907
  "eval_steps_per_second": 0.585,
908
  "step": 45000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
909
  }
910
  ],
911
  "max_steps": 1000000,
912
  "num_train_epochs": 86,
913
- "total_flos": 2.0700998918659003e+21,
914
  "trial_name": null,
915
  "trial_params": null
916
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.253147329023477,
5
+ "global_step": 50000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
906
  "eval_samples_per_second": 18.281,
907
  "eval_steps_per_second": 0.585,
908
  "step": 45000
909
+ },
910
+ {
911
+ "epoch": 3.87,
912
+ "learning_rate": 9.999999999999999e-06,
913
+ "loss": 0.4147,
914
+ "step": 45500
915
+ },
916
+ {
917
+ "epoch": 3.91,
918
+ "learning_rate": 9.999999999999999e-06,
919
+ "loss": 0.4145,
920
+ "step": 46000
921
+ },
922
+ {
923
+ "epoch": 3.91,
924
+ "eval_loss": 0.3854221701622009,
925
+ "eval_runtime": 27.205,
926
+ "eval_samples_per_second": 18.379,
927
+ "eval_steps_per_second": 0.588,
928
+ "step": 46000
929
+ },
930
+ {
931
+ "epoch": 3.96,
932
+ "learning_rate": 9.999999999999999e-06,
933
+ "loss": 0.4149,
934
+ "step": 46500
935
+ },
936
+ {
937
+ "epoch": 4.0,
938
+ "learning_rate": 9.999999999999999e-06,
939
+ "loss": 0.4143,
940
+ "step": 47000
941
+ },
942
+ {
943
+ "epoch": 4.0,
944
+ "eval_loss": 0.38265106081962585,
945
+ "eval_runtime": 26.169,
946
+ "eval_samples_per_second": 19.107,
947
+ "eval_steps_per_second": 0.611,
948
+ "step": 47000
949
+ },
950
+ {
951
+ "epoch": 4.04,
952
+ "learning_rate": 9.999999999999999e-06,
953
+ "loss": 0.4129,
954
+ "step": 47500
955
+ },
956
+ {
957
+ "epoch": 4.08,
958
+ "learning_rate": 9.999999999999999e-06,
959
+ "loss": 0.4144,
960
+ "step": 48000
961
+ },
962
+ {
963
+ "epoch": 4.08,
964
+ "eval_loss": 0.382869690656662,
965
+ "eval_runtime": 25.2103,
966
+ "eval_samples_per_second": 19.833,
967
+ "eval_steps_per_second": 0.635,
968
+ "step": 48000
969
+ },
970
+ {
971
+ "epoch": 4.13,
972
+ "learning_rate": 9.999999999999999e-06,
973
+ "loss": 0.4131,
974
+ "step": 48500
975
+ },
976
+ {
977
+ "epoch": 4.17,
978
+ "learning_rate": 9.999999999999999e-06,
979
+ "loss": 0.4147,
980
+ "step": 49000
981
+ },
982
+ {
983
+ "epoch": 4.17,
984
+ "eval_loss": 0.38291990756988525,
985
+ "eval_runtime": 36.6033,
986
+ "eval_samples_per_second": 13.66,
987
+ "eval_steps_per_second": 0.437,
988
+ "step": 49000
989
+ },
990
+ {
991
+ "epoch": 4.21,
992
+ "learning_rate": 9.999999999999999e-06,
993
+ "loss": 0.4125,
994
+ "step": 49500
995
+ },
996
+ {
997
+ "epoch": 4.25,
998
+ "learning_rate": 9.999999999999999e-06,
999
+ "loss": 0.4143,
1000
+ "step": 50000
1001
+ },
1002
+ {
1003
+ "epoch": 4.25,
1004
+ "eval_loss": 0.3828723728656769,
1005
+ "eval_runtime": 27.6434,
1006
+ "eval_samples_per_second": 18.088,
1007
+ "eval_steps_per_second": 0.579,
1008
+ "step": 50000
1009
  }
1010
  ],
1011
  "max_steps": 1000000,
1012
  "num_train_epochs": 86,
1013
+ "total_flos": 2.3001002203489527e+21,
1014
  "trial_name": null,
1015
  "trial_params": null
1016
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5a9d277649a19c26574f85a21d72156907a1fa49083c31e96d8eb40d2455fc4
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3f8697a7005b7dc484714b01f5a75ae147ff94239fd18844849065bd12dec3d
3
  size 449471589