Rakhman16 commited on
Commit
6deb1ea
·
verified ·
1 Parent(s): 7a8626e

Training in progress, step 4500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:315dd3d1be5cd5aad93e16d6cfd64f1bb9fe3d28b4bf1a28890a1a22e06b4268
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0673723b6fea73b97632e58ff883b0fcedfe7c681e3b064768751625f89426b0
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2aded1500c0a4e0686d69b68e13b1d801287a27d997750ae1545e6654570347
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c6a5cfafd84b1b27fd2b7eb7cde6b07d68bac4292ccc06119a3b5fcbffec4d1
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:448b3e60abf19a367f627475ea9fd93123102153c10bf14e51ba3e6e1e24bd8e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b65d52be0a01d1387b6ab5b94c8d85e39f197ebf9d2ff39d8953e7c1331a8faf
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11165328e3ed53c81315a2b3c898e1767b47d1d7722e15ceb97f24911d09fdce
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2af37ede7aaba01e654492d7c7cd23480899eab3274bf26dc98e9ba5f8aeadfb
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.20224925875663757,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-4000",
4
- "epoch": 2.8099754127151386,
5
  "eval_steps": 100,
6
- "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -887,6 +887,116 @@
887
  "eval_samples_per_second": 66.71,
888
  "eval_steps_per_second": 2.094,
889
  "step": 4000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
890
  }
891
  ],
892
  "logging_steps": 50,
@@ -906,7 +1016,7 @@
906
  "attributes": {}
907
  }
908
  },
909
- "total_flos": 3.89687378116608e+16,
910
  "train_batch_size": 32,
911
  "trial_name": null,
912
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.20200392603874207,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-4500",
4
+ "epoch": 3.161222339304531,
5
  "eval_steps": 100,
6
+ "global_step": 4500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
887
  "eval_samples_per_second": 66.71,
888
  "eval_steps_per_second": 2.094,
889
  "step": 4000
890
+ },
891
+ {
892
+ "epoch": 2.845100105374078,
893
+ "grad_norm": 60070.58984375,
894
+ "learning_rate": 8.65425158116655e-06,
895
+ "loss": 0.1969,
896
+ "step": 4050
897
+ },
898
+ {
899
+ "epoch": 2.880224798033017,
900
+ "grad_norm": 20594.654296875,
901
+ "learning_rate": 8.390723822909348e-06,
902
+ "loss": 0.2009,
903
+ "step": 4100
904
+ },
905
+ {
906
+ "epoch": 2.880224798033017,
907
+ "eval_loss": 0.20173698663711548,
908
+ "eval_runtime": 66.8679,
909
+ "eval_samples_per_second": 66.699,
910
+ "eval_steps_per_second": 2.094,
911
+ "step": 4100
912
+ },
913
+ {
914
+ "epoch": 2.9153494906919564,
915
+ "grad_norm": 22764.1640625,
916
+ "learning_rate": 8.127196064652143e-06,
917
+ "loss": 0.1939,
918
+ "step": 4150
919
+ },
920
+ {
921
+ "epoch": 2.9504741833508956,
922
+ "grad_norm": 22604.9375,
923
+ "learning_rate": 7.86366830639494e-06,
924
+ "loss": 0.1991,
925
+ "step": 4200
926
+ },
927
+ {
928
+ "epoch": 2.9504741833508956,
929
+ "eval_loss": 0.20178209245204926,
930
+ "eval_runtime": 67.157,
931
+ "eval_samples_per_second": 66.412,
932
+ "eval_steps_per_second": 2.085,
933
+ "step": 4200
934
+ },
935
+ {
936
+ "epoch": 2.985598876009835,
937
+ "grad_norm": 23427.0,
938
+ "learning_rate": 7.600140548137737e-06,
939
+ "loss": 0.1982,
940
+ "step": 4250
941
+ },
942
+ {
943
+ "epoch": 3.020723568668774,
944
+ "grad_norm": 22872.943359375,
945
+ "learning_rate": 7.336612789880535e-06,
946
+ "loss": 0.1905,
947
+ "step": 4300
948
+ },
949
+ {
950
+ "epoch": 3.020723568668774,
951
+ "eval_loss": 0.20212285220623016,
952
+ "eval_runtime": 66.9569,
953
+ "eval_samples_per_second": 66.61,
954
+ "eval_steps_per_second": 2.091,
955
+ "step": 4300
956
+ },
957
+ {
958
+ "epoch": 3.0558482613277134,
959
+ "grad_norm": 20360.029296875,
960
+ "learning_rate": 7.073085031623331e-06,
961
+ "loss": 0.2011,
962
+ "step": 4350
963
+ },
964
+ {
965
+ "epoch": 3.0909729539866526,
966
+ "grad_norm": 26769.02734375,
967
+ "learning_rate": 6.809557273366128e-06,
968
+ "loss": 0.1939,
969
+ "step": 4400
970
+ },
971
+ {
972
+ "epoch": 3.0909729539866526,
973
+ "eval_loss": 0.20202863216400146,
974
+ "eval_runtime": 66.9701,
975
+ "eval_samples_per_second": 66.597,
976
+ "eval_steps_per_second": 2.09,
977
+ "step": 4400
978
+ },
979
+ {
980
+ "epoch": 3.126097646645592,
981
+ "grad_norm": 34976.171875,
982
+ "learning_rate": 6.546029515108924e-06,
983
+ "loss": 0.1912,
984
+ "step": 4450
985
+ },
986
+ {
987
+ "epoch": 3.161222339304531,
988
+ "grad_norm": 50123.8671875,
989
+ "learning_rate": 6.282501756851722e-06,
990
+ "loss": 0.1934,
991
+ "step": 4500
992
+ },
993
+ {
994
+ "epoch": 3.161222339304531,
995
+ "eval_loss": 0.20200392603874207,
996
+ "eval_runtime": 66.9822,
997
+ "eval_samples_per_second": 66.585,
998
+ "eval_steps_per_second": 2.09,
999
+ "step": 4500
1000
  }
1001
  ],
1002
  "logging_steps": 50,
 
1016
  "attributes": {}
1017
  }
1018
  },
1019
+ "total_flos": 4.38381173440512e+16,
1020
  "train_batch_size": 32,
1021
  "trial_name": null,
1022
  "trial_params": null