NairaRahim commited on
Commit
a76ba4d
·
verified ·
1 Parent(s): 5c20bf9

Training in progress, epoch 10, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43bc4ea709fdacfcad21a864c7ef0120d87fb2ee3ef66e8da659ffa444583421
3
  size 1227009528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b0578188a7562ebb3f653a9c172c7aab35806f8a64c735c6b610d5e2438e16f
3
  size 1227009528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e223992d46bbe9bb512da1c3a6b7b0881dd041a1208b6f386d718e1eec40e519
3
  size 2454133690
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bd28c218e06e0ddb714109b224f0d2d6ff0943a81c87b19c895d46869fe043e
3
  size 2454133690
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e454a17a07c6bd0c2c4504c81830e5f67d059b3b8b8072bf0bd70406421b40e4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e31bad291fd392e01d851c04b44cf7cac0f5f8b28830534382ca16e10c847e7a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8666902435ebb9a3dd0fa595302755da0514dc6e613fbd4ffe9dc07747cb90f1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac3125827e91c83a2b02ffbd5e22748b751677850854e358e914d72d2a70c5e5
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 34.75983428955078,
3
- "best_model_checkpoint": "/kaggle/working/output/checkpoint-11745",
4
- "epoch": 9.0,
5
  "eval_steps": 500,
6
- "global_step": 11745,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -898,6 +898,105 @@
898
  "eval_samples_per_second": 26.446,
899
  "eval_steps_per_second": 3.323,
900
  "step": 11745
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
901
  }
902
  ],
903
  "logging_steps": 100,
@@ -926,7 +1025,7 @@
926
  "attributes": {}
927
  }
928
  },
929
- "total_flos": 1.2665798127424512e+16,
930
  "train_batch_size": 8,
931
  "trial_name": null,
932
  "trial_params": null
 
1
  {
2
+ "best_metric": 34.700294494628906,
3
+ "best_model_checkpoint": "/kaggle/working/output/checkpoint-13050",
4
+ "epoch": 10.0,
5
  "eval_steps": 500,
6
+ "global_step": 13050,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
898
  "eval_samples_per_second": 26.446,
899
  "eval_steps_per_second": 3.323,
900
  "step": 11745
901
+ },
902
+ {
903
+ "epoch": 9.042145593869732,
904
+ "grad_norm": 6.431031703948975,
905
+ "learning_rate": 4.4351053639846745e-05,
906
+ "loss": 33.6431,
907
+ "step": 11800
908
+ },
909
+ {
910
+ "epoch": 9.118773946360154,
911
+ "grad_norm": 3.262486457824707,
912
+ "learning_rate": 4.430316091954023e-05,
913
+ "loss": 32.9398,
914
+ "step": 11900
915
+ },
916
+ {
917
+ "epoch": 9.195402298850574,
918
+ "grad_norm": 1.945741057395935,
919
+ "learning_rate": 4.425526819923372e-05,
920
+ "loss": 32.7256,
921
+ "step": 12000
922
+ },
923
+ {
924
+ "epoch": 9.272030651340996,
925
+ "grad_norm": 5.09276008605957,
926
+ "learning_rate": 4.4207375478927205e-05,
927
+ "loss": 33.9015,
928
+ "step": 12100
929
+ },
930
+ {
931
+ "epoch": 9.348659003831418,
932
+ "grad_norm": 3.785059928894043,
933
+ "learning_rate": 4.415948275862069e-05,
934
+ "loss": 33.6765,
935
+ "step": 12200
936
+ },
937
+ {
938
+ "epoch": 9.425287356321839,
939
+ "grad_norm": 2.4255340099334717,
940
+ "learning_rate": 4.411159003831418e-05,
941
+ "loss": 33.1262,
942
+ "step": 12300
943
+ },
944
+ {
945
+ "epoch": 9.50191570881226,
946
+ "grad_norm": 5.869349479675293,
947
+ "learning_rate": 4.4063697318007666e-05,
948
+ "loss": 33.2205,
949
+ "step": 12400
950
+ },
951
+ {
952
+ "epoch": 9.578544061302683,
953
+ "grad_norm": 2.361865997314453,
954
+ "learning_rate": 4.4015804597701146e-05,
955
+ "loss": 34.0441,
956
+ "step": 12500
957
+ },
958
+ {
959
+ "epoch": 9.655172413793103,
960
+ "grad_norm": 2.6989896297454834,
961
+ "learning_rate": 4.396791187739464e-05,
962
+ "loss": 33.6812,
963
+ "step": 12600
964
+ },
965
+ {
966
+ "epoch": 9.731800766283525,
967
+ "grad_norm": 2.6094741821289062,
968
+ "learning_rate": 4.3920019157088127e-05,
969
+ "loss": 33.9178,
970
+ "step": 12700
971
+ },
972
+ {
973
+ "epoch": 9.808429118773946,
974
+ "grad_norm": 2.4616310596466064,
975
+ "learning_rate": 4.3872126436781613e-05,
976
+ "loss": 34.5233,
977
+ "step": 12800
978
+ },
979
+ {
980
+ "epoch": 9.885057471264368,
981
+ "grad_norm": 2.7729408740997314,
982
+ "learning_rate": 4.38242337164751e-05,
983
+ "loss": 33.378,
984
+ "step": 12900
985
+ },
986
+ {
987
+ "epoch": 9.96168582375479,
988
+ "grad_norm": 2.5230519771575928,
989
+ "learning_rate": 4.377634099616859e-05,
990
+ "loss": 33.442,
991
+ "step": 13000
992
+ },
993
+ {
994
+ "epoch": 10.0,
995
+ "eval_loss": 34.700294494628906,
996
+ "eval_runtime": 49.2926,
997
+ "eval_samples_per_second": 26.475,
998
+ "eval_steps_per_second": 3.327,
999
+ "step": 13050
1000
  }
1001
  ],
1002
  "logging_steps": 100,
 
1025
  "attributes": {}
1026
  }
1027
  },
1028
+ "total_flos": 1.407310903047168e+16,
1029
  "train_batch_size": 8,
1030
  "trial_name": null,
1031
  "trial_params": null