irishprancer commited on
Commit
1375d4d
·
verified ·
1 Parent(s): 64af500

Training in progress, step 1050, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a27a7c2ec6b774731f6aa003dbe657cf0f987fe25d734da3afab88baa02c39d
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0e8da576de1a533d44f53c0812dae8d5a26fc059f8c242d522e7ff65d9cf742
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:878dd48bd56764e8178656b692092a26f8627ce77d11d5f9b0b03561a5b8d514
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0793bdd731f1c36630008341e378831fc838bd98c826a69296d2e308eb19cec9
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5535f299045fe8f5c903e1fe89ea07c865852a623fd1578e00cbe2ef4b1aa5e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eed7e63646e60ae2bd56a0754378b43da25eff8bd39e1edda0ec4d07c731eeb8
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6933cf40adb222251144f8b6059d0312a60442b27edead7b2d2d4edd50c35f13
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e372b046187e477eabb1e557ed274ea206c76e12dfed9a7f7bcfa7ddabfb242a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7167752981185913,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 39.130434782608695,
5
  "eval_steps": 150,
6
- "global_step": 900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -877,6 +877,151 @@
877
  "eval_samples_per_second": 25.341,
878
  "eval_steps_per_second": 25.341,
879
  "step": 900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
880
  }
881
  ],
882
  "logging_steps": 10,
@@ -896,7 +1041,7 @@
896
  "attributes": {}
897
  }
898
  },
899
- "total_flos": 2.3078822838165504e+16,
900
  "train_batch_size": 4,
901
  "trial_name": null,
902
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7167752981185913,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 45.65217391304348,
5
  "eval_steps": 150,
6
+ "global_step": 1050,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
877
  "eval_samples_per_second": 25.341,
878
  "eval_steps_per_second": 25.341,
879
  "step": 900
880
+ },
881
+ {
882
+ "epoch": 39.56521739130435,
883
+ "grad_norm": 1.403578758239746,
884
+ "learning_rate": 1.129275730490657e-06,
885
+ "loss": 0.4353,
886
+ "step": 910
887
+ },
888
+ {
889
+ "epoch": 40.0,
890
+ "grad_norm": 2.7664737701416016,
891
+ "learning_rate": 2.258551460981314e-06,
892
+ "loss": 0.4364,
893
+ "step": 920
894
+ },
895
+ {
896
+ "epoch": 40.43478260869565,
897
+ "grad_norm": 1.4606038331985474,
898
+ "learning_rate": 3.3878271914719706e-06,
899
+ "loss": 0.5149,
900
+ "step": 930
901
+ },
902
+ {
903
+ "epoch": 40.869565217391305,
904
+ "grad_norm": 1.4345122575759888,
905
+ "learning_rate": 4.517102921962628e-06,
906
+ "loss": 0.3915,
907
+ "step": 940
908
+ },
909
+ {
910
+ "epoch": 41.30434782608695,
911
+ "grad_norm": 1.9893536567687988,
912
+ "learning_rate": 5.6463786524532845e-06,
913
+ "loss": 0.4036,
914
+ "step": 950
915
+ },
916
+ {
917
+ "epoch": 41.73913043478261,
918
+ "grad_norm": 1.3536087274551392,
919
+ "learning_rate": 6.775654382943941e-06,
920
+ "loss": 0.4227,
921
+ "step": 960
922
+ },
923
+ {
924
+ "epoch": 42.17391304347826,
925
+ "grad_norm": 1.3940178155899048,
926
+ "learning_rate": 7.904930113434598e-06,
927
+ "loss": 0.5038,
928
+ "step": 970
929
+ },
930
+ {
931
+ "epoch": 42.608695652173914,
932
+ "grad_norm": 1.3348439931869507,
933
+ "learning_rate": 9.034205843925256e-06,
934
+ "loss": 0.4424,
935
+ "step": 980
936
+ },
937
+ {
938
+ "epoch": 43.04347826086956,
939
+ "grad_norm": 1.4961334466934204,
940
+ "learning_rate": 1.0163481574415913e-05,
941
+ "loss": 0.4336,
942
+ "step": 990
943
+ },
944
+ {
945
+ "epoch": 43.47826086956522,
946
+ "grad_norm": 1.2589102983474731,
947
+ "learning_rate": 1.1292757304906569e-05,
948
+ "loss": 0.4641,
949
+ "step": 1000
950
+ },
951
+ {
952
+ "epoch": 43.91304347826087,
953
+ "grad_norm": 1.1131060123443604,
954
+ "learning_rate": 1.1292756185884522e-05,
955
+ "loss": 0.4143,
956
+ "step": 1010
957
+ },
958
+ {
959
+ "epoch": 44.34782608695652,
960
+ "grad_norm": 1.8933826684951782,
961
+ "learning_rate": 1.1292752828818823e-05,
962
+ "loss": 0.4697,
963
+ "step": 1020
964
+ },
965
+ {
966
+ "epoch": 44.78260869565217,
967
+ "grad_norm": 1.4995206594467163,
968
+ "learning_rate": 1.1292747233710805e-05,
969
+ "loss": 0.4582,
970
+ "step": 1030
971
+ },
972
+ {
973
+ "epoch": 45.21739130434783,
974
+ "grad_norm": 1.1720516681671143,
975
+ "learning_rate": 1.1292739400562683e-05,
976
+ "loss": 0.3298,
977
+ "step": 1040
978
+ },
979
+ {
980
+ "epoch": 45.65217391304348,
981
+ "grad_norm": 1.5783675909042358,
982
+ "learning_rate": 1.1292729329377566e-05,
983
+ "loss": 0.4415,
984
+ "step": 1050
985
+ },
986
+ {
987
+ "epoch": 45.65217391304348,
988
+ "eval_loss": 0.7320327758789062,
989
+ "eval_runtime": 0.5007,
990
+ "eval_samples_per_second": 19.973,
991
+ "eval_steps_per_second": 19.973,
992
+ "step": 1050
993
+ },
994
+ {
995
+ "epoch": 45.65217391304348,
996
+ "eval_loss": 0.8609212040901184,
997
+ "eval_runtime": 0.425,
998
+ "eval_samples_per_second": 23.532,
999
+ "eval_steps_per_second": 23.532,
1000
+ "step": 1050
1001
+ },
1002
+ {
1003
+ "epoch": 45.65217391304348,
1004
+ "eval_loss": 0.7320327758789062,
1005
+ "eval_runtime": 0.4019,
1006
+ "eval_samples_per_second": 24.883,
1007
+ "eval_steps_per_second": 24.883,
1008
+ "step": 1050
1009
+ },
1010
+ {
1011
+ "epoch": 45.65217391304348,
1012
+ "eval_loss": 0.7223752737045288,
1013
+ "eval_runtime": 0.4083,
1014
+ "eval_samples_per_second": 24.492,
1015
+ "eval_steps_per_second": 24.492,
1016
+ "step": 1050
1017
+ },
1018
+ {
1019
+ "epoch": 45.65217391304348,
1020
+ "eval_loss": 0.8609784245491028,
1021
+ "eval_runtime": 0.4101,
1022
+ "eval_samples_per_second": 24.385,
1023
+ "eval_steps_per_second": 24.385,
1024
+ "step": 1050
1025
  }
1026
  ],
1027
  "logging_steps": 10,
 
1041
  "attributes": {}
1042
  }
1043
  },
1044
+ "total_flos": 2.696323352857805e+16,
1045
  "train_batch_size": 4,
1046
  "trial_name": null,
1047
  "trial_params": null