irishprancer commited on
Commit
cec3d84
·
verified ·
1 Parent(s): e17d9c4

Training in progress, step 1350, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a6dd8eb15d303eb3b236d8f68cc9d45fec3651e65ec39b9d552d49c0ad3e89e
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffa55cd66a91334e326e07df94597fc69e79e4b574e9fd7db3b180e03bf34a5a
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6bdb1caf162c588fbb79a826fe5e343b59d9db2b4bcedf59ac4a5cc0d94edc2e
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10ea9539f99ad90d4237f78923fd6eabfa6be63c3f014ac341847df0a0c27d26
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dfd83aa19eaa65ffd8facfc7a6e6b5ccb62a4255eff28971986e07fccd1c0b48
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa041c836419a6bfa571598f365167fc714355f663c100bd675c5722ac5b2b43
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87220f7564c527fa546ba53e49f1fc40170b9568e84927eecccc6abfeef8f191
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50a64c967527f5f080ab65c7a91bddbd4b12cb03e94ccfc2d01741bebae5f6a9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7167766094207764,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 52.17391304347826,
5
  "eval_steps": 150,
6
- "global_step": 1200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -959,6 +959,143 @@
959
  "eval_samples_per_second": 22.998,
960
  "eval_steps_per_second": 22.998,
961
  "step": 1200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
962
  }
963
  ],
964
  "logging_steps": 10,
@@ -978,7 +1115,7 @@
978
  "attributes": {}
979
  }
980
  },
981
- "total_flos": 3.076671992345395e+16,
982
  "train_batch_size": 4,
983
  "trial_name": null,
984
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7167766094207764,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 58.69565217391305,
5
  "eval_steps": 150,
6
+ "global_step": 1350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
959
  "eval_samples_per_second": 22.998,
960
  "eval_steps_per_second": 22.998,
961
  "step": 1200
962
+ },
963
+ {
964
+ "epoch": 52.608695652173914,
965
+ "grad_norm": 1.362567663192749,
966
+ "learning_rate": 1.4986535955022992e-05,
967
+ "loss": 0.3321,
968
+ "step": 1210
969
+ },
970
+ {
971
+ "epoch": 53.04347826086956,
972
+ "grad_norm": 1.6412482261657715,
973
+ "learning_rate": 1.4986516649409596e-05,
974
+ "loss": 0.4151,
975
+ "step": 1220
976
+ },
977
+ {
978
+ "epoch": 53.47826086956522,
979
+ "grad_norm": 1.216570258140564,
980
+ "learning_rate": 1.498649437372243e-05,
981
+ "loss": 0.3162,
982
+ "step": 1230
983
+ },
984
+ {
985
+ "epoch": 53.91304347826087,
986
+ "grad_norm": 1.761721134185791,
987
+ "learning_rate": 1.4986469127970329e-05,
988
+ "loss": 0.3817,
989
+ "step": 1240
990
+ },
991
+ {
992
+ "epoch": 54.34782608695652,
993
+ "grad_norm": 1.6624178886413574,
994
+ "learning_rate": 1.4986440912163295e-05,
995
+ "loss": 0.4377,
996
+ "step": 1250
997
+ },
998
+ {
999
+ "epoch": 54.78260869565217,
1000
+ "grad_norm": 1.047031044960022,
1001
+ "learning_rate": 1.4986409726312515e-05,
1002
+ "loss": 0.3297,
1003
+ "step": 1260
1004
+ },
1005
+ {
1006
+ "epoch": 55.21739130434783,
1007
+ "grad_norm": 1.37089204788208,
1008
+ "learning_rate": 1.498637557043035e-05,
1009
+ "loss": 0.3577,
1010
+ "step": 1270
1011
+ },
1012
+ {
1013
+ "epoch": 55.65217391304348,
1014
+ "grad_norm": 1.563143014907837,
1015
+ "learning_rate": 1.4986338444530336e-05,
1016
+ "loss": 0.4021,
1017
+ "step": 1280
1018
+ },
1019
+ {
1020
+ "epoch": 56.08695652173913,
1021
+ "grad_norm": 2.0261929035186768,
1022
+ "learning_rate": 1.498629834862719e-05,
1023
+ "loss": 0.3311,
1024
+ "step": 1290
1025
+ },
1026
+ {
1027
+ "epoch": 56.52173913043478,
1028
+ "grad_norm": 1.29714834690094,
1029
+ "learning_rate": 1.4986255282736802e-05,
1030
+ "loss": 0.3869,
1031
+ "step": 1300
1032
+ },
1033
+ {
1034
+ "epoch": 56.95652173913044,
1035
+ "grad_norm": 1.6435688734054565,
1036
+ "learning_rate": 1.4986209246876247e-05,
1037
+ "loss": 0.3798,
1038
+ "step": 1310
1039
+ },
1040
+ {
1041
+ "epoch": 57.391304347826086,
1042
+ "grad_norm": 1.2977672815322876,
1043
+ "learning_rate": 1.498616024106377e-05,
1044
+ "loss": 0.3622,
1045
+ "step": 1320
1046
+ },
1047
+ {
1048
+ "epoch": 57.82608695652174,
1049
+ "grad_norm": 2.4099888801574707,
1050
+ "learning_rate": 1.4986108265318795e-05,
1051
+ "loss": 0.3856,
1052
+ "step": 1330
1053
+ },
1054
+ {
1055
+ "epoch": 58.26086956521739,
1056
+ "grad_norm": 1.3188731670379639,
1057
+ "learning_rate": 1.4986053319661923e-05,
1058
+ "loss": 0.3783,
1059
+ "step": 1340
1060
+ },
1061
+ {
1062
+ "epoch": 58.69565217391305,
1063
+ "grad_norm": 1.5936689376831055,
1064
+ "learning_rate": 1.4985995404114931e-05,
1065
+ "loss": 0.3569,
1066
+ "step": 1350
1067
+ },
1068
+ {
1069
+ "epoch": 58.69565217391305,
1070
+ "eval_loss": 0.769973635673523,
1071
+ "eval_runtime": 0.4063,
1072
+ "eval_samples_per_second": 24.611,
1073
+ "eval_steps_per_second": 24.611,
1074
+ "step": 1350
1075
+ },
1076
+ {
1077
+ "epoch": 58.69565217391305,
1078
+ "eval_loss": 0.7558861970901489,
1079
+ "eval_runtime": 0.4014,
1080
+ "eval_samples_per_second": 24.915,
1081
+ "eval_steps_per_second": 24.915,
1082
+ "step": 1350
1083
+ },
1084
+ {
1085
+ "epoch": 58.69565217391305,
1086
+ "eval_loss": 0.7593681216239929,
1087
+ "eval_runtime": 0.3975,
1088
+ "eval_samples_per_second": 25.16,
1089
+ "eval_steps_per_second": 25.16,
1090
+ "step": 1350
1091
+ },
1092
+ {
1093
+ "epoch": 58.69565217391305,
1094
+ "eval_loss": 0.7557762861251831,
1095
+ "eval_runtime": 0.3974,
1096
+ "eval_samples_per_second": 25.162,
1097
+ "eval_steps_per_second": 25.162,
1098
+ "step": 1350
1099
  }
1100
  ],
1101
  "logging_steps": 10,
 
1115
  "attributes": {}
1116
  }
1117
  },
1118
+ "total_flos": 3.460993413931008e+16,
1119
  "train_batch_size": 4,
1120
  "trial_name": null,
1121
  "trial_params": null