irishprancer commited on
Commit
d6ada74
·
verified ·
1 Parent(s): 0cb0cac

Training in progress, step 1200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca76f47f61e6294ad679314dbfbcce80d0fe37e1c0461e75d714ddb535f5da79
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84029e8f70d12b2c7137d5b303188195f30521bd1c82299d6d37b24c0269a65a
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa76e92f4b693debfd6dc410477c70a9606cc7c9bbe0490fcd9f2d6361e7ead5
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d1059ec2c30fa5b1aacfd6d9895b6233f1694495648c48fedab4d2b8e820425
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eed7e63646e60ae2bd56a0754378b43da25eff8bd39e1edda0ec4d07c731eeb8
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7622d316fe354db40f60bc22ab635af3869b60bf5a6c816cb74ee6598c94be27
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6548eb58d62e7512c294251f1e8c024e396ed51c9a6b80ba70928a1cdaee6c7f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:351fd5edffc48c8b46106c61b298184039dcb3c5ee48faa68a22154873155edd
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7168284058570862,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 45.65217391304348,
5
  "eval_steps": 150,
6
- "global_step": 1050,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1022,6 +1022,151 @@
1022
  "eval_samples_per_second": 25.244,
1023
  "eval_steps_per_second": 25.244,
1024
  "step": 1050
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1025
  }
1026
  ],
1027
  "logging_steps": 10,
@@ -1041,7 +1186,7 @@
1041
  "attributes": {}
1042
  }
1043
  },
1044
- "total_flos": 2.696323352857805e+16,
1045
  "train_batch_size": 4,
1046
  "trial_name": null,
1047
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7168284058570862,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 52.17391304347826,
5
  "eval_steps": 150,
6
+ "global_step": 1200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1022
  "eval_samples_per_second": 25.244,
1023
  "eval_steps_per_second": 25.244,
1024
  "step": 1050
1025
+ },
1026
+ {
1027
+ "epoch": 46.08695652173913,
1028
+ "grad_norm": 1.668047308921814,
1029
+ "learning_rate": 1.130734941385923e-05,
1030
+ "loss": 0.4905,
1031
+ "step": 1060
1032
+ },
1033
+ {
1034
+ "epoch": 46.52173913043478,
1035
+ "grad_norm": 1.8213101625442505,
1036
+ "learning_rate": 1.1307334847763571e-05,
1037
+ "loss": 0.4654,
1038
+ "step": 1070
1039
+ },
1040
+ {
1041
+ "epoch": 46.95652173913044,
1042
+ "grad_norm": 1.7041969299316406,
1043
+ "learning_rate": 1.1307318040745661e-05,
1044
+ "loss": 0.4089,
1045
+ "step": 1080
1046
+ },
1047
+ {
1048
+ "epoch": 47.391304347826086,
1049
+ "grad_norm": 1.9170663356781006,
1050
+ "learning_rate": 1.1307298992812163e-05,
1051
+ "loss": 0.4169,
1052
+ "step": 1090
1053
+ },
1054
+ {
1055
+ "epoch": 47.82608695652174,
1056
+ "grad_norm": 1.3400579690933228,
1057
+ "learning_rate": 1.1307277703970627e-05,
1058
+ "loss": 0.4451,
1059
+ "step": 1100
1060
+ },
1061
+ {
1062
+ "epoch": 48.26086956521739,
1063
+ "grad_norm": 1.8620245456695557,
1064
+ "learning_rate": 1.1307254174229492e-05,
1065
+ "loss": 0.4606,
1066
+ "step": 1110
1067
+ },
1068
+ {
1069
+ "epoch": 48.69565217391305,
1070
+ "grad_norm": 1.4493643045425415,
1071
+ "learning_rate": 1.1307228403598083e-05,
1072
+ "loss": 0.3842,
1073
+ "step": 1120
1074
+ },
1075
+ {
1076
+ "epoch": 49.130434782608695,
1077
+ "grad_norm": 1.5963612794876099,
1078
+ "learning_rate": 1.1307200392086617e-05,
1079
+ "loss": 0.5088,
1080
+ "step": 1130
1081
+ },
1082
+ {
1083
+ "epoch": 49.56521739130435,
1084
+ "grad_norm": 1.0860666036605835,
1085
+ "learning_rate": 1.1307170139706193e-05,
1086
+ "loss": 0.4661,
1087
+ "step": 1140
1088
+ },
1089
+ {
1090
+ "epoch": 50.0,
1091
+ "grad_norm": 2.400817632675171,
1092
+ "learning_rate": 1.1307137646468805e-05,
1093
+ "loss": 0.3732,
1094
+ "step": 1150
1095
+ },
1096
+ {
1097
+ "epoch": 50.43478260869565,
1098
+ "grad_norm": 1.2243698835372925,
1099
+ "learning_rate": 1.130710291238733e-05,
1100
+ "loss": 0.3852,
1101
+ "step": 1160
1102
+ },
1103
+ {
1104
+ "epoch": 50.869565217391305,
1105
+ "grad_norm": 1.5243916511535645,
1106
+ "learning_rate": 1.130706593747554e-05,
1107
+ "loss": 0.4324,
1108
+ "step": 1170
1109
+ },
1110
+ {
1111
+ "epoch": 51.30434782608695,
1112
+ "grad_norm": 1.778385877609253,
1113
+ "learning_rate": 1.1307026721748087e-05,
1114
+ "loss": 0.45,
1115
+ "step": 1180
1116
+ },
1117
+ {
1118
+ "epoch": 51.73913043478261,
1119
+ "grad_norm": 1.0479800701141357,
1120
+ "learning_rate": 1.1306985265220515e-05,
1121
+ "loss": 0.3661,
1122
+ "step": 1190
1123
+ },
1124
+ {
1125
+ "epoch": 52.17391304347826,
1126
+ "grad_norm": 2.22280216217041,
1127
+ "learning_rate": 1.1306941567909254e-05,
1128
+ "loss": 0.518,
1129
+ "step": 1200
1130
+ },
1131
+ {
1132
+ "epoch": 52.17391304347826,
1133
+ "eval_loss": 0.7372099161148071,
1134
+ "eval_runtime": 0.5807,
1135
+ "eval_samples_per_second": 17.222,
1136
+ "eval_steps_per_second": 17.222,
1137
+ "step": 1200
1138
+ },
1139
+ {
1140
+ "epoch": 52.17391304347826,
1141
+ "eval_loss": 0.8616224527359009,
1142
+ "eval_runtime": 0.4577,
1143
+ "eval_samples_per_second": 21.848,
1144
+ "eval_steps_per_second": 21.848,
1145
+ "step": 1200
1146
+ },
1147
+ {
1148
+ "epoch": 52.17391304347826,
1149
+ "eval_loss": 0.7372099161148071,
1150
+ "eval_runtime": 0.4574,
1151
+ "eval_samples_per_second": 21.862,
1152
+ "eval_steps_per_second": 21.862,
1153
+ "step": 1200
1154
+ },
1155
+ {
1156
+ "epoch": 52.17391304347826,
1157
+ "eval_loss": 0.7205449938774109,
1158
+ "eval_runtime": 0.4458,
1159
+ "eval_samples_per_second": 22.433,
1160
+ "eval_steps_per_second": 22.433,
1161
+ "step": 1200
1162
+ },
1163
+ {
1164
+ "epoch": 52.17391304347826,
1165
+ "eval_loss": 0.8604005575180054,
1166
+ "eval_runtime": 0.4457,
1167
+ "eval_samples_per_second": 22.437,
1168
+ "eval_steps_per_second": 22.437,
1169
+ "step": 1200
1170
  }
1171
  ],
1172
  "logging_steps": 10,
 
1186
  "attributes": {}
1187
  }
1188
  },
1189
+ "total_flos": 3.076671992345395e+16,
1190
  "train_batch_size": 4,
1191
  "trial_name": null,
1192
  "trial_params": null