mgh6 commited on
Commit
eb8b894
·
verified ·
1 Parent(s): 5ae3713

Training in progress, epoch 7, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94b65f9945fefbaf8be853961d82c7339324704274beb41015a71638438df6ca
3
  size 2695611744
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44d6797e54ea1e42bcae33e74458544cb12bc9b0ff95d94ff72611fcb34e0783
3
  size 2695611744
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eddaae0b197ba351206c079ac22e951ce1b9b3cda33c4dfc2277b9cd13f08127
3
  size 26261260
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5441c765de6e99ecee4cf9c3a075adb77f3d306fb63d2d02f10461c9d056e5c3
3
  size 26261260
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ed2dc48e727c7083f014c752797fb847062faae5cd71cadcb7aba9e7e6b933c
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d5eb5e2eac86bd2c1cd20be57b68211f8585b0779e48efc32499f94581cd3d8
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ebd3960ec1614497d0e6e97cebd857464e618edc4df8a50eee43da5ac2ba348
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1318a37534b462ff16790cf62c003bcdfdf0493594d4a0b3e928e4d2f7999ab
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.0,
5
  "eval_steps": 10,
6
- "global_step": 774,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1085,6 +1085,188 @@
1085
  "eval_samples_per_second": 22.011,
1086
  "eval_steps_per_second": 5.503,
1087
  "step": 770
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1088
  }
1089
  ],
1090
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 7.0,
5
  "eval_steps": 10,
6
+ "global_step": 903,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1085
  "eval_samples_per_second": 22.011,
1086
  "eval_steps_per_second": 5.503,
1087
  "step": 770
1088
+ },
1089
+ {
1090
+ "epoch": 6.046852122986823,
1091
+ "grad_norm": 468637.6875,
1092
+ "learning_rate": 3.90625e-05,
1093
+ "loss": 621.1816,
1094
+ "step": 780
1095
+ },
1096
+ {
1097
+ "epoch": 6.046852122986823,
1098
+ "eval_runtime": 19.6074,
1099
+ "eval_samples_per_second": 22.032,
1100
+ "eval_steps_per_second": 5.508,
1101
+ "step": 780
1102
+ },
1103
+ {
1104
+ "epoch": 6.124938994631528,
1105
+ "grad_norm": 374385.40625,
1106
+ "learning_rate": 3.828125e-05,
1107
+ "loss": 914.6565,
1108
+ "step": 790
1109
+ },
1110
+ {
1111
+ "epoch": 6.124938994631528,
1112
+ "eval_runtime": 19.626,
1113
+ "eval_samples_per_second": 22.012,
1114
+ "eval_steps_per_second": 5.503,
1115
+ "step": 790
1116
+ },
1117
+ {
1118
+ "epoch": 6.203025866276232,
1119
+ "grad_norm": 283100.3125,
1120
+ "learning_rate": 3.7500000000000003e-05,
1121
+ "loss": 830.0896,
1122
+ "step": 800
1123
+ },
1124
+ {
1125
+ "epoch": 6.203025866276232,
1126
+ "eval_runtime": 19.6195,
1127
+ "eval_samples_per_second": 22.019,
1128
+ "eval_steps_per_second": 5.505,
1129
+ "step": 800
1130
+ },
1131
+ {
1132
+ "epoch": 6.281112737920937,
1133
+ "grad_norm": 186444.921875,
1134
+ "learning_rate": 3.671875e-05,
1135
+ "loss": 822.068,
1136
+ "step": 810
1137
+ },
1138
+ {
1139
+ "epoch": 6.281112737920937,
1140
+ "eval_runtime": 19.6377,
1141
+ "eval_samples_per_second": 21.998,
1142
+ "eval_steps_per_second": 5.5,
1143
+ "step": 810
1144
+ },
1145
+ {
1146
+ "epoch": 6.359199609565642,
1147
+ "grad_norm": 431370.5,
1148
+ "learning_rate": 3.59375e-05,
1149
+ "loss": 690.5968,
1150
+ "step": 820
1151
+ },
1152
+ {
1153
+ "epoch": 6.359199609565642,
1154
+ "eval_runtime": 19.6147,
1155
+ "eval_samples_per_second": 22.024,
1156
+ "eval_steps_per_second": 5.506,
1157
+ "step": 820
1158
+ },
1159
+ {
1160
+ "epoch": 6.4372864812103465,
1161
+ "grad_norm": 435885.03125,
1162
+ "learning_rate": 3.5156250000000004e-05,
1163
+ "loss": 1016.4219,
1164
+ "step": 830
1165
+ },
1166
+ {
1167
+ "epoch": 6.4372864812103465,
1168
+ "eval_runtime": 19.6089,
1169
+ "eval_samples_per_second": 22.031,
1170
+ "eval_steps_per_second": 5.508,
1171
+ "step": 830
1172
+ },
1173
+ {
1174
+ "epoch": 6.515373352855051,
1175
+ "grad_norm": 473700.9375,
1176
+ "learning_rate": 3.4375e-05,
1177
+ "loss": 806.1664,
1178
+ "step": 840
1179
+ },
1180
+ {
1181
+ "epoch": 6.515373352855051,
1182
+ "eval_runtime": 19.6159,
1183
+ "eval_samples_per_second": 22.023,
1184
+ "eval_steps_per_second": 5.506,
1185
+ "step": 840
1186
+ },
1187
+ {
1188
+ "epoch": 6.593460224499756,
1189
+ "grad_norm": 398720.625,
1190
+ "learning_rate": 3.359375e-05,
1191
+ "loss": 668.8824,
1192
+ "step": 850
1193
+ },
1194
+ {
1195
+ "epoch": 6.593460224499756,
1196
+ "eval_runtime": 19.6281,
1197
+ "eval_samples_per_second": 22.009,
1198
+ "eval_steps_per_second": 5.502,
1199
+ "step": 850
1200
+ },
1201
+ {
1202
+ "epoch": 6.671547096144461,
1203
+ "grad_norm": 305606.28125,
1204
+ "learning_rate": 3.2812500000000005e-05,
1205
+ "loss": 522.7901,
1206
+ "step": 860
1207
+ },
1208
+ {
1209
+ "epoch": 6.671547096144461,
1210
+ "eval_runtime": 19.6214,
1211
+ "eval_samples_per_second": 22.017,
1212
+ "eval_steps_per_second": 5.504,
1213
+ "step": 860
1214
+ },
1215
+ {
1216
+ "epoch": 6.7496339677891655,
1217
+ "grad_norm": 263199.21875,
1218
+ "learning_rate": 3.203125e-05,
1219
+ "loss": 559.4416,
1220
+ "step": 870
1221
+ },
1222
+ {
1223
+ "epoch": 6.7496339677891655,
1224
+ "eval_runtime": 19.6359,
1225
+ "eval_samples_per_second": 22.0,
1226
+ "eval_steps_per_second": 5.5,
1227
+ "step": 870
1228
+ },
1229
+ {
1230
+ "epoch": 6.82772083943387,
1231
+ "grad_norm": 203541.921875,
1232
+ "learning_rate": 3.125e-05,
1233
+ "loss": 602.3201,
1234
+ "step": 880
1235
+ },
1236
+ {
1237
+ "epoch": 6.82772083943387,
1238
+ "eval_runtime": 19.6292,
1239
+ "eval_samples_per_second": 22.008,
1240
+ "eval_steps_per_second": 5.502,
1241
+ "step": 880
1242
+ },
1243
+ {
1244
+ "epoch": 6.905807711078575,
1245
+ "grad_norm": 109014.71875,
1246
+ "learning_rate": 3.0468750000000002e-05,
1247
+ "loss": 527.6462,
1248
+ "step": 890
1249
+ },
1250
+ {
1251
+ "epoch": 6.905807711078575,
1252
+ "eval_runtime": 19.615,
1253
+ "eval_samples_per_second": 22.024,
1254
+ "eval_steps_per_second": 5.506,
1255
+ "step": 890
1256
+ },
1257
+ {
1258
+ "epoch": 6.98389458272328,
1259
+ "grad_norm": 173269.515625,
1260
+ "learning_rate": 2.96875e-05,
1261
+ "loss": 509.9485,
1262
+ "step": 900
1263
+ },
1264
+ {
1265
+ "epoch": 6.98389458272328,
1266
+ "eval_runtime": 19.6287,
1267
+ "eval_samples_per_second": 22.009,
1268
+ "eval_steps_per_second": 5.502,
1269
+ "step": 900
1270
  }
1271
  ],
1272
  "logging_steps": 10,