NairaRahim commited on
Commit
b640836
·
verified ·
1 Parent(s): e1ee5cb

Training in progress, epoch 13, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d087532161fc3d3113f958d4327ca8ab76fb93d1b9005d7b72d8341648a7f95e
3
  size 1227009528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:661988750c1636a8230782954534697c2c428758d1ffff311560504559c94f4d
3
  size 1227009528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d359385b3376fb641197873abbd6f199bc67d84ad37382d398095c1f51b664a9
3
  size 2454133690
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78153e1deab7d57bbdde6b48f09655dcca7643e4183944f8a976272944fcf355
3
  size 2454133690
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c4d71a933e8a99a1b5e03ca178837d4af39c5cb9255b1959f57ce6925e566d0
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:286894daea5f57f8b75686aeb16b9768a256561b0f4aac865dac3818faf004c7
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f18daae1b94bcadba9e921cdd5d160fa2fe3e4c34c14e032eed270d5a8a3cca
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4a19fb42510963d95f267fb826c867f003e2775662c12b848554d94e9087c80
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 34.700294494628906,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-13050",
4
- "epoch": 12.0,
5
  "eval_steps": 500,
6
- "global_step": 15660,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1195,6 +1195,105 @@
1195
  "eval_samples_per_second": 26.495,
1196
  "eval_steps_per_second": 3.33,
1197
  "step": 15660
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1198
  }
1199
  ],
1200
  "logging_steps": 100,
@@ -1209,7 +1308,7 @@
1209
  "early_stopping_threshold": 0.0
1210
  },
1211
  "attributes": {
1212
- "early_stopping_patience_counter": 2
1213
  }
1214
  },
1215
  "TrainerControl": {
@@ -1223,7 +1322,7 @@
1223
  "attributes": {}
1224
  }
1225
  },
1226
- "total_flos": 1.6887730836566016e+16,
1227
  "train_batch_size": 8,
1228
  "trial_name": null,
1229
  "trial_params": null
 
1
  {
2
  "best_metric": 34.700294494628906,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-13050",
4
+ "epoch": 13.0,
5
  "eval_steps": 500,
6
+ "global_step": 16965,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1195
  "eval_samples_per_second": 26.495,
1196
  "eval_steps_per_second": 3.33,
1197
  "step": 15660
1198
+ },
1199
+ {
1200
+ "epoch": 12.030651340996169,
1201
+ "grad_norm": 2.331453800201416,
1202
+ "learning_rate": 4.2484195402298856e-05,
1203
+ "loss": 32.9794,
1204
+ "step": 15700
1205
+ },
1206
+ {
1207
+ "epoch": 12.10727969348659,
1208
+ "grad_norm": 2.2127463817596436,
1209
+ "learning_rate": 4.243630268199234e-05,
1210
+ "loss": 33.6367,
1211
+ "step": 15800
1212
+ },
1213
+ {
1214
+ "epoch": 12.183908045977011,
1215
+ "grad_norm": 3.1127703189849854,
1216
+ "learning_rate": 4.238840996168583e-05,
1217
+ "loss": 32.7221,
1218
+ "step": 15900
1219
+ },
1220
+ {
1221
+ "epoch": 12.260536398467433,
1222
+ "grad_norm": 2.5665576457977295,
1223
+ "learning_rate": 4.234051724137931e-05,
1224
+ "loss": 33.7796,
1225
+ "step": 16000
1226
+ },
1227
+ {
1228
+ "epoch": 12.337164750957854,
1229
+ "grad_norm": 2.995265245437622,
1230
+ "learning_rate": 4.22926245210728e-05,
1231
+ "loss": 32.8062,
1232
+ "step": 16100
1233
+ },
1234
+ {
1235
+ "epoch": 12.413793103448276,
1236
+ "grad_norm": 3.4698216915130615,
1237
+ "learning_rate": 4.2244731800766284e-05,
1238
+ "loss": 33.5182,
1239
+ "step": 16200
1240
+ },
1241
+ {
1242
+ "epoch": 12.490421455938698,
1243
+ "grad_norm": 4.030599117279053,
1244
+ "learning_rate": 4.219683908045977e-05,
1245
+ "loss": 33.7621,
1246
+ "step": 16300
1247
+ },
1248
+ {
1249
+ "epoch": 12.567049808429118,
1250
+ "grad_norm": 2.277189254760742,
1251
+ "learning_rate": 4.214894636015326e-05,
1252
+ "loss": 33.7926,
1253
+ "step": 16400
1254
+ },
1255
+ {
1256
+ "epoch": 12.64367816091954,
1257
+ "grad_norm": 2.3156633377075195,
1258
+ "learning_rate": 4.2101053639846744e-05,
1259
+ "loss": 33.869,
1260
+ "step": 16500
1261
+ },
1262
+ {
1263
+ "epoch": 12.720306513409962,
1264
+ "grad_norm": 3.5089361667633057,
1265
+ "learning_rate": 4.205316091954023e-05,
1266
+ "loss": 33.6732,
1267
+ "step": 16600
1268
+ },
1269
+ {
1270
+ "epoch": 12.796934865900383,
1271
+ "grad_norm": 2.5379600524902344,
1272
+ "learning_rate": 4.200526819923372e-05,
1273
+ "loss": 33.5854,
1274
+ "step": 16700
1275
+ },
1276
+ {
1277
+ "epoch": 12.873563218390805,
1278
+ "grad_norm": 2.5784411430358887,
1279
+ "learning_rate": 4.1957375478927205e-05,
1280
+ "loss": 33.2835,
1281
+ "step": 16800
1282
+ },
1283
+ {
1284
+ "epoch": 12.950191570881227,
1285
+ "grad_norm": 2.574859380722046,
1286
+ "learning_rate": 4.190948275862069e-05,
1287
+ "loss": 33.8945,
1288
+ "step": 16900
1289
+ },
1290
+ {
1291
+ "epoch": 13.0,
1292
+ "eval_loss": 34.72227478027344,
1293
+ "eval_runtime": 49.2549,
1294
+ "eval_samples_per_second": 26.495,
1295
+ "eval_steps_per_second": 3.33,
1296
+ "step": 16965
1297
  }
1298
  ],
1299
  "logging_steps": 100,
 
1308
  "early_stopping_threshold": 0.0
1309
  },
1310
  "attributes": {
1311
+ "early_stopping_patience_counter": 3
1312
  }
1313
  },
1314
  "TrainerControl": {
 
1322
  "attributes": {}
1323
  }
1324
  },
1325
+ "total_flos": 1.8295041739613184e+16,
1326
  "train_batch_size": 8,
1327
  "trial_name": null,
1328
  "trial_params": null