NairaRahim commited on
Commit
8c099c5
·
verified ·
1 Parent(s): 4b495c1

Training in progress, epoch 14, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:661988750c1636a8230782954534697c2c428758d1ffff311560504559c94f4d
3
  size 1227009528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3dc2253f6a705d4581a259cf35a73237854fffb23cfd63c819297d468b440f4
3
  size 1227009528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78153e1deab7d57bbdde6b48f09655dcca7643e4183944f8a976272944fcf355
3
  size 2454133690
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:834b487b2e27a2a0a519304d2774ed6f8a3bc0bf00c5ec3f97eb6a666a2f12df
3
  size 2454133690
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:286894daea5f57f8b75686aeb16b9768a256561b0f4aac865dac3818faf004c7
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b337fd52d1f50e0cc3a5fd860e091b39657ac975bbe77a94898d8354c966157f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4a19fb42510963d95f267fb826c867f003e2775662c12b848554d94e9087c80
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:099df6d11185a7212be93e879fb321f1e41d7aa7efedf5f52a9ceeb4b4dffb4b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 34.700294494628906,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-13050",
4
- "epoch": 13.0,
5
  "eval_steps": 500,
6
- "global_step": 16965,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1294,6 +1294,105 @@
1294
  "eval_samples_per_second": 26.495,
1295
  "eval_steps_per_second": 3.33,
1296
  "step": 16965
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1297
  }
1298
  ],
1299
  "logging_steps": 100,
@@ -1308,7 +1407,7 @@
1308
  "early_stopping_threshold": 0.0
1309
  },
1310
  "attributes": {
1311
- "early_stopping_patience_counter": 3
1312
  }
1313
  },
1314
  "TrainerControl": {
@@ -1322,7 +1421,7 @@
1322
  "attributes": {}
1323
  }
1324
  },
1325
- "total_flos": 1.8295041739613184e+16,
1326
  "train_batch_size": 8,
1327
  "trial_name": null,
1328
  "trial_params": null
 
1
  {
2
  "best_metric": 34.700294494628906,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-13050",
4
+ "epoch": 14.0,
5
  "eval_steps": 500,
6
+ "global_step": 18270,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1294
  "eval_samples_per_second": 26.495,
1295
  "eval_steps_per_second": 3.33,
1296
  "step": 16965
1297
+ },
1298
+ {
1299
+ "epoch": 13.026819923371647,
1300
+ "grad_norm": 3.8546385765075684,
1301
+ "learning_rate": 4.186159003831418e-05,
1302
+ "loss": 33.1455,
1303
+ "step": 17000
1304
+ },
1305
+ {
1306
+ "epoch": 13.10344827586207,
1307
+ "grad_norm": 3.751404047012329,
1308
+ "learning_rate": 4.1813697318007665e-05,
1309
+ "loss": 33.7843,
1310
+ "step": 17100
1311
+ },
1312
+ {
1313
+ "epoch": 13.18007662835249,
1314
+ "grad_norm": 3.0844898223876953,
1315
+ "learning_rate": 4.176580459770115e-05,
1316
+ "loss": 32.8163,
1317
+ "step": 17200
1318
+ },
1319
+ {
1320
+ "epoch": 13.256704980842912,
1321
+ "grad_norm": 1.7570416927337646,
1322
+ "learning_rate": 4.1718390804597704e-05,
1323
+ "loss": 33.3296,
1324
+ "step": 17300
1325
+ },
1326
+ {
1327
+ "epoch": 13.333333333333334,
1328
+ "grad_norm": 2.5809695720672607,
1329
+ "learning_rate": 4.1670498084291184e-05,
1330
+ "loss": 34.1621,
1331
+ "step": 17400
1332
+ },
1333
+ {
1334
+ "epoch": 13.409961685823754,
1335
+ "grad_norm": 2.564545154571533,
1336
+ "learning_rate": 4.162260536398467e-05,
1337
+ "loss": 33.4641,
1338
+ "step": 17500
1339
+ },
1340
+ {
1341
+ "epoch": 13.486590038314176,
1342
+ "grad_norm": 3.2340521812438965,
1343
+ "learning_rate": 4.1574712643678165e-05,
1344
+ "loss": 33.5958,
1345
+ "step": 17600
1346
+ },
1347
+ {
1348
+ "epoch": 13.563218390804598,
1349
+ "grad_norm": 4.329983711242676,
1350
+ "learning_rate": 4.152681992337165e-05,
1351
+ "loss": 33.53,
1352
+ "step": 17700
1353
+ },
1354
+ {
1355
+ "epoch": 13.639846743295019,
1356
+ "grad_norm": 2.3342621326446533,
1357
+ "learning_rate": 4.147892720306514e-05,
1358
+ "loss": 33.7702,
1359
+ "step": 17800
1360
+ },
1361
+ {
1362
+ "epoch": 13.71647509578544,
1363
+ "grad_norm": 2.6764466762542725,
1364
+ "learning_rate": 4.1431034482758625e-05,
1365
+ "loss": 33.6024,
1366
+ "step": 17900
1367
+ },
1368
+ {
1369
+ "epoch": 13.793103448275861,
1370
+ "grad_norm": 5.089807033538818,
1371
+ "learning_rate": 4.138314176245211e-05,
1372
+ "loss": 32.9291,
1373
+ "step": 18000
1374
+ },
1375
+ {
1376
+ "epoch": 13.869731800766283,
1377
+ "grad_norm": 2.4803364276885986,
1378
+ "learning_rate": 4.13352490421456e-05,
1379
+ "loss": 33.2098,
1380
+ "step": 18100
1381
+ },
1382
+ {
1383
+ "epoch": 13.946360153256705,
1384
+ "grad_norm": 3.0112080574035645,
1385
+ "learning_rate": 4.128735632183908e-05,
1386
+ "loss": 33.7988,
1387
+ "step": 18200
1388
+ },
1389
+ {
1390
+ "epoch": 14.0,
1391
+ "eval_loss": 34.82696533203125,
1392
+ "eval_runtime": 49.261,
1393
+ "eval_samples_per_second": 26.492,
1394
+ "eval_steps_per_second": 3.329,
1395
+ "step": 18270
1396
  }
1397
  ],
1398
  "logging_steps": 100,
 
1407
  "early_stopping_threshold": 0.0
1408
  },
1409
  "attributes": {
1410
+ "early_stopping_patience_counter": 4
1411
  }
1412
  },
1413
  "TrainerControl": {
 
1421
  "attributes": {}
1422
  }
1423
  },
1424
+ "total_flos": 1.970235264266035e+16,
1425
  "train_batch_size": 8,
1426
  "trial_name": null,
1427
  "trial_params": null