mgh6 commited on
Commit
49b30de
·
verified ·
1 Parent(s): f5b1d33

Training in progress, epoch 8, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44d6797e54ea1e42bcae33e74458544cb12bc9b0ff95d94ff72611fcb34e0783
3
  size 2695611744
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a2175ec71e87f793ff0f1b56c0e4d67ecb39ba87b782cb059bb94ad285c2f53
3
  size 2695611744
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5441c765de6e99ecee4cf9c3a075adb77f3d306fb63d2d02f10461c9d056e5c3
3
  size 26261260
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:042e4a5ad63d06a910df2e74958ddf6def3d06490a31ec05e030713646e66aae
3
  size 26261260
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d5eb5e2eac86bd2c1cd20be57b68211f8585b0779e48efc32499f94581cd3d8
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd92451b020a5e32fd17fed687a0bd65992ebcaaa07504fb89486fb3090b6e0b
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1318a37534b462ff16790cf62c003bcdfdf0493594d4a0b3e928e4d2f7999ab
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98b9a9b66ad7c2c92d26ff031cce36ce8f9c2ebdb6be883784d837b0e0137568
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.0,
5
  "eval_steps": 10,
6
- "global_step": 903,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1267,6 +1267,188 @@
1267
  "eval_samples_per_second": 22.009,
1268
  "eval_steps_per_second": 5.502,
1269
  "step": 900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1270
  }
1271
  ],
1272
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.0,
5
  "eval_steps": 10,
6
+ "global_step": 1032,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1267
  "eval_samples_per_second": 22.009,
1268
  "eval_steps_per_second": 5.502,
1269
  "step": 900
1270
+ },
1271
+ {
1272
+ "epoch": 7.054660810151294,
1273
+ "grad_norm": 89940.78125,
1274
+ "learning_rate": 2.890625e-05,
1275
+ "loss": 514.7005,
1276
+ "step": 910
1277
+ },
1278
+ {
1279
+ "epoch": 7.054660810151294,
1280
+ "eval_runtime": 19.6378,
1281
+ "eval_samples_per_second": 21.998,
1282
+ "eval_steps_per_second": 5.5,
1283
+ "step": 910
1284
+ },
1285
+ {
1286
+ "epoch": 7.132747681795998,
1287
+ "grad_norm": 64957.203125,
1288
+ "learning_rate": 2.8125000000000003e-05,
1289
+ "loss": 516.4828,
1290
+ "step": 920
1291
+ },
1292
+ {
1293
+ "epoch": 7.132747681795998,
1294
+ "eval_runtime": 19.6159,
1295
+ "eval_samples_per_second": 22.023,
1296
+ "eval_steps_per_second": 5.506,
1297
+ "step": 920
1298
+ },
1299
+ {
1300
+ "epoch": 7.210834553440703,
1301
+ "grad_norm": 191568.875,
1302
+ "learning_rate": 2.734375e-05,
1303
+ "loss": 469.4625,
1304
+ "step": 930
1305
+ },
1306
+ {
1307
+ "epoch": 7.210834553440703,
1308
+ "eval_runtime": 19.6149,
1309
+ "eval_samples_per_second": 22.024,
1310
+ "eval_steps_per_second": 5.506,
1311
+ "step": 930
1312
+ },
1313
+ {
1314
+ "epoch": 7.288921425085407,
1315
+ "grad_norm": 180467.84375,
1316
+ "learning_rate": 2.6562500000000002e-05,
1317
+ "loss": 627.1263,
1318
+ "step": 940
1319
+ },
1320
+ {
1321
+ "epoch": 7.288921425085407,
1322
+ "eval_runtime": 19.6273,
1323
+ "eval_samples_per_second": 22.01,
1324
+ "eval_steps_per_second": 5.503,
1325
+ "step": 940
1326
+ },
1327
+ {
1328
+ "epoch": 7.367008296730113,
1329
+ "grad_norm": 175262.3125,
1330
+ "learning_rate": 2.578125e-05,
1331
+ "loss": 497.1456,
1332
+ "step": 950
1333
+ },
1334
+ {
1335
+ "epoch": 7.367008296730113,
1336
+ "eval_runtime": 19.6298,
1337
+ "eval_samples_per_second": 22.007,
1338
+ "eval_steps_per_second": 5.502,
1339
+ "step": 950
1340
+ },
1341
+ {
1342
+ "epoch": 7.4450951683748166,
1343
+ "grad_norm": 194304.703125,
1344
+ "learning_rate": 2.5e-05,
1345
+ "loss": 651.0766,
1346
+ "step": 960
1347
+ },
1348
+ {
1349
+ "epoch": 7.4450951683748166,
1350
+ "eval_runtime": 19.6413,
1351
+ "eval_samples_per_second": 21.994,
1352
+ "eval_steps_per_second": 5.499,
1353
+ "step": 960
1354
+ },
1355
+ {
1356
+ "epoch": 7.523182040019522,
1357
+ "grad_norm": 221815.171875,
1358
+ "learning_rate": 2.4218750000000003e-05,
1359
+ "loss": 419.7869,
1360
+ "step": 970
1361
+ },
1362
+ {
1363
+ "epoch": 7.523182040019522,
1364
+ "eval_runtime": 19.6255,
1365
+ "eval_samples_per_second": 22.012,
1366
+ "eval_steps_per_second": 5.503,
1367
+ "step": 970
1368
+ },
1369
+ {
1370
+ "epoch": 7.601268911664226,
1371
+ "grad_norm": 330153.84375,
1372
+ "learning_rate": 2.34375e-05,
1373
+ "loss": 606.2052,
1374
+ "step": 980
1375
+ },
1376
+ {
1377
+ "epoch": 7.601268911664226,
1378
+ "eval_runtime": 19.6246,
1379
+ "eval_samples_per_second": 22.013,
1380
+ "eval_steps_per_second": 5.503,
1381
+ "step": 980
1382
+ },
1383
+ {
1384
+ "epoch": 7.679355783308932,
1385
+ "grad_norm": 134138.4375,
1386
+ "learning_rate": 2.2656250000000002e-05,
1387
+ "loss": 636.721,
1388
+ "step": 990
1389
+ },
1390
+ {
1391
+ "epoch": 7.679355783308932,
1392
+ "eval_runtime": 19.622,
1393
+ "eval_samples_per_second": 22.016,
1394
+ "eval_steps_per_second": 5.504,
1395
+ "step": 990
1396
+ },
1397
+ {
1398
+ "epoch": 7.7574426549536355,
1399
+ "grad_norm": 86497.265625,
1400
+ "learning_rate": 2.1875e-05,
1401
+ "loss": 565.6015,
1402
+ "step": 1000
1403
+ },
1404
+ {
1405
+ "epoch": 7.7574426549536355,
1406
+ "eval_runtime": 19.6167,
1407
+ "eval_samples_per_second": 22.022,
1408
+ "eval_steps_per_second": 5.506,
1409
+ "step": 1000
1410
+ },
1411
+ {
1412
+ "epoch": 7.835529526598341,
1413
+ "grad_norm": 256321.390625,
1414
+ "learning_rate": 2.109375e-05,
1415
+ "loss": 383.8543,
1416
+ "step": 1010
1417
+ },
1418
+ {
1419
+ "epoch": 7.835529526598341,
1420
+ "eval_runtime": 19.6312,
1421
+ "eval_samples_per_second": 22.006,
1422
+ "eval_steps_per_second": 5.501,
1423
+ "step": 1010
1424
+ },
1425
+ {
1426
+ "epoch": 7.913616398243045,
1427
+ "grad_norm": 300360.125,
1428
+ "learning_rate": 2.0312500000000002e-05,
1429
+ "loss": 386.0633,
1430
+ "step": 1020
1431
+ },
1432
+ {
1433
+ "epoch": 7.913616398243045,
1434
+ "eval_runtime": 19.6333,
1435
+ "eval_samples_per_second": 22.003,
1436
+ "eval_steps_per_second": 5.501,
1437
+ "step": 1020
1438
+ },
1439
+ {
1440
+ "epoch": 7.991703269887751,
1441
+ "grad_norm": 65186.7578125,
1442
+ "learning_rate": 1.953125e-05,
1443
+ "loss": 379.998,
1444
+ "step": 1030
1445
+ },
1446
+ {
1447
+ "epoch": 7.991703269887751,
1448
+ "eval_runtime": 19.6079,
1449
+ "eval_samples_per_second": 22.032,
1450
+ "eval_steps_per_second": 5.508,
1451
+ "step": 1030
1452
  }
1453
  ],
1454
  "logging_steps": 10,