ljcamargo commited on
Commit
0b42e3f
·
verified ·
1 Parent(s): 7926c90

Training in progress, step 2100, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72b8d7d88a2599f3d0270503aadee3e06f3c9b6208ea859e85195d0628820e39
3
  size 2558403928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a11ea4246169d4ba1b9cdfd1ec7ef840b6b697d514813efd0057d57657a9241f
3
  size 2558403928
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26b63f4c5630f0eb1aa59baba3bed13a06e5c96c5aaf91324d7334f99b4a5840
3
  size 1313638993
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bff2dd91c1612009877ee3b54bb35e744dcc95587bcbfa8944d30b0c5da6f76
3
  size 1313638993
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c361866728e06bd0e2be39ee57f57e760047eda537fd1611fa8102f0d74137f
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f78d4bac68df9071c97615b2e87cf8d0471f01b17ef6d500cfcd39134f131824
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df14d7a36e37f366debe04e21edb22ea251f3e688d5c13d68e7aade0c9008374
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc99556bf27209385963813e3570510732839e6002d61d657266050e280a33eb
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4433ec275386d9bd6536dd57fdfec5f80c2cb4e78ff122cfbe81518b1380578
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d4ff6faba9116c06367ce60af1ab0ce82d05c5571557409e66e7e9ab509c55a
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.7484,
6
  "eval_steps": 500,
7
- "global_step": 1871,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1324,6 +1324,167 @@
1324
  "learning_rate": 7.746618656487748e-06,
1325
  "loss": 3.1418,
1326
  "step": 1870
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1327
  }
1328
  ],
1329
  "logging_steps": 10,
@@ -1343,7 +1504,7 @@
1343
  "attributes": {}
1344
  }
1345
  },
1346
- "total_flos": 8.796307512390451e+19,
1347
  "train_batch_size": 8,
1348
  "trial_name": null,
1349
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.84,
6
  "eval_steps": 500,
7
+ "global_step": 2100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1324
  "learning_rate": 7.746618656487748e-06,
1325
  "loss": 3.1418,
1326
  "step": 1870
1327
+ },
1328
+ {
1329
+ "epoch": 0.752,
1330
+ "grad_norm": 13.42601490020752,
1331
+ "learning_rate": 7.516052717806346e-06,
1332
+ "loss": 3.0495,
1333
+ "step": 1880
1334
+ },
1335
+ {
1336
+ "epoch": 0.756,
1337
+ "grad_norm": 10.328361511230469,
1338
+ "learning_rate": 7.288361535953472e-06,
1339
+ "loss": 3.2537,
1340
+ "step": 1890
1341
+ },
1342
+ {
1343
+ "epoch": 0.76,
1344
+ "grad_norm": 8.320837020874023,
1345
+ "learning_rate": 7.06358254851513e-06,
1346
+ "loss": 3.2002,
1347
+ "step": 1900
1348
+ },
1349
+ {
1350
+ "epoch": 0.764,
1351
+ "grad_norm": 12.367525100708008,
1352
+ "learning_rate": 6.841752714246588e-06,
1353
+ "loss": 3.415,
1354
+ "step": 1910
1355
+ },
1356
+ {
1357
+ "epoch": 0.768,
1358
+ "grad_norm": 8.72415828704834,
1359
+ "learning_rate": 6.622908506995581e-06,
1360
+ "loss": 2.7481,
1361
+ "step": 1920
1362
+ },
1363
+ {
1364
+ "epoch": 0.772,
1365
+ "grad_norm": 9.888436317443848,
1366
+ "learning_rate": 6.407085909705157e-06,
1367
+ "loss": 3.4815,
1368
+ "step": 1930
1369
+ },
1370
+ {
1371
+ "epoch": 0.776,
1372
+ "grad_norm": 7.541075706481934,
1373
+ "learning_rate": 6.194320408497245e-06,
1374
+ "loss": 3.4048,
1375
+ "step": 1940
1376
+ },
1377
+ {
1378
+ "epoch": 0.78,
1379
+ "grad_norm": 11.171248435974121,
1380
+ "learning_rate": 5.98464698683798e-06,
1381
+ "loss": 3.5409,
1382
+ "step": 1950
1383
+ },
1384
+ {
1385
+ "epoch": 0.784,
1386
+ "grad_norm": 9.28205394744873,
1387
+ "learning_rate": 5.778100119785587e-06,
1388
+ "loss": 3.1082,
1389
+ "step": 1960
1390
+ },
1391
+ {
1392
+ "epoch": 0.788,
1393
+ "grad_norm": 8.433388710021973,
1394
+ "learning_rate": 5.5747137683219404e-06,
1395
+ "loss": 2.9565,
1396
+ "step": 1970
1397
+ },
1398
+ {
1399
+ "epoch": 0.792,
1400
+ "grad_norm": 14.938470840454102,
1401
+ "learning_rate": 5.374521373768549e-06,
1402
+ "loss": 3.2282,
1403
+ "step": 1980
1404
+ },
1405
+ {
1406
+ "epoch": 0.796,
1407
+ "grad_norm": 9.903738975524902,
1408
+ "learning_rate": 5.177555852288119e-06,
1409
+ "loss": 2.9652,
1410
+ "step": 1990
1411
+ },
1412
+ {
1413
+ "epoch": 0.8,
1414
+ "grad_norm": 13.002461433410645,
1415
+ "learning_rate": 4.983849589472348e-06,
1416
+ "loss": 3.221,
1417
+ "step": 2000
1418
+ },
1419
+ {
1420
+ "epoch": 0.804,
1421
+ "grad_norm": 12.107378005981445,
1422
+ "learning_rate": 4.793434435016986e-06,
1423
+ "loss": 3.1341,
1424
+ "step": 2010
1425
+ },
1426
+ {
1427
+ "epoch": 0.808,
1428
+ "grad_norm": 11.94257640838623,
1429
+ "learning_rate": 4.606341697485087e-06,
1430
+ "loss": 3.318,
1431
+ "step": 2020
1432
+ },
1433
+ {
1434
+ "epoch": 0.812,
1435
+ "grad_norm": 10.116772651672363,
1436
+ "learning_rate": 4.422602139159091e-06,
1437
+ "loss": 3.2286,
1438
+ "step": 2030
1439
+ },
1440
+ {
1441
+ "epoch": 0.816,
1442
+ "grad_norm": 10.068933486938477,
1443
+ "learning_rate": 4.242245970982883e-06,
1444
+ "loss": 3.306,
1445
+ "step": 2040
1446
+ },
1447
+ {
1448
+ "epoch": 0.82,
1449
+ "grad_norm": 10.280326843261719,
1450
+ "learning_rate": 4.065302847594369e-06,
1451
+ "loss": 3.005,
1452
+ "step": 2050
1453
+ },
1454
+ {
1455
+ "epoch": 0.824,
1456
+ "grad_norm": 10.214073181152344,
1457
+ "learning_rate": 3.891801862449629e-06,
1458
+ "loss": 2.9953,
1459
+ "step": 2060
1460
+ },
1461
+ {
1462
+ "epoch": 0.828,
1463
+ "grad_norm": 12.787151336669922,
1464
+ "learning_rate": 3.721771543039254e-06,
1465
+ "loss": 2.9877,
1466
+ "step": 2070
1467
+ },
1468
+ {
1469
+ "epoch": 0.832,
1470
+ "grad_norm": 7.119079113006592,
1471
+ "learning_rate": 3.5552398461978277e-06,
1472
+ "loss": 3.0851,
1473
+ "step": 2080
1474
+ },
1475
+ {
1476
+ "epoch": 0.836,
1477
+ "grad_norm": 6.1061177253723145,
1478
+ "learning_rate": 3.3922341535071483e-06,
1479
+ "loss": 2.9198,
1480
+ "step": 2090
1481
+ },
1482
+ {
1483
+ "epoch": 0.84,
1484
+ "grad_norm": 9.866963386535645,
1485
+ "learning_rate": 3.23278126679408e-06,
1486
+ "loss": 2.9846,
1487
+ "step": 2100
1488
  }
1489
  ],
1490
  "logging_steps": 10,
 
1504
  "attributes": {}
1505
  }
1506
  },
1507
+ "total_flos": 9.87292665741312e+19,
1508
  "train_batch_size": 8,
1509
  "trial_name": null,
1510
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:080c92b0b23413891b2d4ebcb1839000b2c36abcd2fdfcebc49b922f27c9f1dd
3
  size 5841
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05c569cc64e00d104bb20e5faf64d881762c6e72e491347da68f267c7d4d9dc2
3
  size 5841