ljcamargo commited on
Commit
4ef6a3a
·
verified ·
1 Parent(s): 76f7c84

Training in progress, step 2250, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:114b891a762a51a9adf99795ed8a1397abbd097711bfd2ff3927ad599e912fbe
3
  size 3809184360
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c62db0277afdc3f2ad6dbafa0dd57f53ea9debb5ee9712f0b1547cf8523f1070
3
  size 3809184360
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0467314eaf329ce64fa294c5b636d1d0a5db236ce1684099429a56bad1f1c530
3
  size 2458291491
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:994a3b059f463b00db236586003b2652100023cbd4f39b1b1ac679076c611649
3
  size 2458291491
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0367ed1b35f65855ad993f74c56f185b353ad034ccb1dbb7df8ac313fc044216
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:376b730bb310b4f7540caf50ba2d9485c55172240b565241043b8847f1833fe8
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5daf118c104c253ac47840aed00a104c21470bc6d0bd2a07133bec544d92037c
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:444dae11008b250d18996da8350dc235efbc33e7070670e4ec0778a449b281a5
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.8,
6
  "eval_steps": 500,
7
- "global_step": 2000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1408,6 +1408,181 @@
1408
  "learning_rate": 1.0181451612903227e-05,
1409
  "loss": 0.2809,
1410
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1411
  }
1412
  ],
1413
  "logging_steps": 10,
@@ -1427,7 +1602,7 @@
1427
  "attributes": {}
1428
  }
1429
  },
1430
- "total_flos": 3.61046831887872e+16,
1431
  "train_batch_size": 2,
1432
  "trial_name": null,
1433
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.9,
6
  "eval_steps": 500,
7
+ "global_step": 2250,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1408
  "learning_rate": 1.0181451612903227e-05,
1409
  "loss": 0.2809,
1410
  "step": 2000
1411
+ },
1412
+ {
1413
+ "epoch": 0.804,
1414
+ "grad_norm": 7.253849029541016,
1415
+ "learning_rate": 9.97983870967742e-06,
1416
+ "loss": 0.2885,
1417
+ "step": 2010
1418
+ },
1419
+ {
1420
+ "epoch": 0.808,
1421
+ "grad_norm": 12.506342887878418,
1422
+ "learning_rate": 9.778225806451613e-06,
1423
+ "loss": 0.3105,
1424
+ "step": 2020
1425
+ },
1426
+ {
1427
+ "epoch": 0.812,
1428
+ "grad_norm": 6.241955757141113,
1429
+ "learning_rate": 9.576612903225806e-06,
1430
+ "loss": 0.3399,
1431
+ "step": 2030
1432
+ },
1433
+ {
1434
+ "epoch": 0.816,
1435
+ "grad_norm": 10.079781532287598,
1436
+ "learning_rate": 9.375000000000001e-06,
1437
+ "loss": 0.2337,
1438
+ "step": 2040
1439
+ },
1440
+ {
1441
+ "epoch": 0.82,
1442
+ "grad_norm": 5.7255377769470215,
1443
+ "learning_rate": 9.173387096774194e-06,
1444
+ "loss": 0.3242,
1445
+ "step": 2050
1446
+ },
1447
+ {
1448
+ "epoch": 0.824,
1449
+ "grad_norm": 8.949894905090332,
1450
+ "learning_rate": 8.971774193548389e-06,
1451
+ "loss": 0.3267,
1452
+ "step": 2060
1453
+ },
1454
+ {
1455
+ "epoch": 0.828,
1456
+ "grad_norm": 9.275047302246094,
1457
+ "learning_rate": 8.770161290322582e-06,
1458
+ "loss": 0.3194,
1459
+ "step": 2070
1460
+ },
1461
+ {
1462
+ "epoch": 0.832,
1463
+ "grad_norm": 7.940761089324951,
1464
+ "learning_rate": 8.568548387096773e-06,
1465
+ "loss": 0.3428,
1466
+ "step": 2080
1467
+ },
1468
+ {
1469
+ "epoch": 0.836,
1470
+ "grad_norm": 9.835103988647461,
1471
+ "learning_rate": 8.366935483870968e-06,
1472
+ "loss": 0.5326,
1473
+ "step": 2090
1474
+ },
1475
+ {
1476
+ "epoch": 0.84,
1477
+ "grad_norm": 4.066821098327637,
1478
+ "learning_rate": 8.165322580645161e-06,
1479
+ "loss": 0.3065,
1480
+ "step": 2100
1481
+ },
1482
+ {
1483
+ "epoch": 0.844,
1484
+ "grad_norm": 12.258496284484863,
1485
+ "learning_rate": 7.963709677419356e-06,
1486
+ "loss": 0.3009,
1487
+ "step": 2110
1488
+ },
1489
+ {
1490
+ "epoch": 0.848,
1491
+ "grad_norm": 5.6699700355529785,
1492
+ "learning_rate": 7.762096774193549e-06,
1493
+ "loss": 0.265,
1494
+ "step": 2120
1495
+ },
1496
+ {
1497
+ "epoch": 0.852,
1498
+ "grad_norm": 12.048685073852539,
1499
+ "learning_rate": 7.560483870967743e-06,
1500
+ "loss": 0.3388,
1501
+ "step": 2130
1502
+ },
1503
+ {
1504
+ "epoch": 0.856,
1505
+ "grad_norm": 4.888071060180664,
1506
+ "learning_rate": 7.358870967741936e-06,
1507
+ "loss": 0.3849,
1508
+ "step": 2140
1509
+ },
1510
+ {
1511
+ "epoch": 0.86,
1512
+ "grad_norm": 9.446249961853027,
1513
+ "learning_rate": 7.15725806451613e-06,
1514
+ "loss": 0.2319,
1515
+ "step": 2150
1516
+ },
1517
+ {
1518
+ "epoch": 0.864,
1519
+ "grad_norm": 40.13935852050781,
1520
+ "learning_rate": 6.955645161290322e-06,
1521
+ "loss": 0.3925,
1522
+ "step": 2160
1523
+ },
1524
+ {
1525
+ "epoch": 0.868,
1526
+ "grad_norm": 9.776718139648438,
1527
+ "learning_rate": 6.754032258064516e-06,
1528
+ "loss": 0.2317,
1529
+ "step": 2170
1530
+ },
1531
+ {
1532
+ "epoch": 0.872,
1533
+ "grad_norm": 11.500304222106934,
1534
+ "learning_rate": 6.55241935483871e-06,
1535
+ "loss": 0.3169,
1536
+ "step": 2180
1537
+ },
1538
+ {
1539
+ "epoch": 0.876,
1540
+ "grad_norm": 13.514867782592773,
1541
+ "learning_rate": 6.350806451612904e-06,
1542
+ "loss": 0.3805,
1543
+ "step": 2190
1544
+ },
1545
+ {
1546
+ "epoch": 0.88,
1547
+ "grad_norm": 6.3404765129089355,
1548
+ "learning_rate": 6.149193548387097e-06,
1549
+ "loss": 0.2765,
1550
+ "step": 2200
1551
+ },
1552
+ {
1553
+ "epoch": 0.884,
1554
+ "grad_norm": 13.954924583435059,
1555
+ "learning_rate": 5.947580645161291e-06,
1556
+ "loss": 0.2185,
1557
+ "step": 2210
1558
+ },
1559
+ {
1560
+ "epoch": 0.888,
1561
+ "grad_norm": 17.032840728759766,
1562
+ "learning_rate": 5.745967741935484e-06,
1563
+ "loss": 0.4505,
1564
+ "step": 2220
1565
+ },
1566
+ {
1567
+ "epoch": 0.892,
1568
+ "grad_norm": 6.66541862487793,
1569
+ "learning_rate": 5.544354838709678e-06,
1570
+ "loss": 0.3105,
1571
+ "step": 2230
1572
+ },
1573
+ {
1574
+ "epoch": 0.896,
1575
+ "grad_norm": 18.18755531311035,
1576
+ "learning_rate": 5.342741935483872e-06,
1577
+ "loss": 0.3225,
1578
+ "step": 2240
1579
+ },
1580
+ {
1581
+ "epoch": 0.9,
1582
+ "grad_norm": 7.183579444885254,
1583
+ "learning_rate": 5.141129032258065e-06,
1584
+ "loss": 0.2152,
1585
+ "step": 2250
1586
  }
1587
  ],
1588
  "logging_steps": 10,
 
1602
  "attributes": {}
1603
  }
1604
  },
1605
+ "total_flos": 4.06788487884288e+16,
1606
  "train_batch_size": 2,
1607
  "trial_name": null,
1608
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20bf013736b049630a8dcb1c77b612997cdb572d3055f50be6f6d59b1bf9eaaa
3
  size 6289
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffb5eae08d182d38c5142a692457f7672389d2a610d34241a5f8c8d5e6b351af
3
  size 6289