ljcamargo commited on
Commit
3406fc7
·
verified ·
1 Parent(s): be08963

Training in progress, step 2100, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30cf4ddc8138dc0b63c04cf5856ccaefc44f54d57161548a2bcf67587713dfed
3
  size 3237818848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f413611c4d62c42b34cb3c35bf307f1f004d85a195b49625f53cb045532d3640
3
  size 3237818848
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c88d4612f6436cb0270beb0bb2ab7cbb57317eafb7b87764e12d36ec083c260
3
  size 2062251569
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3f4fe7007d84dd8d6957cbfe09cbddc20609770c2a83ca6a81bf721da4dd27d
3
  size 2062251569
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3b789b883f13ca849e56997deda5a819a4b325b5d103e882990a667f22165d3
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e677751a73856bc26d9944f5cb47e64e748585d8eec5698550c17847fccdcc5
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1547aae10ac7691e1716f567b08e3b4d274fa923879a48af8c2bb55c815a28a2
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f13dd54935d4d1876d05824ed5aab8e787b691f2aec583b5a7e328fd2bead633
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f5a59feb5a16bc7cf6785205b16a58a4ce06c6d1cd586567a10fcc2307ab6fc
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecb7b6032736995012c1953174826b2661afb8ec0cbb267bc342afa03a626fe2
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.8,
6
  "eval_steps": 300,
7
- "global_step": 1800,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1280,6 +1280,216 @@
1280
  "learning_rate": 2.0183134739587807e-05,
1281
  "loss": 0.9083,
1282
  "step": 1800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1283
  }
1284
  ],
1285
  "logging_steps": 10,
@@ -1299,7 +1509,7 @@
1299
  "attributes": {}
1300
  }
1301
  },
1302
- "total_flos": 4.9102593196032e+19,
1303
  "train_batch_size": 4,
1304
  "trial_name": null,
1305
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.9333333333333333,
6
  "eval_steps": 300,
7
+ "global_step": 2100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1280
  "learning_rate": 2.0183134739587807e-05,
1281
  "loss": 0.9083,
1282
  "step": 1800
1283
+ },
1284
+ {
1285
+ "epoch": 0.8044444444444444,
1286
+ "grad_norm": 6.6202898025512695,
1287
+ "learning_rate": 1.9333323407135652e-05,
1288
+ "loss": 0.9497,
1289
+ "step": 1810
1290
+ },
1291
+ {
1292
+ "epoch": 0.8088888888888889,
1293
+ "grad_norm": 8.43086051940918,
1294
+ "learning_rate": 1.8499871782914823e-05,
1295
+ "loss": 0.8944,
1296
+ "step": 1820
1297
+ },
1298
+ {
1299
+ "epoch": 0.8133333333333334,
1300
+ "grad_norm": 7.180028915405273,
1301
+ "learning_rate": 1.7682948896145037e-05,
1302
+ "loss": 0.9319,
1303
+ "step": 1830
1304
+ },
1305
+ {
1306
+ "epoch": 0.8177777777777778,
1307
+ "grad_norm": 9.3683500289917,
1308
+ "learning_rate": 1.688272042391421e-05,
1309
+ "loss": 0.9467,
1310
+ "step": 1840
1311
+ },
1312
+ {
1313
+ "epoch": 0.8222222222222222,
1314
+ "grad_norm": 7.685975551605225,
1315
+ "learning_rate": 1.609934865757835e-05,
1316
+ "loss": 0.8837,
1317
+ "step": 1850
1318
+ },
1319
+ {
1320
+ "epoch": 0.8266666666666667,
1321
+ "grad_norm": 6.820009231567383,
1322
+ "learning_rate": 1.5332992469847595e-05,
1323
+ "loss": 0.8969,
1324
+ "step": 1860
1325
+ },
1326
+ {
1327
+ "epoch": 0.8311111111111111,
1328
+ "grad_norm": 6.4122161865234375,
1329
+ "learning_rate": 1.4583807282566109e-05,
1330
+ "loss": 0.892,
1331
+ "step": 1870
1332
+ },
1333
+ {
1334
+ "epoch": 0.8355555555555556,
1335
+ "grad_norm": 11.386307716369629,
1336
+ "learning_rate": 1.3851945035191271e-05,
1337
+ "loss": 0.9494,
1338
+ "step": 1880
1339
+ },
1340
+ {
1341
+ "epoch": 0.84,
1342
+ "grad_norm": 7.07219934463501,
1343
+ "learning_rate": 1.3137554153979648e-05,
1344
+ "loss": 0.9254,
1345
+ "step": 1890
1346
+ },
1347
+ {
1348
+ "epoch": 0.8444444444444444,
1349
+ "grad_norm": 8.010754585266113,
1350
+ "learning_rate": 1.2440779521885026e-05,
1351
+ "loss": 0.8829,
1352
+ "step": 1900
1353
+ },
1354
+ {
1355
+ "epoch": 0.8488888888888889,
1356
+ "grad_norm": 7.019892692565918,
1357
+ "learning_rate": 1.1761762449175362e-05,
1358
+ "loss": 0.9155,
1359
+ "step": 1910
1360
+ },
1361
+ {
1362
+ "epoch": 0.8533333333333334,
1363
+ "grad_norm": 9.830785751342773,
1364
+ "learning_rate": 1.1100640644774174e-05,
1365
+ "loss": 0.9572,
1366
+ "step": 1920
1367
+ },
1368
+ {
1369
+ "epoch": 0.8577777777777778,
1370
+ "grad_norm": 8.173066139221191,
1371
+ "learning_rate": 1.0457548188332156e-05,
1372
+ "loss": 0.9465,
1373
+ "step": 1930
1374
+ },
1375
+ {
1376
+ "epoch": 0.8622222222222222,
1377
+ "grad_norm": 7.015248775482178,
1378
+ "learning_rate": 9.83261550303518e-06,
1379
+ "loss": 0.8799,
1380
+ "step": 1940
1381
+ },
1382
+ {
1383
+ "epoch": 0.8666666666666667,
1384
+ "grad_norm": 7.390592575073242,
1385
+ "learning_rate": 9.225969329153572e-06,
1386
+ "loss": 0.9065,
1387
+ "step": 1950
1388
+ },
1389
+ {
1390
+ "epoch": 0.8711111111111111,
1391
+ "grad_norm": 8.248661994934082,
1392
+ "learning_rate": 8.637732698338353e-06,
1393
+ "loss": 0.8821,
1394
+ "step": 1960
1395
+ },
1396
+ {
1397
+ "epoch": 0.8755555555555555,
1398
+ "grad_norm": 7.508656024932861,
1399
+ "learning_rate": 8.068024908669658e-06,
1400
+ "loss": 0.9156,
1401
+ "step": 1970
1402
+ },
1403
+ {
1404
+ "epoch": 0.88,
1405
+ "grad_norm": 11.526093482971191,
1406
+ "learning_rate": 7.516961500462438e-06,
1407
+ "loss": 0.9091,
1408
+ "step": 1980
1409
+ },
1410
+ {
1411
+ "epoch": 0.8844444444444445,
1412
+ "grad_norm": 7.947896480560303,
1413
+ "learning_rate": 6.984654232833998e-06,
1414
+ "loss": 0.9023,
1415
+ "step": 1990
1416
+ },
1417
+ {
1418
+ "epoch": 0.8888888888888888,
1419
+ "grad_norm": 8.093976974487305,
1420
+ "learning_rate": 6.471211061038695e-06,
1421
+ "loss": 0.8878,
1422
+ "step": 2000
1423
+ },
1424
+ {
1425
+ "epoch": 0.8933333333333333,
1426
+ "grad_norm": 11.323365211486816,
1427
+ "learning_rate": 5.976736114573867e-06,
1428
+ "loss": 0.9262,
1429
+ "step": 2010
1430
+ },
1431
+ {
1432
+ "epoch": 0.8977777777777778,
1433
+ "grad_norm": 8.999126434326172,
1434
+ "learning_rate": 5.501329676061662e-06,
1435
+ "loss": 0.878,
1436
+ "step": 2020
1437
+ },
1438
+ {
1439
+ "epoch": 0.9022222222222223,
1440
+ "grad_norm": 8.701828956604004,
1441
+ "learning_rate": 5.045088160911227e-06,
1442
+ "loss": 0.8847,
1443
+ "step": 2030
1444
+ },
1445
+ {
1446
+ "epoch": 0.9066666666666666,
1447
+ "grad_norm": 6.862762451171875,
1448
+ "learning_rate": 4.60810409776491e-06,
1449
+ "loss": 0.8999,
1450
+ "step": 2040
1451
+ },
1452
+ {
1453
+ "epoch": 0.9111111111111111,
1454
+ "grad_norm": 7.2732672691345215,
1455
+ "learning_rate": 4.190466109733004e-06,
1456
+ "loss": 0.9271,
1457
+ "step": 2050
1458
+ },
1459
+ {
1460
+ "epoch": 0.9155555555555556,
1461
+ "grad_norm": 6.5058979988098145,
1462
+ "learning_rate": 3.7922588964203533e-06,
1463
+ "loss": 0.8799,
1464
+ "step": 2060
1465
+ },
1466
+ {
1467
+ "epoch": 0.92,
1468
+ "grad_norm": 7.603868007659912,
1469
+ "learning_rate": 3.4135632167487274e-06,
1470
+ "loss": 0.9081,
1471
+ "step": 2070
1472
+ },
1473
+ {
1474
+ "epoch": 0.9244444444444444,
1475
+ "grad_norm": 7.810312271118164,
1476
+ "learning_rate": 3.054455872578421e-06,
1477
+ "loss": 0.9159,
1478
+ "step": 2080
1479
+ },
1480
+ {
1481
+ "epoch": 0.9288888888888889,
1482
+ "grad_norm": 8.112349510192871,
1483
+ "learning_rate": 2.71500969313242e-06,
1484
+ "loss": 0.8864,
1485
+ "step": 2090
1486
+ },
1487
+ {
1488
+ "epoch": 0.9333333333333333,
1489
+ "grad_norm": 8.802750587463379,
1490
+ "learning_rate": 2.3952935202260608e-06,
1491
+ "loss": 0.8618,
1492
+ "step": 2100
1493
  }
1494
  ],
1495
  "logging_steps": 10,
 
1509
  "attributes": {}
1510
  }
1511
  },
1512
+ "total_flos": 5.7286358728704e+19,
1513
  "train_batch_size": 4,
1514
  "trial_name": null,
1515
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9012ea34a655f218dd2bab2f2edc38d537cc1525f0e422dbcceaa6fa5a75517
3
  size 5969
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24107160801c0c99920b56e5852c1fa1822488d662f9e024502c2d7c259cfa1c
3
  size 5969