ljcamargo commited on
Commit
b05f95b
·
verified ·
1 Parent(s): 9d4f0c3

Training in progress, step 2100, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e3ce014c4ffdeb3ed2d1bb4f167ab2f28f0e06615589c0b4b8532fa2bda557a
3
  size 2558403928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15f22026b9de091fea7c6677cc4018fa6aa370338d59d131114563939cf90b17
3
  size 2558403928
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6bbd954c177e56d9e20277125d9cf6754fff55e598bb9de9bdec4e2cbe1afd8
3
  size 1313638993
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d65b81af14a5a134ba9a873731d09629269da26fa8b87244dac0d9301cc842b1
3
  size 1313638993
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c51602986df284fcdbb1c3432b905ff86ef24e41fe8fb42533918ae0e10ec6f
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ac2c5f32acf6ee420ceadbd9ac0d52af52081eb0d8301506f11df8d08763b6e
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa0408efb69cab96d5bab9a1aaf44cedbc9fc8d34f4cef378d81605e5c026d5c
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc99556bf27209385963813e3570510732839e6002d61d657266050e280a33eb
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:020caa0fd59e7adabae53f0d39b914916a456345c07ea74ca27ee8ec408257e7
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9206e9d78f9c7162a065f53b3bc585a6b187e23b8f10b603eff8a1391fd60fea
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.44,
6
  "eval_steps": 500,
7
- "global_step": 1800,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1275,6 +1275,216 @@
1275
  "learning_rate": 3.9349485794441395e-05,
1276
  "loss": 4.5401,
1277
  "step": 1800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1278
  }
1279
  ],
1280
  "logging_steps": 10,
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.6800000000000002,
6
  "eval_steps": 500,
7
+ "global_step": 2100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1275
  "learning_rate": 3.9349485794441395e-05,
1276
  "loss": 4.5401,
1277
  "step": 1800
1278
+ },
1279
+ {
1280
+ "epoch": 1.448,
1281
+ "grad_norm": 12.537016868591309,
1282
+ "learning_rate": 3.831223489967025e-05,
1283
+ "loss": 4.7768,
1284
+ "step": 1810
1285
+ },
1286
+ {
1287
+ "epoch": 1.456,
1288
+ "grad_norm": 6.526289939880371,
1289
+ "learning_rate": 3.728558920658868e-05,
1290
+ "loss": 4.4916,
1291
+ "step": 1820
1292
+ },
1293
+ {
1294
+ "epoch": 1.464,
1295
+ "grad_norm": 7.882378101348877,
1296
+ "learning_rate": 3.6269725213479846e-05,
1297
+ "loss": 4.5072,
1298
+ "step": 1830
1299
+ },
1300
+ {
1301
+ "epoch": 1.472,
1302
+ "grad_norm": 10.795116424560547,
1303
+ "learning_rate": 3.526481756506498e-05,
1304
+ "loss": 4.7454,
1305
+ "step": 1840
1306
+ },
1307
+ {
1308
+ "epoch": 1.48,
1309
+ "grad_norm": 9.107118606567383,
1310
+ "learning_rate": 3.4271039022478694e-05,
1311
+ "loss": 4.3667,
1312
+ "step": 1850
1313
+ },
1314
+ {
1315
+ "epoch": 1.488,
1316
+ "grad_norm": 8.725346565246582,
1317
+ "learning_rate": 3.328856043356837e-05,
1318
+ "loss": 4.8818,
1319
+ "step": 1860
1320
+ },
1321
+ {
1322
+ "epoch": 1.496,
1323
+ "grad_norm": 8.258055686950684,
1324
+ "learning_rate": 3.231755070352249e-05,
1325
+ "loss": 4.4017,
1326
+ "step": 1870
1327
+ },
1328
+ {
1329
+ "epoch": 1.504,
1330
+ "grad_norm": 9.932522773742676,
1331
+ "learning_rate": 3.1358176765832915e-05,
1332
+ "loss": 4.3575,
1333
+ "step": 1880
1334
+ },
1335
+ {
1336
+ "epoch": 1.512,
1337
+ "grad_norm": 7.825517654418945,
1338
+ "learning_rate": 3.041060355359594e-05,
1339
+ "loss": 4.2122,
1340
+ "step": 1890
1341
+ },
1342
+ {
1343
+ "epoch": 1.52,
1344
+ "grad_norm": 8.71921443939209,
1345
+ "learning_rate": 2.9474993971157605e-05,
1346
+ "loss": 4.4648,
1347
+ "step": 1900
1348
+ },
1349
+ {
1350
+ "epoch": 1.528,
1351
+ "grad_norm": 9.51145076751709,
1352
+ "learning_rate": 2.8551508866107514e-05,
1353
+ "loss": 4.4388,
1354
+ "step": 1910
1355
+ },
1356
+ {
1357
+ "epoch": 1.536,
1358
+ "grad_norm": 6.032364845275879,
1359
+ "learning_rate": 2.764030700162633e-05,
1360
+ "loss": 4.6544,
1361
+ "step": 1920
1362
+ },
1363
+ {
1364
+ "epoch": 1.544,
1365
+ "grad_norm": 6.442480564117432,
1366
+ "learning_rate": 2.6741545029191674e-05,
1367
+ "loss": 4.3281,
1368
+ "step": 1930
1369
+ },
1370
+ {
1371
+ "epoch": 1.552,
1372
+ "grad_norm": 9.142407417297363,
1373
+ "learning_rate": 2.5855377461646902e-05,
1374
+ "loss": 4.7258,
1375
+ "step": 1940
1376
+ },
1377
+ {
1378
+ "epoch": 1.56,
1379
+ "grad_norm": 13.672632217407227,
1380
+ "learning_rate": 2.4981956646637815e-05,
1381
+ "loss": 4.076,
1382
+ "step": 1950
1383
+ },
1384
+ {
1385
+ "epoch": 1.568,
1386
+ "grad_norm": 10.532658576965332,
1387
+ "learning_rate": 2.412143274042129e-05,
1388
+ "loss": 4.3765,
1389
+ "step": 1960
1390
+ },
1391
+ {
1392
+ "epoch": 1.576,
1393
+ "grad_norm": 6.73195743560791,
1394
+ "learning_rate": 2.327395368205084e-05,
1395
+ "loss": 4.4419,
1396
+ "step": 1970
1397
+ },
1398
+ {
1399
+ "epoch": 1.584,
1400
+ "grad_norm": 7.2371745109558105,
1401
+ "learning_rate": 2.243966516794338e-05,
1402
+ "loss": 4.5395,
1403
+ "step": 1980
1404
+ },
1405
+ {
1406
+ "epoch": 1.592,
1407
+ "grad_norm": 9.002269744873047,
1408
+ "learning_rate": 2.161871062683145e-05,
1409
+ "loss": 4.5577,
1410
+ "step": 1990
1411
+ },
1412
+ {
1413
+ "epoch": 1.6,
1414
+ "grad_norm": 10.184229850769043,
1415
+ "learning_rate": 2.0811231195105186e-05,
1416
+ "loss": 4.3631,
1417
+ "step": 2000
1418
+ },
1419
+ {
1420
+ "epoch": 1.608,
1421
+ "grad_norm": 7.962112903594971,
1422
+ "learning_rate": 2.0017365692548717e-05,
1423
+ "loss": 4.6822,
1424
+ "step": 2010
1425
+ },
1426
+ {
1427
+ "epoch": 1.616,
1428
+ "grad_norm": 7.3697190284729,
1429
+ "learning_rate": 1.9237250598474564e-05,
1430
+ "loss": 4.1389,
1431
+ "step": 2020
1432
+ },
1433
+ {
1434
+ "epoch": 1.624,
1435
+ "grad_norm": 8.01472282409668,
1436
+ "learning_rate": 1.8471020028260368e-05,
1437
+ "loss": 3.9598,
1438
+ "step": 2030
1439
+ },
1440
+ {
1441
+ "epoch": 1.6320000000000001,
1442
+ "grad_norm": 5.376262187957764,
1443
+ "learning_rate": 1.7718805710292208e-05,
1444
+ "loss": 4.3192,
1445
+ "step": 2040
1446
+ },
1447
+ {
1448
+ "epoch": 1.6400000000000001,
1449
+ "grad_norm": 10.927371978759766,
1450
+ "learning_rate": 1.6980736963318177e-05,
1451
+ "loss": 4.192,
1452
+ "step": 2050
1453
+ },
1454
+ {
1455
+ "epoch": 1.6480000000000001,
1456
+ "grad_norm": 6.14822244644165,
1457
+ "learning_rate": 1.625694067421626e-05,
1458
+ "loss": 4.4155,
1459
+ "step": 2060
1460
+ },
1461
+ {
1462
+ "epoch": 1.6560000000000001,
1463
+ "grad_norm": 8.27953815460205,
1464
+ "learning_rate": 1.554754127618019e-05,
1465
+ "loss": 4.5526,
1466
+ "step": 2070
1467
+ },
1468
+ {
1469
+ "epoch": 1.6640000000000001,
1470
+ "grad_norm": 6.160037517547607,
1471
+ "learning_rate": 1.4852660727327361e-05,
1472
+ "loss": 4.453,
1473
+ "step": 2080
1474
+ },
1475
+ {
1476
+ "epoch": 1.6720000000000002,
1477
+ "grad_norm": 7.098100662231445,
1478
+ "learning_rate": 1.4172418489731942e-05,
1479
+ "loss": 4.4741,
1480
+ "step": 2090
1481
+ },
1482
+ {
1483
+ "epoch": 1.6800000000000002,
1484
+ "grad_norm": 6.903653144836426,
1485
+ "learning_rate": 1.3506931508887333e-05,
1486
+ "loss": 4.0234,
1487
+ "step": 2100
1488
  }
1489
  ],
1490
  "logging_steps": 10,
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55ca17433b5f6660ff524e613cd483a70dd34c9a3734b5a0b15911c69ef6f917
3
  size 5905
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a17f7b1f07b16a135d4efceeee23f79e5bdc7bac1dd2a6c7b4c904467dcccf1
3
  size 5905