irishprancer commited on
Commit
88b5940
·
verified ·
1 Parent(s): 09a2f25

Training in progress, step 1950, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6bfdae32dcb0649e9b6181d28f56452309d6ccd5e9baa4277ab4f62d2c4d1d80
3
  size 457872944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8442e92131e813ad779f2e5ae1738b36bb12dfa25c65cc43cd254c7aa1ebfb5
3
  size 457872944
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b9fcea2908dd4a519879ba1fc9feed289c05701d2902b54237248ade2af5861
3
  size 906655482
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54db9d4a05e37e1ffabfb46252585a89e74384915ec6e7f08b4aa6f212520050
3
  size 906655482
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d8872471717f433968474073c4ebf4bc1b3a47efb0c26462b67fedcf37cefdb
3
  size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d028195f91f8f2da375afd5c8611226c3f441d4eab18dd3d9c58776f6e4ad297
3
  size 14180
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2760d17b439f8ea889c9a58cfe3005f72c6e199a9c6e96f0f7dd483d869f42ba
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1fd2596562ccd88417cda1ce7310479ebb8edf0f8c5a343fa492c5fa64814ee
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.529088020324707,
3
  "best_model_checkpoint": "./output/checkpoint-1050",
4
- "epoch": 5.09915014164306,
5
  "eval_steps": 150,
6
- "global_step": 1800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1363,6 +1363,119 @@
1363
  "eval_samples_per_second": 9.733,
1364
  "eval_steps_per_second": 9.733,
1365
  "step": 1800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1366
  }
1367
  ],
1368
  "logging_steps": 10,
@@ -1382,7 +1495,7 @@
1382
  "attributes": {}
1383
  }
1384
  },
1385
- "total_flos": 8.689547059086029e+16,
1386
  "train_batch_size": 8,
1387
  "trial_name": null,
1388
  "trial_params": null
 
1
  {
2
  "best_metric": 0.529088020324707,
3
  "best_model_checkpoint": "./output/checkpoint-1050",
4
+ "epoch": 5.524079320113314,
5
  "eval_steps": 150,
6
+ "global_step": 1950,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1363
  "eval_samples_per_second": 9.733,
1364
  "eval_steps_per_second": 9.733,
1365
  "step": 1800
1366
+ },
1367
+ {
1368
+ "epoch": 5.127478753541077,
1369
+ "grad_norm": 6.307571887969971,
1370
+ "learning_rate": 3.094997749385071e-05,
1371
+ "loss": 0.0484,
1372
+ "step": 1810
1373
+ },
1374
+ {
1375
+ "epoch": 5.1558073654390935,
1376
+ "grad_norm": 4.0599684715271,
1377
+ "learning_rate": 3.0828615953126175e-05,
1378
+ "loss": 0.0364,
1379
+ "step": 1820
1380
+ },
1381
+ {
1382
+ "epoch": 5.18413597733711,
1383
+ "grad_norm": 1.6084905862808228,
1384
+ "learning_rate": 3.0706860448208514e-05,
1385
+ "loss": 0.0375,
1386
+ "step": 1830
1387
+ },
1388
+ {
1389
+ "epoch": 5.212464589235127,
1390
+ "grad_norm": 3.262603759765625,
1391
+ "learning_rate": 3.058471598398967e-05,
1392
+ "loss": 0.0508,
1393
+ "step": 1840
1394
+ },
1395
+ {
1396
+ "epoch": 5.240793201133145,
1397
+ "grad_norm": 2.902472972869873,
1398
+ "learning_rate": 3.0462187581350132e-05,
1399
+ "loss": 0.0685,
1400
+ "step": 1850
1401
+ },
1402
+ {
1403
+ "epoch": 5.269121813031162,
1404
+ "grad_norm": 1.6740124225616455,
1405
+ "learning_rate": 3.033928027695265e-05,
1406
+ "loss": 0.0568,
1407
+ "step": 1860
1408
+ },
1409
+ {
1410
+ "epoch": 5.2974504249291785,
1411
+ "grad_norm": 5.76546573638916,
1412
+ "learning_rate": 3.021599912303509e-05,
1413
+ "loss": 0.0706,
1414
+ "step": 1870
1415
+ },
1416
+ {
1417
+ "epoch": 5.325779036827195,
1418
+ "grad_norm": 4.644334316253662,
1419
+ "learning_rate": 3.009234918720284e-05,
1420
+ "loss": 0.062,
1421
+ "step": 1880
1422
+ },
1423
+ {
1424
+ "epoch": 5.354107648725212,
1425
+ "grad_norm": 2.3427531719207764,
1426
+ "learning_rate": 2.9968335552220465e-05,
1427
+ "loss": 0.0529,
1428
+ "step": 1890
1429
+ },
1430
+ {
1431
+ "epoch": 5.38243626062323,
1432
+ "grad_norm": 5.573881149291992,
1433
+ "learning_rate": 2.9843963315802763e-05,
1434
+ "loss": 0.065,
1435
+ "step": 1900
1436
+ },
1437
+ {
1438
+ "epoch": 5.410764872521247,
1439
+ "grad_norm": 1.00111985206604,
1440
+ "learning_rate": 2.9719237590405247e-05,
1441
+ "loss": 0.0458,
1442
+ "step": 1910
1443
+ },
1444
+ {
1445
+ "epoch": 5.4390934844192635,
1446
+ "grad_norm": 9.30931282043457,
1447
+ "learning_rate": 2.959416350301396e-05,
1448
+ "loss": 0.0665,
1449
+ "step": 1920
1450
+ },
1451
+ {
1452
+ "epoch": 5.46742209631728,
1453
+ "grad_norm": 2.9437053203582764,
1454
+ "learning_rate": 2.946874619493476e-05,
1455
+ "loss": 0.0539,
1456
+ "step": 1930
1457
+ },
1458
+ {
1459
+ "epoch": 5.495750708215297,
1460
+ "grad_norm": 6.750890254974365,
1461
+ "learning_rate": 2.9342990821581955e-05,
1462
+ "loss": 0.0574,
1463
+ "step": 1940
1464
+ },
1465
+ {
1466
+ "epoch": 5.524079320113314,
1467
+ "grad_norm": 3.0086891651153564,
1468
+ "learning_rate": 2.9216902552266397e-05,
1469
+ "loss": 0.0515,
1470
+ "step": 1950
1471
+ },
1472
+ {
1473
+ "epoch": 5.524079320113314,
1474
+ "eval_loss": 0.7158511281013489,
1475
+ "eval_runtime": 31.532,
1476
+ "eval_samples_per_second": 9.958,
1477
+ "eval_steps_per_second": 9.958,
1478
+ "step": 1950
1479
  }
1480
  ],
1481
  "logging_steps": 10,
 
1495
  "attributes": {}
1496
  }
1497
  },
1498
+ "total_flos": 9.410666091714048e+16,
1499
  "train_batch_size": 8,
1500
  "trial_name": null,
1501
  "trial_params": null