irishprancer commited on
Commit
4efbfeb
·
verified ·
1 Parent(s): e159959

Training in progress, step 1950, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fef9f5e763df977a6d66200c2b774f79362d53395811821634923be4de05095f
3
  size 1252339768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a66f530074025ce9a868656712abe5b9851577acdf8c6996b5934299ab17d9df
3
  size 1252339768
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:889b331db9f86d0820197833669ee4f6f5be0751345502b526ea0c9385372e71
3
  size 922711674
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:602f73099bf923d15f70a8a148b96595915e9977d54842736c1dd52de11d9e6d
3
  size 922711674
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd84dc0618cff04de1d820f56790b53f09a83540b67a5913a4d65c1ec4d76a8d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:379a5e6efcfc2c2acbc05d6d635118ce7aff442a8e90d42cfe4f041d83d97567
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2045cf70d5b75a930f47730c11bef0eb78af4c3bdc39abfe9aa52f6177c6833
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7e3cde54edf92558be8268ded51d77ecba3dd577313ae619e684eff0af121d6
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.6718898415565491,
3
  "best_model_checkpoint": "./output/checkpoint-1050",
4
- "epoch": 1.6666666666666665,
5
  "eval_steps": 150,
6
- "global_step": 1800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1363,6 +1363,119 @@
1363
  "eval_samples_per_second": 7.231,
1364
  "eval_steps_per_second": 7.231,
1365
  "step": 1800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1366
  }
1367
  ],
1368
  "logging_steps": 10,
@@ -1382,7 +1495,7 @@
1382
  "attributes": {}
1383
  }
1384
  },
1385
- "total_flos": 4.3624807854073805e+17,
1386
  "train_batch_size": 4,
1387
  "trial_name": null,
1388
  "trial_params": null
 
1
  {
2
  "best_metric": 0.6718898415565491,
3
  "best_model_checkpoint": "./output/checkpoint-1050",
4
+ "epoch": 1.8055555555555556,
5
  "eval_steps": 150,
6
+ "global_step": 1950,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1363
  "eval_samples_per_second": 7.231,
1364
  "eval_steps_per_second": 7.231,
1365
  "step": 1800
1366
+ },
1367
+ {
1368
+ "epoch": 1.675925925925926,
1369
+ "grad_norm": 8.68697452545166,
1370
+ "learning_rate": 1.6288016313047095e-05,
1371
+ "loss": 0.4179,
1372
+ "step": 1810
1373
+ },
1374
+ {
1375
+ "epoch": 1.6851851851851851,
1376
+ "grad_norm": 7.802534103393555,
1377
+ "learning_rate": 1.6224147486147602e-05,
1378
+ "loss": 0.3834,
1379
+ "step": 1820
1380
+ },
1381
+ {
1382
+ "epoch": 1.6944444444444444,
1383
+ "grad_norm": 9.18057918548584,
1384
+ "learning_rate": 1.616007132807298e-05,
1385
+ "loss": 0.4318,
1386
+ "step": 1830
1387
+ },
1388
+ {
1389
+ "epoch": 1.7037037037037037,
1390
+ "grad_norm": 11.710827827453613,
1391
+ "learning_rate": 1.6095790472743107e-05,
1392
+ "loss": 0.3901,
1393
+ "step": 1840
1394
+ },
1395
+ {
1396
+ "epoch": 1.7129629629629628,
1397
+ "grad_norm": 16.61398696899414,
1398
+ "learning_rate": 1.6031307562492174e-05,
1399
+ "loss": 0.4248,
1400
+ "step": 1850
1401
+ },
1402
+ {
1403
+ "epoch": 1.7222222222222223,
1404
+ "grad_norm": 9.394521713256836,
1405
+ "learning_rate": 1.5966625247960068e-05,
1406
+ "loss": 0.3785,
1407
+ "step": 1860
1408
+ },
1409
+ {
1410
+ "epoch": 1.7314814814814814,
1411
+ "grad_norm": 8.738484382629395,
1412
+ "learning_rate": 1.5901746187983387e-05,
1413
+ "loss": 0.4888,
1414
+ "step": 1870
1415
+ },
1416
+ {
1417
+ "epoch": 1.7407407407407407,
1418
+ "grad_norm": 9.013481140136719,
1419
+ "learning_rate": 1.5836673049486175e-05,
1420
+ "loss": 0.466,
1421
+ "step": 1880
1422
+ },
1423
+ {
1424
+ "epoch": 1.75,
1425
+ "grad_norm": 5.994925022125244,
1426
+ "learning_rate": 1.577140850737029e-05,
1427
+ "loss": 0.4301,
1428
+ "step": 1890
1429
+ },
1430
+ {
1431
+ "epoch": 1.7592592592592593,
1432
+ "grad_norm": 17.245548248291016,
1433
+ "learning_rate": 1.5705955244405423e-05,
1434
+ "loss": 0.4447,
1435
+ "step": 1900
1436
+ },
1437
+ {
1438
+ "epoch": 1.7685185185185186,
1439
+ "grad_norm": 13.091217041015625,
1440
+ "learning_rate": 1.564031595111886e-05,
1441
+ "loss": 0.4568,
1442
+ "step": 1910
1443
+ },
1444
+ {
1445
+ "epoch": 1.7777777777777777,
1446
+ "grad_norm": 9.169949531555176,
1447
+ "learning_rate": 1.557449332568485e-05,
1448
+ "loss": 0.4043,
1449
+ "step": 1920
1450
+ },
1451
+ {
1452
+ "epoch": 1.7870370370370372,
1453
+ "grad_norm": 7.6942644119262695,
1454
+ "learning_rate": 1.5508490073813722e-05,
1455
+ "loss": 0.4212,
1456
+ "step": 1930
1457
+ },
1458
+ {
1459
+ "epoch": 1.7962962962962963,
1460
+ "grad_norm": 11.494171142578125,
1461
+ "learning_rate": 1.5442308908640636e-05,
1462
+ "loss": 0.4773,
1463
+ "step": 1940
1464
+ },
1465
+ {
1466
+ "epoch": 1.8055555555555556,
1467
+ "grad_norm": 11.983560562133789,
1468
+ "learning_rate": 1.537595255061408e-05,
1469
+ "loss": 0.4704,
1470
+ "step": 1950
1471
+ },
1472
+ {
1473
+ "epoch": 1.8055555555555556,
1474
+ "eval_loss": 0.6845656037330627,
1475
+ "eval_runtime": 66.4105,
1476
+ "eval_samples_per_second": 7.228,
1477
+ "eval_steps_per_second": 7.228,
1478
+ "step": 1950
1479
  }
1480
  ],
1481
  "logging_steps": 10,
 
1495
  "attributes": {}
1496
  }
1497
  },
1498
+ "total_flos": 4.735843482878876e+17,
1499
  "train_batch_size": 4,
1500
  "trial_name": null,
1501
  "trial_params": null