FredericFan commited on
Commit
77ca0d4
·
verified ·
1 Parent(s): 4cdaa1c

Training in progress, step 9500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5dfabab5bf5157607fcc2602e95b9ce10a5b071f3b4fe4e130b9741ece0fba0a
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79d246435ab5b40ad2ea43d8f5100d2b86ff4b2b6856057a71c7e027ed54a525
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0adc8bbd6eae86e2c260d1fab52c374eee35d382f517b5c8cbd8551e8f7a77a3
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64f09f59006a636b77641a43b73bf147e4d36c6b5a2f33fb4d1638706ccc710a
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d89768c598d4fd9b0ed22710bf5b3c13eec14bfaad44bcf482c9b7c5838dd420
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d34bc75d96aba8284db8d20e2294a4e554617b7623afa838e1b67103dc8d05a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:620c7bc458bafe9b83f6fadd9f5a5700bbd89b438e325546819b47236eeb8e3e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6239f6e8ed7e2039649ceba0353d677c59a2e919a67e2025fa635742b0397798
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.0838891863822937,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-9000",
4
- "epoch": 0.72,
5
  "eval_steps": 500,
6
- "global_step": 9000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1411,6 +1411,84 @@
1411
  "eval_samples_per_second": 22.706,
1412
  "eval_steps_per_second": 5.676,
1413
  "step": 9000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1414
  }
1415
  ],
1416
  "logging_steps": 50,
@@ -1430,7 +1508,7 @@
1430
  "attributes": {}
1431
  }
1432
  },
1433
- "total_flos": 2.192248406016e+16,
1434
  "train_batch_size": 4,
1435
  "trial_name": null,
1436
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.08371420204639435,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-9500",
4
+ "epoch": 0.76,
5
  "eval_steps": 500,
6
+ "global_step": 9500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1411
  "eval_samples_per_second": 22.706,
1412
  "eval_steps_per_second": 5.676,
1413
  "step": 9000
1414
+ },
1415
+ {
1416
+ "epoch": 0.724,
1417
+ "grad_norm": 0.152841717004776,
1418
+ "learning_rate": 1.91424e-05,
1419
+ "loss": 0.0611,
1420
+ "step": 9050
1421
+ },
1422
+ {
1423
+ "epoch": 0.728,
1424
+ "grad_norm": 0.16737787425518036,
1425
+ "learning_rate": 1.90824e-05,
1426
+ "loss": 0.0626,
1427
+ "step": 9100
1428
+ },
1429
+ {
1430
+ "epoch": 0.732,
1431
+ "grad_norm": 0.1456371396780014,
1432
+ "learning_rate": 1.90224e-05,
1433
+ "loss": 0.0599,
1434
+ "step": 9150
1435
+ },
1436
+ {
1437
+ "epoch": 0.736,
1438
+ "grad_norm": 0.1772635579109192,
1439
+ "learning_rate": 1.89624e-05,
1440
+ "loss": 0.0577,
1441
+ "step": 9200
1442
+ },
1443
+ {
1444
+ "epoch": 0.74,
1445
+ "grad_norm": 0.11802179366350174,
1446
+ "learning_rate": 1.89024e-05,
1447
+ "loss": 0.0645,
1448
+ "step": 9250
1449
+ },
1450
+ {
1451
+ "epoch": 0.744,
1452
+ "grad_norm": 0.1235092505812645,
1453
+ "learning_rate": 1.88424e-05,
1454
+ "loss": 0.0584,
1455
+ "step": 9300
1456
+ },
1457
+ {
1458
+ "epoch": 0.748,
1459
+ "grad_norm": 0.11901592463254929,
1460
+ "learning_rate": 1.8782399999999998e-05,
1461
+ "loss": 0.06,
1462
+ "step": 9350
1463
+ },
1464
+ {
1465
+ "epoch": 0.752,
1466
+ "grad_norm": 0.09477788209915161,
1467
+ "learning_rate": 1.87224e-05,
1468
+ "loss": 0.0637,
1469
+ "step": 9400
1470
+ },
1471
+ {
1472
+ "epoch": 0.756,
1473
+ "grad_norm": 0.12917232513427734,
1474
+ "learning_rate": 1.86624e-05,
1475
+ "loss": 0.0648,
1476
+ "step": 9450
1477
+ },
1478
+ {
1479
+ "epoch": 0.76,
1480
+ "grad_norm": 0.17290474474430084,
1481
+ "learning_rate": 1.86024e-05,
1482
+ "loss": 0.0638,
1483
+ "step": 9500
1484
+ },
1485
+ {
1486
+ "epoch": 0.76,
1487
+ "eval_loss": 0.08371420204639435,
1488
+ "eval_runtime": 88.0684,
1489
+ "eval_samples_per_second": 22.71,
1490
+ "eval_steps_per_second": 5.677,
1491
+ "step": 9500
1492
  }
1493
  ],
1494
  "logging_steps": 50,
 
1508
  "attributes": {}
1509
  }
1510
  },
1511
+ "total_flos": 2.314039984128e+16,
1512
  "train_batch_size": 4,
1513
  "trial_name": null,
1514
  "trial_params": null