FredericFan commited on
Commit
76822ef
·
verified ·
1 Parent(s): 77ca0d4

Training in progress, step 10000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79d246435ab5b40ad2ea43d8f5100d2b86ff4b2b6856057a71c7e027ed54a525
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d220ecc16a6dfc89422bca61f2e82fa5409c8e0025b733a0084094bebb3ad38
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:64f09f59006a636b77641a43b73bf147e4d36c6b5a2f33fb4d1638706ccc710a
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:732697ac83fc07fe43a3d59c6ed5cfdde9a44eb04f5a28df82f1bae231ebecac
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d34bc75d96aba8284db8d20e2294a4e554617b7623afa838e1b67103dc8d05a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4f0336fd2179ac86a5ce6aaf8830eebae152619763d875677000624b0d5df8f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6239f6e8ed7e2039649ceba0353d677c59a2e919a67e2025fa635742b0397798
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:661fa99b8052c174bd0009dc67982291a9f4a6f70e6b8d4ec974bec23de59a1d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.08371420204639435,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-9500",
4
- "epoch": 0.76,
5
  "eval_steps": 500,
6
- "global_step": 9500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1489,6 +1489,84 @@
1489
  "eval_samples_per_second": 22.71,
1490
  "eval_steps_per_second": 5.677,
1491
  "step": 9500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1492
  }
1493
  ],
1494
  "logging_steps": 50,
@@ -1508,7 +1586,7 @@
1508
  "attributes": {}
1509
  }
1510
  },
1511
- "total_flos": 2.314039984128e+16,
1512
  "train_batch_size": 4,
1513
  "trial_name": null,
1514
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.08371368050575256,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-10000",
4
+ "epoch": 0.8,
5
  "eval_steps": 500,
6
+ "global_step": 10000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1489
  "eval_samples_per_second": 22.71,
1490
  "eval_steps_per_second": 5.677,
1491
  "step": 9500
1492
+ },
1493
+ {
1494
+ "epoch": 0.764,
1495
+ "grad_norm": 0.07568900287151337,
1496
+ "learning_rate": 1.85424e-05,
1497
+ "loss": 0.0648,
1498
+ "step": 9550
1499
+ },
1500
+ {
1501
+ "epoch": 0.768,
1502
+ "grad_norm": 0.11715036630630493,
1503
+ "learning_rate": 1.8482399999999998e-05,
1504
+ "loss": 0.0644,
1505
+ "step": 9600
1506
+ },
1507
+ {
1508
+ "epoch": 0.772,
1509
+ "grad_norm": 0.18466413021087646,
1510
+ "learning_rate": 1.84224e-05,
1511
+ "loss": 0.0641,
1512
+ "step": 9650
1513
+ },
1514
+ {
1515
+ "epoch": 0.776,
1516
+ "grad_norm": 0.15552838146686554,
1517
+ "learning_rate": 1.83624e-05,
1518
+ "loss": 0.0538,
1519
+ "step": 9700
1520
+ },
1521
+ {
1522
+ "epoch": 0.78,
1523
+ "grad_norm": 0.1408630758523941,
1524
+ "learning_rate": 1.83024e-05,
1525
+ "loss": 0.0589,
1526
+ "step": 9750
1527
+ },
1528
+ {
1529
+ "epoch": 0.784,
1530
+ "grad_norm": 0.1492939293384552,
1531
+ "learning_rate": 1.82424e-05,
1532
+ "loss": 0.0599,
1533
+ "step": 9800
1534
+ },
1535
+ {
1536
+ "epoch": 0.788,
1537
+ "grad_norm": 0.08888087421655655,
1538
+ "learning_rate": 1.81824e-05,
1539
+ "loss": 0.0591,
1540
+ "step": 9850
1541
+ },
1542
+ {
1543
+ "epoch": 0.792,
1544
+ "grad_norm": 0.11865179240703583,
1545
+ "learning_rate": 1.8122399999999998e-05,
1546
+ "loss": 0.0581,
1547
+ "step": 9900
1548
+ },
1549
+ {
1550
+ "epoch": 0.796,
1551
+ "grad_norm": 0.18105269968509674,
1552
+ "learning_rate": 1.80624e-05,
1553
+ "loss": 0.0592,
1554
+ "step": 9950
1555
+ },
1556
+ {
1557
+ "epoch": 0.8,
1558
+ "grad_norm": 0.11952503770589828,
1559
+ "learning_rate": 1.80024e-05,
1560
+ "loss": 0.0567,
1561
+ "step": 10000
1562
+ },
1563
+ {
1564
+ "epoch": 0.8,
1565
+ "eval_loss": 0.08371368050575256,
1566
+ "eval_runtime": 88.0598,
1567
+ "eval_samples_per_second": 22.712,
1568
+ "eval_steps_per_second": 5.678,
1569
+ "step": 10000
1570
  }
1571
  ],
1572
  "logging_steps": 50,
 
1586
  "attributes": {}
1587
  }
1588
  },
1589
+ "total_flos": 2.43583156224e+16,
1590
  "train_batch_size": 4,
1591
  "trial_name": null,
1592
  "trial_params": null