FormlessAI commited on
Commit
0fad8f9
·
verified ·
1 Parent(s): c241758

Training in progress, epoch 0, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f7a8d58b4c09ade6f079e7b6a46e65c4b32950956c4f0dfb9b6939df8a17f87
3
  size 69760376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0acbc6280d59d7c546d4019439dd15028d08b91d45d71b01e76d925d5b9aa047
3
  size 69760376
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f03e4a75d05a3a69122dc2cde414860903f0552bb86a96ea0c3ca9b096fa4f0
3
  size 35674187
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9288679039f283abf554e10a50a709b5399da64be515ef088c22abd9c4696b5
3
  size 35674187
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:128263f9d56df8dfbebdf37b11d1574802ff808151ff8b1eaf516edf15d33663
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:613c246f7daefe145538132c58a45b1f05c81b12e71ac5e830125551ea6dba3f
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af755915271b882008c5dc7bb597fd721d504e6913687828b26b35921fe7eb3c
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9291f46bb0ea12256885a523949d99cb2662c99c6053a28fd3413aa853f7487e
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1bace330f16736829343cc88653634357667c6ace471d122f10b8c2f916085bd
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf1ff5a38e6a8205b2cb133f2fe8a22c4a8fff158447854d82be8ecb3a5173b4
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d28958f4c0698d32eee35941b6e510c911f3e053aeeb69a91c16b23f28de1d2
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7fbccc240607f30e73da17be501b2e69611822ad5cbb5bf68c3113e547a764a
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7cffc4dbac33d7f056f73816d3b2c381c4dec0851d36a6b2b5d0ea3711b36c12
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5544d99b47a50f9a0dfa3c93e360c08a1e2651081f44245ce601a2c2cc1689e5
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 1.5769575834274292,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.29896430223867315,
6
  "eval_steps": 50,
7
- "global_step": 4200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -6560,6 +6560,162 @@
6560
  "eval_samples_per_second": 507.408,
6561
  "eval_steps_per_second": 15.857,
6562
  "step": 4200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6563
  }
6564
  ],
6565
  "logging_steps": 5,
@@ -6588,7 +6744,7 @@
6588
  "attributes": {}
6589
  }
6590
  },
6591
- "total_flos": 2.6306887513695846e+17,
6592
  "train_batch_size": 8,
6593
  "trial_name": null,
6594
  "trial_params": null
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 1.5748103857040405,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.30608249991102254,
6
  "eval_steps": 50,
7
+ "global_step": 4300,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
6560
  "eval_samples_per_second": 507.408,
6561
  "eval_steps_per_second": 15.857,
6562
  "step": 4200
6563
+ },
6564
+ {
6565
+ "epoch": 0.29932021212229065,
6566
+ "grad_norm": 1.0814719200134277,
6567
+ "learning_rate": 0.00019736991588454418,
6568
+ "loss": 1.58,
6569
+ "step": 4205
6570
+ },
6571
+ {
6572
+ "epoch": 0.2996761220059081,
6573
+ "grad_norm": 1.125158429145813,
6574
+ "learning_rate": 0.00019736353172038974,
6575
+ "loss": 1.5156,
6576
+ "step": 4210
6577
+ },
6578
+ {
6579
+ "epoch": 0.3000320318895256,
6580
+ "grad_norm": 1.249143123626709,
6581
+ "learning_rate": 0.00019735713992079262,
6582
+ "loss": 1.5065,
6583
+ "step": 4215
6584
+ },
6585
+ {
6586
+ "epoch": 0.30038794177314304,
6587
+ "grad_norm": 1.0526578426361084,
6588
+ "learning_rate": 0.00019735074048625413,
6589
+ "loss": 1.4694,
6590
+ "step": 4220
6591
+ },
6592
+ {
6593
+ "epoch": 0.3007438516567605,
6594
+ "grad_norm": 0.9182873368263245,
6595
+ "learning_rate": 0.0001973443334172761,
6596
+ "loss": 1.4801,
6597
+ "step": 4225
6598
+ },
6599
+ {
6600
+ "epoch": 0.301099761540378,
6601
+ "grad_norm": 0.9803130626678467,
6602
+ "learning_rate": 0.000197337918714361,
6603
+ "loss": 1.5348,
6604
+ "step": 4230
6605
+ },
6606
+ {
6607
+ "epoch": 0.30145567142399543,
6608
+ "grad_norm": 1.5117913484573364,
6609
+ "learning_rate": 0.00019733149637801188,
6610
+ "loss": 1.6197,
6611
+ "step": 4235
6612
+ },
6613
+ {
6614
+ "epoch": 0.30181158130761293,
6615
+ "grad_norm": 0.9475013017654419,
6616
+ "learning_rate": 0.00019732506640873237,
6617
+ "loss": 1.5342,
6618
+ "step": 4240
6619
+ },
6620
+ {
6621
+ "epoch": 0.3021674911912304,
6622
+ "grad_norm": 0.7990288734436035,
6623
+ "learning_rate": 0.00019731862880702675,
6624
+ "loss": 1.4877,
6625
+ "step": 4245
6626
+ },
6627
+ {
6628
+ "epoch": 0.3025234010748479,
6629
+ "grad_norm": 0.9656383395195007,
6630
+ "learning_rate": 0.00019731218357339983,
6631
+ "loss": 1.5794,
6632
+ "step": 4250
6633
+ },
6634
+ {
6635
+ "epoch": 0.3025234010748479,
6636
+ "eval_loss": 1.5782647132873535,
6637
+ "eval_runtime": 197.0151,
6638
+ "eval_samples_per_second": 507.068,
6639
+ "eval_steps_per_second": 15.847,
6640
+ "step": 4250
6641
+ },
6642
+ {
6643
+ "epoch": 0.3028793109584653,
6644
+ "grad_norm": 0.9215989708900452,
6645
+ "learning_rate": 0.00019730573070835713,
6646
+ "loss": 1.5015,
6647
+ "step": 4255
6648
+ },
6649
+ {
6650
+ "epoch": 0.30323522084208276,
6651
+ "grad_norm": 0.9019184708595276,
6652
+ "learning_rate": 0.00019729927021240463,
6653
+ "loss": 1.4834,
6654
+ "step": 4260
6655
+ },
6656
+ {
6657
+ "epoch": 0.30359113072570026,
6658
+ "grad_norm": 1.0000808238983154,
6659
+ "learning_rate": 0.00019729280208604898,
6660
+ "loss": 1.6177,
6661
+ "step": 4265
6662
+ },
6663
+ {
6664
+ "epoch": 0.3039470406093177,
6665
+ "grad_norm": 1.110743522644043,
6666
+ "learning_rate": 0.00019728632632979746,
6667
+ "loss": 1.5632,
6668
+ "step": 4270
6669
+ },
6670
+ {
6671
+ "epoch": 0.3043029504929352,
6672
+ "grad_norm": 1.039726734161377,
6673
+ "learning_rate": 0.00019727984294415788,
6674
+ "loss": 1.4988,
6675
+ "step": 4275
6676
+ },
6677
+ {
6678
+ "epoch": 0.30465886037655265,
6679
+ "grad_norm": 1.0508739948272705,
6680
+ "learning_rate": 0.0001972733519296387,
6681
+ "loss": 1.6282,
6682
+ "step": 4280
6683
+ },
6684
+ {
6685
+ "epoch": 0.3050147702601701,
6686
+ "grad_norm": 0.9834769368171692,
6687
+ "learning_rate": 0.000197266853286749,
6688
+ "loss": 1.5576,
6689
+ "step": 4285
6690
+ },
6691
+ {
6692
+ "epoch": 0.3053706801437876,
6693
+ "grad_norm": 1.0661753416061401,
6694
+ "learning_rate": 0.0001972603470159983,
6695
+ "loss": 1.5619,
6696
+ "step": 4290
6697
+ },
6698
+ {
6699
+ "epoch": 0.30572659002740504,
6700
+ "grad_norm": 0.9293569326400757,
6701
+ "learning_rate": 0.00019725383311789693,
6702
+ "loss": 1.5036,
6703
+ "step": 4295
6704
+ },
6705
+ {
6706
+ "epoch": 0.30608249991102254,
6707
+ "grad_norm": 1.0038737058639526,
6708
+ "learning_rate": 0.0001972473115929557,
6709
+ "loss": 1.5417,
6710
+ "step": 4300
6711
+ },
6712
+ {
6713
+ "epoch": 0.30608249991102254,
6714
+ "eval_loss": 1.5748103857040405,
6715
+ "eval_runtime": 197.5572,
6716
+ "eval_samples_per_second": 505.676,
6717
+ "eval_steps_per_second": 15.803,
6718
+ "step": 4300
6719
  }
6720
  ],
6721
  "logging_steps": 5,
 
6744
  "attributes": {}
6745
  }
6746
  },
6747
+ "total_flos": 2.6945799231792742e+17,
6748
  "train_batch_size": 8,
6749
  "trial_name": null,
6750
  "trial_params": null