CocoRoF commited on
Commit
4659a02
·
verified ·
1 Parent(s): ffd1e93

Training in progress, step 7000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee3ca4993c74fdba66a4a45cec937d889ea635d5a3363dbf41258ab3cdb82d1e
3
  size 738367848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:320c1a6e527bda174c669c1f951ba0d8511688825b12496cdd957e88966c44d9
3
  size 738367848
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:703c818dd2c8e17f4fd71ce7085e8a08cd52a233c9ec0efa1da31b9b6249f59f
3
  size 1476823354
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57f4bc2a3c4abe8dda39abccfe59caa42f3c3826a1837f4bf0c5399a3526b447
3
  size 1476823354
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b92a5a9232e10290e92a7ee43e17a65c2d7fd5bd9b7fae4a78bb653de6ff7f1e
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6677d2f94c1eeef9fd86043f9720c0ec154224510969ab790d1983791f375b88
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:027e63f43c97b9a1e6e633ec27654b2d81e59843c5c61895f16184d95b5ecfce
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4c2eafdb8010cffd636a540805e21c2ed7a900fa295f85b31d51d1a42084ef2
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ce4c5bab7dcc40e8eda0dabcca4b51013677ae4eb8d8b9aae51fdbac3ff5302
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:243c3ab92fec11f8d6189a1ee0c6563a7d0fa0286f6142dbd247bcce148d357d
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b04fd2cdaec73d40bd342c2736426a28ca23cb93fea46275f9c93f0355e8e51
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12c16967790d71b2e48b6adefaa270162917f0f86335d85480de9fb6860c7492
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fb08899b31a5efc329181f0ecc59c2d36f1c1b6251e03bcab322df2bd5b23a5
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:369fdbde0f14abf383325d06785d666884716d0228d7dc8c5e50c13cfb424b27
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1f2ce10faab494375937f049ced3ce1d0fb669dd4ede3a5d75a6c0bc4eebe50
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb8ed41d9a3d876261e27e3c4c409c9b564db5930687ae4ec83f98c5023be0a7
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0fb2fd90ebb124ea406837f497911487ba5c20d875615f6a03594328a7dafc26
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a76fd0f99241be8ebb729e7e60a6572e44b68c9f795cc3e8947a8ca4fa56c56
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de98b2885a6933e6cd867d0a9af94f7453c3971e0017fa67668e1a0ca515fd9d
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45c51dab87a7b7a65f8d23876cdf72e77eeca4ff50002d44cd0db1ecc3a43a63
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:662f374a56de952606a2f764f88941c488163b14a1fd8282c0553ed7f96dbcfe
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2dc388ca3bbc2159f39c9ce16c2f601ff32a9453701746d70cc6d36087ab16b
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.623242736644799,
5
  "eval_steps": 250,
6
- "global_step": 6000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4591,6 +4591,770 @@
4591
  "eval_spearman_manhattan": 0.7343750357819732,
4592
  "eval_steps_per_second": 7.36,
4593
  "step": 6000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4594
  }
4595
  ],
4596
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.560449859418932,
5
  "eval_steps": 250,
6
+ "global_step": 7000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4591
  "eval_spearman_manhattan": 0.7343750357819732,
4592
  "eval_steps_per_second": 7.36,
4593
  "step": 6000
4594
+ },
4595
+ {
4596
+ "epoch": 5.63261480787254,
4597
+ "grad_norm": 1.2122074365615845,
4598
+ "learning_rate": 9.955995841204294e-06,
4599
+ "loss": 0.0665,
4600
+ "step": 6010
4601
+ },
4602
+ {
4603
+ "epoch": 5.641986879100282,
4604
+ "grad_norm": 1.7832310199737549,
4605
+ "learning_rate": 9.955922622970026e-06,
4606
+ "loss": 0.063,
4607
+ "step": 6020
4608
+ },
4609
+ {
4610
+ "epoch": 5.651358950328023,
4611
+ "grad_norm": 1.1854170560836792,
4612
+ "learning_rate": 9.955849404735755e-06,
4613
+ "loss": 0.0573,
4614
+ "step": 6030
4615
+ },
4616
+ {
4617
+ "epoch": 5.660731021555764,
4618
+ "grad_norm": 1.6633968353271484,
4619
+ "learning_rate": 9.955776186501486e-06,
4620
+ "loss": 0.0549,
4621
+ "step": 6040
4622
+ },
4623
+ {
4624
+ "epoch": 5.670103092783505,
4625
+ "grad_norm": 1.31834077835083,
4626
+ "learning_rate": 9.955702968267219e-06,
4627
+ "loss": 0.0478,
4628
+ "step": 6050
4629
+ },
4630
+ {
4631
+ "epoch": 5.679475164011246,
4632
+ "grad_norm": 0.8284873962402344,
4633
+ "learning_rate": 9.95562975003295e-06,
4634
+ "loss": 0.0639,
4635
+ "step": 6060
4636
+ },
4637
+ {
4638
+ "epoch": 5.688847235238988,
4639
+ "grad_norm": 1.2393404245376587,
4640
+ "learning_rate": 9.95555653179868e-06,
4641
+ "loss": 0.0593,
4642
+ "step": 6070
4643
+ },
4644
+ {
4645
+ "epoch": 5.698219306466729,
4646
+ "grad_norm": 1.5327643156051636,
4647
+ "learning_rate": 9.95548331356441e-06,
4648
+ "loss": 0.0644,
4649
+ "step": 6080
4650
+ },
4651
+ {
4652
+ "epoch": 5.70759137769447,
4653
+ "grad_norm": 1.8985389471054077,
4654
+ "learning_rate": 9.955410095330142e-06,
4655
+ "loss": 0.0646,
4656
+ "step": 6090
4657
+ },
4658
+ {
4659
+ "epoch": 5.716963448922212,
4660
+ "grad_norm": 1.5896059274673462,
4661
+ "learning_rate": 9.955336877095872e-06,
4662
+ "loss": 0.0716,
4663
+ "step": 6100
4664
+ },
4665
+ {
4666
+ "epoch": 5.726335520149953,
4667
+ "grad_norm": 1.21624755859375,
4668
+ "learning_rate": 9.955263658861603e-06,
4669
+ "loss": 0.0559,
4670
+ "step": 6110
4671
+ },
4672
+ {
4673
+ "epoch": 5.735707591377695,
4674
+ "grad_norm": 1.3084664344787598,
4675
+ "learning_rate": 9.955190440627336e-06,
4676
+ "loss": 0.065,
4677
+ "step": 6120
4678
+ },
4679
+ {
4680
+ "epoch": 5.745079662605436,
4681
+ "grad_norm": 0.9755469560623169,
4682
+ "learning_rate": 9.955117222393066e-06,
4683
+ "loss": 0.0601,
4684
+ "step": 6130
4685
+ },
4686
+ {
4687
+ "epoch": 5.754451733833177,
4688
+ "grad_norm": 1.1662402153015137,
4689
+ "learning_rate": 9.955044004158797e-06,
4690
+ "loss": 0.0588,
4691
+ "step": 6140
4692
+ },
4693
+ {
4694
+ "epoch": 5.763823805060919,
4695
+ "grad_norm": 1.313323974609375,
4696
+ "learning_rate": 9.954970785924528e-06,
4697
+ "loss": 0.0667,
4698
+ "step": 6150
4699
+ },
4700
+ {
4701
+ "epoch": 5.77319587628866,
4702
+ "grad_norm": 1.4725874662399292,
4703
+ "learning_rate": 9.954897567690259e-06,
4704
+ "loss": 0.0619,
4705
+ "step": 6160
4706
+ },
4707
+ {
4708
+ "epoch": 5.782567947516402,
4709
+ "grad_norm": 1.3176454305648804,
4710
+ "learning_rate": 9.95482434945599e-06,
4711
+ "loss": 0.056,
4712
+ "step": 6170
4713
+ },
4714
+ {
4715
+ "epoch": 5.7919400187441425,
4716
+ "grad_norm": 1.0566222667694092,
4717
+ "learning_rate": 9.95475113122172e-06,
4718
+ "loss": 0.0587,
4719
+ "step": 6180
4720
+ },
4721
+ {
4722
+ "epoch": 5.8013120899718835,
4723
+ "grad_norm": 1.0623878240585327,
4724
+ "learning_rate": 9.95467791298745e-06,
4725
+ "loss": 0.0591,
4726
+ "step": 6190
4727
+ },
4728
+ {
4729
+ "epoch": 5.810684161199625,
4730
+ "grad_norm": 1.6217368841171265,
4731
+ "learning_rate": 9.954604694753183e-06,
4732
+ "loss": 0.0536,
4733
+ "step": 6200
4734
+ },
4735
+ {
4736
+ "epoch": 5.820056232427366,
4737
+ "grad_norm": 1.2574353218078613,
4738
+ "learning_rate": 9.954531476518912e-06,
4739
+ "loss": 0.0552,
4740
+ "step": 6210
4741
+ },
4742
+ {
4743
+ "epoch": 5.829428303655108,
4744
+ "grad_norm": 1.2605924606323242,
4745
+ "learning_rate": 9.954458258284643e-06,
4746
+ "loss": 0.0669,
4747
+ "step": 6220
4748
+ },
4749
+ {
4750
+ "epoch": 5.838800374882849,
4751
+ "grad_norm": 1.8283051252365112,
4752
+ "learning_rate": 9.954385040050375e-06,
4753
+ "loss": 0.0631,
4754
+ "step": 6230
4755
+ },
4756
+ {
4757
+ "epoch": 5.84817244611059,
4758
+ "grad_norm": 1.2457951307296753,
4759
+ "learning_rate": 9.954311821816106e-06,
4760
+ "loss": 0.0578,
4761
+ "step": 6240
4762
+ },
4763
+ {
4764
+ "epoch": 5.857544517338332,
4765
+ "grad_norm": 1.1618739366531372,
4766
+ "learning_rate": 9.954238603581837e-06,
4767
+ "loss": 0.0547,
4768
+ "step": 6250
4769
+ },
4770
+ {
4771
+ "epoch": 5.857544517338332,
4772
+ "eval_loss": 0.03839369863271713,
4773
+ "eval_pearson_cosine": 0.7663547396659851,
4774
+ "eval_pearson_dot": 0.7110079526901245,
4775
+ "eval_pearson_euclidean": 0.7369804978370667,
4776
+ "eval_pearson_manhattan": 0.738224983215332,
4777
+ "eval_runtime": 28.702,
4778
+ "eval_samples_per_second": 52.261,
4779
+ "eval_spearman_cosine": 0.766680322110213,
4780
+ "eval_spearman_dot": 0.7118792296635837,
4781
+ "eval_spearman_euclidean": 0.7420173359570077,
4782
+ "eval_spearman_manhattan": 0.7431811125331302,
4783
+ "eval_steps_per_second": 6.55,
4784
+ "step": 6250
4785
+ },
4786
+ {
4787
+ "epoch": 5.866916588566073,
4788
+ "grad_norm": 1.565491795539856,
4789
+ "learning_rate": 9.954165385347568e-06,
4790
+ "loss": 0.0634,
4791
+ "step": 6260
4792
+ },
4793
+ {
4794
+ "epoch": 5.876288659793815,
4795
+ "grad_norm": 1.412607192993164,
4796
+ "learning_rate": 9.954092167113298e-06,
4797
+ "loss": 0.0641,
4798
+ "step": 6270
4799
+ },
4800
+ {
4801
+ "epoch": 5.885660731021556,
4802
+ "grad_norm": 1.5475645065307617,
4803
+ "learning_rate": 9.95401894887903e-06,
4804
+ "loss": 0.058,
4805
+ "step": 6280
4806
+ },
4807
+ {
4808
+ "epoch": 5.895032802249297,
4809
+ "grad_norm": 1.6942791938781738,
4810
+ "learning_rate": 9.95394573064476e-06,
4811
+ "loss": 0.0668,
4812
+ "step": 6290
4813
+ },
4814
+ {
4815
+ "epoch": 5.904404873477039,
4816
+ "grad_norm": 1.286224603652954,
4817
+ "learning_rate": 9.953872512410492e-06,
4818
+ "loss": 0.058,
4819
+ "step": 6300
4820
+ },
4821
+ {
4822
+ "epoch": 5.91377694470478,
4823
+ "grad_norm": 1.5031893253326416,
4824
+ "learning_rate": 9.953799294176223e-06,
4825
+ "loss": 0.062,
4826
+ "step": 6310
4827
+ },
4828
+ {
4829
+ "epoch": 5.9231490159325215,
4830
+ "grad_norm": 1.416455864906311,
4831
+ "learning_rate": 9.953726075941952e-06,
4832
+ "loss": 0.0596,
4833
+ "step": 6320
4834
+ },
4835
+ {
4836
+ "epoch": 5.9325210871602625,
4837
+ "grad_norm": 1.3160662651062012,
4838
+ "learning_rate": 9.953652857707685e-06,
4839
+ "loss": 0.062,
4840
+ "step": 6330
4841
+ },
4842
+ {
4843
+ "epoch": 5.9418931583880035,
4844
+ "grad_norm": 0.9542105793952942,
4845
+ "learning_rate": 9.953579639473415e-06,
4846
+ "loss": 0.0645,
4847
+ "step": 6340
4848
+ },
4849
+ {
4850
+ "epoch": 5.951265229615745,
4851
+ "grad_norm": 1.4458489418029785,
4852
+ "learning_rate": 9.953506421239146e-06,
4853
+ "loss": 0.0563,
4854
+ "step": 6350
4855
+ },
4856
+ {
4857
+ "epoch": 5.960637300843486,
4858
+ "grad_norm": 1.0310072898864746,
4859
+ "learning_rate": 9.953433203004877e-06,
4860
+ "loss": 0.0567,
4861
+ "step": 6360
4862
+ },
4863
+ {
4864
+ "epoch": 5.970009372071228,
4865
+ "grad_norm": 1.4674971103668213,
4866
+ "learning_rate": 9.95335998477061e-06,
4867
+ "loss": 0.0579,
4868
+ "step": 6370
4869
+ },
4870
+ {
4871
+ "epoch": 5.979381443298969,
4872
+ "grad_norm": 1.229636311531067,
4873
+ "learning_rate": 9.953286766536338e-06,
4874
+ "loss": 0.0589,
4875
+ "step": 6380
4876
+ },
4877
+ {
4878
+ "epoch": 5.98875351452671,
4879
+ "grad_norm": 1.4654268026351929,
4880
+ "learning_rate": 9.95321354830207e-06,
4881
+ "loss": 0.0519,
4882
+ "step": 6390
4883
+ },
4884
+ {
4885
+ "epoch": 5.998125585754452,
4886
+ "grad_norm": 1.276367425918579,
4887
+ "learning_rate": 9.953140330067802e-06,
4888
+ "loss": 0.066,
4889
+ "step": 6400
4890
+ },
4891
+ {
4892
+ "epoch": 6.007497656982193,
4893
+ "grad_norm": 1.0710258483886719,
4894
+ "learning_rate": 9.953067111833532e-06,
4895
+ "loss": 0.0462,
4896
+ "step": 6410
4897
+ },
4898
+ {
4899
+ "epoch": 6.016869728209935,
4900
+ "grad_norm": 0.9316133856773376,
4901
+ "learning_rate": 9.952993893599263e-06,
4902
+ "loss": 0.044,
4903
+ "step": 6420
4904
+ },
4905
+ {
4906
+ "epoch": 6.026241799437676,
4907
+ "grad_norm": 0.8318607211112976,
4908
+ "learning_rate": 9.952920675364994e-06,
4909
+ "loss": 0.0399,
4910
+ "step": 6430
4911
+ },
4912
+ {
4913
+ "epoch": 6.035613870665417,
4914
+ "grad_norm": 0.9682859182357788,
4915
+ "learning_rate": 9.952847457130725e-06,
4916
+ "loss": 0.0371,
4917
+ "step": 6440
4918
+ },
4919
+ {
4920
+ "epoch": 6.044985941893159,
4921
+ "grad_norm": 0.8720560669898987,
4922
+ "learning_rate": 9.952774238896455e-06,
4923
+ "loss": 0.0453,
4924
+ "step": 6450
4925
+ },
4926
+ {
4927
+ "epoch": 6.0543580131209,
4928
+ "grad_norm": 0.7835734486579895,
4929
+ "learning_rate": 9.952701020662186e-06,
4930
+ "loss": 0.0475,
4931
+ "step": 6460
4932
+ },
4933
+ {
4934
+ "epoch": 6.0637300843486415,
4935
+ "grad_norm": 1.4373115301132202,
4936
+ "learning_rate": 9.952627802427917e-06,
4937
+ "loss": 0.0416,
4938
+ "step": 6470
4939
+ },
4940
+ {
4941
+ "epoch": 6.073102155576382,
4942
+ "grad_norm": 1.317517638206482,
4943
+ "learning_rate": 9.95255458419365e-06,
4944
+ "loss": 0.0425,
4945
+ "step": 6480
4946
+ },
4947
+ {
4948
+ "epoch": 6.082474226804123,
4949
+ "grad_norm": 1.1831910610198975,
4950
+ "learning_rate": 9.952481365959378e-06,
4951
+ "loss": 0.0471,
4952
+ "step": 6490
4953
+ },
4954
+ {
4955
+ "epoch": 6.091846298031865,
4956
+ "grad_norm": 1.0449994802474976,
4957
+ "learning_rate": 9.95240814772511e-06,
4958
+ "loss": 0.0476,
4959
+ "step": 6500
4960
+ },
4961
+ {
4962
+ "epoch": 6.091846298031865,
4963
+ "eval_loss": 0.03876839950680733,
4964
+ "eval_pearson_cosine": 0.7637665867805481,
4965
+ "eval_pearson_dot": 0.7007623910903931,
4966
+ "eval_pearson_euclidean": 0.7322614192962646,
4967
+ "eval_pearson_manhattan": 0.7338271141052246,
4968
+ "eval_runtime": 22.3296,
4969
+ "eval_samples_per_second": 67.175,
4970
+ "eval_spearman_cosine": 0.7641548541194557,
4971
+ "eval_spearman_dot": 0.7012776165056044,
4972
+ "eval_spearman_euclidean": 0.7377602855270703,
4973
+ "eval_spearman_manhattan": 0.73918298594716,
4974
+ "eval_steps_per_second": 8.419,
4975
+ "step": 6500
4976
+ },
4977
+ {
4978
+ "epoch": 6.101218369259606,
4979
+ "grad_norm": 0.7369022965431213,
4980
+ "learning_rate": 9.952334929490842e-06,
4981
+ "loss": 0.0364,
4982
+ "step": 6510
4983
+ },
4984
+ {
4985
+ "epoch": 6.110590440487348,
4986
+ "grad_norm": 0.8673484325408936,
4987
+ "learning_rate": 9.952261711256572e-06,
4988
+ "loss": 0.0498,
4989
+ "step": 6520
4990
+ },
4991
+ {
4992
+ "epoch": 6.119962511715089,
4993
+ "grad_norm": 1.5341424942016602,
4994
+ "learning_rate": 9.952188493022303e-06,
4995
+ "loss": 0.045,
4996
+ "step": 6530
4997
+ },
4998
+ {
4999
+ "epoch": 6.12933458294283,
5000
+ "grad_norm": 0.8899186253547668,
5001
+ "learning_rate": 9.952115274788034e-06,
5002
+ "loss": 0.0441,
5003
+ "step": 6540
5004
+ },
5005
+ {
5006
+ "epoch": 6.138706654170572,
5007
+ "grad_norm": 1.0708824396133423,
5008
+ "learning_rate": 9.952042056553765e-06,
5009
+ "loss": 0.0458,
5010
+ "step": 6550
5011
+ },
5012
+ {
5013
+ "epoch": 6.148078725398313,
5014
+ "grad_norm": 1.1551895141601562,
5015
+ "learning_rate": 9.951968838319495e-06,
5016
+ "loss": 0.0421,
5017
+ "step": 6560
5018
+ },
5019
+ {
5020
+ "epoch": 6.157450796626055,
5021
+ "grad_norm": 1.0832526683807373,
5022
+ "learning_rate": 9.951895620085226e-06,
5023
+ "loss": 0.0462,
5024
+ "step": 6570
5025
+ },
5026
+ {
5027
+ "epoch": 6.166822867853796,
5028
+ "grad_norm": 1.303536295890808,
5029
+ "learning_rate": 9.951822401850959e-06,
5030
+ "loss": 0.0423,
5031
+ "step": 6580
5032
+ },
5033
+ {
5034
+ "epoch": 6.176194939081537,
5035
+ "grad_norm": 1.2826794385910034,
5036
+ "learning_rate": 9.95174918361669e-06,
5037
+ "loss": 0.0463,
5038
+ "step": 6590
5039
+ },
5040
+ {
5041
+ "epoch": 6.185567010309279,
5042
+ "grad_norm": 1.0724890232086182,
5043
+ "learning_rate": 9.95167596538242e-06,
5044
+ "loss": 0.043,
5045
+ "step": 6600
5046
+ },
5047
+ {
5048
+ "epoch": 6.1949390815370196,
5049
+ "grad_norm": 0.9407768249511719,
5050
+ "learning_rate": 9.95160274714815e-06,
5051
+ "loss": 0.045,
5052
+ "step": 6610
5053
+ },
5054
+ {
5055
+ "epoch": 6.204311152764761,
5056
+ "grad_norm": 1.1686878204345703,
5057
+ "learning_rate": 9.951529528913882e-06,
5058
+ "loss": 0.0407,
5059
+ "step": 6620
5060
+ },
5061
+ {
5062
+ "epoch": 6.213683223992502,
5063
+ "grad_norm": 1.5972820520401,
5064
+ "learning_rate": 9.951456310679612e-06,
5065
+ "loss": 0.0449,
5066
+ "step": 6630
5067
+ },
5068
+ {
5069
+ "epoch": 6.223055295220243,
5070
+ "grad_norm": 0.7610195875167847,
5071
+ "learning_rate": 9.951383092445343e-06,
5072
+ "loss": 0.0397,
5073
+ "step": 6640
5074
+ },
5075
+ {
5076
+ "epoch": 6.232427366447985,
5077
+ "grad_norm": 1.02704656124115,
5078
+ "learning_rate": 9.951309874211075e-06,
5079
+ "loss": 0.0448,
5080
+ "step": 6650
5081
+ },
5082
+ {
5083
+ "epoch": 6.241799437675726,
5084
+ "grad_norm": 0.8035688400268555,
5085
+ "learning_rate": 9.951236655976805e-06,
5086
+ "loss": 0.0445,
5087
+ "step": 6660
5088
+ },
5089
+ {
5090
+ "epoch": 6.251171508903468,
5091
+ "grad_norm": 1.019539475440979,
5092
+ "learning_rate": 9.951163437742535e-06,
5093
+ "loss": 0.0452,
5094
+ "step": 6670
5095
+ },
5096
+ {
5097
+ "epoch": 6.260543580131209,
5098
+ "grad_norm": 1.662574291229248,
5099
+ "learning_rate": 9.951090219508268e-06,
5100
+ "loss": 0.0517,
5101
+ "step": 6680
5102
+ },
5103
+ {
5104
+ "epoch": 6.26991565135895,
5105
+ "grad_norm": 1.1599600315093994,
5106
+ "learning_rate": 9.951017001273998e-06,
5107
+ "loss": 0.0493,
5108
+ "step": 6690
5109
+ },
5110
+ {
5111
+ "epoch": 6.279287722586692,
5112
+ "grad_norm": 0.7756074070930481,
5113
+ "learning_rate": 9.95094378303973e-06,
5114
+ "loss": 0.048,
5115
+ "step": 6700
5116
+ },
5117
+ {
5118
+ "epoch": 6.288659793814433,
5119
+ "grad_norm": 1.0959285497665405,
5120
+ "learning_rate": 9.95087056480546e-06,
5121
+ "loss": 0.0501,
5122
+ "step": 6710
5123
+ },
5124
+ {
5125
+ "epoch": 6.298031865042175,
5126
+ "grad_norm": 1.2311910390853882,
5127
+ "learning_rate": 9.95079734657119e-06,
5128
+ "loss": 0.0486,
5129
+ "step": 6720
5130
+ },
5131
+ {
5132
+ "epoch": 6.307403936269916,
5133
+ "grad_norm": 1.2149254083633423,
5134
+ "learning_rate": 9.950724128336921e-06,
5135
+ "loss": 0.0389,
5136
+ "step": 6730
5137
+ },
5138
+ {
5139
+ "epoch": 6.316776007497657,
5140
+ "grad_norm": 1.5355291366577148,
5141
+ "learning_rate": 9.950650910102652e-06,
5142
+ "loss": 0.0472,
5143
+ "step": 6740
5144
+ },
5145
+ {
5146
+ "epoch": 6.3261480787253985,
5147
+ "grad_norm": 1.1264081001281738,
5148
+ "learning_rate": 9.950577691868385e-06,
5149
+ "loss": 0.043,
5150
+ "step": 6750
5151
+ },
5152
+ {
5153
+ "epoch": 6.3261480787253985,
5154
+ "eval_loss": 0.03764544054865837,
5155
+ "eval_pearson_cosine": 0.7692497968673706,
5156
+ "eval_pearson_dot": 0.7138222455978394,
5157
+ "eval_pearson_euclidean": 0.7343003749847412,
5158
+ "eval_pearson_manhattan": 0.7356712818145752,
5159
+ "eval_runtime": 22.6897,
5160
+ "eval_samples_per_second": 66.109,
5161
+ "eval_spearman_cosine": 0.7695765922931803,
5162
+ "eval_spearman_dot": 0.7152262336240688,
5163
+ "eval_spearman_euclidean": 0.739557951171161,
5164
+ "eval_spearman_manhattan": 0.7408550126908494,
5165
+ "eval_steps_per_second": 8.286,
5166
+ "step": 6750
5167
+ },
5168
+ {
5169
+ "epoch": 6.3355201499531395,
5170
+ "grad_norm": 0.6277545690536499,
5171
+ "learning_rate": 9.950504473634115e-06,
5172
+ "loss": 0.0406,
5173
+ "step": 6760
5174
+ },
5175
+ {
5176
+ "epoch": 6.344892221180881,
5177
+ "grad_norm": 1.3999137878417969,
5178
+ "learning_rate": 9.950431255399846e-06,
5179
+ "loss": 0.0447,
5180
+ "step": 6770
5181
+ },
5182
+ {
5183
+ "epoch": 6.354264292408622,
5184
+ "grad_norm": 0.7465086579322815,
5185
+ "learning_rate": 9.950358037165577e-06,
5186
+ "loss": 0.0502,
5187
+ "step": 6780
5188
+ },
5189
+ {
5190
+ "epoch": 6.363636363636363,
5191
+ "grad_norm": 1.1154383420944214,
5192
+ "learning_rate": 9.950284818931308e-06,
5193
+ "loss": 0.05,
5194
+ "step": 6790
5195
+ },
5196
+ {
5197
+ "epoch": 6.373008434864105,
5198
+ "grad_norm": 1.1133472919464111,
5199
+ "learning_rate": 9.950211600697038e-06,
5200
+ "loss": 0.0473,
5201
+ "step": 6800
5202
+ },
5203
+ {
5204
+ "epoch": 6.382380506091846,
5205
+ "grad_norm": 1.0995352268218994,
5206
+ "learning_rate": 9.95013838246277e-06,
5207
+ "loss": 0.0414,
5208
+ "step": 6810
5209
+ },
5210
+ {
5211
+ "epoch": 6.391752577319588,
5212
+ "grad_norm": 0.9666862487792969,
5213
+ "learning_rate": 9.9500651642285e-06,
5214
+ "loss": 0.049,
5215
+ "step": 6820
5216
+ },
5217
+ {
5218
+ "epoch": 6.401124648547329,
5219
+ "grad_norm": 1.1517918109893799,
5220
+ "learning_rate": 9.94999194599423e-06,
5221
+ "loss": 0.0413,
5222
+ "step": 6830
5223
+ },
5224
+ {
5225
+ "epoch": 6.41049671977507,
5226
+ "grad_norm": 0.5381759405136108,
5227
+ "learning_rate": 9.949918727759961e-06,
5228
+ "loss": 0.0418,
5229
+ "step": 6840
5230
+ },
5231
+ {
5232
+ "epoch": 6.419868791002812,
5233
+ "grad_norm": 0.973006546497345,
5234
+ "learning_rate": 9.949845509525692e-06,
5235
+ "loss": 0.0495,
5236
+ "step": 6850
5237
+ },
5238
+ {
5239
+ "epoch": 6.429240862230553,
5240
+ "grad_norm": 1.126633882522583,
5241
+ "learning_rate": 9.949772291291425e-06,
5242
+ "loss": 0.0493,
5243
+ "step": 6860
5244
+ },
5245
+ {
5246
+ "epoch": 6.438612933458295,
5247
+ "grad_norm": 0.7894268035888672,
5248
+ "learning_rate": 9.949699073057155e-06,
5249
+ "loss": 0.0436,
5250
+ "step": 6870
5251
+ },
5252
+ {
5253
+ "epoch": 6.447985004686036,
5254
+ "grad_norm": 0.7125422358512878,
5255
+ "learning_rate": 9.949625854822886e-06,
5256
+ "loss": 0.0433,
5257
+ "step": 6880
5258
+ },
5259
+ {
5260
+ "epoch": 6.457357075913777,
5261
+ "grad_norm": 0.9013342261314392,
5262
+ "learning_rate": 9.949552636588617e-06,
5263
+ "loss": 0.0376,
5264
+ "step": 6890
5265
+ },
5266
+ {
5267
+ "epoch": 6.4667291471415185,
5268
+ "grad_norm": 1.132384181022644,
5269
+ "learning_rate": 9.949479418354348e-06,
5270
+ "loss": 0.0482,
5271
+ "step": 6900
5272
+ },
5273
+ {
5274
+ "epoch": 6.4761012183692594,
5275
+ "grad_norm": 1.0104179382324219,
5276
+ "learning_rate": 9.949406200120078e-06,
5277
+ "loss": 0.0485,
5278
+ "step": 6910
5279
+ },
5280
+ {
5281
+ "epoch": 6.485473289597001,
5282
+ "grad_norm": 1.233464241027832,
5283
+ "learning_rate": 9.949332981885809e-06,
5284
+ "loss": 0.0478,
5285
+ "step": 6920
5286
+ },
5287
+ {
5288
+ "epoch": 6.494845360824742,
5289
+ "grad_norm": 0.7077954411506653,
5290
+ "learning_rate": 9.949259763651542e-06,
5291
+ "loss": 0.0464,
5292
+ "step": 6930
5293
+ },
5294
+ {
5295
+ "epoch": 6.504217432052483,
5296
+ "grad_norm": 1.5273882150650024,
5297
+ "learning_rate": 9.949186545417272e-06,
5298
+ "loss": 0.0404,
5299
+ "step": 6940
5300
+ },
5301
+ {
5302
+ "epoch": 6.513589503280225,
5303
+ "grad_norm": 1.2204720973968506,
5304
+ "learning_rate": 9.949113327183001e-06,
5305
+ "loss": 0.0375,
5306
+ "step": 6950
5307
+ },
5308
+ {
5309
+ "epoch": 6.522961574507966,
5310
+ "grad_norm": 0.9539759755134583,
5311
+ "learning_rate": 9.949040108948734e-06,
5312
+ "loss": 0.0397,
5313
+ "step": 6960
5314
+ },
5315
+ {
5316
+ "epoch": 6.532333645735708,
5317
+ "grad_norm": 1.949201226234436,
5318
+ "learning_rate": 9.948966890714465e-06,
5319
+ "loss": 0.0476,
5320
+ "step": 6970
5321
+ },
5322
+ {
5323
+ "epoch": 6.541705716963449,
5324
+ "grad_norm": 1.046915888786316,
5325
+ "learning_rate": 9.948893672480195e-06,
5326
+ "loss": 0.0445,
5327
+ "step": 6980
5328
+ },
5329
+ {
5330
+ "epoch": 6.55107778819119,
5331
+ "grad_norm": 0.8392923474311829,
5332
+ "learning_rate": 9.948820454245926e-06,
5333
+ "loss": 0.0502,
5334
+ "step": 6990
5335
+ },
5336
+ {
5337
+ "epoch": 6.560449859418932,
5338
+ "grad_norm": 1.357014536857605,
5339
+ "learning_rate": 9.948747236011659e-06,
5340
+ "loss": 0.0436,
5341
+ "step": 7000
5342
+ },
5343
+ {
5344
+ "epoch": 6.560449859418932,
5345
+ "eval_loss": 0.03813355416059494,
5346
+ "eval_pearson_cosine": 0.7662351131439209,
5347
+ "eval_pearson_dot": 0.7104849219322205,
5348
+ "eval_pearson_euclidean": 0.7334129810333252,
5349
+ "eval_pearson_manhattan": 0.7350986003875732,
5350
+ "eval_runtime": 22.7512,
5351
+ "eval_samples_per_second": 65.931,
5352
+ "eval_spearman_cosine": 0.7662226343415417,
5353
+ "eval_spearman_dot": 0.7115825441503862,
5354
+ "eval_spearman_euclidean": 0.7384103552275764,
5355
+ "eval_spearman_manhattan": 0.7397995971405482,
5356
+ "eval_steps_per_second": 8.263,
5357
+ "step": 7000
5358
  }
5359
  ],
5360
  "logging_steps": 10,