CocoRoF commited on
Commit
e82b268
·
verified ·
1 Parent(s): 0c594a3

Training in progress, step 7000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bea384ab228f9e4e48eb09750911bf53afac4e27ba8e57f44979634ad866b1c1
3
  size 737580392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb141f64f181f8aaa27c9d3a77af0d93d7afb9857067a90441308d67ea3f00a0
3
  size 737580392
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a64a6a144fbcea9043ebf95d58cf6dc0de1643977d324340c43b19a2a87324c9
3
  size 1475248442
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd3f4a311baa95b8c3c9a1270d3c59f445a214334d14d8eaebcf84317b8587c7
3
  size 1475248442
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e59fd29507b0a3f94de60acf1485068cfbd28d4220459a98545dc01f241293d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9819055317e0aa1215ad120239bc4cecc175225c0dc18c98ca0bffe9f465133f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:923c446f373ad2f0ffab5651c7c112ff9f6139581eac3a16834e284a234062e7
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:879c433ff59d472a2c5d7c4da04ae14ac33f2ec2552c8f49d34302050bc28fb0
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.8116213683223994,
5
  "eval_steps": 250,
6
- "global_step": 6000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4591,6 +4591,770 @@
4591
  "eval_spearman_manhattan": 0.8036153637269691,
4592
  "eval_steps_per_second": 29.47,
4593
  "step": 6000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4594
  }
4595
  ],
4596
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.280224929709466,
5
  "eval_steps": 250,
6
+ "global_step": 7000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4591
  "eval_spearman_manhattan": 0.8036153637269691,
4592
  "eval_steps_per_second": 29.47,
4593
  "step": 6000
4594
+ },
4595
+ {
4596
+ "epoch": 2.81630740393627,
4597
+ "grad_norm": 1.460700273513794,
4598
+ "learning_rate": 1.8239807872539834e-05,
4599
+ "loss": 0.1745,
4600
+ "step": 6010
4601
+ },
4602
+ {
4603
+ "epoch": 2.820993439550141,
4604
+ "grad_norm": 1.7924867868423462,
4605
+ "learning_rate": 1.8236879100281163e-05,
4606
+ "loss": 0.151,
4607
+ "step": 6020
4608
+ },
4609
+ {
4610
+ "epoch": 2.8256794751640113,
4611
+ "grad_norm": 1.7853138446807861,
4612
+ "learning_rate": 1.8233950328022493e-05,
4613
+ "loss": 0.1629,
4614
+ "step": 6030
4615
+ },
4616
+ {
4617
+ "epoch": 2.830365510777882,
4618
+ "grad_norm": 1.8576308488845825,
4619
+ "learning_rate": 1.8231021555763825e-05,
4620
+ "loss": 0.173,
4621
+ "step": 6040
4622
+ },
4623
+ {
4624
+ "epoch": 2.8350515463917527,
4625
+ "grad_norm": 1.7521380186080933,
4626
+ "learning_rate": 1.8228092783505155e-05,
4627
+ "loss": 0.1741,
4628
+ "step": 6050
4629
+ },
4630
+ {
4631
+ "epoch": 2.839737582005623,
4632
+ "grad_norm": 1.6762547492980957,
4633
+ "learning_rate": 1.8225164011246488e-05,
4634
+ "loss": 0.1662,
4635
+ "step": 6060
4636
+ },
4637
+ {
4638
+ "epoch": 2.844423617619494,
4639
+ "grad_norm": 1.985397219657898,
4640
+ "learning_rate": 1.8222235238987817e-05,
4641
+ "loss": 0.1889,
4642
+ "step": 6070
4643
+ },
4644
+ {
4645
+ "epoch": 2.8491096532333646,
4646
+ "grad_norm": 2.0703446865081787,
4647
+ "learning_rate": 1.821930646672915e-05,
4648
+ "loss": 0.1772,
4649
+ "step": 6080
4650
+ },
4651
+ {
4652
+ "epoch": 2.853795688847235,
4653
+ "grad_norm": 2.128908395767212,
4654
+ "learning_rate": 1.821637769447048e-05,
4655
+ "loss": 0.1644,
4656
+ "step": 6090
4657
+ },
4658
+ {
4659
+ "epoch": 2.858481724461106,
4660
+ "grad_norm": 2.1612260341644287,
4661
+ "learning_rate": 1.821344892221181e-05,
4662
+ "loss": 0.2097,
4663
+ "step": 6100
4664
+ },
4665
+ {
4666
+ "epoch": 2.8631677600749765,
4667
+ "grad_norm": 2.220729351043701,
4668
+ "learning_rate": 1.8210520149953142e-05,
4669
+ "loss": 0.1746,
4670
+ "step": 6110
4671
+ },
4672
+ {
4673
+ "epoch": 2.8678537956888475,
4674
+ "grad_norm": 2.017711639404297,
4675
+ "learning_rate": 1.820759137769447e-05,
4676
+ "loss": 0.1816,
4677
+ "step": 6120
4678
+ },
4679
+ {
4680
+ "epoch": 2.872539831302718,
4681
+ "grad_norm": 1.7267364263534546,
4682
+ "learning_rate": 1.8204662605435805e-05,
4683
+ "loss": 0.1738,
4684
+ "step": 6130
4685
+ },
4686
+ {
4687
+ "epoch": 2.8772258669165884,
4688
+ "grad_norm": 1.7841973304748535,
4689
+ "learning_rate": 1.8201733833177134e-05,
4690
+ "loss": 0.1598,
4691
+ "step": 6140
4692
+ },
4693
+ {
4694
+ "epoch": 2.8819119025304594,
4695
+ "grad_norm": 1.9603809118270874,
4696
+ "learning_rate": 1.8198805060918464e-05,
4697
+ "loss": 0.1867,
4698
+ "step": 6150
4699
+ },
4700
+ {
4701
+ "epoch": 2.88659793814433,
4702
+ "grad_norm": 2.0242254734039307,
4703
+ "learning_rate": 1.8195876288659796e-05,
4704
+ "loss": 0.164,
4705
+ "step": 6160
4706
+ },
4707
+ {
4708
+ "epoch": 2.891283973758201,
4709
+ "grad_norm": 1.3930503129959106,
4710
+ "learning_rate": 1.8192947516401126e-05,
4711
+ "loss": 0.1469,
4712
+ "step": 6170
4713
+ },
4714
+ {
4715
+ "epoch": 2.8959700093720713,
4716
+ "grad_norm": 1.4639194011688232,
4717
+ "learning_rate": 1.8190018744142455e-05,
4718
+ "loss": 0.1676,
4719
+ "step": 6180
4720
+ },
4721
+ {
4722
+ "epoch": 2.9006560449859418,
4723
+ "grad_norm": 1.9063324928283691,
4724
+ "learning_rate": 1.8187089971883788e-05,
4725
+ "loss": 0.1644,
4726
+ "step": 6190
4727
+ },
4728
+ {
4729
+ "epoch": 2.9053420805998127,
4730
+ "grad_norm": 1.8006539344787598,
4731
+ "learning_rate": 1.8184161199625118e-05,
4732
+ "loss": 0.1695,
4733
+ "step": 6200
4734
+ },
4735
+ {
4736
+ "epoch": 2.910028116213683,
4737
+ "grad_norm": 1.7670259475708008,
4738
+ "learning_rate": 1.8181232427366447e-05,
4739
+ "loss": 0.1624,
4740
+ "step": 6210
4741
+ },
4742
+ {
4743
+ "epoch": 2.914714151827554,
4744
+ "grad_norm": 1.9622496366500854,
4745
+ "learning_rate": 1.817830365510778e-05,
4746
+ "loss": 0.1777,
4747
+ "step": 6220
4748
+ },
4749
+ {
4750
+ "epoch": 2.9194001874414246,
4751
+ "grad_norm": 1.9627212285995483,
4752
+ "learning_rate": 1.817537488284911e-05,
4753
+ "loss": 0.1817,
4754
+ "step": 6230
4755
+ },
4756
+ {
4757
+ "epoch": 2.924086223055295,
4758
+ "grad_norm": 1.9101510047912598,
4759
+ "learning_rate": 1.8172446110590443e-05,
4760
+ "loss": 0.1743,
4761
+ "step": 6240
4762
+ },
4763
+ {
4764
+ "epoch": 2.928772258669166,
4765
+ "grad_norm": 1.890410304069519,
4766
+ "learning_rate": 1.8169517338331772e-05,
4767
+ "loss": 0.152,
4768
+ "step": 6250
4769
+ },
4770
+ {
4771
+ "epoch": 2.928772258669166,
4772
+ "eval_loss": 0.03263232484459877,
4773
+ "eval_pearson_cosine": 0.8153769983260584,
4774
+ "eval_pearson_dot": 0.7414947307896895,
4775
+ "eval_pearson_euclidean": 0.7908278401422209,
4776
+ "eval_pearson_manhattan": 0.7928717466268438,
4777
+ "eval_runtime": 3.1717,
4778
+ "eval_samples_per_second": 472.936,
4779
+ "eval_spearman_cosine": 0.8150926580171975,
4780
+ "eval_spearman_dot": 0.7426551131852436,
4781
+ "eval_spearman_euclidean": 0.8000506096598716,
4782
+ "eval_spearman_manhattan": 0.8018413708002138,
4783
+ "eval_steps_per_second": 29.637,
4784
+ "step": 6250
4785
+ },
4786
+ {
4787
+ "epoch": 2.9334582942830365,
4788
+ "grad_norm": 1.8069051504135132,
4789
+ "learning_rate": 1.8166588566073105e-05,
4790
+ "loss": 0.1915,
4791
+ "step": 6260
4792
+ },
4793
+ {
4794
+ "epoch": 2.9381443298969074,
4795
+ "grad_norm": 1.2744157314300537,
4796
+ "learning_rate": 1.8163659793814434e-05,
4797
+ "loss": 0.1493,
4798
+ "step": 6270
4799
+ },
4800
+ {
4801
+ "epoch": 2.942830365510778,
4802
+ "grad_norm": 1.6674821376800537,
4803
+ "learning_rate": 1.8160731021555767e-05,
4804
+ "loss": 0.1618,
4805
+ "step": 6280
4806
+ },
4807
+ {
4808
+ "epoch": 2.9475164011246484,
4809
+ "grad_norm": 2.0318241119384766,
4810
+ "learning_rate": 1.8157802249297097e-05,
4811
+ "loss": 0.1769,
4812
+ "step": 6290
4813
+ },
4814
+ {
4815
+ "epoch": 2.9522024367385193,
4816
+ "grad_norm": 2.4880385398864746,
4817
+ "learning_rate": 1.8154873477038426e-05,
4818
+ "loss": 0.1886,
4819
+ "step": 6300
4820
+ },
4821
+ {
4822
+ "epoch": 2.95688847235239,
4823
+ "grad_norm": 2.0214920043945312,
4824
+ "learning_rate": 1.815194470477976e-05,
4825
+ "loss": 0.1806,
4826
+ "step": 6310
4827
+ },
4828
+ {
4829
+ "epoch": 2.9615745079662608,
4830
+ "grad_norm": 1.6570240259170532,
4831
+ "learning_rate": 1.814901593252109e-05,
4832
+ "loss": 0.1681,
4833
+ "step": 6320
4834
+ },
4835
+ {
4836
+ "epoch": 2.9662605435801312,
4837
+ "grad_norm": 1.9034634828567505,
4838
+ "learning_rate": 1.814608716026242e-05,
4839
+ "loss": 0.1465,
4840
+ "step": 6330
4841
+ },
4842
+ {
4843
+ "epoch": 2.9709465791940017,
4844
+ "grad_norm": 1.3237876892089844,
4845
+ "learning_rate": 1.814315838800375e-05,
4846
+ "loss": 0.1658,
4847
+ "step": 6340
4848
+ },
4849
+ {
4850
+ "epoch": 2.9756326148078727,
4851
+ "grad_norm": 2.253992795944214,
4852
+ "learning_rate": 1.814022961574508e-05,
4853
+ "loss": 0.1993,
4854
+ "step": 6350
4855
+ },
4856
+ {
4857
+ "epoch": 2.980318650421743,
4858
+ "grad_norm": 1.5123599767684937,
4859
+ "learning_rate": 1.813730084348641e-05,
4860
+ "loss": 0.1713,
4861
+ "step": 6360
4862
+ },
4863
+ {
4864
+ "epoch": 2.985004686035614,
4865
+ "grad_norm": 2.3374693393707275,
4866
+ "learning_rate": 1.8134372071227743e-05,
4867
+ "loss": 0.1545,
4868
+ "step": 6370
4869
+ },
4870
+ {
4871
+ "epoch": 2.9896907216494846,
4872
+ "grad_norm": 1.5885390043258667,
4873
+ "learning_rate": 1.8131443298969072e-05,
4874
+ "loss": 0.1689,
4875
+ "step": 6380
4876
+ },
4877
+ {
4878
+ "epoch": 2.994376757263355,
4879
+ "grad_norm": 2.09841251373291,
4880
+ "learning_rate": 1.8128514526710402e-05,
4881
+ "loss": 0.199,
4882
+ "step": 6390
4883
+ },
4884
+ {
4885
+ "epoch": 2.999062792877226,
4886
+ "grad_norm": 1.7730973958969116,
4887
+ "learning_rate": 1.8125585754451735e-05,
4888
+ "loss": 0.1674,
4889
+ "step": 6400
4890
+ },
4891
+ {
4892
+ "epoch": 3.0037488284910965,
4893
+ "grad_norm": 1.6908677816390991,
4894
+ "learning_rate": 1.8122656982193064e-05,
4895
+ "loss": 0.1177,
4896
+ "step": 6410
4897
+ },
4898
+ {
4899
+ "epoch": 3.0084348641049674,
4900
+ "grad_norm": 1.376086711883545,
4901
+ "learning_rate": 1.8119728209934397e-05,
4902
+ "loss": 0.1203,
4903
+ "step": 6420
4904
+ },
4905
+ {
4906
+ "epoch": 3.013120899718838,
4907
+ "grad_norm": 1.7540253400802612,
4908
+ "learning_rate": 1.8116799437675727e-05,
4909
+ "loss": 0.1133,
4910
+ "step": 6430
4911
+ },
4912
+ {
4913
+ "epoch": 3.0178069353327084,
4914
+ "grad_norm": 1.0979869365692139,
4915
+ "learning_rate": 1.811387066541706e-05,
4916
+ "loss": 0.0976,
4917
+ "step": 6440
4918
+ },
4919
+ {
4920
+ "epoch": 3.0224929709465793,
4921
+ "grad_norm": 1.4160518646240234,
4922
+ "learning_rate": 1.811094189315839e-05,
4923
+ "loss": 0.1084,
4924
+ "step": 6450
4925
+ },
4926
+ {
4927
+ "epoch": 3.02717900656045,
4928
+ "grad_norm": 1.0536082983016968,
4929
+ "learning_rate": 1.8108013120899722e-05,
4930
+ "loss": 0.0951,
4931
+ "step": 6460
4932
+ },
4933
+ {
4934
+ "epoch": 3.0318650421743207,
4935
+ "grad_norm": 1.4449706077575684,
4936
+ "learning_rate": 1.810508434864105e-05,
4937
+ "loss": 0.1148,
4938
+ "step": 6470
4939
+ },
4940
+ {
4941
+ "epoch": 3.036551077788191,
4942
+ "grad_norm": 1.9929163455963135,
4943
+ "learning_rate": 1.8102155576382384e-05,
4944
+ "loss": 0.1092,
4945
+ "step": 6480
4946
+ },
4947
+ {
4948
+ "epoch": 3.0412371134020617,
4949
+ "grad_norm": 1.5023281574249268,
4950
+ "learning_rate": 1.8099226804123714e-05,
4951
+ "loss": 0.1366,
4952
+ "step": 6490
4953
+ },
4954
+ {
4955
+ "epoch": 3.0459231490159326,
4956
+ "grad_norm": 1.0478644371032715,
4957
+ "learning_rate": 1.8096298031865043e-05,
4958
+ "loss": 0.1,
4959
+ "step": 6500
4960
+ },
4961
+ {
4962
+ "epoch": 3.0459231490159326,
4963
+ "eval_loss": 0.03124774619936943,
4964
+ "eval_pearson_cosine": 0.819439330096543,
4965
+ "eval_pearson_dot": 0.756545112697367,
4966
+ "eval_pearson_euclidean": 0.7885948455069851,
4967
+ "eval_pearson_manhattan": 0.790849549554963,
4968
+ "eval_runtime": 3.2845,
4969
+ "eval_samples_per_second": 456.685,
4970
+ "eval_spearman_cosine": 0.8189547958390648,
4971
+ "eval_spearman_dot": 0.7570952217597201,
4972
+ "eval_spearman_euclidean": 0.7971512626570622,
4973
+ "eval_spearman_manhattan": 0.7989572580196979,
4974
+ "eval_steps_per_second": 28.619,
4975
+ "step": 6500
4976
+ },
4977
+ {
4978
+ "epoch": 3.050609184629803,
4979
+ "grad_norm": 1.7084600925445557,
4980
+ "learning_rate": 1.8093369259606376e-05,
4981
+ "loss": 0.1258,
4982
+ "step": 6510
4983
+ },
4984
+ {
4985
+ "epoch": 3.055295220243674,
4986
+ "grad_norm": 1.9885269403457642,
4987
+ "learning_rate": 1.8090440487347706e-05,
4988
+ "loss": 0.1275,
4989
+ "step": 6520
4990
+ },
4991
+ {
4992
+ "epoch": 3.0599812558575445,
4993
+ "grad_norm": 1.5580956935882568,
4994
+ "learning_rate": 1.8087511715089035e-05,
4995
+ "loss": 0.1147,
4996
+ "step": 6530
4997
+ },
4998
+ {
4999
+ "epoch": 3.064667291471415,
5000
+ "grad_norm": 1.2762457132339478,
5001
+ "learning_rate": 1.8084582942830368e-05,
5002
+ "loss": 0.1087,
5003
+ "step": 6540
5004
+ },
5005
+ {
5006
+ "epoch": 3.069353327085286,
5007
+ "grad_norm": 1.7397170066833496,
5008
+ "learning_rate": 1.8081654170571698e-05,
5009
+ "loss": 0.107,
5010
+ "step": 6550
5011
+ },
5012
+ {
5013
+ "epoch": 3.0740393626991565,
5014
+ "grad_norm": 1.6765695810317993,
5015
+ "learning_rate": 1.8078725398313027e-05,
5016
+ "loss": 0.1389,
5017
+ "step": 6560
5018
+ },
5019
+ {
5020
+ "epoch": 3.0787253983130274,
5021
+ "grad_norm": 1.6627321243286133,
5022
+ "learning_rate": 1.807579662605436e-05,
5023
+ "loss": 0.1315,
5024
+ "step": 6570
5025
+ },
5026
+ {
5027
+ "epoch": 3.083411433926898,
5028
+ "grad_norm": 1.4617902040481567,
5029
+ "learning_rate": 1.807286785379569e-05,
5030
+ "loss": 0.1003,
5031
+ "step": 6580
5032
+ },
5033
+ {
5034
+ "epoch": 3.0880974695407684,
5035
+ "grad_norm": 1.4063915014266968,
5036
+ "learning_rate": 1.806993908153702e-05,
5037
+ "loss": 0.1256,
5038
+ "step": 6590
5039
+ },
5040
+ {
5041
+ "epoch": 3.0927835051546393,
5042
+ "grad_norm": 1.4291496276855469,
5043
+ "learning_rate": 1.8067010309278352e-05,
5044
+ "loss": 0.0952,
5045
+ "step": 6600
5046
+ },
5047
+ {
5048
+ "epoch": 3.0974695407685098,
5049
+ "grad_norm": 1.7208278179168701,
5050
+ "learning_rate": 1.806408153701968e-05,
5051
+ "loss": 0.1232,
5052
+ "step": 6610
5053
+ },
5054
+ {
5055
+ "epoch": 3.1021555763823807,
5056
+ "grad_norm": 1.4286062717437744,
5057
+ "learning_rate": 1.8061152764761014e-05,
5058
+ "loss": 0.1162,
5059
+ "step": 6620
5060
+ },
5061
+ {
5062
+ "epoch": 3.106841611996251,
5063
+ "grad_norm": 1.6473411321640015,
5064
+ "learning_rate": 1.8058223992502344e-05,
5065
+ "loss": 0.1132,
5066
+ "step": 6630
5067
+ },
5068
+ {
5069
+ "epoch": 3.1115276476101217,
5070
+ "grad_norm": 1.6076815128326416,
5071
+ "learning_rate": 1.8055295220243677e-05,
5072
+ "loss": 0.1224,
5073
+ "step": 6640
5074
+ },
5075
+ {
5076
+ "epoch": 3.1162136832239926,
5077
+ "grad_norm": 1.4402363300323486,
5078
+ "learning_rate": 1.8052366447985006e-05,
5079
+ "loss": 0.1149,
5080
+ "step": 6650
5081
+ },
5082
+ {
5083
+ "epoch": 3.120899718837863,
5084
+ "grad_norm": 1.2964081764221191,
5085
+ "learning_rate": 1.804943767572634e-05,
5086
+ "loss": 0.1084,
5087
+ "step": 6660
5088
+ },
5089
+ {
5090
+ "epoch": 3.125585754451734,
5091
+ "grad_norm": 1.4852274656295776,
5092
+ "learning_rate": 1.804650890346767e-05,
5093
+ "loss": 0.1299,
5094
+ "step": 6670
5095
+ },
5096
+ {
5097
+ "epoch": 3.1302717900656045,
5098
+ "grad_norm": 1.2311766147613525,
5099
+ "learning_rate": 1.8043580131208998e-05,
5100
+ "loss": 0.112,
5101
+ "step": 6680
5102
+ },
5103
+ {
5104
+ "epoch": 3.134957825679475,
5105
+ "grad_norm": 1.7571637630462646,
5106
+ "learning_rate": 1.804065135895033e-05,
5107
+ "loss": 0.1128,
5108
+ "step": 6690
5109
+ },
5110
+ {
5111
+ "epoch": 3.139643861293346,
5112
+ "grad_norm": 2.1391453742980957,
5113
+ "learning_rate": 1.803772258669166e-05,
5114
+ "loss": 0.1323,
5115
+ "step": 6700
5116
+ },
5117
+ {
5118
+ "epoch": 3.1443298969072164,
5119
+ "grad_norm": 1.5623685121536255,
5120
+ "learning_rate": 1.803479381443299e-05,
5121
+ "loss": 0.1139,
5122
+ "step": 6710
5123
+ },
5124
+ {
5125
+ "epoch": 3.1490159325210874,
5126
+ "grad_norm": 1.7942404747009277,
5127
+ "learning_rate": 1.8031865042174323e-05,
5128
+ "loss": 0.1426,
5129
+ "step": 6720
5130
+ },
5131
+ {
5132
+ "epoch": 3.153701968134958,
5133
+ "grad_norm": 1.410545825958252,
5134
+ "learning_rate": 1.8028936269915652e-05,
5135
+ "loss": 0.1056,
5136
+ "step": 6730
5137
+ },
5138
+ {
5139
+ "epoch": 3.1583880037488283,
5140
+ "grad_norm": 1.8002315759658813,
5141
+ "learning_rate": 1.8026007497656982e-05,
5142
+ "loss": 0.1256,
5143
+ "step": 6740
5144
+ },
5145
+ {
5146
+ "epoch": 3.1630740393626993,
5147
+ "grad_norm": 1.1960804462432861,
5148
+ "learning_rate": 1.8023078725398315e-05,
5149
+ "loss": 0.1075,
5150
+ "step": 6750
5151
+ },
5152
+ {
5153
+ "epoch": 3.1630740393626993,
5154
+ "eval_loss": 0.031751763075590134,
5155
+ "eval_pearson_cosine": 0.818392251131673,
5156
+ "eval_pearson_dot": 0.7566814090378315,
5157
+ "eval_pearson_euclidean": 0.7928479508679516,
5158
+ "eval_pearson_manhattan": 0.7948564653512236,
5159
+ "eval_runtime": 3.2248,
5160
+ "eval_samples_per_second": 465.151,
5161
+ "eval_spearman_cosine": 0.8181084179548533,
5162
+ "eval_spearman_dot": 0.7582501700895796,
5163
+ "eval_spearman_euclidean": 0.801587599060446,
5164
+ "eval_spearman_manhattan": 0.8030998382251602,
5165
+ "eval_steps_per_second": 29.149,
5166
+ "step": 6750
5167
+ },
5168
+ {
5169
+ "epoch": 3.1677600749765698,
5170
+ "grad_norm": 1.8303911685943604,
5171
+ "learning_rate": 1.8020149953139644e-05,
5172
+ "loss": 0.1148,
5173
+ "step": 6760
5174
+ },
5175
+ {
5176
+ "epoch": 3.1724461105904407,
5177
+ "grad_norm": 0.8495842814445496,
5178
+ "learning_rate": 1.8017221180880977e-05,
5179
+ "loss": 0.1118,
5180
+ "step": 6770
5181
+ },
5182
+ {
5183
+ "epoch": 3.177132146204311,
5184
+ "grad_norm": 1.8021464347839355,
5185
+ "learning_rate": 1.8014292408622307e-05,
5186
+ "loss": 0.1171,
5187
+ "step": 6780
5188
+ },
5189
+ {
5190
+ "epoch": 3.1818181818181817,
5191
+ "grad_norm": 1.4046647548675537,
5192
+ "learning_rate": 1.8011363636363636e-05,
5193
+ "loss": 0.1126,
5194
+ "step": 6790
5195
+ },
5196
+ {
5197
+ "epoch": 3.1865042174320526,
5198
+ "grad_norm": 1.425398349761963,
5199
+ "learning_rate": 1.800843486410497e-05,
5200
+ "loss": 0.0897,
5201
+ "step": 6800
5202
+ },
5203
+ {
5204
+ "epoch": 3.191190253045923,
5205
+ "grad_norm": 1.938445806503296,
5206
+ "learning_rate": 1.80055060918463e-05,
5207
+ "loss": 0.1173,
5208
+ "step": 6810
5209
+ },
5210
+ {
5211
+ "epoch": 3.195876288659794,
5212
+ "grad_norm": 1.9677798748016357,
5213
+ "learning_rate": 1.800257731958763e-05,
5214
+ "loss": 0.1113,
5215
+ "step": 6820
5216
+ },
5217
+ {
5218
+ "epoch": 3.2005623242736645,
5219
+ "grad_norm": 1.0695815086364746,
5220
+ "learning_rate": 1.799964854732896e-05,
5221
+ "loss": 0.1251,
5222
+ "step": 6830
5223
+ },
5224
+ {
5225
+ "epoch": 3.205248359887535,
5226
+ "grad_norm": 1.5036897659301758,
5227
+ "learning_rate": 1.7996719775070294e-05,
5228
+ "loss": 0.1077,
5229
+ "step": 6840
5230
+ },
5231
+ {
5232
+ "epoch": 3.209934395501406,
5233
+ "grad_norm": 1.0521790981292725,
5234
+ "learning_rate": 1.7993791002811623e-05,
5235
+ "loss": 0.1121,
5236
+ "step": 6850
5237
+ },
5238
+ {
5239
+ "epoch": 3.2146204311152764,
5240
+ "grad_norm": 1.6036179065704346,
5241
+ "learning_rate": 1.7990862230552956e-05,
5242
+ "loss": 0.1087,
5243
+ "step": 6860
5244
+ },
5245
+ {
5246
+ "epoch": 3.2193064667291473,
5247
+ "grad_norm": 1.5137251615524292,
5248
+ "learning_rate": 1.7987933458294286e-05,
5249
+ "loss": 0.1227,
5250
+ "step": 6870
5251
+ },
5252
+ {
5253
+ "epoch": 3.223992502343018,
5254
+ "grad_norm": 1.7321174144744873,
5255
+ "learning_rate": 1.7985004686035615e-05,
5256
+ "loss": 0.1165,
5257
+ "step": 6880
5258
+ },
5259
+ {
5260
+ "epoch": 3.2286785379568883,
5261
+ "grad_norm": 1.744694709777832,
5262
+ "learning_rate": 1.7982075913776945e-05,
5263
+ "loss": 0.1164,
5264
+ "step": 6890
5265
+ },
5266
+ {
5267
+ "epoch": 3.2333645735707592,
5268
+ "grad_norm": 1.5463277101516724,
5269
+ "learning_rate": 1.7979147141518278e-05,
5270
+ "loss": 0.1047,
5271
+ "step": 6900
5272
+ },
5273
+ {
5274
+ "epoch": 3.2380506091846297,
5275
+ "grad_norm": 1.663386583328247,
5276
+ "learning_rate": 1.7976218369259607e-05,
5277
+ "loss": 0.102,
5278
+ "step": 6910
5279
+ },
5280
+ {
5281
+ "epoch": 3.2427366447985007,
5282
+ "grad_norm": 1.3090193271636963,
5283
+ "learning_rate": 1.7973289597000937e-05,
5284
+ "loss": 0.1206,
5285
+ "step": 6920
5286
+ },
5287
+ {
5288
+ "epoch": 3.247422680412371,
5289
+ "grad_norm": 1.535120964050293,
5290
+ "learning_rate": 1.797036082474227e-05,
5291
+ "loss": 0.1131,
5292
+ "step": 6930
5293
+ },
5294
+ {
5295
+ "epoch": 3.2521087160262416,
5296
+ "grad_norm": 2.086014986038208,
5297
+ "learning_rate": 1.79674320524836e-05,
5298
+ "loss": 0.1237,
5299
+ "step": 6940
5300
+ },
5301
+ {
5302
+ "epoch": 3.2567947516401126,
5303
+ "grad_norm": 1.6445001363754272,
5304
+ "learning_rate": 1.7964503280224932e-05,
5305
+ "loss": 0.1079,
5306
+ "step": 6950
5307
+ },
5308
+ {
5309
+ "epoch": 3.261480787253983,
5310
+ "grad_norm": 1.4344754219055176,
5311
+ "learning_rate": 1.796157450796626e-05,
5312
+ "loss": 0.1077,
5313
+ "step": 6960
5314
+ },
5315
+ {
5316
+ "epoch": 3.266166822867854,
5317
+ "grad_norm": 0.9027751088142395,
5318
+ "learning_rate": 1.7958645735707594e-05,
5319
+ "loss": 0.0977,
5320
+ "step": 6970
5321
+ },
5322
+ {
5323
+ "epoch": 3.2708528584817245,
5324
+ "grad_norm": 1.6101003885269165,
5325
+ "learning_rate": 1.7955716963448924e-05,
5326
+ "loss": 0.1233,
5327
+ "step": 6980
5328
+ },
5329
+ {
5330
+ "epoch": 3.275538894095595,
5331
+ "grad_norm": 1.6779502630233765,
5332
+ "learning_rate": 1.7952788191190253e-05,
5333
+ "loss": 0.1259,
5334
+ "step": 6990
5335
+ },
5336
+ {
5337
+ "epoch": 3.280224929709466,
5338
+ "grad_norm": 1.1465091705322266,
5339
+ "learning_rate": 1.7949859418931586e-05,
5340
+ "loss": 0.0971,
5341
+ "step": 7000
5342
+ },
5343
+ {
5344
+ "epoch": 3.280224929709466,
5345
+ "eval_loss": 0.031160470098257065,
5346
+ "eval_pearson_cosine": 0.8183441854138493,
5347
+ "eval_pearson_dot": 0.756100708061723,
5348
+ "eval_pearson_euclidean": 0.7881792865072583,
5349
+ "eval_pearson_manhattan": 0.7905067817277995,
5350
+ "eval_runtime": 3.3694,
5351
+ "eval_samples_per_second": 445.183,
5352
+ "eval_spearman_cosine": 0.8175806864862152,
5353
+ "eval_spearman_dot": 0.7571609305062721,
5354
+ "eval_spearman_euclidean": 0.7969525723417172,
5355
+ "eval_spearman_manhattan": 0.7992497485259732,
5356
+ "eval_steps_per_second": 27.898,
5357
+ "step": 7000
5358
  }
5359
  ],
5360
  "logging_steps": 10,