CocoRoF commited on
Commit
7d22d75
·
verified ·
1 Parent(s): 9e6d107

Training in progress, step 20000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5fd5c90298fb3b8e4b3bfc5252ca67d39257c11142359692b801557f737b7e42
3
  size 613004648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a99077961d0a641c5ff38bc41aeb0e96f4e0aa881e97473db5564c741bb8ca1
3
  size 613004648
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3dde98893d5f526a3fd3a3e2c8cc5d0c5ec7ef3827d46a4ac82be414c5ffde16
3
  size 1226096954
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b633f36fabb5fb014eb719663342186c16c8ad074853b96d787c85ceecedc06
3
  size 1226096954
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a62b565b93cfaa85c1ff8e14ede1dbb6d31acf0d0ff726cbda86bec73b0dea2e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc17503afa3bf0eafca6b72efbe6ae8cb454a3c16da90f560f71f4af87c7a4e4
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ad567ab1c91260dc0b589aab08ed3b669be820ef88836c60f94ca1975b277c3
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f38d69aa9147d77e04cfc7d1c0433054c65fed5f88ad2cafb308669398f46b3f
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.903467666354265,
5
  "eval_steps": 250,
6
- "global_step": 19000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -14523,6 +14523,770 @@
14523
  "eval_spearman_manhattan": 0.7432616809242956,
14524
  "eval_steps_per_second": 36.716,
14525
  "step": 19000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14526
  }
14527
  ],
14528
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.372071227741332,
5
  "eval_steps": 250,
6
+ "global_step": 20000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
14523
  "eval_spearman_manhattan": 0.7432616809242956,
14524
  "eval_steps_per_second": 36.716,
14525
  "step": 19000
14526
+ },
14527
+ {
14528
+ "epoch": 8.908153701968136,
14529
+ "grad_norm": 1.1705414056777954,
14530
+ "learning_rate": 8.886480787253984e-06,
14531
+ "loss": 0.0549,
14532
+ "step": 19010
14533
+ },
14534
+ {
14535
+ "epoch": 8.912839737582006,
14536
+ "grad_norm": 2.2512776851654053,
14537
+ "learning_rate": 8.885895032802249e-06,
14538
+ "loss": 0.0715,
14539
+ "step": 19020
14540
+ },
14541
+ {
14542
+ "epoch": 8.917525773195877,
14543
+ "grad_norm": 1.7541801929473877,
14544
+ "learning_rate": 8.885309278350516e-06,
14545
+ "loss": 0.0657,
14546
+ "step": 19030
14547
+ },
14548
+ {
14549
+ "epoch": 8.922211808809747,
14550
+ "grad_norm": 1.3972922563552856,
14551
+ "learning_rate": 8.884723523898783e-06,
14552
+ "loss": 0.0516,
14553
+ "step": 19040
14554
+ },
14555
+ {
14556
+ "epoch": 8.926897844423618,
14557
+ "grad_norm": 0.9502004384994507,
14558
+ "learning_rate": 8.884137769447048e-06,
14559
+ "loss": 0.0664,
14560
+ "step": 19050
14561
+ },
14562
+ {
14563
+ "epoch": 8.931583880037488,
14564
+ "grad_norm": 2.1048943996429443,
14565
+ "learning_rate": 8.883552014995315e-06,
14566
+ "loss": 0.0668,
14567
+ "step": 19060
14568
+ },
14569
+ {
14570
+ "epoch": 8.936269915651359,
14571
+ "grad_norm": 2.441774368286133,
14572
+ "learning_rate": 8.88296626054358e-06,
14573
+ "loss": 0.0681,
14574
+ "step": 19070
14575
+ },
14576
+ {
14577
+ "epoch": 8.940955951265229,
14578
+ "grad_norm": 1.6815327405929565,
14579
+ "learning_rate": 8.882380506091847e-06,
14580
+ "loss": 0.0583,
14581
+ "step": 19080
14582
+ },
14583
+ {
14584
+ "epoch": 8.9456419868791,
14585
+ "grad_norm": 2.0613820552825928,
14586
+ "learning_rate": 8.881794751640114e-06,
14587
+ "loss": 0.0605,
14588
+ "step": 19090
14589
+ },
14590
+ {
14591
+ "epoch": 8.950328022492972,
14592
+ "grad_norm": 2.164487838745117,
14593
+ "learning_rate": 8.88120899718838e-06,
14594
+ "loss": 0.0749,
14595
+ "step": 19100
14596
+ },
14597
+ {
14598
+ "epoch": 8.955014058106842,
14599
+ "grad_norm": 1.838508129119873,
14600
+ "learning_rate": 8.880623242736646e-06,
14601
+ "loss": 0.0588,
14602
+ "step": 19110
14603
+ },
14604
+ {
14605
+ "epoch": 8.959700093720713,
14606
+ "grad_norm": 2.338103771209717,
14607
+ "learning_rate": 8.880037488284912e-06,
14608
+ "loss": 0.06,
14609
+ "step": 19120
14610
+ },
14611
+ {
14612
+ "epoch": 8.964386129334583,
14613
+ "grad_norm": 1.5945453643798828,
14614
+ "learning_rate": 8.879451733833177e-06,
14615
+ "loss": 0.062,
14616
+ "step": 19130
14617
+ },
14618
+ {
14619
+ "epoch": 8.969072164948454,
14620
+ "grad_norm": 1.4666954278945923,
14621
+ "learning_rate": 8.878865979381444e-06,
14622
+ "loss": 0.0593,
14623
+ "step": 19140
14624
+ },
14625
+ {
14626
+ "epoch": 8.973758200562324,
14627
+ "grad_norm": 2.2944576740264893,
14628
+ "learning_rate": 8.87828022492971e-06,
14629
+ "loss": 0.0621,
14630
+ "step": 19150
14631
+ },
14632
+ {
14633
+ "epoch": 8.978444236176195,
14634
+ "grad_norm": 1.8283967971801758,
14635
+ "learning_rate": 8.877694470477976e-06,
14636
+ "loss": 0.0646,
14637
+ "step": 19160
14638
+ },
14639
+ {
14640
+ "epoch": 8.983130271790065,
14641
+ "grad_norm": 1.3863099813461304,
14642
+ "learning_rate": 8.877108716026243e-06,
14643
+ "loss": 0.0524,
14644
+ "step": 19170
14645
+ },
14646
+ {
14647
+ "epoch": 8.987816307403936,
14648
+ "grad_norm": 2.2185399532318115,
14649
+ "learning_rate": 8.876522961574508e-06,
14650
+ "loss": 0.0741,
14651
+ "step": 19180
14652
+ },
14653
+ {
14654
+ "epoch": 8.992502343017806,
14655
+ "grad_norm": 1.0174760818481445,
14656
+ "learning_rate": 8.875937207122775e-06,
14657
+ "loss": 0.0613,
14658
+ "step": 19190
14659
+ },
14660
+ {
14661
+ "epoch": 8.997188378631678,
14662
+ "grad_norm": 2.3824729919433594,
14663
+ "learning_rate": 8.87535145267104e-06,
14664
+ "loss": 0.0588,
14665
+ "step": 19200
14666
+ },
14667
+ {
14668
+ "epoch": 9.001874414245549,
14669
+ "grad_norm": 0.5327388048171997,
14670
+ "learning_rate": 8.874765698219307e-06,
14671
+ "loss": 0.0531,
14672
+ "step": 19210
14673
+ },
14674
+ {
14675
+ "epoch": 9.00656044985942,
14676
+ "grad_norm": 1.4109519720077515,
14677
+ "learning_rate": 8.874179943767574e-06,
14678
+ "loss": 0.0435,
14679
+ "step": 19220
14680
+ },
14681
+ {
14682
+ "epoch": 9.01124648547329,
14683
+ "grad_norm": 1.2190899848937988,
14684
+ "learning_rate": 8.873594189315839e-06,
14685
+ "loss": 0.0499,
14686
+ "step": 19230
14687
+ },
14688
+ {
14689
+ "epoch": 9.01593252108716,
14690
+ "grad_norm": 1.7541508674621582,
14691
+ "learning_rate": 8.873008434864106e-06,
14692
+ "loss": 0.046,
14693
+ "step": 19240
14694
+ },
14695
+ {
14696
+ "epoch": 9.02061855670103,
14697
+ "grad_norm": 0.7862921953201294,
14698
+ "learning_rate": 8.872422680412371e-06,
14699
+ "loss": 0.0436,
14700
+ "step": 19250
14701
+ },
14702
+ {
14703
+ "epoch": 9.02061855670103,
14704
+ "eval_loss": 0.03772435337305069,
14705
+ "eval_pearson_cosine": 0.7855273741960076,
14706
+ "eval_pearson_dot": 0.6489285063428554,
14707
+ "eval_pearson_euclidean": 0.7292628651619779,
14708
+ "eval_pearson_manhattan": 0.728854161244513,
14709
+ "eval_runtime": 39.9971,
14710
+ "eval_samples_per_second": 37.503,
14711
+ "eval_spearman_cosine": 0.7855635384218336,
14712
+ "eval_spearman_dot": 0.6695729920402427,
14713
+ "eval_spearman_euclidean": 0.7464551903137364,
14714
+ "eval_spearman_manhattan": 0.7461578348935499,
14715
+ "eval_steps_per_second": 37.503,
14716
+ "step": 19250
14717
+ },
14718
+ {
14719
+ "epoch": 9.025304592314901,
14720
+ "grad_norm": 1.5160027742385864,
14721
+ "learning_rate": 8.871836925960638e-06,
14722
+ "loss": 0.0402,
14723
+ "step": 19260
14724
+ },
14725
+ {
14726
+ "epoch": 9.029990627928772,
14727
+ "grad_norm": 0.9218592047691345,
14728
+ "learning_rate": 8.871251171508905e-06,
14729
+ "loss": 0.0548,
14730
+ "step": 19270
14731
+ },
14732
+ {
14733
+ "epoch": 9.034676663542642,
14734
+ "grad_norm": 1.078574299812317,
14735
+ "learning_rate": 8.87066541705717e-06,
14736
+ "loss": 0.0501,
14737
+ "step": 19280
14738
+ },
14739
+ {
14740
+ "epoch": 9.039362699156513,
14741
+ "grad_norm": 1.2724040746688843,
14742
+ "learning_rate": 8.870079662605436e-06,
14743
+ "loss": 0.0455,
14744
+ "step": 19290
14745
+ },
14746
+ {
14747
+ "epoch": 9.044048734770385,
14748
+ "grad_norm": 1.7886642217636108,
14749
+ "learning_rate": 8.869493908153702e-06,
14750
+ "loss": 0.0483,
14751
+ "step": 19300
14752
+ },
14753
+ {
14754
+ "epoch": 9.048734770384256,
14755
+ "grad_norm": 2.028181552886963,
14756
+ "learning_rate": 8.868908153701968e-06,
14757
+ "loss": 0.0453,
14758
+ "step": 19310
14759
+ },
14760
+ {
14761
+ "epoch": 9.053420805998126,
14762
+ "grad_norm": 1.9034656286239624,
14763
+ "learning_rate": 8.868322399250235e-06,
14764
+ "loss": 0.049,
14765
+ "step": 19320
14766
+ },
14767
+ {
14768
+ "epoch": 9.058106841611997,
14769
+ "grad_norm": 1.34951651096344,
14770
+ "learning_rate": 8.867736644798502e-06,
14771
+ "loss": 0.0486,
14772
+ "step": 19330
14773
+ },
14774
+ {
14775
+ "epoch": 9.062792877225867,
14776
+ "grad_norm": 2.447568655014038,
14777
+ "learning_rate": 8.867150890346767e-06,
14778
+ "loss": 0.0464,
14779
+ "step": 19340
14780
+ },
14781
+ {
14782
+ "epoch": 9.067478912839738,
14783
+ "grad_norm": 0.8183003664016724,
14784
+ "learning_rate": 8.866565135895034e-06,
14785
+ "loss": 0.0469,
14786
+ "step": 19350
14787
+ },
14788
+ {
14789
+ "epoch": 9.072164948453608,
14790
+ "grad_norm": 1.3520687818527222,
14791
+ "learning_rate": 8.865979381443299e-06,
14792
+ "loss": 0.052,
14793
+ "step": 19360
14794
+ },
14795
+ {
14796
+ "epoch": 9.076850984067478,
14797
+ "grad_norm": 1.827717900276184,
14798
+ "learning_rate": 8.865393626991566e-06,
14799
+ "loss": 0.052,
14800
+ "step": 19370
14801
+ },
14802
+ {
14803
+ "epoch": 9.081537019681349,
14804
+ "grad_norm": 1.5364701747894287,
14805
+ "learning_rate": 8.864807872539833e-06,
14806
+ "loss": 0.0522,
14807
+ "step": 19380
14808
+ },
14809
+ {
14810
+ "epoch": 9.08622305529522,
14811
+ "grad_norm": 1.0967464447021484,
14812
+ "learning_rate": 8.864222118088098e-06,
14813
+ "loss": 0.0546,
14814
+ "step": 19390
14815
+ },
14816
+ {
14817
+ "epoch": 9.090909090909092,
14818
+ "grad_norm": 1.141569972038269,
14819
+ "learning_rate": 8.863636363636365e-06,
14820
+ "loss": 0.054,
14821
+ "step": 19400
14822
+ },
14823
+ {
14824
+ "epoch": 9.095595126522962,
14825
+ "grad_norm": 1.025898814201355,
14826
+ "learning_rate": 8.86305060918463e-06,
14827
+ "loss": 0.0549,
14828
+ "step": 19410
14829
+ },
14830
+ {
14831
+ "epoch": 9.100281162136833,
14832
+ "grad_norm": 0.9598554968833923,
14833
+ "learning_rate": 8.862464854732897e-06,
14834
+ "loss": 0.0562,
14835
+ "step": 19420
14836
+ },
14837
+ {
14838
+ "epoch": 9.104967197750703,
14839
+ "grad_norm": 1.6386889219284058,
14840
+ "learning_rate": 8.861879100281164e-06,
14841
+ "loss": 0.0447,
14842
+ "step": 19430
14843
+ },
14844
+ {
14845
+ "epoch": 9.109653233364574,
14846
+ "grad_norm": 1.3437844514846802,
14847
+ "learning_rate": 8.861293345829429e-06,
14848
+ "loss": 0.0545,
14849
+ "step": 19440
14850
+ },
14851
+ {
14852
+ "epoch": 9.114339268978444,
14853
+ "grad_norm": 0.9290686249732971,
14854
+ "learning_rate": 8.860707591377694e-06,
14855
+ "loss": 0.0472,
14856
+ "step": 19450
14857
+ },
14858
+ {
14859
+ "epoch": 9.119025304592315,
14860
+ "grad_norm": 1.5257052183151245,
14861
+ "learning_rate": 8.860121836925961e-06,
14862
+ "loss": 0.0499,
14863
+ "step": 19460
14864
+ },
14865
+ {
14866
+ "epoch": 9.123711340206185,
14867
+ "grad_norm": 1.8884594440460205,
14868
+ "learning_rate": 8.859536082474226e-06,
14869
+ "loss": 0.0607,
14870
+ "step": 19470
14871
+ },
14872
+ {
14873
+ "epoch": 9.128397375820056,
14874
+ "grad_norm": 1.6822651624679565,
14875
+ "learning_rate": 8.858950328022493e-06,
14876
+ "loss": 0.0504,
14877
+ "step": 19480
14878
+ },
14879
+ {
14880
+ "epoch": 9.133083411433926,
14881
+ "grad_norm": 1.626015067100525,
14882
+ "learning_rate": 8.85836457357076e-06,
14883
+ "loss": 0.0516,
14884
+ "step": 19490
14885
+ },
14886
+ {
14887
+ "epoch": 9.137769447047798,
14888
+ "grad_norm": 1.1759178638458252,
14889
+ "learning_rate": 8.857778819119026e-06,
14890
+ "loss": 0.047,
14891
+ "step": 19500
14892
+ },
14893
+ {
14894
+ "epoch": 9.137769447047798,
14895
+ "eval_loss": 0.03765318915247917,
14896
+ "eval_pearson_cosine": 0.7869684109175026,
14897
+ "eval_pearson_dot": 0.6458746869453549,
14898
+ "eval_pearson_euclidean": 0.7253508328002916,
14899
+ "eval_pearson_manhattan": 0.7249120216278655,
14900
+ "eval_runtime": 39.8108,
14901
+ "eval_samples_per_second": 37.678,
14902
+ "eval_spearman_cosine": 0.7881589626771033,
14903
+ "eval_spearman_dot": 0.6693627499015223,
14904
+ "eval_spearman_euclidean": 0.7413480639045013,
14905
+ "eval_spearman_manhattan": 0.7414303112939764,
14906
+ "eval_steps_per_second": 37.678,
14907
+ "step": 19500
14908
+ },
14909
+ {
14910
+ "epoch": 9.142455482661669,
14911
+ "grad_norm": 1.134598731994629,
14912
+ "learning_rate": 8.857193064667292e-06,
14913
+ "loss": 0.0531,
14914
+ "step": 19510
14915
+ },
14916
+ {
14917
+ "epoch": 9.14714151827554,
14918
+ "grad_norm": 1.447082757949829,
14919
+ "learning_rate": 8.856607310215558e-06,
14920
+ "loss": 0.0531,
14921
+ "step": 19520
14922
+ },
14923
+ {
14924
+ "epoch": 9.15182755388941,
14925
+ "grad_norm": 1.004354476928711,
14926
+ "learning_rate": 8.856021555763825e-06,
14927
+ "loss": 0.0511,
14928
+ "step": 19530
14929
+ },
14930
+ {
14931
+ "epoch": 9.15651358950328,
14932
+ "grad_norm": 1.6353479623794556,
14933
+ "learning_rate": 8.855435801312092e-06,
14934
+ "loss": 0.0467,
14935
+ "step": 19540
14936
+ },
14937
+ {
14938
+ "epoch": 9.16119962511715,
14939
+ "grad_norm": 1.8899836540222168,
14940
+ "learning_rate": 8.854850046860357e-06,
14941
+ "loss": 0.0582,
14942
+ "step": 19550
14943
+ },
14944
+ {
14945
+ "epoch": 9.165885660731021,
14946
+ "grad_norm": 1.306091070175171,
14947
+ "learning_rate": 8.854264292408624e-06,
14948
+ "loss": 0.0571,
14949
+ "step": 19560
14950
+ },
14951
+ {
14952
+ "epoch": 9.170571696344892,
14953
+ "grad_norm": 1.7783139944076538,
14954
+ "learning_rate": 8.853678537956889e-06,
14955
+ "loss": 0.0543,
14956
+ "step": 19570
14957
+ },
14958
+ {
14959
+ "epoch": 9.175257731958762,
14960
+ "grad_norm": 1.1551589965820312,
14961
+ "learning_rate": 8.853092783505156e-06,
14962
+ "loss": 0.0626,
14963
+ "step": 19580
14964
+ },
14965
+ {
14966
+ "epoch": 9.179943767572633,
14967
+ "grad_norm": 0.8448215126991272,
14968
+ "learning_rate": 8.852507029053423e-06,
14969
+ "loss": 0.0509,
14970
+ "step": 19590
14971
+ },
14972
+ {
14973
+ "epoch": 9.184629803186505,
14974
+ "grad_norm": 1.3088339567184448,
14975
+ "learning_rate": 8.851921274601688e-06,
14976
+ "loss": 0.0532,
14977
+ "step": 19600
14978
+ },
14979
+ {
14980
+ "epoch": 9.189315838800376,
14981
+ "grad_norm": 1.2790261507034302,
14982
+ "learning_rate": 8.851335520149953e-06,
14983
+ "loss": 0.0366,
14984
+ "step": 19610
14985
+ },
14986
+ {
14987
+ "epoch": 9.194001874414246,
14988
+ "grad_norm": 1.4637041091918945,
14989
+ "learning_rate": 8.85074976569822e-06,
14990
+ "loss": 0.0476,
14991
+ "step": 19620
14992
+ },
14993
+ {
14994
+ "epoch": 9.198687910028116,
14995
+ "grad_norm": 1.1702561378479004,
14996
+ "learning_rate": 8.850164011246485e-06,
14997
+ "loss": 0.0539,
14998
+ "step": 19630
14999
+ },
15000
+ {
15001
+ "epoch": 9.203373945641987,
15002
+ "grad_norm": 1.4241745471954346,
15003
+ "learning_rate": 8.849578256794752e-06,
15004
+ "loss": 0.0488,
15005
+ "step": 19640
15006
+ },
15007
+ {
15008
+ "epoch": 9.208059981255857,
15009
+ "grad_norm": 1.3767116069793701,
15010
+ "learning_rate": 8.848992502343019e-06,
15011
+ "loss": 0.0501,
15012
+ "step": 19650
15013
+ },
15014
+ {
15015
+ "epoch": 9.212746016869728,
15016
+ "grad_norm": 0.946832001209259,
15017
+ "learning_rate": 8.848406747891284e-06,
15018
+ "loss": 0.0509,
15019
+ "step": 19660
15020
+ },
15021
+ {
15022
+ "epoch": 9.217432052483598,
15023
+ "grad_norm": 2.132277011871338,
15024
+ "learning_rate": 8.847820993439551e-06,
15025
+ "loss": 0.0517,
15026
+ "step": 19670
15027
+ },
15028
+ {
15029
+ "epoch": 9.222118088097469,
15030
+ "grad_norm": 3.003037929534912,
15031
+ "learning_rate": 8.847235238987816e-06,
15032
+ "loss": 0.0601,
15033
+ "step": 19680
15034
+ },
15035
+ {
15036
+ "epoch": 9.22680412371134,
15037
+ "grad_norm": 0.8297474384307861,
15038
+ "learning_rate": 8.846649484536083e-06,
15039
+ "loss": 0.0381,
15040
+ "step": 19690
15041
+ },
15042
+ {
15043
+ "epoch": 9.231490159325212,
15044
+ "grad_norm": 0.8142613768577576,
15045
+ "learning_rate": 8.846063730084349e-06,
15046
+ "loss": 0.0528,
15047
+ "step": 19700
15048
+ },
15049
+ {
15050
+ "epoch": 9.236176194939082,
15051
+ "grad_norm": 1.9133763313293457,
15052
+ "learning_rate": 8.845477975632616e-06,
15053
+ "loss": 0.0536,
15054
+ "step": 19710
15055
+ },
15056
+ {
15057
+ "epoch": 9.240862230552953,
15058
+ "grad_norm": 1.1931358575820923,
15059
+ "learning_rate": 8.844892221180882e-06,
15060
+ "loss": 0.0558,
15061
+ "step": 19720
15062
+ },
15063
+ {
15064
+ "epoch": 9.245548266166823,
15065
+ "grad_norm": 2.3464787006378174,
15066
+ "learning_rate": 8.844306466729148e-06,
15067
+ "loss": 0.0505,
15068
+ "step": 19730
15069
+ },
15070
+ {
15071
+ "epoch": 9.250234301780694,
15072
+ "grad_norm": 1.3109287023544312,
15073
+ "learning_rate": 8.843720712277415e-06,
15074
+ "loss": 0.0582,
15075
+ "step": 19740
15076
+ },
15077
+ {
15078
+ "epoch": 9.254920337394564,
15079
+ "grad_norm": 1.866816520690918,
15080
+ "learning_rate": 8.84313495782568e-06,
15081
+ "loss": 0.0482,
15082
+ "step": 19750
15083
+ },
15084
+ {
15085
+ "epoch": 9.254920337394564,
15086
+ "eval_loss": 0.03766845539212227,
15087
+ "eval_pearson_cosine": 0.786280047827276,
15088
+ "eval_pearson_dot": 0.6498320134943469,
15089
+ "eval_pearson_euclidean": 0.7306029375409793,
15090
+ "eval_pearson_manhattan": 0.7296493603800656,
15091
+ "eval_runtime": 40.2507,
15092
+ "eval_samples_per_second": 37.266,
15093
+ "eval_spearman_cosine": 0.7871053277749581,
15094
+ "eval_spearman_dot": 0.6689992229589644,
15095
+ "eval_spearman_euclidean": 0.7449412319412662,
15096
+ "eval_spearman_manhattan": 0.7442196282250385,
15097
+ "eval_steps_per_second": 37.266,
15098
+ "step": 19750
15099
+ },
15100
+ {
15101
+ "epoch": 9.259606373008435,
15102
+ "grad_norm": 1.386021614074707,
15103
+ "learning_rate": 8.842549203373947e-06,
15104
+ "loss": 0.0512,
15105
+ "step": 19760
15106
+ },
15107
+ {
15108
+ "epoch": 9.264292408622305,
15109
+ "grad_norm": 1.7170544862747192,
15110
+ "learning_rate": 8.841963448922212e-06,
15111
+ "loss": 0.0567,
15112
+ "step": 19770
15113
+ },
15114
+ {
15115
+ "epoch": 9.268978444236176,
15116
+ "grad_norm": 2.6358816623687744,
15117
+ "learning_rate": 8.841377694470479e-06,
15118
+ "loss": 0.0611,
15119
+ "step": 19780
15120
+ },
15121
+ {
15122
+ "epoch": 9.273664479850046,
15123
+ "grad_norm": 1.0627405643463135,
15124
+ "learning_rate": 8.840791940018744e-06,
15125
+ "loss": 0.0516,
15126
+ "step": 19790
15127
+ },
15128
+ {
15129
+ "epoch": 9.278350515463918,
15130
+ "grad_norm": 2.116197109222412,
15131
+ "learning_rate": 8.840206185567011e-06,
15132
+ "loss": 0.0442,
15133
+ "step": 19800
15134
+ },
15135
+ {
15136
+ "epoch": 9.283036551077789,
15137
+ "grad_norm": 1.0570743083953857,
15138
+ "learning_rate": 8.839620431115276e-06,
15139
+ "loss": 0.0517,
15140
+ "step": 19810
15141
+ },
15142
+ {
15143
+ "epoch": 9.28772258669166,
15144
+ "grad_norm": 0.9444879293441772,
15145
+ "learning_rate": 8.839034676663543e-06,
15146
+ "loss": 0.0427,
15147
+ "step": 19820
15148
+ },
15149
+ {
15150
+ "epoch": 9.29240862230553,
15151
+ "grad_norm": 0.820633590221405,
15152
+ "learning_rate": 8.83844892221181e-06,
15153
+ "loss": 0.0505,
15154
+ "step": 19830
15155
+ },
15156
+ {
15157
+ "epoch": 9.2970946579194,
15158
+ "grad_norm": 0.9164274334907532,
15159
+ "learning_rate": 8.837863167760075e-06,
15160
+ "loss": 0.0539,
15161
+ "step": 19840
15162
+ },
15163
+ {
15164
+ "epoch": 9.30178069353327,
15165
+ "grad_norm": 1.6659798622131348,
15166
+ "learning_rate": 8.837277413308342e-06,
15167
+ "loss": 0.055,
15168
+ "step": 19850
15169
+ },
15170
+ {
15171
+ "epoch": 9.306466729147141,
15172
+ "grad_norm": 1.224489450454712,
15173
+ "learning_rate": 8.836691658856607e-06,
15174
+ "loss": 0.0487,
15175
+ "step": 19860
15176
+ },
15177
+ {
15178
+ "epoch": 9.311152764761012,
15179
+ "grad_norm": 1.6015446186065674,
15180
+ "learning_rate": 8.836105904404874e-06,
15181
+ "loss": 0.0622,
15182
+ "step": 19870
15183
+ },
15184
+ {
15185
+ "epoch": 9.315838800374882,
15186
+ "grad_norm": 2.066589593887329,
15187
+ "learning_rate": 8.835520149953141e-06,
15188
+ "loss": 0.0562,
15189
+ "step": 19880
15190
+ },
15191
+ {
15192
+ "epoch": 9.320524835988753,
15193
+ "grad_norm": 1.8341182470321655,
15194
+ "learning_rate": 8.834934395501406e-06,
15195
+ "loss": 0.0414,
15196
+ "step": 19890
15197
+ },
15198
+ {
15199
+ "epoch": 9.325210871602625,
15200
+ "grad_norm": 2.1060688495635986,
15201
+ "learning_rate": 8.834348641049673e-06,
15202
+ "loss": 0.0423,
15203
+ "step": 19900
15204
+ },
15205
+ {
15206
+ "epoch": 9.329896907216495,
15207
+ "grad_norm": 2.0976791381835938,
15208
+ "learning_rate": 8.833762886597939e-06,
15209
+ "loss": 0.0562,
15210
+ "step": 19910
15211
+ },
15212
+ {
15213
+ "epoch": 9.334582942830366,
15214
+ "grad_norm": 1.7656900882720947,
15215
+ "learning_rate": 8.833177132146204e-06,
15216
+ "loss": 0.0454,
15217
+ "step": 19920
15218
+ },
15219
+ {
15220
+ "epoch": 9.339268978444236,
15221
+ "grad_norm": 0.9391831755638123,
15222
+ "learning_rate": 8.832591377694472e-06,
15223
+ "loss": 0.0471,
15224
+ "step": 19930
15225
+ },
15226
+ {
15227
+ "epoch": 9.343955014058107,
15228
+ "grad_norm": 1.8361108303070068,
15229
+ "learning_rate": 8.832005623242738e-06,
15230
+ "loss": 0.0521,
15231
+ "step": 19940
15232
+ },
15233
+ {
15234
+ "epoch": 9.348641049671977,
15235
+ "grad_norm": 1.4012130498886108,
15236
+ "learning_rate": 8.831419868791003e-06,
15237
+ "loss": 0.0476,
15238
+ "step": 19950
15239
+ },
15240
+ {
15241
+ "epoch": 9.353327085285848,
15242
+ "grad_norm": 1.4812968969345093,
15243
+ "learning_rate": 8.83083411433927e-06,
15244
+ "loss": 0.0356,
15245
+ "step": 19960
15246
+ },
15247
+ {
15248
+ "epoch": 9.358013120899718,
15249
+ "grad_norm": 1.4447283744812012,
15250
+ "learning_rate": 8.830248359887535e-06,
15251
+ "loss": 0.046,
15252
+ "step": 19970
15253
+ },
15254
+ {
15255
+ "epoch": 9.362699156513589,
15256
+ "grad_norm": 1.9198623895645142,
15257
+ "learning_rate": 8.829662605435802e-06,
15258
+ "loss": 0.0546,
15259
+ "step": 19980
15260
+ },
15261
+ {
15262
+ "epoch": 9.36738519212746,
15263
+ "grad_norm": 0.8466697335243225,
15264
+ "learning_rate": 8.829076850984069e-06,
15265
+ "loss": 0.0481,
15266
+ "step": 19990
15267
+ },
15268
+ {
15269
+ "epoch": 9.372071227741332,
15270
+ "grad_norm": 1.5158565044403076,
15271
+ "learning_rate": 8.828491096532334e-06,
15272
+ "loss": 0.0529,
15273
+ "step": 20000
15274
+ },
15275
+ {
15276
+ "epoch": 9.372071227741332,
15277
+ "eval_loss": 0.0377335324883461,
15278
+ "eval_pearson_cosine": 0.7872657190030239,
15279
+ "eval_pearson_dot": 0.6489881022917316,
15280
+ "eval_pearson_euclidean": 0.7290286852364005,
15281
+ "eval_pearson_manhattan": 0.7285143498985862,
15282
+ "eval_runtime": 39.7128,
15283
+ "eval_samples_per_second": 37.771,
15284
+ "eval_spearman_cosine": 0.7888105939241997,
15285
+ "eval_spearman_dot": 0.6689738777456538,
15286
+ "eval_spearman_euclidean": 0.7426040363283044,
15287
+ "eval_spearman_manhattan": 0.742345267890976,
15288
+ "eval_steps_per_second": 37.771,
15289
+ "step": 20000
15290
  }
15291
  ],
15292
  "logging_steps": 10,