CocoRoF commited on
Commit
5c09222
·
verified ·
1 Parent(s): c6832e1

Training in progress, step 15000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7faa7408a314eb81f420ce1f2568d726ce6012f44614f1f2a60a7364bd5ef3f
3
  size 613004648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b233efdfedfad8f103bdeed119d35dcd099eba59c85ffbbf10546b2a64d3674e
3
  size 613004648
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed1cf1b200aa0d0cd19c9252edcac54428c1ac0420162221afb866eff0c7bd48
3
  size 1226096954
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6cd415bf9e90b88d0f924f465a8f839a232fb228bedc13d8305dea0013598c0
3
  size 1226096954
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96df60f7a61e3b5eb854abf0dcce2641f189b2c45835fcf9c1fef3b37282da2b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35a846db1f952a5c57e0f96eff0e1f51f9bc69325d1c9533532221ee67d3d2cc
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bdb91814ca594bcc53b1d6ad5131ca9e1395fe1b33192ef261102fa12fca871c
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ae2d4f55a59e8b5160b468e406d78254547d58c1b3eaf0e1797452533ce3e19
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.560449859418932,
5
  "eval_steps": 250,
6
- "global_step": 14000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -10703,6 +10703,770 @@
10703
  "eval_spearman_manhattan": 0.757795808702236,
10704
  "eval_steps_per_second": 37.345,
10705
  "step": 14000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10706
  }
10707
  ],
10708
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 7.029053420805998,
5
  "eval_steps": 250,
6
+ "global_step": 15000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
10703
  "eval_spearman_manhattan": 0.757795808702236,
10704
  "eval_steps_per_second": 37.345,
10705
  "step": 14000
10706
+ },
10707
+ {
10708
+ "epoch": 6.565135895032802,
10709
+ "grad_norm": 1.1771986484527588,
10710
+ "learning_rate": 9.1793580131209e-06,
10711
+ "loss": 0.0723,
10712
+ "step": 14010
10713
+ },
10714
+ {
10715
+ "epoch": 6.569821930646673,
10716
+ "grad_norm": 3.105875253677368,
10717
+ "learning_rate": 9.178772258669167e-06,
10718
+ "loss": 0.0831,
10719
+ "step": 14020
10720
+ },
10721
+ {
10722
+ "epoch": 6.574507966260543,
10723
+ "grad_norm": 2.368614673614502,
10724
+ "learning_rate": 9.178186504217432e-06,
10725
+ "loss": 0.0896,
10726
+ "step": 14030
10727
+ },
10728
+ {
10729
+ "epoch": 6.579194001874415,
10730
+ "grad_norm": 1.0225666761398315,
10731
+ "learning_rate": 9.1776007497657e-06,
10732
+ "loss": 0.0773,
10733
+ "step": 14040
10734
+ },
10735
+ {
10736
+ "epoch": 6.583880037488285,
10737
+ "grad_norm": 1.7535622119903564,
10738
+ "learning_rate": 9.177014995313966e-06,
10739
+ "loss": 0.0966,
10740
+ "step": 14050
10741
+ },
10742
+ {
10743
+ "epoch": 6.588566073102156,
10744
+ "grad_norm": 1.9074852466583252,
10745
+ "learning_rate": 9.176429240862231e-06,
10746
+ "loss": 0.099,
10747
+ "step": 14060
10748
+ },
10749
+ {
10750
+ "epoch": 6.593252108716026,
10751
+ "grad_norm": 1.9208943843841553,
10752
+ "learning_rate": 9.175843486410497e-06,
10753
+ "loss": 0.0829,
10754
+ "step": 14070
10755
+ },
10756
+ {
10757
+ "epoch": 6.597938144329897,
10758
+ "grad_norm": 1.6164686679840088,
10759
+ "learning_rate": 9.175257731958764e-06,
10760
+ "loss": 0.0791,
10761
+ "step": 14080
10762
+ },
10763
+ {
10764
+ "epoch": 6.602624179943768,
10765
+ "grad_norm": 2.0227229595184326,
10766
+ "learning_rate": 9.17467197750703e-06,
10767
+ "loss": 0.087,
10768
+ "step": 14090
10769
+ },
10770
+ {
10771
+ "epoch": 6.607310215557638,
10772
+ "grad_norm": 2.0741658210754395,
10773
+ "learning_rate": 9.174086223055296e-06,
10774
+ "loss": 0.0832,
10775
+ "step": 14100
10776
+ },
10777
+ {
10778
+ "epoch": 6.611996251171509,
10779
+ "grad_norm": 2.0126988887786865,
10780
+ "learning_rate": 9.173500468603563e-06,
10781
+ "loss": 0.0959,
10782
+ "step": 14110
10783
+ },
10784
+ {
10785
+ "epoch": 6.616682286785379,
10786
+ "grad_norm": 2.8039979934692383,
10787
+ "learning_rate": 9.172914714151828e-06,
10788
+ "loss": 0.0876,
10789
+ "step": 14120
10790
+ },
10791
+ {
10792
+ "epoch": 6.62136832239925,
10793
+ "grad_norm": 2.453516721725464,
10794
+ "learning_rate": 9.172328959700095e-06,
10795
+ "loss": 0.0946,
10796
+ "step": 14130
10797
+ },
10798
+ {
10799
+ "epoch": 6.626054358013121,
10800
+ "grad_norm": 1.891790509223938,
10801
+ "learning_rate": 9.17174320524836e-06,
10802
+ "loss": 0.0995,
10803
+ "step": 14140
10804
+ },
10805
+ {
10806
+ "epoch": 6.630740393626992,
10807
+ "grad_norm": 2.185473680496216,
10808
+ "learning_rate": 9.171157450796627e-06,
10809
+ "loss": 0.0766,
10810
+ "step": 14150
10811
+ },
10812
+ {
10813
+ "epoch": 6.635426429240862,
10814
+ "grad_norm": 1.8330421447753906,
10815
+ "learning_rate": 9.170571696344894e-06,
10816
+ "loss": 0.0944,
10817
+ "step": 14160
10818
+ },
10819
+ {
10820
+ "epoch": 6.640112464854733,
10821
+ "grad_norm": 1.9126405715942383,
10822
+ "learning_rate": 9.169985941893159e-06,
10823
+ "loss": 0.0903,
10824
+ "step": 14170
10825
+ },
10826
+ {
10827
+ "epoch": 6.644798500468603,
10828
+ "grad_norm": 2.1286425590515137,
10829
+ "learning_rate": 9.169400187441426e-06,
10830
+ "loss": 0.0909,
10831
+ "step": 14180
10832
+ },
10833
+ {
10834
+ "epoch": 6.649484536082475,
10835
+ "grad_norm": 2.539160966873169,
10836
+ "learning_rate": 9.168814432989691e-06,
10837
+ "loss": 0.0842,
10838
+ "step": 14190
10839
+ },
10840
+ {
10841
+ "epoch": 6.654170571696345,
10842
+ "grad_norm": 2.383507490158081,
10843
+ "learning_rate": 9.168228678537958e-06,
10844
+ "loss": 0.0884,
10845
+ "step": 14200
10846
+ },
10847
+ {
10848
+ "epoch": 6.658856607310216,
10849
+ "grad_norm": 1.336970567703247,
10850
+ "learning_rate": 9.167642924086225e-06,
10851
+ "loss": 0.0832,
10852
+ "step": 14210
10853
+ },
10854
+ {
10855
+ "epoch": 6.663542642924086,
10856
+ "grad_norm": 1.9437108039855957,
10857
+ "learning_rate": 9.16705716963449e-06,
10858
+ "loss": 0.0939,
10859
+ "step": 14220
10860
+ },
10861
+ {
10862
+ "epoch": 6.6682286785379565,
10863
+ "grad_norm": 1.757957100868225,
10864
+ "learning_rate": 9.166471415182755e-06,
10865
+ "loss": 0.0789,
10866
+ "step": 14230
10867
+ },
10868
+ {
10869
+ "epoch": 6.672914714151828,
10870
+ "grad_norm": 1.8997693061828613,
10871
+ "learning_rate": 9.165885660731022e-06,
10872
+ "loss": 0.0871,
10873
+ "step": 14240
10874
+ },
10875
+ {
10876
+ "epoch": 6.677600749765698,
10877
+ "grad_norm": 2.3691928386688232,
10878
+ "learning_rate": 9.165299906279288e-06,
10879
+ "loss": 0.0919,
10880
+ "step": 14250
10881
+ },
10882
+ {
10883
+ "epoch": 6.677600749765698,
10884
+ "eval_loss": 0.04086451604962349,
10885
+ "eval_pearson_cosine": 0.7820280058953486,
10886
+ "eval_pearson_dot": 0.6341032311060246,
10887
+ "eval_pearson_euclidean": 0.7402547311501273,
10888
+ "eval_pearson_manhattan": 0.7402377003069951,
10889
+ "eval_runtime": 39.165,
10890
+ "eval_samples_per_second": 38.3,
10891
+ "eval_spearman_cosine": 0.7858084087028067,
10892
+ "eval_spearman_dot": 0.6459085165502623,
10893
+ "eval_spearman_euclidean": 0.7544028045089441,
10894
+ "eval_spearman_manhattan": 0.7545435768510885,
10895
+ "eval_steps_per_second": 38.3,
10896
+ "step": 14250
10897
+ },
10898
+ {
10899
+ "epoch": 6.682286785379569,
10900
+ "grad_norm": 1.7762689590454102,
10901
+ "learning_rate": 9.164714151827554e-06,
10902
+ "loss": 0.0917,
10903
+ "step": 14260
10904
+ },
10905
+ {
10906
+ "epoch": 6.686972820993439,
10907
+ "grad_norm": 2.296797037124634,
10908
+ "learning_rate": 9.164128397375821e-06,
10909
+ "loss": 0.0843,
10910
+ "step": 14270
10911
+ },
10912
+ {
10913
+ "epoch": 6.69165885660731,
10914
+ "grad_norm": 2.3844399452209473,
10915
+ "learning_rate": 9.163542642924087e-06,
10916
+ "loss": 0.0987,
10917
+ "step": 14280
10918
+ },
10919
+ {
10920
+ "epoch": 6.696344892221181,
10921
+ "grad_norm": 1.65080988407135,
10922
+ "learning_rate": 9.162956888472354e-06,
10923
+ "loss": 0.0644,
10924
+ "step": 14290
10925
+ },
10926
+ {
10927
+ "epoch": 6.701030927835052,
10928
+ "grad_norm": 2.429184913635254,
10929
+ "learning_rate": 9.162371134020619e-06,
10930
+ "loss": 0.0913,
10931
+ "step": 14300
10932
+ },
10933
+ {
10934
+ "epoch": 6.705716963448922,
10935
+ "grad_norm": 2.7205028533935547,
10936
+ "learning_rate": 9.161785379568886e-06,
10937
+ "loss": 0.0955,
10938
+ "step": 14310
10939
+ },
10940
+ {
10941
+ "epoch": 6.710402999062793,
10942
+ "grad_norm": 1.9049710035324097,
10943
+ "learning_rate": 9.161199625117153e-06,
10944
+ "loss": 0.0943,
10945
+ "step": 14320
10946
+ },
10947
+ {
10948
+ "epoch": 6.715089034676663,
10949
+ "grad_norm": 1.4774081707000732,
10950
+ "learning_rate": 9.160613870665418e-06,
10951
+ "loss": 0.0909,
10952
+ "step": 14330
10953
+ },
10954
+ {
10955
+ "epoch": 6.719775070290535,
10956
+ "grad_norm": 2.102787971496582,
10957
+ "learning_rate": 9.160028116213685e-06,
10958
+ "loss": 0.0757,
10959
+ "step": 14340
10960
+ },
10961
+ {
10962
+ "epoch": 6.724461105904405,
10963
+ "grad_norm": 1.7760144472122192,
10964
+ "learning_rate": 9.15944236176195e-06,
10965
+ "loss": 0.0949,
10966
+ "step": 14350
10967
+ },
10968
+ {
10969
+ "epoch": 6.7291471415182755,
10970
+ "grad_norm": 2.6216742992401123,
10971
+ "learning_rate": 9.158856607310215e-06,
10972
+ "loss": 0.0997,
10973
+ "step": 14360
10974
+ },
10975
+ {
10976
+ "epoch": 6.733833177132146,
10977
+ "grad_norm": 1.8759592771530151,
10978
+ "learning_rate": 9.158270852858484e-06,
10979
+ "loss": 0.0898,
10980
+ "step": 14370
10981
+ },
10982
+ {
10983
+ "epoch": 6.7385192127460165,
10984
+ "grad_norm": 1.7543883323669434,
10985
+ "learning_rate": 9.157685098406749e-06,
10986
+ "loss": 0.0913,
10987
+ "step": 14380
10988
+ },
10989
+ {
10990
+ "epoch": 6.743205248359888,
10991
+ "grad_norm": 2.2106900215148926,
10992
+ "learning_rate": 9.157099343955014e-06,
10993
+ "loss": 0.0996,
10994
+ "step": 14390
10995
+ },
10996
+ {
10997
+ "epoch": 6.747891283973758,
10998
+ "grad_norm": 2.408613443374634,
10999
+ "learning_rate": 9.156513589503281e-06,
11000
+ "loss": 0.0988,
11001
+ "step": 14400
11002
+ },
11003
+ {
11004
+ "epoch": 6.752577319587629,
11005
+ "grad_norm": 3.430854082107544,
11006
+ "learning_rate": 9.155927835051546e-06,
11007
+ "loss": 0.0998,
11008
+ "step": 14410
11009
+ },
11010
+ {
11011
+ "epoch": 6.757263355201499,
11012
+ "grad_norm": 2.024101495742798,
11013
+ "learning_rate": 9.155342080599813e-06,
11014
+ "loss": 0.0784,
11015
+ "step": 14420
11016
+ },
11017
+ {
11018
+ "epoch": 6.76194939081537,
11019
+ "grad_norm": 1.9962611198425293,
11020
+ "learning_rate": 9.15475632614808e-06,
11021
+ "loss": 0.0778,
11022
+ "step": 14430
11023
+ },
11024
+ {
11025
+ "epoch": 6.766635426429241,
11026
+ "grad_norm": 2.7750437259674072,
11027
+ "learning_rate": 9.154170571696345e-06,
11028
+ "loss": 0.0787,
11029
+ "step": 14440
11030
+ },
11031
+ {
11032
+ "epoch": 6.771321462043112,
11033
+ "grad_norm": 2.2293503284454346,
11034
+ "learning_rate": 9.153584817244612e-06,
11035
+ "loss": 0.0866,
11036
+ "step": 14450
11037
+ },
11038
+ {
11039
+ "epoch": 6.776007497656982,
11040
+ "grad_norm": 2.104199171066284,
11041
+ "learning_rate": 9.152999062792878e-06,
11042
+ "loss": 0.0886,
11043
+ "step": 14460
11044
+ },
11045
+ {
11046
+ "epoch": 6.780693533270853,
11047
+ "grad_norm": 2.1825919151306152,
11048
+ "learning_rate": 9.152413308341144e-06,
11049
+ "loss": 0.0881,
11050
+ "step": 14470
11051
+ },
11052
+ {
11053
+ "epoch": 6.785379568884723,
11054
+ "grad_norm": 2.0743556022644043,
11055
+ "learning_rate": 9.151827553889411e-06,
11056
+ "loss": 0.0816,
11057
+ "step": 14480
11058
+ },
11059
+ {
11060
+ "epoch": 6.7900656044985945,
11061
+ "grad_norm": 1.4492499828338623,
11062
+ "learning_rate": 9.151241799437677e-06,
11063
+ "loss": 0.0798,
11064
+ "step": 14490
11065
+ },
11066
+ {
11067
+ "epoch": 6.794751640112465,
11068
+ "grad_norm": 2.3385043144226074,
11069
+ "learning_rate": 9.150656044985944e-06,
11070
+ "loss": 0.0784,
11071
+ "step": 14500
11072
+ },
11073
+ {
11074
+ "epoch": 6.794751640112465,
11075
+ "eval_loss": 0.040780164301395416,
11076
+ "eval_pearson_cosine": 0.7793833022968641,
11077
+ "eval_pearson_dot": 0.6305632848645555,
11078
+ "eval_pearson_euclidean": 0.7311736352551961,
11079
+ "eval_pearson_manhattan": 0.7308022032341484,
11080
+ "eval_runtime": 40.6522,
11081
+ "eval_samples_per_second": 36.898,
11082
+ "eval_spearman_cosine": 0.7838836973690695,
11083
+ "eval_spearman_dot": 0.6427126264330253,
11084
+ "eval_spearman_euclidean": 0.7493899504087712,
11085
+ "eval_spearman_manhattan": 0.7495131408878735,
11086
+ "eval_steps_per_second": 36.898,
11087
+ "step": 14500
11088
+ },
11089
+ {
11090
+ "epoch": 6.7994376757263355,
11091
+ "grad_norm": 2.441380023956299,
11092
+ "learning_rate": 9.150070290534209e-06,
11093
+ "loss": 0.0866,
11094
+ "step": 14510
11095
+ },
11096
+ {
11097
+ "epoch": 6.804123711340206,
11098
+ "grad_norm": 2.3444080352783203,
11099
+ "learning_rate": 9.149484536082474e-06,
11100
+ "loss": 0.1104,
11101
+ "step": 14520
11102
+ },
11103
+ {
11104
+ "epoch": 6.8088097469540765,
11105
+ "grad_norm": 2.7428460121154785,
11106
+ "learning_rate": 9.148898781630741e-06,
11107
+ "loss": 0.1116,
11108
+ "step": 14530
11109
+ },
11110
+ {
11111
+ "epoch": 6.813495782567948,
11112
+ "grad_norm": 2.756432294845581,
11113
+ "learning_rate": 9.148313027179008e-06,
11114
+ "loss": 0.0942,
11115
+ "step": 14540
11116
+ },
11117
+ {
11118
+ "epoch": 6.818181818181818,
11119
+ "grad_norm": 2.891023635864258,
11120
+ "learning_rate": 9.147727272727273e-06,
11121
+ "loss": 0.1101,
11122
+ "step": 14550
11123
+ },
11124
+ {
11125
+ "epoch": 6.822867853795689,
11126
+ "grad_norm": 2.115098476409912,
11127
+ "learning_rate": 9.14714151827554e-06,
11128
+ "loss": 0.0896,
11129
+ "step": 14560
11130
+ },
11131
+ {
11132
+ "epoch": 6.827553889409559,
11133
+ "grad_norm": 1.161385178565979,
11134
+ "learning_rate": 9.146555763823805e-06,
11135
+ "loss": 0.0881,
11136
+ "step": 14570
11137
+ },
11138
+ {
11139
+ "epoch": 6.83223992502343,
11140
+ "grad_norm": 2.266988754272461,
11141
+ "learning_rate": 9.145970009372072e-06,
11142
+ "loss": 0.0904,
11143
+ "step": 14580
11144
+ },
11145
+ {
11146
+ "epoch": 6.836925960637301,
11147
+ "grad_norm": 2.111978769302368,
11148
+ "learning_rate": 9.145384254920339e-06,
11149
+ "loss": 0.0864,
11150
+ "step": 14590
11151
+ },
11152
+ {
11153
+ "epoch": 6.841611996251172,
11154
+ "grad_norm": 2.025771141052246,
11155
+ "learning_rate": 9.144798500468604e-06,
11156
+ "loss": 0.0947,
11157
+ "step": 14600
11158
+ },
11159
+ {
11160
+ "epoch": 6.846298031865042,
11161
+ "grad_norm": 1.6989368200302124,
11162
+ "learning_rate": 9.144212746016871e-06,
11163
+ "loss": 0.0906,
11164
+ "step": 14610
11165
+ },
11166
+ {
11167
+ "epoch": 6.850984067478913,
11168
+ "grad_norm": 2.4871666431427,
11169
+ "learning_rate": 9.143626991565136e-06,
11170
+ "loss": 0.0969,
11171
+ "step": 14620
11172
+ },
11173
+ {
11174
+ "epoch": 6.855670103092783,
11175
+ "grad_norm": 2.155759572982788,
11176
+ "learning_rate": 9.143041237113403e-06,
11177
+ "loss": 0.0978,
11178
+ "step": 14630
11179
+ },
11180
+ {
11181
+ "epoch": 6.8603561387066545,
11182
+ "grad_norm": 3.0879483222961426,
11183
+ "learning_rate": 9.142455482661668e-06,
11184
+ "loss": 0.0928,
11185
+ "step": 14640
11186
+ },
11187
+ {
11188
+ "epoch": 6.865042174320525,
11189
+ "grad_norm": 1.6940726041793823,
11190
+ "learning_rate": 9.141869728209935e-06,
11191
+ "loss": 0.0879,
11192
+ "step": 14650
11193
+ },
11194
+ {
11195
+ "epoch": 6.8697282099343955,
11196
+ "grad_norm": 2.233914613723755,
11197
+ "learning_rate": 9.141283973758202e-06,
11198
+ "loss": 0.0894,
11199
+ "step": 14660
11200
+ },
11201
+ {
11202
+ "epoch": 6.874414245548266,
11203
+ "grad_norm": 2.0306718349456787,
11204
+ "learning_rate": 9.140698219306468e-06,
11205
+ "loss": 0.0844,
11206
+ "step": 14670
11207
+ },
11208
+ {
11209
+ "epoch": 6.8791002811621365,
11210
+ "grad_norm": 1.8942639827728271,
11211
+ "learning_rate": 9.140112464854733e-06,
11212
+ "loss": 0.0817,
11213
+ "step": 14680
11214
+ },
11215
+ {
11216
+ "epoch": 6.883786316776008,
11217
+ "grad_norm": 2.03265643119812,
11218
+ "learning_rate": 9.139526710403e-06,
11219
+ "loss": 0.0852,
11220
+ "step": 14690
11221
+ },
11222
+ {
11223
+ "epoch": 6.888472352389878,
11224
+ "grad_norm": 1.7808202505111694,
11225
+ "learning_rate": 9.138940955951267e-06,
11226
+ "loss": 0.0896,
11227
+ "step": 14700
11228
+ },
11229
+ {
11230
+ "epoch": 6.893158388003749,
11231
+ "grad_norm": 2.668078660964966,
11232
+ "learning_rate": 9.138355201499532e-06,
11233
+ "loss": 0.0973,
11234
+ "step": 14710
11235
+ },
11236
+ {
11237
+ "epoch": 6.897844423617619,
11238
+ "grad_norm": 1.402289867401123,
11239
+ "learning_rate": 9.137769447047799e-06,
11240
+ "loss": 0.1026,
11241
+ "step": 14720
11242
+ },
11243
+ {
11244
+ "epoch": 6.90253045923149,
11245
+ "grad_norm": 2.591413974761963,
11246
+ "learning_rate": 9.137183692596064e-06,
11247
+ "loss": 0.1042,
11248
+ "step": 14730
11249
+ },
11250
+ {
11251
+ "epoch": 6.907216494845361,
11252
+ "grad_norm": 2.172842264175415,
11253
+ "learning_rate": 9.136597938144331e-06,
11254
+ "loss": 0.0896,
11255
+ "step": 14740
11256
+ },
11257
+ {
11258
+ "epoch": 6.911902530459232,
11259
+ "grad_norm": 2.8410751819610596,
11260
+ "learning_rate": 9.136012183692596e-06,
11261
+ "loss": 0.0821,
11262
+ "step": 14750
11263
+ },
11264
+ {
11265
+ "epoch": 6.911902530459232,
11266
+ "eval_loss": 0.04055028408765793,
11267
+ "eval_pearson_cosine": 0.7788665525758081,
11268
+ "eval_pearson_dot": 0.6376645263249117,
11269
+ "eval_pearson_euclidean": 0.727043862062235,
11270
+ "eval_pearson_manhattan": 0.7265471618369332,
11271
+ "eval_runtime": 40.6024,
11272
+ "eval_samples_per_second": 36.944,
11273
+ "eval_spearman_cosine": 0.7822270840908377,
11274
+ "eval_spearman_dot": 0.6567383479059669,
11275
+ "eval_spearman_euclidean": 0.744619349149987,
11276
+ "eval_spearman_manhattan": 0.7446135946974944,
11277
+ "eval_steps_per_second": 36.944,
11278
+ "step": 14750
11279
+ },
11280
+ {
11281
+ "epoch": 6.916588566073102,
11282
+ "grad_norm": 2.586047649383545,
11283
+ "learning_rate": 9.135426429240863e-06,
11284
+ "loss": 0.0965,
11285
+ "step": 14760
11286
+ },
11287
+ {
11288
+ "epoch": 6.921274601686973,
11289
+ "grad_norm": 1.9667673110961914,
11290
+ "learning_rate": 9.13484067478913e-06,
11291
+ "loss": 0.0913,
11292
+ "step": 14770
11293
+ },
11294
+ {
11295
+ "epoch": 6.925960637300843,
11296
+ "grad_norm": 2.6598875522613525,
11297
+ "learning_rate": 9.134254920337395e-06,
11298
+ "loss": 0.0863,
11299
+ "step": 14780
11300
+ },
11301
+ {
11302
+ "epoch": 6.9306466729147145,
11303
+ "grad_norm": 1.5291812419891357,
11304
+ "learning_rate": 9.133669165885662e-06,
11305
+ "loss": 0.0699,
11306
+ "step": 14790
11307
+ },
11308
+ {
11309
+ "epoch": 6.935332708528585,
11310
+ "grad_norm": 1.8771485090255737,
11311
+ "learning_rate": 9.133083411433927e-06,
11312
+ "loss": 0.0836,
11313
+ "step": 14800
11314
+ },
11315
+ {
11316
+ "epoch": 6.9400187441424555,
11317
+ "grad_norm": 1.7967191934585571,
11318
+ "learning_rate": 9.132497656982192e-06,
11319
+ "loss": 0.0833,
11320
+ "step": 14810
11321
+ },
11322
+ {
11323
+ "epoch": 6.944704779756326,
11324
+ "grad_norm": 1.3904474973678589,
11325
+ "learning_rate": 9.131911902530461e-06,
11326
+ "loss": 0.1147,
11327
+ "step": 14820
11328
+ },
11329
+ {
11330
+ "epoch": 6.949390815370196,
11331
+ "grad_norm": 2.315178871154785,
11332
+ "learning_rate": 9.131326148078726e-06,
11333
+ "loss": 0.1054,
11334
+ "step": 14830
11335
+ },
11336
+ {
11337
+ "epoch": 6.954076850984068,
11338
+ "grad_norm": 2.0457489490509033,
11339
+ "learning_rate": 9.130740393626992e-06,
11340
+ "loss": 0.0833,
11341
+ "step": 14840
11342
+ },
11343
+ {
11344
+ "epoch": 6.958762886597938,
11345
+ "grad_norm": 1.5070949792861938,
11346
+ "learning_rate": 9.130154639175258e-06,
11347
+ "loss": 0.0737,
11348
+ "step": 14850
11349
+ },
11350
+ {
11351
+ "epoch": 6.963448922211809,
11352
+ "grad_norm": 2.301478862762451,
11353
+ "learning_rate": 9.129568884723524e-06,
11354
+ "loss": 0.0779,
11355
+ "step": 14860
11356
+ },
11357
+ {
11358
+ "epoch": 6.968134957825679,
11359
+ "grad_norm": 1.9525973796844482,
11360
+ "learning_rate": 9.12898313027179e-06,
11361
+ "loss": 0.0829,
11362
+ "step": 14870
11363
+ },
11364
+ {
11365
+ "epoch": 6.97282099343955,
11366
+ "grad_norm": 2.3320276737213135,
11367
+ "learning_rate": 9.128397375820058e-06,
11368
+ "loss": 0.09,
11369
+ "step": 14880
11370
+ },
11371
+ {
11372
+ "epoch": 6.977507029053421,
11373
+ "grad_norm": 2.358041286468506,
11374
+ "learning_rate": 9.127811621368323e-06,
11375
+ "loss": 0.0848,
11376
+ "step": 14890
11377
+ },
11378
+ {
11379
+ "epoch": 6.982193064667292,
11380
+ "grad_norm": 2.0248255729675293,
11381
+ "learning_rate": 9.12722586691659e-06,
11382
+ "loss": 0.1023,
11383
+ "step": 14900
11384
+ },
11385
+ {
11386
+ "epoch": 6.986879100281162,
11387
+ "grad_norm": 2.4078421592712402,
11388
+ "learning_rate": 9.126640112464855e-06,
11389
+ "loss": 0.0886,
11390
+ "step": 14910
11391
+ },
11392
+ {
11393
+ "epoch": 6.991565135895033,
11394
+ "grad_norm": 2.0203652381896973,
11395
+ "learning_rate": 9.126054358013122e-06,
11396
+ "loss": 0.0989,
11397
+ "step": 14920
11398
+ },
11399
+ {
11400
+ "epoch": 6.996251171508904,
11401
+ "grad_norm": 2.0305638313293457,
11402
+ "learning_rate": 9.125468603561389e-06,
11403
+ "loss": 0.0854,
11404
+ "step": 14930
11405
+ },
11406
+ {
11407
+ "epoch": 7.0009372071227745,
11408
+ "grad_norm": 2.3170547485351562,
11409
+ "learning_rate": 9.124882849109654e-06,
11410
+ "loss": 0.0888,
11411
+ "step": 14940
11412
+ },
11413
+ {
11414
+ "epoch": 7.005623242736645,
11415
+ "grad_norm": 1.3576775789260864,
11416
+ "learning_rate": 9.124297094657921e-06,
11417
+ "loss": 0.0756,
11418
+ "step": 14950
11419
+ },
11420
+ {
11421
+ "epoch": 7.010309278350515,
11422
+ "grad_norm": 2.177962303161621,
11423
+ "learning_rate": 9.123711340206186e-06,
11424
+ "loss": 0.0679,
11425
+ "step": 14960
11426
+ },
11427
+ {
11428
+ "epoch": 7.014995313964386,
11429
+ "grad_norm": 1.6852316856384277,
11430
+ "learning_rate": 9.123125585754451e-06,
11431
+ "loss": 0.0635,
11432
+ "step": 14970
11433
+ },
11434
+ {
11435
+ "epoch": 7.019681349578256,
11436
+ "grad_norm": 2.245973825454712,
11437
+ "learning_rate": 9.12253983130272e-06,
11438
+ "loss": 0.081,
11439
+ "step": 14980
11440
+ },
11441
+ {
11442
+ "epoch": 7.024367385192128,
11443
+ "grad_norm": 1.2212837934494019,
11444
+ "learning_rate": 9.121954076850985e-06,
11445
+ "loss": 0.0684,
11446
+ "step": 14990
11447
+ },
11448
+ {
11449
+ "epoch": 7.029053420805998,
11450
+ "grad_norm": 2.061438798904419,
11451
+ "learning_rate": 9.12136832239925e-06,
11452
+ "loss": 0.0792,
11453
+ "step": 15000
11454
+ },
11455
+ {
11456
+ "epoch": 7.029053420805998,
11457
+ "eval_loss": 0.04008892923593521,
11458
+ "eval_pearson_cosine": 0.7799962553837254,
11459
+ "eval_pearson_dot": 0.6338093099514381,
11460
+ "eval_pearson_euclidean": 0.740542330523235,
11461
+ "eval_pearson_manhattan": 0.7397823209884535,
11462
+ "eval_runtime": 39.3867,
11463
+ "eval_samples_per_second": 38.084,
11464
+ "eval_spearman_cosine": 0.7833374621050089,
11465
+ "eval_spearman_dot": 0.646744349870265,
11466
+ "eval_spearman_euclidean": 0.7572153670081455,
11467
+ "eval_spearman_manhattan": 0.7568637419859118,
11468
+ "eval_steps_per_second": 38.084,
11469
+ "step": 15000
11470
  }
11471
  ],
11472
  "logging_steps": 10,