CocoRoF commited on
Commit
f48c3fa
·
verified ·
1 Parent(s): 83112d9

Training in progress, step 19000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ca95a7948bce19be5449f8781c37f0268eecbc454dcb50de5ef8e89c3d9a4e6
3
  size 613004648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fd5c90298fb3b8e4b3bfc5252ca67d39257c11142359692b801557f737b7e42
3
  size 613004648
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:667a4ed3665b904cc6a25c6508c89fb468bc4b1b80e08cd26eb7f6e936a1d8ff
3
  size 1226096954
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dde98893d5f526a3fd3a3e2c8cc5d0c5ec7ef3827d46a4ac82be414c5ffde16
3
  size 1226096954
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b67589c462b9c803b3450b2a56b26bd15fd2aad689878137f6e7e3b31569b4d3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a62b565b93cfaa85c1ff8e14ede1dbb6d31acf0d0ff726cbda86bec73b0dea2e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e87f5dbc951603cbe6f4d5f5d51c8e0ef8863d7cd661e58ac58827859c30521
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ad567ab1c91260dc0b589aab08ed3b669be820ef88836c60f94ca1975b277c3
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.434864104967197,
5
  "eval_steps": 250,
6
- "global_step": 18000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -13759,6 +13759,770 @@
13759
  "eval_spearman_manhattan": 0.7537307168421792,
13760
  "eval_steps_per_second": 35.48,
13761
  "step": 18000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13762
  }
13763
  ],
13764
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.903467666354265,
5
  "eval_steps": 250,
6
+ "global_step": 19000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
13759
  "eval_spearman_manhattan": 0.7537307168421792,
13760
  "eval_steps_per_second": 35.48,
13761
  "step": 18000
13762
+ },
13763
+ {
13764
+ "epoch": 8.43955014058107,
13765
+ "grad_norm": 0.9349134564399719,
13766
+ "learning_rate": 8.945056232427367e-06,
13767
+ "loss": 0.0479,
13768
+ "step": 18010
13769
+ },
13770
+ {
13771
+ "epoch": 8.44423617619494,
13772
+ "grad_norm": 0.9666185975074768,
13773
+ "learning_rate": 8.944470477975634e-06,
13774
+ "loss": 0.0595,
13775
+ "step": 18020
13776
+ },
13777
+ {
13778
+ "epoch": 8.44892221180881,
13779
+ "grad_norm": 2.2687034606933594,
13780
+ "learning_rate": 8.943884723523899e-06,
13781
+ "loss": 0.055,
13782
+ "step": 18030
13783
+ },
13784
+ {
13785
+ "epoch": 8.45360824742268,
13786
+ "grad_norm": 1.9229964017868042,
13787
+ "learning_rate": 8.943298969072166e-06,
13788
+ "loss": 0.0602,
13789
+ "step": 18040
13790
+ },
13791
+ {
13792
+ "epoch": 8.458294283036551,
13793
+ "grad_norm": 2.1603922843933105,
13794
+ "learning_rate": 8.942713214620433e-06,
13795
+ "loss": 0.0736,
13796
+ "step": 18050
13797
+ },
13798
+ {
13799
+ "epoch": 8.462980318650422,
13800
+ "grad_norm": 2.0796990394592285,
13801
+ "learning_rate": 8.942127460168698e-06,
13802
+ "loss": 0.0569,
13803
+ "step": 18060
13804
+ },
13805
+ {
13806
+ "epoch": 8.467666354264292,
13807
+ "grad_norm": 1.9972143173217773,
13808
+ "learning_rate": 8.941541705716963e-06,
13809
+ "loss": 0.0678,
13810
+ "step": 18070
13811
+ },
13812
+ {
13813
+ "epoch": 8.472352389878163,
13814
+ "grad_norm": 0.865214467048645,
13815
+ "learning_rate": 8.94095595126523e-06,
13816
+ "loss": 0.0627,
13817
+ "step": 18080
13818
+ },
13819
+ {
13820
+ "epoch": 8.477038425492033,
13821
+ "grad_norm": 2.142777442932129,
13822
+ "learning_rate": 8.940370196813497e-06,
13823
+ "loss": 0.0674,
13824
+ "step": 18090
13825
+ },
13826
+ {
13827
+ "epoch": 8.481724461105905,
13828
+ "grad_norm": 1.4264150857925415,
13829
+ "learning_rate": 8.939784442361762e-06,
13830
+ "loss": 0.0671,
13831
+ "step": 18100
13832
+ },
13833
+ {
13834
+ "epoch": 8.486410496719776,
13835
+ "grad_norm": 0.7674472332000732,
13836
+ "learning_rate": 8.93919868791003e-06,
13837
+ "loss": 0.0559,
13838
+ "step": 18110
13839
+ },
13840
+ {
13841
+ "epoch": 8.491096532333646,
13842
+ "grad_norm": 0.81045001745224,
13843
+ "learning_rate": 8.938612933458294e-06,
13844
+ "loss": 0.0587,
13845
+ "step": 18120
13846
+ },
13847
+ {
13848
+ "epoch": 8.495782567947517,
13849
+ "grad_norm": 1.4473369121551514,
13850
+ "learning_rate": 8.938027179006561e-06,
13851
+ "loss": 0.0613,
13852
+ "step": 18130
13853
+ },
13854
+ {
13855
+ "epoch": 8.500468603561387,
13856
+ "grad_norm": 1.741360068321228,
13857
+ "learning_rate": 8.937441424554827e-06,
13858
+ "loss": 0.0638,
13859
+ "step": 18140
13860
+ },
13861
+ {
13862
+ "epoch": 8.505154639175258,
13863
+ "grad_norm": 2.0288760662078857,
13864
+ "learning_rate": 8.936855670103094e-06,
13865
+ "loss": 0.0586,
13866
+ "step": 18150
13867
+ },
13868
+ {
13869
+ "epoch": 8.509840674789128,
13870
+ "grad_norm": 2.380078077316284,
13871
+ "learning_rate": 8.93626991565136e-06,
13872
+ "loss": 0.0612,
13873
+ "step": 18160
13874
+ },
13875
+ {
13876
+ "epoch": 8.514526710402999,
13877
+ "grad_norm": 1.9383304119110107,
13878
+ "learning_rate": 8.935684161199626e-06,
13879
+ "loss": 0.0601,
13880
+ "step": 18170
13881
+ },
13882
+ {
13883
+ "epoch": 8.51921274601687,
13884
+ "grad_norm": 1.2531105279922485,
13885
+ "learning_rate": 8.935098406747893e-06,
13886
+ "loss": 0.0719,
13887
+ "step": 18180
13888
+ },
13889
+ {
13890
+ "epoch": 8.52389878163074,
13891
+ "grad_norm": 0.9482662081718445,
13892
+ "learning_rate": 8.934512652296158e-06,
13893
+ "loss": 0.0546,
13894
+ "step": 18190
13895
+ },
13896
+ {
13897
+ "epoch": 8.52858481724461,
13898
+ "grad_norm": 1.4870429039001465,
13899
+ "learning_rate": 8.933926897844423e-06,
13900
+ "loss": 0.0568,
13901
+ "step": 18200
13902
+ },
13903
+ {
13904
+ "epoch": 8.533270852858482,
13905
+ "grad_norm": 1.6849005222320557,
13906
+ "learning_rate": 8.933341143392692e-06,
13907
+ "loss": 0.0555,
13908
+ "step": 18210
13909
+ },
13910
+ {
13911
+ "epoch": 8.537956888472353,
13912
+ "grad_norm": 1.955870270729065,
13913
+ "learning_rate": 8.932755388940957e-06,
13914
+ "loss": 0.0689,
13915
+ "step": 18220
13916
+ },
13917
+ {
13918
+ "epoch": 8.542642924086223,
13919
+ "grad_norm": 1.697548747062683,
13920
+ "learning_rate": 8.932169634489222e-06,
13921
+ "loss": 0.056,
13922
+ "step": 18230
13923
+ },
13924
+ {
13925
+ "epoch": 8.547328959700094,
13926
+ "grad_norm": 1.673592209815979,
13927
+ "learning_rate": 8.931583880037489e-06,
13928
+ "loss": 0.0705,
13929
+ "step": 18240
13930
+ },
13931
+ {
13932
+ "epoch": 8.552014995313964,
13933
+ "grad_norm": 1.5452814102172852,
13934
+ "learning_rate": 8.930998125585754e-06,
13935
+ "loss": 0.0568,
13936
+ "step": 18250
13937
+ },
13938
+ {
13939
+ "epoch": 8.552014995313964,
13940
+ "eval_loss": 0.038943566381931305,
13941
+ "eval_pearson_cosine": 0.7825822620756648,
13942
+ "eval_pearson_dot": 0.6378466620068579,
13943
+ "eval_pearson_euclidean": 0.736198412680281,
13944
+ "eval_pearson_manhattan": 0.7358221479501772,
13945
+ "eval_runtime": 39.5923,
13946
+ "eval_samples_per_second": 37.886,
13947
+ "eval_spearman_cosine": 0.7830920224286129,
13948
+ "eval_spearman_dot": 0.6535736820096772,
13949
+ "eval_spearman_euclidean": 0.7509264123559705,
13950
+ "eval_spearman_manhattan": 0.7510068056516,
13951
+ "eval_steps_per_second": 37.886,
13952
+ "step": 18250
13953
+ },
13954
+ {
13955
+ "epoch": 8.556701030927835,
13956
+ "grad_norm": 2.1532504558563232,
13957
+ "learning_rate": 8.930412371134021e-06,
13958
+ "loss": 0.0737,
13959
+ "step": 18260
13960
+ },
13961
+ {
13962
+ "epoch": 8.561387066541705,
13963
+ "grad_norm": 1.189831256866455,
13964
+ "learning_rate": 8.929826616682288e-06,
13965
+ "loss": 0.0641,
13966
+ "step": 18270
13967
+ },
13968
+ {
13969
+ "epoch": 8.566073102155576,
13970
+ "grad_norm": 1.0703136920928955,
13971
+ "learning_rate": 8.929240862230553e-06,
13972
+ "loss": 0.0597,
13973
+ "step": 18280
13974
+ },
13975
+ {
13976
+ "epoch": 8.570759137769446,
13977
+ "grad_norm": 1.7828891277313232,
13978
+ "learning_rate": 8.92865510777882e-06,
13979
+ "loss": 0.056,
13980
+ "step": 18290
13981
+ },
13982
+ {
13983
+ "epoch": 8.575445173383319,
13984
+ "grad_norm": 1.6652967929840088,
13985
+ "learning_rate": 8.928069353327085e-06,
13986
+ "loss": 0.0587,
13987
+ "step": 18300
13988
+ },
13989
+ {
13990
+ "epoch": 8.580131208997189,
13991
+ "grad_norm": 1.5879887342453003,
13992
+ "learning_rate": 8.927483598875352e-06,
13993
+ "loss": 0.0652,
13994
+ "step": 18310
13995
+ },
13996
+ {
13997
+ "epoch": 8.58481724461106,
13998
+ "grad_norm": 1.400453805923462,
13999
+ "learning_rate": 8.92689784442362e-06,
14000
+ "loss": 0.0589,
14001
+ "step": 18320
14002
+ },
14003
+ {
14004
+ "epoch": 8.58950328022493,
14005
+ "grad_norm": 1.2532896995544434,
14006
+ "learning_rate": 8.926312089971884e-06,
14007
+ "loss": 0.0538,
14008
+ "step": 18330
14009
+ },
14010
+ {
14011
+ "epoch": 8.5941893158388,
14012
+ "grad_norm": 1.3725074529647827,
14013
+ "learning_rate": 8.925726335520151e-06,
14014
+ "loss": 0.0458,
14015
+ "step": 18340
14016
+ },
14017
+ {
14018
+ "epoch": 8.598875351452671,
14019
+ "grad_norm": 0.8545303344726562,
14020
+ "learning_rate": 8.925140581068417e-06,
14021
+ "loss": 0.054,
14022
+ "step": 18350
14023
+ },
14024
+ {
14025
+ "epoch": 8.603561387066541,
14026
+ "grad_norm": 2.2644894123077393,
14027
+ "learning_rate": 8.924554826616682e-06,
14028
+ "loss": 0.0727,
14029
+ "step": 18360
14030
+ },
14031
+ {
14032
+ "epoch": 8.608247422680412,
14033
+ "grad_norm": 2.0160939693450928,
14034
+ "learning_rate": 8.92396907216495e-06,
14035
+ "loss": 0.0716,
14036
+ "step": 18370
14037
+ },
14038
+ {
14039
+ "epoch": 8.612933458294282,
14040
+ "grad_norm": 1.2805579900741577,
14041
+ "learning_rate": 8.923383317713216e-06,
14042
+ "loss": 0.06,
14043
+ "step": 18380
14044
+ },
14045
+ {
14046
+ "epoch": 8.617619493908153,
14047
+ "grad_norm": 2.359361410140991,
14048
+ "learning_rate": 8.922797563261481e-06,
14049
+ "loss": 0.0568,
14050
+ "step": 18390
14051
+ },
14052
+ {
14053
+ "epoch": 8.622305529522023,
14054
+ "grad_norm": 2.423886775970459,
14055
+ "learning_rate": 8.922211808809748e-06,
14056
+ "loss": 0.0732,
14057
+ "step": 18400
14058
+ },
14059
+ {
14060
+ "epoch": 8.626991565135896,
14061
+ "grad_norm": 1.2898362874984741,
14062
+ "learning_rate": 8.921626054358013e-06,
14063
+ "loss": 0.0566,
14064
+ "step": 18410
14065
+ },
14066
+ {
14067
+ "epoch": 8.631677600749766,
14068
+ "grad_norm": 0.6553903818130493,
14069
+ "learning_rate": 8.92104029990628e-06,
14070
+ "loss": 0.0507,
14071
+ "step": 18420
14072
+ },
14073
+ {
14074
+ "epoch": 8.636363636363637,
14075
+ "grad_norm": 1.3605937957763672,
14076
+ "learning_rate": 8.920454545454547e-06,
14077
+ "loss": 0.0581,
14078
+ "step": 18430
14079
+ },
14080
+ {
14081
+ "epoch": 8.641049671977507,
14082
+ "grad_norm": 1.9910422563552856,
14083
+ "learning_rate": 8.919868791002812e-06,
14084
+ "loss": 0.0566,
14085
+ "step": 18440
14086
+ },
14087
+ {
14088
+ "epoch": 8.645735707591378,
14089
+ "grad_norm": 2.0107765197753906,
14090
+ "learning_rate": 8.919283036551079e-06,
14091
+ "loss": 0.068,
14092
+ "step": 18450
14093
+ },
14094
+ {
14095
+ "epoch": 8.650421743205248,
14096
+ "grad_norm": 1.168728232383728,
14097
+ "learning_rate": 8.918697282099344e-06,
14098
+ "loss": 0.0589,
14099
+ "step": 18460
14100
+ },
14101
+ {
14102
+ "epoch": 8.655107778819119,
14103
+ "grad_norm": 2.3766093254089355,
14104
+ "learning_rate": 8.918111527647611e-06,
14105
+ "loss": 0.061,
14106
+ "step": 18470
14107
+ },
14108
+ {
14109
+ "epoch": 8.65979381443299,
14110
+ "grad_norm": 1.6704158782958984,
14111
+ "learning_rate": 8.917525773195878e-06,
14112
+ "loss": 0.0629,
14113
+ "step": 18480
14114
+ },
14115
+ {
14116
+ "epoch": 8.66447985004686,
14117
+ "grad_norm": 1.9102870225906372,
14118
+ "learning_rate": 8.916940018744143e-06,
14119
+ "loss": 0.0464,
14120
+ "step": 18490
14121
+ },
14122
+ {
14123
+ "epoch": 8.669165885660732,
14124
+ "grad_norm": 2.742626428604126,
14125
+ "learning_rate": 8.91635426429241e-06,
14126
+ "loss": 0.0645,
14127
+ "step": 18500
14128
+ },
14129
+ {
14130
+ "epoch": 8.669165885660732,
14131
+ "eval_loss": 0.03774439916014671,
14132
+ "eval_pearson_cosine": 0.7887750445614863,
14133
+ "eval_pearson_dot": 0.6513653629224123,
14134
+ "eval_pearson_euclidean": 0.7319388075486906,
14135
+ "eval_pearson_manhattan": 0.7314905753471947,
14136
+ "eval_runtime": 40.5433,
14137
+ "eval_samples_per_second": 36.998,
14138
+ "eval_spearman_cosine": 0.7892064111202951,
14139
+ "eval_spearman_dot": 0.6704252435211006,
14140
+ "eval_spearman_euclidean": 0.7498699934549212,
14141
+ "eval_spearman_manhattan": 0.7495320910792913,
14142
+ "eval_steps_per_second": 36.998,
14143
+ "step": 18500
14144
+ },
14145
+ {
14146
+ "epoch": 8.673851921274602,
14147
+ "grad_norm": 1.4276272058486938,
14148
+ "learning_rate": 8.915768509840675e-06,
14149
+ "loss": 0.0466,
14150
+ "step": 18510
14151
+ },
14152
+ {
14153
+ "epoch": 8.678537956888473,
14154
+ "grad_norm": 1.780705451965332,
14155
+ "learning_rate": 8.91518275538894e-06,
14156
+ "loss": 0.073,
14157
+ "step": 18520
14158
+ },
14159
+ {
14160
+ "epoch": 8.683223992502343,
14161
+ "grad_norm": 1.422787070274353,
14162
+ "learning_rate": 8.914597000937208e-06,
14163
+ "loss": 0.0662,
14164
+ "step": 18530
14165
+ },
14166
+ {
14167
+ "epoch": 8.687910028116214,
14168
+ "grad_norm": 1.8989777565002441,
14169
+ "learning_rate": 8.914011246485474e-06,
14170
+ "loss": 0.053,
14171
+ "step": 18540
14172
+ },
14173
+ {
14174
+ "epoch": 8.692596063730084,
14175
+ "grad_norm": 1.208201289176941,
14176
+ "learning_rate": 8.91342549203374e-06,
14177
+ "loss": 0.0557,
14178
+ "step": 18550
14179
+ },
14180
+ {
14181
+ "epoch": 8.697282099343955,
14182
+ "grad_norm": 1.4029545783996582,
14183
+ "learning_rate": 8.912839737582007e-06,
14184
+ "loss": 0.0498,
14185
+ "step": 18560
14186
+ },
14187
+ {
14188
+ "epoch": 8.701968134957825,
14189
+ "grad_norm": 1.4905900955200195,
14190
+ "learning_rate": 8.912253983130272e-06,
14191
+ "loss": 0.0586,
14192
+ "step": 18570
14193
+ },
14194
+ {
14195
+ "epoch": 8.706654170571696,
14196
+ "grad_norm": 1.494296669960022,
14197
+ "learning_rate": 8.911668228678539e-06,
14198
+ "loss": 0.0597,
14199
+ "step": 18580
14200
+ },
14201
+ {
14202
+ "epoch": 8.711340206185566,
14203
+ "grad_norm": 1.8540481328964233,
14204
+ "learning_rate": 8.911082474226806e-06,
14205
+ "loss": 0.06,
14206
+ "step": 18590
14207
+ },
14208
+ {
14209
+ "epoch": 8.716026241799437,
14210
+ "grad_norm": 0.9429871439933777,
14211
+ "learning_rate": 8.910496719775071e-06,
14212
+ "loss": 0.0708,
14213
+ "step": 18600
14214
+ },
14215
+ {
14216
+ "epoch": 8.720712277413309,
14217
+ "grad_norm": 1.333791732788086,
14218
+ "learning_rate": 8.909910965323338e-06,
14219
+ "loss": 0.0583,
14220
+ "step": 18610
14221
+ },
14222
+ {
14223
+ "epoch": 8.72539831302718,
14224
+ "grad_norm": 1.1609207391738892,
14225
+ "learning_rate": 8.909325210871603e-06,
14226
+ "loss": 0.0499,
14227
+ "step": 18620
14228
+ },
14229
+ {
14230
+ "epoch": 8.73008434864105,
14231
+ "grad_norm": 1.9390841722488403,
14232
+ "learning_rate": 8.90873945641987e-06,
14233
+ "loss": 0.0751,
14234
+ "step": 18630
14235
+ },
14236
+ {
14237
+ "epoch": 8.73477038425492,
14238
+ "grad_norm": 1.693433165550232,
14239
+ "learning_rate": 8.908153701968135e-06,
14240
+ "loss": 0.0685,
14241
+ "step": 18640
14242
+ },
14243
+ {
14244
+ "epoch": 8.739456419868791,
14245
+ "grad_norm": 1.7784210443496704,
14246
+ "learning_rate": 8.907567947516402e-06,
14247
+ "loss": 0.0524,
14248
+ "step": 18650
14249
+ },
14250
+ {
14251
+ "epoch": 8.744142455482661,
14252
+ "grad_norm": 1.4945738315582275,
14253
+ "learning_rate": 8.906982193064669e-06,
14254
+ "loss": 0.064,
14255
+ "step": 18660
14256
+ },
14257
+ {
14258
+ "epoch": 8.748828491096532,
14259
+ "grad_norm": 1.7549676895141602,
14260
+ "learning_rate": 8.906396438612934e-06,
14261
+ "loss": 0.0634,
14262
+ "step": 18670
14263
+ },
14264
+ {
14265
+ "epoch": 8.753514526710402,
14266
+ "grad_norm": 1.1789377927780151,
14267
+ "learning_rate": 8.9058106841612e-06,
14268
+ "loss": 0.0597,
14269
+ "step": 18680
14270
+ },
14271
+ {
14272
+ "epoch": 8.758200562324273,
14273
+ "grad_norm": 1.983936071395874,
14274
+ "learning_rate": 8.905224929709466e-06,
14275
+ "loss": 0.0614,
14276
+ "step": 18690
14277
+ },
14278
+ {
14279
+ "epoch": 8.762886597938145,
14280
+ "grad_norm": 2.564476251602173,
14281
+ "learning_rate": 8.904639175257732e-06,
14282
+ "loss": 0.0711,
14283
+ "step": 18700
14284
+ },
14285
+ {
14286
+ "epoch": 8.767572633552016,
14287
+ "grad_norm": 0.5671543478965759,
14288
+ "learning_rate": 8.904053420805998e-06,
14289
+ "loss": 0.0586,
14290
+ "step": 18710
14291
+ },
14292
+ {
14293
+ "epoch": 8.772258669165886,
14294
+ "grad_norm": 1.1714857816696167,
14295
+ "learning_rate": 8.903467666354265e-06,
14296
+ "loss": 0.06,
14297
+ "step": 18720
14298
+ },
14299
+ {
14300
+ "epoch": 8.776944704779757,
14301
+ "grad_norm": 1.8699477910995483,
14302
+ "learning_rate": 8.90288191190253e-06,
14303
+ "loss": 0.052,
14304
+ "step": 18730
14305
+ },
14306
+ {
14307
+ "epoch": 8.781630740393627,
14308
+ "grad_norm": 1.0824236869812012,
14309
+ "learning_rate": 8.902296157450798e-06,
14310
+ "loss": 0.0638,
14311
+ "step": 18740
14312
+ },
14313
+ {
14314
+ "epoch": 8.786316776007498,
14315
+ "grad_norm": 1.3703303337097168,
14316
+ "learning_rate": 8.901710402999063e-06,
14317
+ "loss": 0.0563,
14318
+ "step": 18750
14319
+ },
14320
+ {
14321
+ "epoch": 8.786316776007498,
14322
+ "eval_loss": 0.037630029022693634,
14323
+ "eval_pearson_cosine": 0.7870129329535697,
14324
+ "eval_pearson_dot": 0.6393485188875303,
14325
+ "eval_pearson_euclidean": 0.7289305204204517,
14326
+ "eval_pearson_manhattan": 0.7285165698261729,
14327
+ "eval_runtime": 40.5046,
14328
+ "eval_samples_per_second": 37.033,
14329
+ "eval_spearman_cosine": 0.7878034848552876,
14330
+ "eval_spearman_dot": 0.6605642491363777,
14331
+ "eval_spearman_euclidean": 0.7454305721470555,
14332
+ "eval_spearman_manhattan": 0.745136975852769,
14333
+ "eval_steps_per_second": 37.033,
14334
+ "step": 18750
14335
+ },
14336
+ {
14337
+ "epoch": 8.791002811621368,
14338
+ "grad_norm": 1.745339035987854,
14339
+ "learning_rate": 8.90112464854733e-06,
14340
+ "loss": 0.0566,
14341
+ "step": 18760
14342
+ },
14343
+ {
14344
+ "epoch": 8.795688847235239,
14345
+ "grad_norm": 1.5828258991241455,
14346
+ "learning_rate": 8.900538894095597e-06,
14347
+ "loss": 0.0602,
14348
+ "step": 18770
14349
+ },
14350
+ {
14351
+ "epoch": 8.800374882849109,
14352
+ "grad_norm": 1.4292279481887817,
14353
+ "learning_rate": 8.899953139643862e-06,
14354
+ "loss": 0.0638,
14355
+ "step": 18780
14356
+ },
14357
+ {
14358
+ "epoch": 8.80506091846298,
14359
+ "grad_norm": 1.956358790397644,
14360
+ "learning_rate": 8.899367385192129e-06,
14361
+ "loss": 0.0667,
14362
+ "step": 18790
14363
+ },
14364
+ {
14365
+ "epoch": 8.80974695407685,
14366
+ "grad_norm": 0.9023747444152832,
14367
+ "learning_rate": 8.898781630740394e-06,
14368
+ "loss": 0.0662,
14369
+ "step": 18800
14370
+ },
14371
+ {
14372
+ "epoch": 8.814432989690722,
14373
+ "grad_norm": 2.1007392406463623,
14374
+ "learning_rate": 8.89819587628866e-06,
14375
+ "loss": 0.0561,
14376
+ "step": 18810
14377
+ },
14378
+ {
14379
+ "epoch": 8.819119025304593,
14380
+ "grad_norm": 2.0597100257873535,
14381
+ "learning_rate": 8.897610121836928e-06,
14382
+ "loss": 0.0666,
14383
+ "step": 18820
14384
+ },
14385
+ {
14386
+ "epoch": 8.823805060918463,
14387
+ "grad_norm": 1.1200934648513794,
14388
+ "learning_rate": 8.897024367385193e-06,
14389
+ "loss": 0.0538,
14390
+ "step": 18830
14391
+ },
14392
+ {
14393
+ "epoch": 8.828491096532334,
14394
+ "grad_norm": 2.032970428466797,
14395
+ "learning_rate": 8.896438612933458e-06,
14396
+ "loss": 0.0734,
14397
+ "step": 18840
14398
+ },
14399
+ {
14400
+ "epoch": 8.833177132146204,
14401
+ "grad_norm": 1.5491752624511719,
14402
+ "learning_rate": 8.895852858481725e-06,
14403
+ "loss": 0.0638,
14404
+ "step": 18850
14405
+ },
14406
+ {
14407
+ "epoch": 8.837863167760075,
14408
+ "grad_norm": 0.7450467348098755,
14409
+ "learning_rate": 8.89526710402999e-06,
14410
+ "loss": 0.0752,
14411
+ "step": 18860
14412
+ },
14413
+ {
14414
+ "epoch": 8.842549203373945,
14415
+ "grad_norm": 1.0671043395996094,
14416
+ "learning_rate": 8.894681349578257e-06,
14417
+ "loss": 0.0562,
14418
+ "step": 18870
14419
+ },
14420
+ {
14421
+ "epoch": 8.847235238987816,
14422
+ "grad_norm": 1.3302968740463257,
14423
+ "learning_rate": 8.894095595126524e-06,
14424
+ "loss": 0.0573,
14425
+ "step": 18880
14426
+ },
14427
+ {
14428
+ "epoch": 8.851921274601686,
14429
+ "grad_norm": 1.423279881477356,
14430
+ "learning_rate": 8.89350984067479e-06,
14431
+ "loss": 0.0645,
14432
+ "step": 18890
14433
+ },
14434
+ {
14435
+ "epoch": 8.856607310215558,
14436
+ "grad_norm": 1.1250574588775635,
14437
+ "learning_rate": 8.892924086223056e-06,
14438
+ "loss": 0.0616,
14439
+ "step": 18900
14440
+ },
14441
+ {
14442
+ "epoch": 8.861293345829429,
14443
+ "grad_norm": 1.3438372611999512,
14444
+ "learning_rate": 8.892338331771322e-06,
14445
+ "loss": 0.0525,
14446
+ "step": 18910
14447
+ },
14448
+ {
14449
+ "epoch": 8.8659793814433,
14450
+ "grad_norm": 1.5097957849502563,
14451
+ "learning_rate": 8.891752577319588e-06,
14452
+ "loss": 0.0593,
14453
+ "step": 18920
14454
+ },
14455
+ {
14456
+ "epoch": 8.87066541705717,
14457
+ "grad_norm": 1.9522205591201782,
14458
+ "learning_rate": 8.891166822867855e-06,
14459
+ "loss": 0.0674,
14460
+ "step": 18930
14461
+ },
14462
+ {
14463
+ "epoch": 8.87535145267104,
14464
+ "grad_norm": 1.1841950416564941,
14465
+ "learning_rate": 8.89058106841612e-06,
14466
+ "loss": 0.0641,
14467
+ "step": 18940
14468
+ },
14469
+ {
14470
+ "epoch": 8.880037488284911,
14471
+ "grad_norm": 1.658074140548706,
14472
+ "learning_rate": 8.889995313964388e-06,
14473
+ "loss": 0.0737,
14474
+ "step": 18950
14475
+ },
14476
+ {
14477
+ "epoch": 8.884723523898781,
14478
+ "grad_norm": 1.5924397706985474,
14479
+ "learning_rate": 8.889409559512653e-06,
14480
+ "loss": 0.07,
14481
+ "step": 18960
14482
+ },
14483
+ {
14484
+ "epoch": 8.889409559512652,
14485
+ "grad_norm": 1.379166603088379,
14486
+ "learning_rate": 8.888823805060918e-06,
14487
+ "loss": 0.0689,
14488
+ "step": 18970
14489
+ },
14490
+ {
14491
+ "epoch": 8.894095595126522,
14492
+ "grad_norm": 1.3292274475097656,
14493
+ "learning_rate": 8.888238050609187e-06,
14494
+ "loss": 0.0564,
14495
+ "step": 18980
14496
+ },
14497
+ {
14498
+ "epoch": 8.898781630740393,
14499
+ "grad_norm": 1.4383434057235718,
14500
+ "learning_rate": 8.887652296157452e-06,
14501
+ "loss": 0.0583,
14502
+ "step": 18990
14503
+ },
14504
+ {
14505
+ "epoch": 8.903467666354265,
14506
+ "grad_norm": 2.1288797855377197,
14507
+ "learning_rate": 8.887066541705717e-06,
14508
+ "loss": 0.0669,
14509
+ "step": 19000
14510
+ },
14511
+ {
14512
+ "epoch": 8.903467666354265,
14513
+ "eval_loss": 0.03827948495745659,
14514
+ "eval_pearson_cosine": 0.7850468616972819,
14515
+ "eval_pearson_dot": 0.6358914679070722,
14516
+ "eval_pearson_euclidean": 0.7244498308050709,
14517
+ "eval_pearson_manhattan": 0.7238488356503296,
14518
+ "eval_runtime": 40.8545,
14519
+ "eval_samples_per_second": 36.716,
14520
+ "eval_spearman_cosine": 0.7865593789879696,
14521
+ "eval_spearman_dot": 0.6571265794919958,
14522
+ "eval_spearman_euclidean": 0.7437161421017117,
14523
+ "eval_spearman_manhattan": 0.7432616809242956,
14524
+ "eval_steps_per_second": 36.716,
14525
+ "step": 19000
14526
  }
14527
  ],
14528
  "logging_steps": 10,