CocoRoF commited on
Commit
82ef49b
·
verified ·
1 Parent(s): 597926a

Training in progress, step 10000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8adc247d305d9f3f48cfa3878ee63336d7fa4374add45ee01ed8dda6005ec778
3
  size 613004648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad6a577dd191618d3b00fe33fe0cdf33b81a5c002fe33712258e536fcc1520a8
3
  size 613004648
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8859d669a35bcbc93780c06ab1a65bd8bc02a7cba417a0b1747229bae278e8c4
3
  size 1226096954
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ffe192963611108e55a8d98be8d1dafc323965fc2794aed1bf226c3a6c17145
3
  size 1226096954
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b9b5cbeb72cb13416d33fb3709b0d7a367e681de773132f1b73f6b02bba3582
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5e1f8a9c771c1f6fac3e948b043a309dd550a6e17ac92d5fdcc99c5e406cf1c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cdb70058e5fda25192fd22654de739fc2dcf9d8d64b5fc7673d822670c95ff92
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c9a67dc102c0201a7b47ec5ea788b4d7cd182385f8b19e21244eece8e7dbf3b
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.217432052483598,
5
  "eval_steps": 250,
6
- "global_step": 9000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -6883,6 +6883,770 @@
6883
  "eval_spearman_manhattan": 0.7624729408213863,
6884
  "eval_steps_per_second": 37.636,
6885
  "step": 9000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6886
  }
6887
  ],
6888
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.686035613870665,
5
  "eval_steps": 250,
6
+ "global_step": 10000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
6883
  "eval_spearman_manhattan": 0.7624729408213863,
6884
  "eval_steps_per_second": 37.636,
6885
  "step": 9000
6886
+ },
6887
+ {
6888
+ "epoch": 4.22211808809747,
6889
+ "grad_norm": 1.666392207145691,
6890
+ "learning_rate": 9.472235238987816e-06,
6891
+ "loss": 0.1418,
6892
+ "step": 9010
6893
+ },
6894
+ {
6895
+ "epoch": 4.22680412371134,
6896
+ "grad_norm": 2.7508490085601807,
6897
+ "learning_rate": 9.471649484536083e-06,
6898
+ "loss": 0.1448,
6899
+ "step": 9020
6900
+ },
6901
+ {
6902
+ "epoch": 4.231490159325211,
6903
+ "grad_norm": 2.883143663406372,
6904
+ "learning_rate": 9.471063730084349e-06,
6905
+ "loss": 0.166,
6906
+ "step": 9030
6907
+ },
6908
+ {
6909
+ "epoch": 4.236176194939081,
6910
+ "grad_norm": 2.5505452156066895,
6911
+ "learning_rate": 9.470477975632615e-06,
6912
+ "loss": 0.1417,
6913
+ "step": 9040
6914
+ },
6915
+ {
6916
+ "epoch": 4.240862230552953,
6917
+ "grad_norm": 2.3343942165374756,
6918
+ "learning_rate": 9.469892221180882e-06,
6919
+ "loss": 0.1569,
6920
+ "step": 9050
6921
+ },
6922
+ {
6923
+ "epoch": 4.245548266166823,
6924
+ "grad_norm": 2.2727818489074707,
6925
+ "learning_rate": 9.469306466729148e-06,
6926
+ "loss": 0.1346,
6927
+ "step": 9060
6928
+ },
6929
+ {
6930
+ "epoch": 4.250234301780694,
6931
+ "grad_norm": 2.8398542404174805,
6932
+ "learning_rate": 9.468720712277415e-06,
6933
+ "loss": 0.151,
6934
+ "step": 9070
6935
+ },
6936
+ {
6937
+ "epoch": 4.254920337394564,
6938
+ "grad_norm": 2.0750465393066406,
6939
+ "learning_rate": 9.46813495782568e-06,
6940
+ "loss": 0.1555,
6941
+ "step": 9080
6942
+ },
6943
+ {
6944
+ "epoch": 4.259606373008435,
6945
+ "grad_norm": 2.1490373611450195,
6946
+ "learning_rate": 9.467549203373947e-06,
6947
+ "loss": 0.1719,
6948
+ "step": 9090
6949
+ },
6950
+ {
6951
+ "epoch": 4.264292408622305,
6952
+ "grad_norm": 1.9958566427230835,
6953
+ "learning_rate": 9.466963448922214e-06,
6954
+ "loss": 0.1332,
6955
+ "step": 9100
6956
+ },
6957
+ {
6958
+ "epoch": 4.2689784442361765,
6959
+ "grad_norm": 2.586869478225708,
6960
+ "learning_rate": 9.466377694470479e-06,
6961
+ "loss": 0.1545,
6962
+ "step": 9110
6963
+ },
6964
+ {
6965
+ "epoch": 4.273664479850047,
6966
+ "grad_norm": 1.7282586097717285,
6967
+ "learning_rate": 9.465791940018744e-06,
6968
+ "loss": 0.1608,
6969
+ "step": 9120
6970
+ },
6971
+ {
6972
+ "epoch": 4.278350515463917,
6973
+ "grad_norm": 2.7652742862701416,
6974
+ "learning_rate": 9.465206185567011e-06,
6975
+ "loss": 0.1319,
6976
+ "step": 9130
6977
+ },
6978
+ {
6979
+ "epoch": 4.283036551077788,
6980
+ "grad_norm": 2.3922176361083984,
6981
+ "learning_rate": 9.464620431115276e-06,
6982
+ "loss": 0.1569,
6983
+ "step": 9140
6984
+ },
6985
+ {
6986
+ "epoch": 4.287722586691659,
6987
+ "grad_norm": 2.223822832107544,
6988
+ "learning_rate": 9.464034676663543e-06,
6989
+ "loss": 0.1494,
6990
+ "step": 9150
6991
+ },
6992
+ {
6993
+ "epoch": 4.29240862230553,
6994
+ "grad_norm": 2.0714290142059326,
6995
+ "learning_rate": 9.46344892221181e-06,
6996
+ "loss": 0.1506,
6997
+ "step": 9160
6998
+ },
6999
+ {
7000
+ "epoch": 4.2970946579194,
7001
+ "grad_norm": 2.4368040561676025,
7002
+ "learning_rate": 9.462863167760075e-06,
7003
+ "loss": 0.1413,
7004
+ "step": 9170
7005
+ },
7006
+ {
7007
+ "epoch": 4.301780693533271,
7008
+ "grad_norm": 2.760032892227173,
7009
+ "learning_rate": 9.462277413308342e-06,
7010
+ "loss": 0.1599,
7011
+ "step": 9180
7012
+ },
7013
+ {
7014
+ "epoch": 4.306466729147141,
7015
+ "grad_norm": 2.8202686309814453,
7016
+ "learning_rate": 9.461691658856607e-06,
7017
+ "loss": 0.1444,
7018
+ "step": 9190
7019
+ },
7020
+ {
7021
+ "epoch": 4.311152764761012,
7022
+ "grad_norm": 2.5001349449157715,
7023
+ "learning_rate": 9.461105904404874e-06,
7024
+ "loss": 0.1322,
7025
+ "step": 9200
7026
+ },
7027
+ {
7028
+ "epoch": 4.315838800374883,
7029
+ "grad_norm": 2.5955960750579834,
7030
+ "learning_rate": 9.460520149953141e-06,
7031
+ "loss": 0.1522,
7032
+ "step": 9210
7033
+ },
7034
+ {
7035
+ "epoch": 4.320524835988754,
7036
+ "grad_norm": 1.966848611831665,
7037
+ "learning_rate": 9.459934395501406e-06,
7038
+ "loss": 0.133,
7039
+ "step": 9220
7040
+ },
7041
+ {
7042
+ "epoch": 4.325210871602624,
7043
+ "grad_norm": 2.4600915908813477,
7044
+ "learning_rate": 9.459348641049673e-06,
7045
+ "loss": 0.1381,
7046
+ "step": 9230
7047
+ },
7048
+ {
7049
+ "epoch": 4.329896907216495,
7050
+ "grad_norm": 2.6355576515197754,
7051
+ "learning_rate": 9.458762886597939e-06,
7052
+ "loss": 0.1671,
7053
+ "step": 9240
7054
+ },
7055
+ {
7056
+ "epoch": 4.334582942830366,
7057
+ "grad_norm": 1.9884010553359985,
7058
+ "learning_rate": 9.458177132146204e-06,
7059
+ "loss": 0.1565,
7060
+ "step": 9250
7061
+ },
7062
+ {
7063
+ "epoch": 4.334582942830366,
7064
+ "eval_loss": 0.04769788682460785,
7065
+ "eval_pearson_cosine": 0.7716762907463419,
7066
+ "eval_pearson_dot": 0.6025868220654615,
7067
+ "eval_pearson_euclidean": 0.7486154713041202,
7068
+ "eval_pearson_manhattan": 0.7480947777024909,
7069
+ "eval_runtime": 39.7245,
7070
+ "eval_samples_per_second": 37.76,
7071
+ "eval_spearman_cosine": 0.7767828328865971,
7072
+ "eval_spearman_dot": 0.6102486537359278,
7073
+ "eval_spearman_euclidean": 0.764536418480009,
7074
+ "eval_spearman_manhattan": 0.7641318665907693,
7075
+ "eval_steps_per_second": 37.76,
7076
+ "step": 9250
7077
+ },
7078
+ {
7079
+ "epoch": 4.339268978444236,
7080
+ "grad_norm": 2.462095260620117,
7081
+ "learning_rate": 9.457591377694472e-06,
7082
+ "loss": 0.1574,
7083
+ "step": 9260
7084
+ },
7085
+ {
7086
+ "epoch": 4.343955014058107,
7087
+ "grad_norm": 2.8646531105041504,
7088
+ "learning_rate": 9.457005623242738e-06,
7089
+ "loss": 0.1447,
7090
+ "step": 9270
7091
+ },
7092
+ {
7093
+ "epoch": 4.348641049671977,
7094
+ "grad_norm": 2.456422805786133,
7095
+ "learning_rate": 9.456419868791003e-06,
7096
+ "loss": 0.1481,
7097
+ "step": 9280
7098
+ },
7099
+ {
7100
+ "epoch": 4.353327085285848,
7101
+ "grad_norm": 3.1123046875,
7102
+ "learning_rate": 9.45583411433927e-06,
7103
+ "loss": 0.1465,
7104
+ "step": 9290
7105
+ },
7106
+ {
7107
+ "epoch": 4.358013120899718,
7108
+ "grad_norm": 3.3074183464050293,
7109
+ "learning_rate": 9.455248359887535e-06,
7110
+ "loss": 0.1642,
7111
+ "step": 9300
7112
+ },
7113
+ {
7114
+ "epoch": 4.36269915651359,
7115
+ "grad_norm": 1.6989983320236206,
7116
+ "learning_rate": 9.454662605435802e-06,
7117
+ "loss": 0.1282,
7118
+ "step": 9310
7119
+ },
7120
+ {
7121
+ "epoch": 4.36738519212746,
7122
+ "grad_norm": 1.7987381219863892,
7123
+ "learning_rate": 9.454076850984069e-06,
7124
+ "loss": 0.1456,
7125
+ "step": 9320
7126
+ },
7127
+ {
7128
+ "epoch": 4.372071227741331,
7129
+ "grad_norm": 1.6666337251663208,
7130
+ "learning_rate": 9.453491096532334e-06,
7131
+ "loss": 0.1618,
7132
+ "step": 9330
7133
+ },
7134
+ {
7135
+ "epoch": 4.376757263355201,
7136
+ "grad_norm": 2.1631813049316406,
7137
+ "learning_rate": 9.452905342080601e-06,
7138
+ "loss": 0.142,
7139
+ "step": 9340
7140
+ },
7141
+ {
7142
+ "epoch": 4.381443298969073,
7143
+ "grad_norm": 2.5329623222351074,
7144
+ "learning_rate": 9.452319587628866e-06,
7145
+ "loss": 0.1436,
7146
+ "step": 9350
7147
+ },
7148
+ {
7149
+ "epoch": 4.386129334582943,
7150
+ "grad_norm": 1.9239972829818726,
7151
+ "learning_rate": 9.451733833177133e-06,
7152
+ "loss": 0.1361,
7153
+ "step": 9360
7154
+ },
7155
+ {
7156
+ "epoch": 4.390815370196814,
7157
+ "grad_norm": 2.091343641281128,
7158
+ "learning_rate": 9.4511480787254e-06,
7159
+ "loss": 0.1525,
7160
+ "step": 9370
7161
+ },
7162
+ {
7163
+ "epoch": 4.395501405810684,
7164
+ "grad_norm": 2.544008255004883,
7165
+ "learning_rate": 9.450562324273665e-06,
7166
+ "loss": 0.1454,
7167
+ "step": 9380
7168
+ },
7169
+ {
7170
+ "epoch": 4.4001874414245545,
7171
+ "grad_norm": 2.5722694396972656,
7172
+ "learning_rate": 9.449976569821932e-06,
7173
+ "loss": 0.1619,
7174
+ "step": 9390
7175
+ },
7176
+ {
7177
+ "epoch": 4.404873477038425,
7178
+ "grad_norm": 2.6824848651885986,
7179
+ "learning_rate": 9.449390815370197e-06,
7180
+ "loss": 0.1529,
7181
+ "step": 9400
7182
+ },
7183
+ {
7184
+ "epoch": 4.409559512652296,
7185
+ "grad_norm": 2.406972885131836,
7186
+ "learning_rate": 9.448805060918463e-06,
7187
+ "loss": 0.153,
7188
+ "step": 9410
7189
+ },
7190
+ {
7191
+ "epoch": 4.414245548266167,
7192
+ "grad_norm": 2.7198727130889893,
7193
+ "learning_rate": 9.448219306466731e-06,
7194
+ "loss": 0.1657,
7195
+ "step": 9420
7196
+ },
7197
+ {
7198
+ "epoch": 4.418931583880037,
7199
+ "grad_norm": 2.511366844177246,
7200
+ "learning_rate": 9.447633552014996e-06,
7201
+ "loss": 0.1409,
7202
+ "step": 9430
7203
+ },
7204
+ {
7205
+ "epoch": 4.423617619493908,
7206
+ "grad_norm": 2.4278454780578613,
7207
+ "learning_rate": 9.447047797563262e-06,
7208
+ "loss": 0.1392,
7209
+ "step": 9440
7210
+ },
7211
+ {
7212
+ "epoch": 4.428303655107779,
7213
+ "grad_norm": 3.2954795360565186,
7214
+ "learning_rate": 9.446462043111529e-06,
7215
+ "loss": 0.142,
7216
+ "step": 9450
7217
+ },
7218
+ {
7219
+ "epoch": 4.43298969072165,
7220
+ "grad_norm": 2.079050302505493,
7221
+ "learning_rate": 9.445876288659794e-06,
7222
+ "loss": 0.1444,
7223
+ "step": 9460
7224
+ },
7225
+ {
7226
+ "epoch": 4.43767572633552,
7227
+ "grad_norm": 2.284108877182007,
7228
+ "learning_rate": 9.44529053420806e-06,
7229
+ "loss": 0.1549,
7230
+ "step": 9470
7231
+ },
7232
+ {
7233
+ "epoch": 4.442361761949391,
7234
+ "grad_norm": 1.7953790426254272,
7235
+ "learning_rate": 9.444704779756328e-06,
7236
+ "loss": 0.1632,
7237
+ "step": 9480
7238
+ },
7239
+ {
7240
+ "epoch": 4.447047797563261,
7241
+ "grad_norm": 2.6434693336486816,
7242
+ "learning_rate": 9.444119025304593e-06,
7243
+ "loss": 0.1453,
7244
+ "step": 9490
7245
+ },
7246
+ {
7247
+ "epoch": 4.451733833177133,
7248
+ "grad_norm": 2.6489696502685547,
7249
+ "learning_rate": 9.44353327085286e-06,
7250
+ "loss": 0.1577,
7251
+ "step": 9500
7252
+ },
7253
+ {
7254
+ "epoch": 4.451733833177133,
7255
+ "eval_loss": 0.0442158505320549,
7256
+ "eval_pearson_cosine": 0.7793668842918748,
7257
+ "eval_pearson_dot": 0.6181762937224704,
7258
+ "eval_pearson_euclidean": 0.7443899044656206,
7259
+ "eval_pearson_manhattan": 0.7439473885249299,
7260
+ "eval_runtime": 39.5492,
7261
+ "eval_samples_per_second": 37.927,
7262
+ "eval_spearman_cosine": 0.7823666516115477,
7263
+ "eval_spearman_dot": 0.6290855072192552,
7264
+ "eval_spearman_euclidean": 0.7629748815703942,
7265
+ "eval_spearman_manhattan": 0.7626889580909112,
7266
+ "eval_steps_per_second": 37.927,
7267
+ "step": 9500
7268
+ },
7269
+ {
7270
+ "epoch": 4.456419868791003,
7271
+ "grad_norm": 2.769876003265381,
7272
+ "learning_rate": 9.442947516401125e-06,
7273
+ "loss": 0.1474,
7274
+ "step": 9510
7275
+ },
7276
+ {
7277
+ "epoch": 4.4611059044048735,
7278
+ "grad_norm": 2.8958747386932373,
7279
+ "learning_rate": 9.442361761949392e-06,
7280
+ "loss": 0.1479,
7281
+ "step": 9520
7282
+ },
7283
+ {
7284
+ "epoch": 4.465791940018744,
7285
+ "grad_norm": 3.219896078109741,
7286
+ "learning_rate": 9.441776007497657e-06,
7287
+ "loss": 0.1621,
7288
+ "step": 9530
7289
+ },
7290
+ {
7291
+ "epoch": 4.4704779756326145,
7292
+ "grad_norm": 2.0421993732452393,
7293
+ "learning_rate": 9.441190253045924e-06,
7294
+ "loss": 0.1473,
7295
+ "step": 9540
7296
+ },
7297
+ {
7298
+ "epoch": 4.475164011246486,
7299
+ "grad_norm": 2.189121961593628,
7300
+ "learning_rate": 9.440604498594191e-06,
7301
+ "loss": 0.1751,
7302
+ "step": 9550
7303
+ },
7304
+ {
7305
+ "epoch": 4.479850046860356,
7306
+ "grad_norm": 2.7012038230895996,
7307
+ "learning_rate": 9.440018744142456e-06,
7308
+ "loss": 0.1598,
7309
+ "step": 9560
7310
+ },
7311
+ {
7312
+ "epoch": 4.484536082474227,
7313
+ "grad_norm": 2.8359875679016113,
7314
+ "learning_rate": 9.439432989690721e-06,
7315
+ "loss": 0.1217,
7316
+ "step": 9570
7317
+ },
7318
+ {
7319
+ "epoch": 4.489222118088097,
7320
+ "grad_norm": 2.1257011890411377,
7321
+ "learning_rate": 9.438847235238988e-06,
7322
+ "loss": 0.1395,
7323
+ "step": 9580
7324
+ },
7325
+ {
7326
+ "epoch": 4.493908153701968,
7327
+ "grad_norm": 2.8070883750915527,
7328
+ "learning_rate": 9.438261480787255e-06,
7329
+ "loss": 0.1654,
7330
+ "step": 9590
7331
+ },
7332
+ {
7333
+ "epoch": 4.498594189315839,
7334
+ "grad_norm": 2.672773838043213,
7335
+ "learning_rate": 9.43767572633552e-06,
7336
+ "loss": 0.133,
7337
+ "step": 9600
7338
+ },
7339
+ {
7340
+ "epoch": 4.50328022492971,
7341
+ "grad_norm": 2.5079360008239746,
7342
+ "learning_rate": 9.437089971883787e-06,
7343
+ "loss": 0.131,
7344
+ "step": 9610
7345
+ },
7346
+ {
7347
+ "epoch": 4.50796626054358,
7348
+ "grad_norm": 3.078028678894043,
7349
+ "learning_rate": 9.436504217432053e-06,
7350
+ "loss": 0.1634,
7351
+ "step": 9620
7352
+ },
7353
+ {
7354
+ "epoch": 4.512652296157451,
7355
+ "grad_norm": 2.3852665424346924,
7356
+ "learning_rate": 9.43591846298032e-06,
7357
+ "loss": 0.1382,
7358
+ "step": 9630
7359
+ },
7360
+ {
7361
+ "epoch": 4.517338331771321,
7362
+ "grad_norm": 2.917783260345459,
7363
+ "learning_rate": 9.435332708528585e-06,
7364
+ "loss": 0.1573,
7365
+ "step": 9640
7366
+ },
7367
+ {
7368
+ "epoch": 4.5220243673851925,
7369
+ "grad_norm": 2.0304133892059326,
7370
+ "learning_rate": 9.434746954076852e-06,
7371
+ "loss": 0.1261,
7372
+ "step": 9650
7373
+ },
7374
+ {
7375
+ "epoch": 4.526710402999063,
7376
+ "grad_norm": 2.4420013427734375,
7377
+ "learning_rate": 9.434161199625119e-06,
7378
+ "loss": 0.1529,
7379
+ "step": 9660
7380
+ },
7381
+ {
7382
+ "epoch": 4.5313964386129335,
7383
+ "grad_norm": 3.5100250244140625,
7384
+ "learning_rate": 9.433575445173384e-06,
7385
+ "loss": 0.1415,
7386
+ "step": 9670
7387
+ },
7388
+ {
7389
+ "epoch": 4.536082474226804,
7390
+ "grad_norm": 2.250225782394409,
7391
+ "learning_rate": 9.43298969072165e-06,
7392
+ "loss": 0.1389,
7393
+ "step": 9680
7394
+ },
7395
+ {
7396
+ "epoch": 4.5407685098406745,
7397
+ "grad_norm": 2.866528272628784,
7398
+ "learning_rate": 9.432403936269916e-06,
7399
+ "loss": 0.1366,
7400
+ "step": 9690
7401
+ },
7402
+ {
7403
+ "epoch": 4.545454545454545,
7404
+ "grad_norm": 2.196103096008301,
7405
+ "learning_rate": 9.431818181818183e-06,
7406
+ "loss": 0.139,
7407
+ "step": 9700
7408
+ },
7409
+ {
7410
+ "epoch": 4.550140581068416,
7411
+ "grad_norm": 2.383646249771118,
7412
+ "learning_rate": 9.43123242736645e-06,
7413
+ "loss": 0.1503,
7414
+ "step": 9710
7415
+ },
7416
+ {
7417
+ "epoch": 4.554826616682287,
7418
+ "grad_norm": 2.5517141819000244,
7419
+ "learning_rate": 9.430646672914715e-06,
7420
+ "loss": 0.1588,
7421
+ "step": 9720
7422
+ },
7423
+ {
7424
+ "epoch": 4.559512652296157,
7425
+ "grad_norm": 2.985891103744507,
7426
+ "learning_rate": 9.43006091846298e-06,
7427
+ "loss": 0.1476,
7428
+ "step": 9730
7429
+ },
7430
+ {
7431
+ "epoch": 4.564198687910028,
7432
+ "grad_norm": 2.9456253051757812,
7433
+ "learning_rate": 9.429475164011247e-06,
7434
+ "loss": 0.1698,
7435
+ "step": 9740
7436
+ },
7437
+ {
7438
+ "epoch": 4.568884723523899,
7439
+ "grad_norm": 2.052727699279785,
7440
+ "learning_rate": 9.428889409559512e-06,
7441
+ "loss": 0.1463,
7442
+ "step": 9750
7443
+ },
7444
+ {
7445
+ "epoch": 4.568884723523899,
7446
+ "eval_loss": 0.0455799400806427,
7447
+ "eval_pearson_cosine": 0.7764385842406938,
7448
+ "eval_pearson_dot": 0.5941173098885884,
7449
+ "eval_pearson_euclidean": 0.7405048981360327,
7450
+ "eval_pearson_manhattan": 0.740129834669768,
7451
+ "eval_runtime": 40.4889,
7452
+ "eval_samples_per_second": 37.047,
7453
+ "eval_spearman_cosine": 0.782117792198136,
7454
+ "eval_spearman_dot": 0.599132023248896,
7455
+ "eval_spearman_euclidean": 0.7603847963092804,
7456
+ "eval_spearman_manhattan": 0.7601623940158222,
7457
+ "eval_steps_per_second": 37.047,
7458
+ "step": 9750
7459
+ },
7460
+ {
7461
+ "epoch": 4.57357075913777,
7462
+ "grad_norm": 2.1978442668914795,
7463
+ "learning_rate": 9.42830365510778e-06,
7464
+ "loss": 0.1537,
7465
+ "step": 9760
7466
+ },
7467
+ {
7468
+ "epoch": 4.57825679475164,
7469
+ "grad_norm": 2.976311206817627,
7470
+ "learning_rate": 9.427717900656046e-06,
7471
+ "loss": 0.1506,
7472
+ "step": 9770
7473
+ },
7474
+ {
7475
+ "epoch": 4.582942830365511,
7476
+ "grad_norm": 1.1756877899169922,
7477
+ "learning_rate": 9.427132146204311e-06,
7478
+ "loss": 0.127,
7479
+ "step": 9780
7480
+ },
7481
+ {
7482
+ "epoch": 4.587628865979381,
7483
+ "grad_norm": 2.0622363090515137,
7484
+ "learning_rate": 9.426546391752578e-06,
7485
+ "loss": 0.1508,
7486
+ "step": 9790
7487
+ },
7488
+ {
7489
+ "epoch": 4.592314901593252,
7490
+ "grad_norm": 2.6205925941467285,
7491
+ "learning_rate": 9.425960637300844e-06,
7492
+ "loss": 0.142,
7493
+ "step": 9800
7494
+ },
7495
+ {
7496
+ "epoch": 4.597000937207123,
7497
+ "grad_norm": 1.8698289394378662,
7498
+ "learning_rate": 9.42537488284911e-06,
7499
+ "loss": 0.1375,
7500
+ "step": 9810
7501
+ },
7502
+ {
7503
+ "epoch": 4.6016869728209935,
7504
+ "grad_norm": 1.9030426740646362,
7505
+ "learning_rate": 9.424789128397377e-06,
7506
+ "loss": 0.1626,
7507
+ "step": 9820
7508
+ },
7509
+ {
7510
+ "epoch": 4.606373008434864,
7511
+ "grad_norm": 2.2842605113983154,
7512
+ "learning_rate": 9.424203373945643e-06,
7513
+ "loss": 0.1518,
7514
+ "step": 9830
7515
+ },
7516
+ {
7517
+ "epoch": 4.6110590440487345,
7518
+ "grad_norm": 1.5332591533660889,
7519
+ "learning_rate": 9.42361761949391e-06,
7520
+ "loss": 0.1612,
7521
+ "step": 9840
7522
+ },
7523
+ {
7524
+ "epoch": 4.615745079662606,
7525
+ "grad_norm": 2.012329339981079,
7526
+ "learning_rate": 9.423031865042175e-06,
7527
+ "loss": 0.159,
7528
+ "step": 9850
7529
+ },
7530
+ {
7531
+ "epoch": 4.620431115276476,
7532
+ "grad_norm": 1.955610752105713,
7533
+ "learning_rate": 9.42244611059044e-06,
7534
+ "loss": 0.1672,
7535
+ "step": 9860
7536
+ },
7537
+ {
7538
+ "epoch": 4.625117150890347,
7539
+ "grad_norm": 2.124481439590454,
7540
+ "learning_rate": 9.421860356138709e-06,
7541
+ "loss": 0.1325,
7542
+ "step": 9870
7543
+ },
7544
+ {
7545
+ "epoch": 4.629803186504217,
7546
+ "grad_norm": 2.279585599899292,
7547
+ "learning_rate": 9.421274601686974e-06,
7548
+ "loss": 0.1611,
7549
+ "step": 9880
7550
+ },
7551
+ {
7552
+ "epoch": 4.634489222118088,
7553
+ "grad_norm": 1.9664572477340698,
7554
+ "learning_rate": 9.420688847235239e-06,
7555
+ "loss": 0.1299,
7556
+ "step": 9890
7557
+ },
7558
+ {
7559
+ "epoch": 4.639175257731958,
7560
+ "grad_norm": 2.385551691055298,
7561
+ "learning_rate": 9.420103092783506e-06,
7562
+ "loss": 0.1631,
7563
+ "step": 9900
7564
+ },
7565
+ {
7566
+ "epoch": 4.64386129334583,
7567
+ "grad_norm": 1.469383955001831,
7568
+ "learning_rate": 9.419517338331771e-06,
7569
+ "loss": 0.1205,
7570
+ "step": 9910
7571
+ },
7572
+ {
7573
+ "epoch": 4.6485473289597,
7574
+ "grad_norm": 2.6736183166503906,
7575
+ "learning_rate": 9.418931583880038e-06,
7576
+ "loss": 0.1675,
7577
+ "step": 9920
7578
+ },
7579
+ {
7580
+ "epoch": 4.653233364573571,
7581
+ "grad_norm": 3.122366189956665,
7582
+ "learning_rate": 9.418345829428305e-06,
7583
+ "loss": 0.135,
7584
+ "step": 9930
7585
+ },
7586
+ {
7587
+ "epoch": 4.657919400187441,
7588
+ "grad_norm": 2.8057267665863037,
7589
+ "learning_rate": 9.41776007497657e-06,
7590
+ "loss": 0.1318,
7591
+ "step": 9940
7592
+ },
7593
+ {
7594
+ "epoch": 4.6626054358013125,
7595
+ "grad_norm": 3.2085843086242676,
7596
+ "learning_rate": 9.417174320524837e-06,
7597
+ "loss": 0.163,
7598
+ "step": 9950
7599
+ },
7600
+ {
7601
+ "epoch": 4.667291471415183,
7602
+ "grad_norm": 3.1496477127075195,
7603
+ "learning_rate": 9.416588566073102e-06,
7604
+ "loss": 0.1655,
7605
+ "step": 9960
7606
+ },
7607
+ {
7608
+ "epoch": 4.6719775070290535,
7609
+ "grad_norm": 2.3873279094696045,
7610
+ "learning_rate": 9.41600281162137e-06,
7611
+ "loss": 0.1381,
7612
+ "step": 9970
7613
+ },
7614
+ {
7615
+ "epoch": 4.676663542642924,
7616
+ "grad_norm": 2.0492842197418213,
7617
+ "learning_rate": 9.415417057169636e-06,
7618
+ "loss": 0.1462,
7619
+ "step": 9980
7620
+ },
7621
+ {
7622
+ "epoch": 4.681349578256794,
7623
+ "grad_norm": 3.0173768997192383,
7624
+ "learning_rate": 9.414831302717901e-06,
7625
+ "loss": 0.1711,
7626
+ "step": 9990
7627
+ },
7628
+ {
7629
+ "epoch": 4.686035613870665,
7630
+ "grad_norm": 2.1712629795074463,
7631
+ "learning_rate": 9.414245548266168e-06,
7632
+ "loss": 0.16,
7633
+ "step": 10000
7634
+ },
7635
+ {
7636
+ "epoch": 4.686035613870665,
7637
+ "eval_loss": 0.0459674596786499,
7638
+ "eval_pearson_cosine": 0.7749308103807095,
7639
+ "eval_pearson_dot": 0.6140435552912393,
7640
+ "eval_pearson_euclidean": 0.7497870505171651,
7641
+ "eval_pearson_manhattan": 0.7494772460672863,
7642
+ "eval_runtime": 40.3087,
7643
+ "eval_samples_per_second": 37.213,
7644
+ "eval_spearman_cosine": 0.7793432902242333,
7645
+ "eval_spearman_dot": 0.6191753058355182,
7646
+ "eval_spearman_euclidean": 0.7659596686028919,
7647
+ "eval_spearman_manhattan": 0.7658321542772971,
7648
+ "eval_steps_per_second": 37.213,
7649
+ "step": 10000
7650
  }
7651
  ],
7652
  "logging_steps": 10,