CocoRoF commited on
Commit
c79763c
·
verified ·
1 Parent(s): 3aea149

Training in progress, step 18000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:723de6fd746cbba66015f7a7da153864465a825d5f4e24435edd8645a25ac837
3
  size 613004648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ca95a7948bce19be5449f8781c37f0268eecbc454dcb50de5ef8e89c3d9a4e6
3
  size 613004648
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dafb37725ed184eeb99653a88821c53652c298a048e783bcf251a3b487c248c8
3
  size 1226096954
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:667a4ed3665b904cc6a25c6508c89fb468bc4b1b80e08cd26eb7f6e936a1d8ff
3
  size 1226096954
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c551e87aa3f069465eab6343f6462d0da8c27e46770ad44ff0400698bec95cda
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b67589c462b9c803b3450b2a56b26bd15fd2aad689878137f6e7e3b31569b4d3
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:64e32232d2d68b6508947dd18795fe7ac8dd583abb7b016b68d853036e32fd4b
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e87f5dbc951603cbe6f4d5f5d51c8e0ef8863d7cd661e58ac58827859c30521
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.966260543580131,
5
  "eval_steps": 250,
6
- "global_step": 17000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -12995,6 +12995,770 @@
12995
  "eval_spearman_manhattan": 0.745468210963869,
12996
  "eval_steps_per_second": 37.654,
12997
  "step": 17000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12998
  }
12999
  ],
13000
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.434864104967197,
5
  "eval_steps": 250,
6
+ "global_step": 18000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
12995
  "eval_spearman_manhattan": 0.745468210963869,
12996
  "eval_steps_per_second": 37.654,
12997
  "step": 17000
12998
+ },
12999
+ {
13000
+ "epoch": 7.970946579194002,
13001
+ "grad_norm": 1.712897539138794,
13002
+ "learning_rate": 9.00363167760075e-06,
13003
+ "loss": 0.0638,
13004
+ "step": 17010
13005
+ },
13006
+ {
13007
+ "epoch": 7.975632614807872,
13008
+ "grad_norm": 1.6661624908447266,
13009
+ "learning_rate": 9.003045923149017e-06,
13010
+ "loss": 0.0721,
13011
+ "step": 17020
13012
+ },
13013
+ {
13014
+ "epoch": 7.980318650421744,
13015
+ "grad_norm": 1.8900232315063477,
13016
+ "learning_rate": 9.002460168697282e-06,
13017
+ "loss": 0.0697,
13018
+ "step": 17030
13019
+ },
13020
+ {
13021
+ "epoch": 7.985004686035614,
13022
+ "grad_norm": 1.7580076456069946,
13023
+ "learning_rate": 9.001874414245549e-06,
13024
+ "loss": 0.0829,
13025
+ "step": 17040
13026
+ },
13027
+ {
13028
+ "epoch": 7.989690721649485,
13029
+ "grad_norm": 1.0621994733810425,
13030
+ "learning_rate": 9.001288659793816e-06,
13031
+ "loss": 0.0654,
13032
+ "step": 17050
13033
+ },
13034
+ {
13035
+ "epoch": 7.994376757263355,
13036
+ "grad_norm": 2.382904529571533,
13037
+ "learning_rate": 9.000702905342081e-06,
13038
+ "loss": 0.0832,
13039
+ "step": 17060
13040
+ },
13041
+ {
13042
+ "epoch": 7.9990627928772255,
13043
+ "grad_norm": 2.5280025005340576,
13044
+ "learning_rate": 9.000117150890348e-06,
13045
+ "loss": 0.0671,
13046
+ "step": 17070
13047
+ },
13048
+ {
13049
+ "epoch": 8.003748828491096,
13050
+ "grad_norm": 1.5230673551559448,
13051
+ "learning_rate": 8.999531396438613e-06,
13052
+ "loss": 0.0705,
13053
+ "step": 17080
13054
+ },
13055
+ {
13056
+ "epoch": 8.008434864104967,
13057
+ "grad_norm": 1.430708646774292,
13058
+ "learning_rate": 8.99894564198688e-06,
13059
+ "loss": 0.0469,
13060
+ "step": 17090
13061
+ },
13062
+ {
13063
+ "epoch": 8.013120899718837,
13064
+ "grad_norm": 1.6292754411697388,
13065
+ "learning_rate": 8.998359887535147e-06,
13066
+ "loss": 0.0511,
13067
+ "step": 17100
13068
+ },
13069
+ {
13070
+ "epoch": 8.01780693533271,
13071
+ "grad_norm": 1.5162855386734009,
13072
+ "learning_rate": 8.997774133083412e-06,
13073
+ "loss": 0.0562,
13074
+ "step": 17110
13075
+ },
13076
+ {
13077
+ "epoch": 8.02249297094658,
13078
+ "grad_norm": 1.8024640083312988,
13079
+ "learning_rate": 8.997188378631679e-06,
13080
+ "loss": 0.0591,
13081
+ "step": 17120
13082
+ },
13083
+ {
13084
+ "epoch": 8.02717900656045,
13085
+ "grad_norm": 1.7311487197875977,
13086
+ "learning_rate": 8.996602624179944e-06,
13087
+ "loss": 0.0588,
13088
+ "step": 17130
13089
+ },
13090
+ {
13091
+ "epoch": 8.03186504217432,
13092
+ "grad_norm": 0.8079742193222046,
13093
+ "learning_rate": 8.99601686972821e-06,
13094
+ "loss": 0.0575,
13095
+ "step": 17140
13096
+ },
13097
+ {
13098
+ "epoch": 8.036551077788191,
13099
+ "grad_norm": 1.5149396657943726,
13100
+ "learning_rate": 8.995431115276478e-06,
13101
+ "loss": 0.0631,
13102
+ "step": 17150
13103
+ },
13104
+ {
13105
+ "epoch": 8.041237113402062,
13106
+ "grad_norm": 1.4858596324920654,
13107
+ "learning_rate": 8.994845360824743e-06,
13108
+ "loss": 0.0531,
13109
+ "step": 17160
13110
+ },
13111
+ {
13112
+ "epoch": 8.045923149015932,
13113
+ "grad_norm": 0.9805922508239746,
13114
+ "learning_rate": 8.994259606373009e-06,
13115
+ "loss": 0.0507,
13116
+ "step": 17170
13117
+ },
13118
+ {
13119
+ "epoch": 8.050609184629803,
13120
+ "grad_norm": 1.2604528665542603,
13121
+ "learning_rate": 8.993673851921275e-06,
13122
+ "loss": 0.0552,
13123
+ "step": 17180
13124
+ },
13125
+ {
13126
+ "epoch": 8.055295220243673,
13127
+ "grad_norm": 1.1252182722091675,
13128
+ "learning_rate": 8.99308809746954e-06,
13129
+ "loss": 0.05,
13130
+ "step": 17190
13131
+ },
13132
+ {
13133
+ "epoch": 8.059981255857544,
13134
+ "grad_norm": 2.151175022125244,
13135
+ "learning_rate": 8.992502343017808e-06,
13136
+ "loss": 0.0678,
13137
+ "step": 17200
13138
+ },
13139
+ {
13140
+ "epoch": 8.064667291471416,
13141
+ "grad_norm": 1.468262791633606,
13142
+ "learning_rate": 8.991916588566075e-06,
13143
+ "loss": 0.0542,
13144
+ "step": 17210
13145
+ },
13146
+ {
13147
+ "epoch": 8.069353327085286,
13148
+ "grad_norm": 1.679754376411438,
13149
+ "learning_rate": 8.99133083411434e-06,
13150
+ "loss": 0.0574,
13151
+ "step": 17220
13152
+ },
13153
+ {
13154
+ "epoch": 8.074039362699157,
13155
+ "grad_norm": 2.2520573139190674,
13156
+ "learning_rate": 8.990745079662607e-06,
13157
+ "loss": 0.0517,
13158
+ "step": 17230
13159
+ },
13160
+ {
13161
+ "epoch": 8.078725398313027,
13162
+ "grad_norm": 1.5211695432662964,
13163
+ "learning_rate": 8.990159325210872e-06,
13164
+ "loss": 0.0527,
13165
+ "step": 17240
13166
+ },
13167
+ {
13168
+ "epoch": 8.083411433926898,
13169
+ "grad_norm": 1.7770270109176636,
13170
+ "learning_rate": 8.989573570759139e-06,
13171
+ "loss": 0.0682,
13172
+ "step": 17250
13173
+ },
13174
+ {
13175
+ "epoch": 8.083411433926898,
13176
+ "eval_loss": 0.03860222175717354,
13177
+ "eval_pearson_cosine": 0.7845454144482034,
13178
+ "eval_pearson_dot": 0.6431296048602846,
13179
+ "eval_pearson_euclidean": 0.7311376660170836,
13180
+ "eval_pearson_manhattan": 0.7305724358867849,
13181
+ "eval_runtime": 40.1479,
13182
+ "eval_samples_per_second": 37.362,
13183
+ "eval_spearman_cosine": 0.7869140607349678,
13184
+ "eval_spearman_dot": 0.6613187727914379,
13185
+ "eval_spearman_euclidean": 0.744930207684551,
13186
+ "eval_spearman_manhattan": 0.7446744595094797,
13187
+ "eval_steps_per_second": 37.362,
13188
+ "step": 17250
13189
+ },
13190
+ {
13191
+ "epoch": 8.088097469540768,
13192
+ "grad_norm": 1.6006652116775513,
13193
+ "learning_rate": 8.988987816307406e-06,
13194
+ "loss": 0.0604,
13195
+ "step": 17260
13196
+ },
13197
+ {
13198
+ "epoch": 8.092783505154639,
13199
+ "grad_norm": 1.7531373500823975,
13200
+ "learning_rate": 8.988402061855671e-06,
13201
+ "loss": 0.063,
13202
+ "step": 17270
13203
+ },
13204
+ {
13205
+ "epoch": 8.09746954076851,
13206
+ "grad_norm": 2.294930934906006,
13207
+ "learning_rate": 8.987816307403938e-06,
13208
+ "loss": 0.0568,
13209
+ "step": 17280
13210
+ },
13211
+ {
13212
+ "epoch": 8.10215557638238,
13213
+ "grad_norm": 1.9267457723617554,
13214
+ "learning_rate": 8.987230552952203e-06,
13215
+ "loss": 0.0565,
13216
+ "step": 17290
13217
+ },
13218
+ {
13219
+ "epoch": 8.10684161199625,
13220
+ "grad_norm": 2.1076624393463135,
13221
+ "learning_rate": 8.986644798500468e-06,
13222
+ "loss": 0.05,
13223
+ "step": 17300
13224
+ },
13225
+ {
13226
+ "epoch": 8.111527647610123,
13227
+ "grad_norm": 2.3128514289855957,
13228
+ "learning_rate": 8.986059044048735e-06,
13229
+ "loss": 0.0656,
13230
+ "step": 17310
13231
+ },
13232
+ {
13233
+ "epoch": 8.116213683223993,
13234
+ "grad_norm": 1.6104718446731567,
13235
+ "learning_rate": 8.985473289597002e-06,
13236
+ "loss": 0.0554,
13237
+ "step": 17320
13238
+ },
13239
+ {
13240
+ "epoch": 8.120899718837864,
13241
+ "grad_norm": 1.1439037322998047,
13242
+ "learning_rate": 8.984887535145267e-06,
13243
+ "loss": 0.0588,
13244
+ "step": 17330
13245
+ },
13246
+ {
13247
+ "epoch": 8.125585754451734,
13248
+ "grad_norm": 1.342757225036621,
13249
+ "learning_rate": 8.984301780693534e-06,
13250
+ "loss": 0.0539,
13251
+ "step": 17340
13252
+ },
13253
+ {
13254
+ "epoch": 8.130271790065605,
13255
+ "grad_norm": 0.8630651235580444,
13256
+ "learning_rate": 8.9837160262418e-06,
13257
+ "loss": 0.0602,
13258
+ "step": 17350
13259
+ },
13260
+ {
13261
+ "epoch": 8.134957825679475,
13262
+ "grad_norm": 2.1189727783203125,
13263
+ "learning_rate": 8.983130271790066e-06,
13264
+ "loss": 0.0676,
13265
+ "step": 17360
13266
+ },
13267
+ {
13268
+ "epoch": 8.139643861293345,
13269
+ "grad_norm": 1.943943977355957,
13270
+ "learning_rate": 8.982544517338333e-06,
13271
+ "loss": 0.0564,
13272
+ "step": 17370
13273
+ },
13274
+ {
13275
+ "epoch": 8.144329896907216,
13276
+ "grad_norm": 2.4925365447998047,
13277
+ "learning_rate": 8.981958762886599e-06,
13278
+ "loss": 0.058,
13279
+ "step": 17380
13280
+ },
13281
+ {
13282
+ "epoch": 8.149015932521086,
13283
+ "grad_norm": 0.8549938797950745,
13284
+ "learning_rate": 8.981373008434865e-06,
13285
+ "loss": 0.0583,
13286
+ "step": 17390
13287
+ },
13288
+ {
13289
+ "epoch": 8.153701968134957,
13290
+ "grad_norm": 2.259129762649536,
13291
+ "learning_rate": 8.98078725398313e-06,
13292
+ "loss": 0.064,
13293
+ "step": 17400
13294
+ },
13295
+ {
13296
+ "epoch": 8.15838800374883,
13297
+ "grad_norm": 1.5482234954833984,
13298
+ "learning_rate": 8.980201499531398e-06,
13299
+ "loss": 0.0608,
13300
+ "step": 17410
13301
+ },
13302
+ {
13303
+ "epoch": 8.1630740393627,
13304
+ "grad_norm": 1.5130146741867065,
13305
+ "learning_rate": 8.979615745079663e-06,
13306
+ "loss": 0.0487,
13307
+ "step": 17420
13308
+ },
13309
+ {
13310
+ "epoch": 8.16776007497657,
13311
+ "grad_norm": 2.1378371715545654,
13312
+ "learning_rate": 8.97902999062793e-06,
13313
+ "loss": 0.0687,
13314
+ "step": 17430
13315
+ },
13316
+ {
13317
+ "epoch": 8.17244611059044,
13318
+ "grad_norm": 1.4148082733154297,
13319
+ "learning_rate": 8.978444236176197e-06,
13320
+ "loss": 0.0527,
13321
+ "step": 17440
13322
+ },
13323
+ {
13324
+ "epoch": 8.177132146204311,
13325
+ "grad_norm": 1.621864676475525,
13326
+ "learning_rate": 8.977858481724462e-06,
13327
+ "loss": 0.0541,
13328
+ "step": 17450
13329
+ },
13330
+ {
13331
+ "epoch": 8.181818181818182,
13332
+ "grad_norm": 1.5040533542633057,
13333
+ "learning_rate": 8.977272727272727e-06,
13334
+ "loss": 0.0505,
13335
+ "step": 17460
13336
+ },
13337
+ {
13338
+ "epoch": 8.186504217432052,
13339
+ "grad_norm": 1.7481443881988525,
13340
+ "learning_rate": 8.976686972820994e-06,
13341
+ "loss": 0.0658,
13342
+ "step": 17470
13343
+ },
13344
+ {
13345
+ "epoch": 8.191190253045923,
13346
+ "grad_norm": 1.6171940565109253,
13347
+ "learning_rate": 8.976101218369261e-06,
13348
+ "loss": 0.0545,
13349
+ "step": 17480
13350
+ },
13351
+ {
13352
+ "epoch": 8.195876288659793,
13353
+ "grad_norm": 3.027470350265503,
13354
+ "learning_rate": 8.975515463917526e-06,
13355
+ "loss": 0.0497,
13356
+ "step": 17490
13357
+ },
13358
+ {
13359
+ "epoch": 8.200562324273664,
13360
+ "grad_norm": 1.5770045518875122,
13361
+ "learning_rate": 8.974929709465793e-06,
13362
+ "loss": 0.0526,
13363
+ "step": 17500
13364
+ },
13365
+ {
13366
+ "epoch": 8.200562324273664,
13367
+ "eval_loss": 0.03888610377907753,
13368
+ "eval_pearson_cosine": 0.7824405710209184,
13369
+ "eval_pearson_dot": 0.6370045075889941,
13370
+ "eval_pearson_euclidean": 0.7275292814047258,
13371
+ "eval_pearson_manhattan": 0.7271617935348544,
13372
+ "eval_runtime": 40.0736,
13373
+ "eval_samples_per_second": 37.431,
13374
+ "eval_spearman_cosine": 0.7832356097193793,
13375
+ "eval_spearman_dot": 0.6538613957323862,
13376
+ "eval_spearman_euclidean": 0.7430558622725291,
13377
+ "eval_spearman_manhattan": 0.7430619174369794,
13378
+ "eval_steps_per_second": 37.431,
13379
+ "step": 17500
13380
+ },
13381
+ {
13382
+ "epoch": 8.205248359887536,
13383
+ "grad_norm": 2.2228381633758545,
13384
+ "learning_rate": 8.974343955014058e-06,
13385
+ "loss": 0.0457,
13386
+ "step": 17510
13387
+ },
13388
+ {
13389
+ "epoch": 8.209934395501406,
13390
+ "grad_norm": 2.4519641399383545,
13391
+ "learning_rate": 8.973758200562325e-06,
13392
+ "loss": 0.051,
13393
+ "step": 17520
13394
+ },
13395
+ {
13396
+ "epoch": 8.214620431115277,
13397
+ "grad_norm": 1.8084455728530884,
13398
+ "learning_rate": 8.97317244611059e-06,
13399
+ "loss": 0.0575,
13400
+ "step": 17530
13401
+ },
13402
+ {
13403
+ "epoch": 8.219306466729147,
13404
+ "grad_norm": 1.3803386688232422,
13405
+ "learning_rate": 8.972586691658857e-06,
13406
+ "loss": 0.053,
13407
+ "step": 17540
13408
+ },
13409
+ {
13410
+ "epoch": 8.223992502343018,
13411
+ "grad_norm": 1.3450793027877808,
13412
+ "learning_rate": 8.972000937207124e-06,
13413
+ "loss": 0.0556,
13414
+ "step": 17550
13415
+ },
13416
+ {
13417
+ "epoch": 8.228678537956888,
13418
+ "grad_norm": 2.0758721828460693,
13419
+ "learning_rate": 8.97141518275539e-06,
13420
+ "loss": 0.0521,
13421
+ "step": 17560
13422
+ },
13423
+ {
13424
+ "epoch": 8.233364573570759,
13425
+ "grad_norm": 1.4197956323623657,
13426
+ "learning_rate": 8.970829428303656e-06,
13427
+ "loss": 0.0631,
13428
+ "step": 17570
13429
+ },
13430
+ {
13431
+ "epoch": 8.23805060918463,
13432
+ "grad_norm": 1.497050166130066,
13433
+ "learning_rate": 8.970243673851922e-06,
13434
+ "loss": 0.0614,
13435
+ "step": 17580
13436
+ },
13437
+ {
13438
+ "epoch": 8.2427366447985,
13439
+ "grad_norm": 1.0769314765930176,
13440
+ "learning_rate": 8.969657919400189e-06,
13441
+ "loss": 0.0587,
13442
+ "step": 17590
13443
+ },
13444
+ {
13445
+ "epoch": 8.24742268041237,
13446
+ "grad_norm": 1.9401723146438599,
13447
+ "learning_rate": 8.969072164948455e-06,
13448
+ "loss": 0.0584,
13449
+ "step": 17600
13450
+ },
13451
+ {
13452
+ "epoch": 8.252108716026243,
13453
+ "grad_norm": 0.6708168387413025,
13454
+ "learning_rate": 8.96848641049672e-06,
13455
+ "loss": 0.058,
13456
+ "step": 17610
13457
+ },
13458
+ {
13459
+ "epoch": 8.256794751640113,
13460
+ "grad_norm": 1.555535912513733,
13461
+ "learning_rate": 8.967900656044986e-06,
13462
+ "loss": 0.0623,
13463
+ "step": 17620
13464
+ },
13465
+ {
13466
+ "epoch": 8.261480787253983,
13467
+ "grad_norm": 1.182997703552246,
13468
+ "learning_rate": 8.967314901593253e-06,
13469
+ "loss": 0.0521,
13470
+ "step": 17630
13471
+ },
13472
+ {
13473
+ "epoch": 8.266166822867854,
13474
+ "grad_norm": 1.7748857736587524,
13475
+ "learning_rate": 8.966729147141518e-06,
13476
+ "loss": 0.0573,
13477
+ "step": 17640
13478
+ },
13479
+ {
13480
+ "epoch": 8.270852858481724,
13481
+ "grad_norm": 1.558457851409912,
13482
+ "learning_rate": 8.966143392689785e-06,
13483
+ "loss": 0.0586,
13484
+ "step": 17650
13485
+ },
13486
+ {
13487
+ "epoch": 8.275538894095595,
13488
+ "grad_norm": 2.463069438934326,
13489
+ "learning_rate": 8.965557638238052e-06,
13490
+ "loss": 0.0581,
13491
+ "step": 17660
13492
+ },
13493
+ {
13494
+ "epoch": 8.280224929709465,
13495
+ "grad_norm": 1.325049877166748,
13496
+ "learning_rate": 8.964971883786317e-06,
13497
+ "loss": 0.0526,
13498
+ "step": 17670
13499
+ },
13500
+ {
13501
+ "epoch": 8.284910965323336,
13502
+ "grad_norm": 1.9136682748794556,
13503
+ "learning_rate": 8.964386129334584e-06,
13504
+ "loss": 0.0717,
13505
+ "step": 17680
13506
+ },
13507
+ {
13508
+ "epoch": 8.289597000937206,
13509
+ "grad_norm": 0.9149712920188904,
13510
+ "learning_rate": 8.96380037488285e-06,
13511
+ "loss": 0.0551,
13512
+ "step": 17690
13513
+ },
13514
+ {
13515
+ "epoch": 8.294283036551079,
13516
+ "grad_norm": 1.0004934072494507,
13517
+ "learning_rate": 8.963214620431116e-06,
13518
+ "loss": 0.0552,
13519
+ "step": 17700
13520
+ },
13521
+ {
13522
+ "epoch": 8.29896907216495,
13523
+ "grad_norm": 2.1920504570007324,
13524
+ "learning_rate": 8.962628865979383e-06,
13525
+ "loss": 0.0631,
13526
+ "step": 17710
13527
+ },
13528
+ {
13529
+ "epoch": 8.30365510777882,
13530
+ "grad_norm": 1.7555533647537231,
13531
+ "learning_rate": 8.962043111527648e-06,
13532
+ "loss": 0.0643,
13533
+ "step": 17720
13534
+ },
13535
+ {
13536
+ "epoch": 8.30834114339269,
13537
+ "grad_norm": 1.980637550354004,
13538
+ "learning_rate": 8.961457357075915e-06,
13539
+ "loss": 0.0594,
13540
+ "step": 17730
13541
+ },
13542
+ {
13543
+ "epoch": 8.31302717900656,
13544
+ "grad_norm": 1.4178955554962158,
13545
+ "learning_rate": 8.96087160262418e-06,
13546
+ "loss": 0.0584,
13547
+ "step": 17740
13548
+ },
13549
+ {
13550
+ "epoch": 8.317713214620431,
13551
+ "grad_norm": 1.375645399093628,
13552
+ "learning_rate": 8.960285848172446e-06,
13553
+ "loss": 0.0558,
13554
+ "step": 17750
13555
+ },
13556
+ {
13557
+ "epoch": 8.317713214620431,
13558
+ "eval_loss": 0.03849739581346512,
13559
+ "eval_pearson_cosine": 0.7855877317949194,
13560
+ "eval_pearson_dot": 0.651727283647233,
13561
+ "eval_pearson_euclidean": 0.7376296235813697,
13562
+ "eval_pearson_manhattan": 0.7370097948427539,
13563
+ "eval_runtime": 40.7984,
13564
+ "eval_samples_per_second": 36.766,
13565
+ "eval_spearman_cosine": 0.7865254359033228,
13566
+ "eval_spearman_dot": 0.6678553912046729,
13567
+ "eval_spearman_euclidean": 0.7518223898617357,
13568
+ "eval_spearman_manhattan": 0.7512717468993468,
13569
+ "eval_steps_per_second": 36.766,
13570
+ "step": 17750
13571
+ },
13572
+ {
13573
+ "epoch": 8.322399250234302,
13574
+ "grad_norm": 1.6528228521347046,
13575
+ "learning_rate": 8.959700093720714e-06,
13576
+ "loss": 0.0671,
13577
+ "step": 17760
13578
+ },
13579
+ {
13580
+ "epoch": 8.327085285848172,
13581
+ "grad_norm": 1.526089072227478,
13582
+ "learning_rate": 8.95911433926898e-06,
13583
+ "loss": 0.0661,
13584
+ "step": 17770
13585
+ },
13586
+ {
13587
+ "epoch": 8.331771321462043,
13588
+ "grad_norm": 1.9455267190933228,
13589
+ "learning_rate": 8.958528584817245e-06,
13590
+ "loss": 0.059,
13591
+ "step": 17780
13592
+ },
13593
+ {
13594
+ "epoch": 8.336457357075913,
13595
+ "grad_norm": 2.1176974773406982,
13596
+ "learning_rate": 8.957942830365512e-06,
13597
+ "loss": 0.0628,
13598
+ "step": 17790
13599
+ },
13600
+ {
13601
+ "epoch": 8.341143392689784,
13602
+ "grad_norm": 1.9059792757034302,
13603
+ "learning_rate": 8.957357075913777e-06,
13604
+ "loss": 0.0547,
13605
+ "step": 17800
13606
+ },
13607
+ {
13608
+ "epoch": 8.345829428303656,
13609
+ "grad_norm": 1.9086081981658936,
13610
+ "learning_rate": 8.956771321462044e-06,
13611
+ "loss": 0.0598,
13612
+ "step": 17810
13613
+ },
13614
+ {
13615
+ "epoch": 8.350515463917526,
13616
+ "grad_norm": 1.835897445678711,
13617
+ "learning_rate": 8.95618556701031e-06,
13618
+ "loss": 0.0528,
13619
+ "step": 17820
13620
+ },
13621
+ {
13622
+ "epoch": 8.355201499531397,
13623
+ "grad_norm": 1.4925363063812256,
13624
+ "learning_rate": 8.955599812558576e-06,
13625
+ "loss": 0.054,
13626
+ "step": 17830
13627
+ },
13628
+ {
13629
+ "epoch": 8.359887535145267,
13630
+ "grad_norm": 1.8737494945526123,
13631
+ "learning_rate": 8.955014058106843e-06,
13632
+ "loss": 0.0592,
13633
+ "step": 17840
13634
+ },
13635
+ {
13636
+ "epoch": 8.364573570759138,
13637
+ "grad_norm": 2.0734856128692627,
13638
+ "learning_rate": 8.954428303655108e-06,
13639
+ "loss": 0.0577,
13640
+ "step": 17850
13641
+ },
13642
+ {
13643
+ "epoch": 8.369259606373008,
13644
+ "grad_norm": 1.1876471042633057,
13645
+ "learning_rate": 8.953842549203375e-06,
13646
+ "loss": 0.0529,
13647
+ "step": 17860
13648
+ },
13649
+ {
13650
+ "epoch": 8.373945641986879,
13651
+ "grad_norm": 0.8391751646995544,
13652
+ "learning_rate": 8.953256794751642e-06,
13653
+ "loss": 0.0513,
13654
+ "step": 17870
13655
+ },
13656
+ {
13657
+ "epoch": 8.37863167760075,
13658
+ "grad_norm": 2.0527615547180176,
13659
+ "learning_rate": 8.952671040299907e-06,
13660
+ "loss": 0.0802,
13661
+ "step": 17880
13662
+ },
13663
+ {
13664
+ "epoch": 8.38331771321462,
13665
+ "grad_norm": 1.1670820713043213,
13666
+ "learning_rate": 8.952085285848174e-06,
13667
+ "loss": 0.0567,
13668
+ "step": 17890
13669
+ },
13670
+ {
13671
+ "epoch": 8.388003748828492,
13672
+ "grad_norm": 1.0440400838851929,
13673
+ "learning_rate": 8.95149953139644e-06,
13674
+ "loss": 0.0589,
13675
+ "step": 17900
13676
+ },
13677
+ {
13678
+ "epoch": 8.392689784442362,
13679
+ "grad_norm": 1.3903789520263672,
13680
+ "learning_rate": 8.950913776944704e-06,
13681
+ "loss": 0.0555,
13682
+ "step": 17910
13683
+ },
13684
+ {
13685
+ "epoch": 8.397375820056233,
13686
+ "grad_norm": 2.042224407196045,
13687
+ "learning_rate": 8.950328022492971e-06,
13688
+ "loss": 0.0705,
13689
+ "step": 17920
13690
+ },
13691
+ {
13692
+ "epoch": 8.402061855670103,
13693
+ "grad_norm": 1.8270450830459595,
13694
+ "learning_rate": 8.949742268041238e-06,
13695
+ "loss": 0.0568,
13696
+ "step": 17930
13697
+ },
13698
+ {
13699
+ "epoch": 8.406747891283974,
13700
+ "grad_norm": 1.7498126029968262,
13701
+ "learning_rate": 8.949156513589504e-06,
13702
+ "loss": 0.0584,
13703
+ "step": 17940
13704
+ },
13705
+ {
13706
+ "epoch": 8.411433926897844,
13707
+ "grad_norm": 1.2420893907546997,
13708
+ "learning_rate": 8.94857075913777e-06,
13709
+ "loss": 0.06,
13710
+ "step": 17950
13711
+ },
13712
+ {
13713
+ "epoch": 8.416119962511715,
13714
+ "grad_norm": 1.9896409511566162,
13715
+ "learning_rate": 8.947985004686036e-06,
13716
+ "loss": 0.0505,
13717
+ "step": 17960
13718
+ },
13719
+ {
13720
+ "epoch": 8.420805998125585,
13721
+ "grad_norm": 1.1669880151748657,
13722
+ "learning_rate": 8.947399250234303e-06,
13723
+ "loss": 0.0595,
13724
+ "step": 17970
13725
+ },
13726
+ {
13727
+ "epoch": 8.425492033739456,
13728
+ "grad_norm": 1.2261865139007568,
13729
+ "learning_rate": 8.94681349578257e-06,
13730
+ "loss": 0.0604,
13731
+ "step": 17980
13732
+ },
13733
+ {
13734
+ "epoch": 8.430178069353326,
13735
+ "grad_norm": 1.5421935319900513,
13736
+ "learning_rate": 8.946227741330835e-06,
13737
+ "loss": 0.0621,
13738
+ "step": 17990
13739
+ },
13740
+ {
13741
+ "epoch": 8.434864104967197,
13742
+ "grad_norm": 1.9026983976364136,
13743
+ "learning_rate": 8.945641986879102e-06,
13744
+ "loss": 0.0633,
13745
+ "step": 18000
13746
+ },
13747
+ {
13748
+ "epoch": 8.434864104967197,
13749
+ "eval_loss": 0.039177875965833664,
13750
+ "eval_pearson_cosine": 0.7822495113035757,
13751
+ "eval_pearson_dot": 0.6511666258149553,
13752
+ "eval_pearson_euclidean": 0.7395462188066446,
13753
+ "eval_pearson_manhattan": 0.7387984914454222,
13754
+ "eval_runtime": 42.2768,
13755
+ "eval_samples_per_second": 35.48,
13756
+ "eval_spearman_cosine": 0.7845228935533591,
13757
+ "eval_spearman_dot": 0.6664111108433938,
13758
+ "eval_spearman_euclidean": 0.7541690232038317,
13759
+ "eval_spearman_manhattan": 0.7537307168421792,
13760
+ "eval_steps_per_second": 35.48,
13761
+ "step": 18000
13762
  }
13763
  ],
13764
  "logging_steps": 10,