CocoRoF commited on
Commit
899bf83
·
verified ·
1 Parent(s): 6e57482

Training in progress, step 14000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d153a85db882a8d2ec877dfba2d9b581b46d201ce2501b713d912d9b724be90d
3
  size 613004648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7faa7408a314eb81f420ce1f2568d726ce6012f44614f1f2a60a7364bd5ef3f
3
  size 613004648
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e64c3f6b99c05bc4af3b1afc1105f63d286ccdb944360ccc4c6c03aaa0867281
3
  size 1226096954
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed1cf1b200aa0d0cd19c9252edcac54428c1ac0420162221afb866eff0c7bd48
3
  size 1226096954
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:feb76d7d56395cb464f4c6b097cc298c265886f58499ea053baed20b9e64abbb
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96df60f7a61e3b5eb854abf0dcce2641f189b2c45835fcf9c1fef3b37282da2b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61bccafd9792e811bc7ce6d26e59618969221a81768981a24e66ff1e4f6c92d4
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdb91814ca594bcc53b1d6ad5131ca9e1395fe1b33192ef261102fa12fca871c
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.091846298031865,
5
  "eval_steps": 250,
6
- "global_step": 13000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -9939,6 +9939,770 @@
9939
  "eval_spearman_manhattan": 0.752705655614685,
9940
  "eval_steps_per_second": 37.444,
9941
  "step": 13000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9942
  }
9943
  ],
9944
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.560449859418932,
5
  "eval_steps": 250,
6
+ "global_step": 14000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
9939
  "eval_spearman_manhattan": 0.752705655614685,
9940
  "eval_steps_per_second": 37.444,
9941
  "step": 13000
9942
+ },
9943
+ {
9944
+ "epoch": 6.096532333645736,
9945
+ "grad_norm": 2.591386556625366,
9946
+ "learning_rate": 9.237933458294283e-06,
9947
+ "loss": 0.074,
9948
+ "step": 13010
9949
+ },
9950
+ {
9951
+ "epoch": 6.101218369259606,
9952
+ "grad_norm": 3.0938313007354736,
9953
+ "learning_rate": 9.23734770384255e-06,
9954
+ "loss": 0.0889,
9955
+ "step": 13020
9956
+ },
9957
+ {
9958
+ "epoch": 6.105904404873477,
9959
+ "grad_norm": 1.073983907699585,
9960
+ "learning_rate": 9.236761949390815e-06,
9961
+ "loss": 0.0702,
9962
+ "step": 13030
9963
+ },
9964
+ {
9965
+ "epoch": 6.110590440487348,
9966
+ "grad_norm": 1.7386090755462646,
9967
+ "learning_rate": 9.236176194939082e-06,
9968
+ "loss": 0.0965,
9969
+ "step": 13040
9970
+ },
9971
+ {
9972
+ "epoch": 6.115276476101219,
9973
+ "grad_norm": 2.1852550506591797,
9974
+ "learning_rate": 9.235590440487349e-06,
9975
+ "loss": 0.0818,
9976
+ "step": 13050
9977
+ },
9978
+ {
9979
+ "epoch": 6.119962511715089,
9980
+ "grad_norm": 2.9571776390075684,
9981
+ "learning_rate": 9.235004686035614e-06,
9982
+ "loss": 0.0909,
9983
+ "step": 13060
9984
+ },
9985
+ {
9986
+ "epoch": 6.12464854732896,
9987
+ "grad_norm": 1.568499207496643,
9988
+ "learning_rate": 9.234418931583881e-06,
9989
+ "loss": 0.0935,
9990
+ "step": 13070
9991
+ },
9992
+ {
9993
+ "epoch": 6.12933458294283,
9994
+ "grad_norm": 2.0286030769348145,
9995
+ "learning_rate": 9.233833177132146e-06,
9996
+ "loss": 0.0923,
9997
+ "step": 13080
9998
+ },
9999
+ {
10000
+ "epoch": 6.134020618556701,
10001
+ "grad_norm": 1.3567001819610596,
10002
+ "learning_rate": 9.233247422680413e-06,
10003
+ "loss": 0.0874,
10004
+ "step": 13090
10005
+ },
10006
+ {
10007
+ "epoch": 6.138706654170572,
10008
+ "grad_norm": 3.1520678997039795,
10009
+ "learning_rate": 9.23266166822868e-06,
10010
+ "loss": 0.0907,
10011
+ "step": 13100
10012
+ },
10013
+ {
10014
+ "epoch": 6.143392689784442,
10015
+ "grad_norm": 1.935091495513916,
10016
+ "learning_rate": 9.232075913776945e-06,
10017
+ "loss": 0.0927,
10018
+ "step": 13110
10019
+ },
10020
+ {
10021
+ "epoch": 6.148078725398313,
10022
+ "grad_norm": 2.9123198986053467,
10023
+ "learning_rate": 9.23149015932521e-06,
10024
+ "loss": 0.0986,
10025
+ "step": 13120
10026
+ },
10027
+ {
10028
+ "epoch": 6.152764761012183,
10029
+ "grad_norm": 2.592006206512451,
10030
+ "learning_rate": 9.230904404873478e-06,
10031
+ "loss": 0.0864,
10032
+ "step": 13130
10033
+ },
10034
+ {
10035
+ "epoch": 6.157450796626055,
10036
+ "grad_norm": 2.0214340686798096,
10037
+ "learning_rate": 9.230318650421743e-06,
10038
+ "loss": 0.1066,
10039
+ "step": 13140
10040
+ },
10041
+ {
10042
+ "epoch": 6.162136832239925,
10043
+ "grad_norm": 1.743891716003418,
10044
+ "learning_rate": 9.22973289597001e-06,
10045
+ "loss": 0.0871,
10046
+ "step": 13150
10047
+ },
10048
+ {
10049
+ "epoch": 6.166822867853796,
10050
+ "grad_norm": 2.368562698364258,
10051
+ "learning_rate": 9.229147141518277e-06,
10052
+ "loss": 0.0754,
10053
+ "step": 13160
10054
+ },
10055
+ {
10056
+ "epoch": 6.171508903467666,
10057
+ "grad_norm": 2.369436264038086,
10058
+ "learning_rate": 9.228561387066542e-06,
10059
+ "loss": 0.0788,
10060
+ "step": 13170
10061
+ },
10062
+ {
10063
+ "epoch": 6.176194939081537,
10064
+ "grad_norm": 1.3312957286834717,
10065
+ "learning_rate": 9.227975632614809e-06,
10066
+ "loss": 0.0816,
10067
+ "step": 13180
10068
+ },
10069
+ {
10070
+ "epoch": 6.180880974695408,
10071
+ "grad_norm": 1.15755295753479,
10072
+ "learning_rate": 9.227389878163074e-06,
10073
+ "loss": 0.0867,
10074
+ "step": 13190
10075
+ },
10076
+ {
10077
+ "epoch": 6.185567010309279,
10078
+ "grad_norm": 1.4255220890045166,
10079
+ "learning_rate": 9.226804123711341e-06,
10080
+ "loss": 0.085,
10081
+ "step": 13200
10082
+ },
10083
+ {
10084
+ "epoch": 6.190253045923149,
10085
+ "grad_norm": 2.264315605163574,
10086
+ "learning_rate": 9.226218369259608e-06,
10087
+ "loss": 0.0939,
10088
+ "step": 13210
10089
+ },
10090
+ {
10091
+ "epoch": 6.1949390815370196,
10092
+ "grad_norm": 1.314502239227295,
10093
+ "learning_rate": 9.225632614807873e-06,
10094
+ "loss": 0.0847,
10095
+ "step": 13220
10096
+ },
10097
+ {
10098
+ "epoch": 6.19962511715089,
10099
+ "grad_norm": 1.463553786277771,
10100
+ "learning_rate": 9.22504686035614e-06,
10101
+ "loss": 0.0733,
10102
+ "step": 13230
10103
+ },
10104
+ {
10105
+ "epoch": 6.204311152764761,
10106
+ "grad_norm": 2.397528648376465,
10107
+ "learning_rate": 9.224461105904405e-06,
10108
+ "loss": 0.1004,
10109
+ "step": 13240
10110
+ },
10111
+ {
10112
+ "epoch": 6.208997188378632,
10113
+ "grad_norm": 1.8068519830703735,
10114
+ "learning_rate": 9.223875351452672e-06,
10115
+ "loss": 0.081,
10116
+ "step": 13250
10117
+ },
10118
+ {
10119
+ "epoch": 6.208997188378632,
10120
+ "eval_loss": 0.04053681343793869,
10121
+ "eval_pearson_cosine": 0.7813660928551514,
10122
+ "eval_pearson_dot": 0.6355779688477607,
10123
+ "eval_pearson_euclidean": 0.7355234843709724,
10124
+ "eval_pearson_manhattan": 0.7352503996205417,
10125
+ "eval_runtime": 39.4829,
10126
+ "eval_samples_per_second": 37.991,
10127
+ "eval_spearman_cosine": 0.7853597304476542,
10128
+ "eval_spearman_dot": 0.6478331015151749,
10129
+ "eval_spearman_euclidean": 0.7514050247961205,
10130
+ "eval_spearman_manhattan": 0.751296576320389,
10131
+ "eval_steps_per_second": 37.991,
10132
+ "step": 13250
10133
+ },
10134
+ {
10135
+ "epoch": 6.213683223992502,
10136
+ "grad_norm": 2.0780019760131836,
10137
+ "learning_rate": 9.223289597000939e-06,
10138
+ "loss": 0.1108,
10139
+ "step": 13260
10140
+ },
10141
+ {
10142
+ "epoch": 6.218369259606373,
10143
+ "grad_norm": 1.3198286294937134,
10144
+ "learning_rate": 9.222703842549204e-06,
10145
+ "loss": 0.0809,
10146
+ "step": 13270
10147
+ },
10148
+ {
10149
+ "epoch": 6.223055295220243,
10150
+ "grad_norm": 1.4620928764343262,
10151
+ "learning_rate": 9.22211808809747e-06,
10152
+ "loss": 0.0803,
10153
+ "step": 13280
10154
+ },
10155
+ {
10156
+ "epoch": 6.227741330834115,
10157
+ "grad_norm": 2.1493215560913086,
10158
+ "learning_rate": 9.221532333645736e-06,
10159
+ "loss": 0.0925,
10160
+ "step": 13290
10161
+ },
10162
+ {
10163
+ "epoch": 6.232427366447985,
10164
+ "grad_norm": 2.0909435749053955,
10165
+ "learning_rate": 9.220946579194002e-06,
10166
+ "loss": 0.1007,
10167
+ "step": 13300
10168
+ },
10169
+ {
10170
+ "epoch": 6.237113402061856,
10171
+ "grad_norm": 1.8717360496520996,
10172
+ "learning_rate": 9.220360824742269e-06,
10173
+ "loss": 0.0907,
10174
+ "step": 13310
10175
+ },
10176
+ {
10177
+ "epoch": 6.241799437675726,
10178
+ "grad_norm": 2.817506790161133,
10179
+ "learning_rate": 9.219775070290535e-06,
10180
+ "loss": 0.1027,
10181
+ "step": 13320
10182
+ },
10183
+ {
10184
+ "epoch": 6.246485473289597,
10185
+ "grad_norm": 2.344900369644165,
10186
+ "learning_rate": 9.2191893158388e-06,
10187
+ "loss": 0.0993,
10188
+ "step": 13330
10189
+ },
10190
+ {
10191
+ "epoch": 6.251171508903468,
10192
+ "grad_norm": 1.5306053161621094,
10193
+ "learning_rate": 9.218603561387068e-06,
10194
+ "loss": 0.0717,
10195
+ "step": 13340
10196
+ },
10197
+ {
10198
+ "epoch": 6.2558575445173386,
10199
+ "grad_norm": 1.2907652854919434,
10200
+ "learning_rate": 9.218017806935333e-06,
10201
+ "loss": 0.0813,
10202
+ "step": 13350
10203
+ },
10204
+ {
10205
+ "epoch": 6.260543580131209,
10206
+ "grad_norm": 2.5026895999908447,
10207
+ "learning_rate": 9.2174320524836e-06,
10208
+ "loss": 0.107,
10209
+ "step": 13360
10210
+ },
10211
+ {
10212
+ "epoch": 6.2652296157450795,
10213
+ "grad_norm": 2.076305389404297,
10214
+ "learning_rate": 9.216846298031867e-06,
10215
+ "loss": 0.0725,
10216
+ "step": 13370
10217
+ },
10218
+ {
10219
+ "epoch": 6.26991565135895,
10220
+ "grad_norm": 2.985992908477783,
10221
+ "learning_rate": 9.216260543580132e-06,
10222
+ "loss": 0.1041,
10223
+ "step": 13380
10224
+ },
10225
+ {
10226
+ "epoch": 6.274601686972821,
10227
+ "grad_norm": 1.430763602256775,
10228
+ "learning_rate": 9.215674789128399e-06,
10229
+ "loss": 0.0941,
10230
+ "step": 13390
10231
+ },
10232
+ {
10233
+ "epoch": 6.279287722586692,
10234
+ "grad_norm": 2.2910046577453613,
10235
+ "learning_rate": 9.215089034676664e-06,
10236
+ "loss": 0.07,
10237
+ "step": 13400
10238
+ },
10239
+ {
10240
+ "epoch": 6.283973758200562,
10241
+ "grad_norm": 2.093707799911499,
10242
+ "learning_rate": 9.214503280224931e-06,
10243
+ "loss": 0.085,
10244
+ "step": 13410
10245
+ },
10246
+ {
10247
+ "epoch": 6.288659793814433,
10248
+ "grad_norm": 1.2367407083511353,
10249
+ "learning_rate": 9.213917525773196e-06,
10250
+ "loss": 0.0895,
10251
+ "step": 13420
10252
+ },
10253
+ {
10254
+ "epoch": 6.293345829428303,
10255
+ "grad_norm": 1.8991947174072266,
10256
+ "learning_rate": 9.213331771321463e-06,
10257
+ "loss": 0.0992,
10258
+ "step": 13430
10259
+ },
10260
+ {
10261
+ "epoch": 6.298031865042175,
10262
+ "grad_norm": 2.4072980880737305,
10263
+ "learning_rate": 9.212746016869728e-06,
10264
+ "loss": 0.1033,
10265
+ "step": 13440
10266
+ },
10267
+ {
10268
+ "epoch": 6.302717900656045,
10269
+ "grad_norm": 1.8667056560516357,
10270
+ "learning_rate": 9.212160262417995e-06,
10271
+ "loss": 0.0825,
10272
+ "step": 13450
10273
+ },
10274
+ {
10275
+ "epoch": 6.307403936269916,
10276
+ "grad_norm": 2.0823960304260254,
10277
+ "learning_rate": 9.21157450796626e-06,
10278
+ "loss": 0.095,
10279
+ "step": 13460
10280
+ },
10281
+ {
10282
+ "epoch": 6.312089971883786,
10283
+ "grad_norm": 1.7069936990737915,
10284
+ "learning_rate": 9.210988753514527e-06,
10285
+ "loss": 0.088,
10286
+ "step": 13470
10287
+ },
10288
+ {
10289
+ "epoch": 6.316776007497657,
10290
+ "grad_norm": 2.103296995162964,
10291
+ "learning_rate": 9.210402999062794e-06,
10292
+ "loss": 0.0867,
10293
+ "step": 13480
10294
+ },
10295
+ {
10296
+ "epoch": 6.321462043111528,
10297
+ "grad_norm": 1.0462993383407593,
10298
+ "learning_rate": 9.20981724461106e-06,
10299
+ "loss": 0.0813,
10300
+ "step": 13490
10301
+ },
10302
+ {
10303
+ "epoch": 6.3261480787253985,
10304
+ "grad_norm": 1.5300264358520508,
10305
+ "learning_rate": 9.209231490159326e-06,
10306
+ "loss": 0.0807,
10307
+ "step": 13500
10308
+ },
10309
+ {
10310
+ "epoch": 6.3261480787253985,
10311
+ "eval_loss": 0.040135517716407776,
10312
+ "eval_pearson_cosine": 0.7837547061964756,
10313
+ "eval_pearson_dot": 0.6449662712980881,
10314
+ "eval_pearson_euclidean": 0.7344489051322731,
10315
+ "eval_pearson_manhattan": 0.733928021220585,
10316
+ "eval_runtime": 40.9066,
10317
+ "eval_samples_per_second": 36.669,
10318
+ "eval_spearman_cosine": 0.787895557874529,
10319
+ "eval_spearman_dot": 0.6615144668773622,
10320
+ "eval_spearman_euclidean": 0.7513080603143243,
10321
+ "eval_spearman_manhattan": 0.7509655476151297,
10322
+ "eval_steps_per_second": 36.669,
10323
+ "step": 13500
10324
+ },
10325
+ {
10326
+ "epoch": 6.330834114339269,
10327
+ "grad_norm": 2.420490264892578,
10328
+ "learning_rate": 9.208645735707592e-06,
10329
+ "loss": 0.086,
10330
+ "step": 13510
10331
+ },
10332
+ {
10333
+ "epoch": 6.3355201499531395,
10334
+ "grad_norm": 2.568124771118164,
10335
+ "learning_rate": 9.208059981255859e-06,
10336
+ "loss": 0.0972,
10337
+ "step": 13520
10338
+ },
10339
+ {
10340
+ "epoch": 6.34020618556701,
10341
+ "grad_norm": 2.0971317291259766,
10342
+ "learning_rate": 9.207474226804124e-06,
10343
+ "loss": 0.0851,
10344
+ "step": 13530
10345
+ },
10346
+ {
10347
+ "epoch": 6.344892221180881,
10348
+ "grad_norm": 2.3092575073242188,
10349
+ "learning_rate": 9.20688847235239e-06,
10350
+ "loss": 0.1103,
10351
+ "step": 13540
10352
+ },
10353
+ {
10354
+ "epoch": 6.349578256794752,
10355
+ "grad_norm": 1.0552003383636475,
10356
+ "learning_rate": 9.206302717900658e-06,
10357
+ "loss": 0.0927,
10358
+ "step": 13550
10359
+ },
10360
+ {
10361
+ "epoch": 6.354264292408622,
10362
+ "grad_norm": 2.5293657779693604,
10363
+ "learning_rate": 9.205716963448923e-06,
10364
+ "loss": 0.0948,
10365
+ "step": 13560
10366
+ },
10367
+ {
10368
+ "epoch": 6.358950328022493,
10369
+ "grad_norm": 2.3516149520874023,
10370
+ "learning_rate": 9.20513120899719e-06,
10371
+ "loss": 0.0997,
10372
+ "step": 13570
10373
+ },
10374
+ {
10375
+ "epoch": 6.363636363636363,
10376
+ "grad_norm": 2.1005942821502686,
10377
+ "learning_rate": 9.204545454545455e-06,
10378
+ "loss": 0.0839,
10379
+ "step": 13580
10380
+ },
10381
+ {
10382
+ "epoch": 6.368322399250235,
10383
+ "grad_norm": 1.8200384378433228,
10384
+ "learning_rate": 9.203959700093722e-06,
10385
+ "loss": 0.0876,
10386
+ "step": 13590
10387
+ },
10388
+ {
10389
+ "epoch": 6.373008434864105,
10390
+ "grad_norm": 2.4939422607421875,
10391
+ "learning_rate": 9.203373945641987e-06,
10392
+ "loss": 0.0912,
10393
+ "step": 13600
10394
+ },
10395
+ {
10396
+ "epoch": 6.377694470477976,
10397
+ "grad_norm": 2.695819616317749,
10398
+ "learning_rate": 9.202788191190254e-06,
10399
+ "loss": 0.0797,
10400
+ "step": 13610
10401
+ },
10402
+ {
10403
+ "epoch": 6.382380506091846,
10404
+ "grad_norm": 2.2140467166900635,
10405
+ "learning_rate": 9.20220243673852e-06,
10406
+ "loss": 0.0908,
10407
+ "step": 13620
10408
+ },
10409
+ {
10410
+ "epoch": 6.387066541705717,
10411
+ "grad_norm": 1.4225996732711792,
10412
+ "learning_rate": 9.201616682286786e-06,
10413
+ "loss": 0.0888,
10414
+ "step": 13630
10415
+ },
10416
+ {
10417
+ "epoch": 6.391752577319588,
10418
+ "grad_norm": 1.8974356651306152,
10419
+ "learning_rate": 9.201030927835051e-06,
10420
+ "loss": 0.0721,
10421
+ "step": 13640
10422
+ },
10423
+ {
10424
+ "epoch": 6.3964386129334585,
10425
+ "grad_norm": 2.853360176086426,
10426
+ "learning_rate": 9.200445173383318e-06,
10427
+ "loss": 0.0998,
10428
+ "step": 13650
10429
+ },
10430
+ {
10431
+ "epoch": 6.401124648547329,
10432
+ "grad_norm": 1.7699189186096191,
10433
+ "learning_rate": 9.199859418931585e-06,
10434
+ "loss": 0.075,
10435
+ "step": 13660
10436
+ },
10437
+ {
10438
+ "epoch": 6.4058106841611995,
10439
+ "grad_norm": 1.9779934883117676,
10440
+ "learning_rate": 9.19927366447985e-06,
10441
+ "loss": 0.0854,
10442
+ "step": 13670
10443
+ },
10444
+ {
10445
+ "epoch": 6.41049671977507,
10446
+ "grad_norm": 1.0165759325027466,
10447
+ "learning_rate": 9.198687910028117e-06,
10448
+ "loss": 0.0694,
10449
+ "step": 13680
10450
+ },
10451
+ {
10452
+ "epoch": 6.415182755388941,
10453
+ "grad_norm": 2.2649970054626465,
10454
+ "learning_rate": 9.198102155576383e-06,
10455
+ "loss": 0.1138,
10456
+ "step": 13690
10457
+ },
10458
+ {
10459
+ "epoch": 6.419868791002812,
10460
+ "grad_norm": 1.683463454246521,
10461
+ "learning_rate": 9.19751640112465e-06,
10462
+ "loss": 0.0838,
10463
+ "step": 13700
10464
+ },
10465
+ {
10466
+ "epoch": 6.424554826616682,
10467
+ "grad_norm": 2.579838275909424,
10468
+ "learning_rate": 9.196930646672916e-06,
10469
+ "loss": 0.1073,
10470
+ "step": 13710
10471
+ },
10472
+ {
10473
+ "epoch": 6.429240862230553,
10474
+ "grad_norm": 2.041942834854126,
10475
+ "learning_rate": 9.196344892221182e-06,
10476
+ "loss": 0.0804,
10477
+ "step": 13720
10478
+ },
10479
+ {
10480
+ "epoch": 6.433926897844423,
10481
+ "grad_norm": 1.7387852668762207,
10482
+ "learning_rate": 9.195759137769449e-06,
10483
+ "loss": 0.0994,
10484
+ "step": 13730
10485
+ },
10486
+ {
10487
+ "epoch": 6.438612933458295,
10488
+ "grad_norm": 1.3456875085830688,
10489
+ "learning_rate": 9.195173383317714e-06,
10490
+ "loss": 0.0957,
10491
+ "step": 13740
10492
+ },
10493
+ {
10494
+ "epoch": 6.443298969072165,
10495
+ "grad_norm": 1.8419511318206787,
10496
+ "learning_rate": 9.194587628865979e-06,
10497
+ "loss": 0.0863,
10498
+ "step": 13750
10499
+ },
10500
+ {
10501
+ "epoch": 6.443298969072165,
10502
+ "eval_loss": 0.04049157723784447,
10503
+ "eval_pearson_cosine": 0.7813714988340337,
10504
+ "eval_pearson_dot": 0.6324385596537514,
10505
+ "eval_pearson_euclidean": 0.7407851614356957,
10506
+ "eval_pearson_manhattan": 0.7403546898217854,
10507
+ "eval_runtime": 39.3844,
10508
+ "eval_samples_per_second": 38.086,
10509
+ "eval_spearman_cosine": 0.7841122028697166,
10510
+ "eval_spearman_dot": 0.6478511175419069,
10511
+ "eval_spearman_euclidean": 0.7589082543335365,
10512
+ "eval_spearman_manhattan": 0.7586951153918645,
10513
+ "eval_steps_per_second": 38.086,
10514
+ "step": 13750
10515
+ },
10516
+ {
10517
+ "epoch": 6.447985004686036,
10518
+ "grad_norm": 2.0003013610839844,
10519
+ "learning_rate": 9.194001874414246e-06,
10520
+ "loss": 0.0752,
10521
+ "step": 13760
10522
+ },
10523
+ {
10524
+ "epoch": 6.452671040299906,
10525
+ "grad_norm": 1.2026764154434204,
10526
+ "learning_rate": 9.193416119962513e-06,
10527
+ "loss": 0.0879,
10528
+ "step": 13770
10529
+ },
10530
+ {
10531
+ "epoch": 6.457357075913777,
10532
+ "grad_norm": 1.3370740413665771,
10533
+ "learning_rate": 9.192830365510778e-06,
10534
+ "loss": 0.1001,
10535
+ "step": 13780
10536
+ },
10537
+ {
10538
+ "epoch": 6.462043111527648,
10539
+ "grad_norm": 2.915313482284546,
10540
+ "learning_rate": 9.192244611059045e-06,
10541
+ "loss": 0.0893,
10542
+ "step": 13790
10543
+ },
10544
+ {
10545
+ "epoch": 6.4667291471415185,
10546
+ "grad_norm": 2.472095012664795,
10547
+ "learning_rate": 9.19165885660731e-06,
10548
+ "loss": 0.0833,
10549
+ "step": 13800
10550
+ },
10551
+ {
10552
+ "epoch": 6.471415182755389,
10553
+ "grad_norm": 2.4989047050476074,
10554
+ "learning_rate": 9.191073102155577e-06,
10555
+ "loss": 0.0808,
10556
+ "step": 13810
10557
+ },
10558
+ {
10559
+ "epoch": 6.4761012183692594,
10560
+ "grad_norm": 1.9011194705963135,
10561
+ "learning_rate": 9.190487347703844e-06,
10562
+ "loss": 0.0845,
10563
+ "step": 13820
10564
+ },
10565
+ {
10566
+ "epoch": 6.48078725398313,
10567
+ "grad_norm": 1.4347310066223145,
10568
+ "learning_rate": 9.18990159325211e-06,
10569
+ "loss": 0.1029,
10570
+ "step": 13830
10571
+ },
10572
+ {
10573
+ "epoch": 6.485473289597001,
10574
+ "grad_norm": 2.54189395904541,
10575
+ "learning_rate": 9.189315838800376e-06,
10576
+ "loss": 0.0779,
10577
+ "step": 13840
10578
+ },
10579
+ {
10580
+ "epoch": 6.490159325210872,
10581
+ "grad_norm": 1.2057007551193237,
10582
+ "learning_rate": 9.188730084348641e-06,
10583
+ "loss": 0.0894,
10584
+ "step": 13850
10585
+ },
10586
+ {
10587
+ "epoch": 6.494845360824742,
10588
+ "grad_norm": 1.7278800010681152,
10589
+ "learning_rate": 9.188144329896908e-06,
10590
+ "loss": 0.0779,
10591
+ "step": 13860
10592
+ },
10593
+ {
10594
+ "epoch": 6.499531396438613,
10595
+ "grad_norm": 1.5743318796157837,
10596
+ "learning_rate": 9.187558575445175e-06,
10597
+ "loss": 0.0927,
10598
+ "step": 13870
10599
+ },
10600
+ {
10601
+ "epoch": 6.504217432052483,
10602
+ "grad_norm": 1.5857350826263428,
10603
+ "learning_rate": 9.18697282099344e-06,
10604
+ "loss": 0.0947,
10605
+ "step": 13880
10606
+ },
10607
+ {
10608
+ "epoch": 6.508903467666355,
10609
+ "grad_norm": 1.6598036289215088,
10610
+ "learning_rate": 9.186387066541707e-06,
10611
+ "loss": 0.0686,
10612
+ "step": 13890
10613
+ },
10614
+ {
10615
+ "epoch": 6.513589503280225,
10616
+ "grad_norm": 1.861210823059082,
10617
+ "learning_rate": 9.185801312089973e-06,
10618
+ "loss": 0.0785,
10619
+ "step": 13900
10620
+ },
10621
+ {
10622
+ "epoch": 6.518275538894096,
10623
+ "grad_norm": 2.236298084259033,
10624
+ "learning_rate": 9.185215557638238e-06,
10625
+ "loss": 0.0786,
10626
+ "step": 13910
10627
+ },
10628
+ {
10629
+ "epoch": 6.522961574507966,
10630
+ "grad_norm": 2.7919623851776123,
10631
+ "learning_rate": 9.184629803186505e-06,
10632
+ "loss": 0.091,
10633
+ "step": 13920
10634
+ },
10635
+ {
10636
+ "epoch": 6.527647610121837,
10637
+ "grad_norm": 2.071950674057007,
10638
+ "learning_rate": 9.184044048734772e-06,
10639
+ "loss": 0.0815,
10640
+ "step": 13930
10641
+ },
10642
+ {
10643
+ "epoch": 6.532333645735708,
10644
+ "grad_norm": 2.533116102218628,
10645
+ "learning_rate": 9.183458294283037e-06,
10646
+ "loss": 0.0787,
10647
+ "step": 13940
10648
+ },
10649
+ {
10650
+ "epoch": 6.5370196813495784,
10651
+ "grad_norm": 2.1527774333953857,
10652
+ "learning_rate": 9.182872539831304e-06,
10653
+ "loss": 0.0749,
10654
+ "step": 13950
10655
+ },
10656
+ {
10657
+ "epoch": 6.541705716963449,
10658
+ "grad_norm": 1.961341142654419,
10659
+ "learning_rate": 9.182286785379569e-06,
10660
+ "loss": 0.0953,
10661
+ "step": 13960
10662
+ },
10663
+ {
10664
+ "epoch": 6.546391752577319,
10665
+ "grad_norm": 2.28048038482666,
10666
+ "learning_rate": 9.181701030927836e-06,
10667
+ "loss": 0.0915,
10668
+ "step": 13970
10669
+ },
10670
+ {
10671
+ "epoch": 6.55107778819119,
10672
+ "grad_norm": 2.0521295070648193,
10673
+ "learning_rate": 9.181115276476103e-06,
10674
+ "loss": 0.0895,
10675
+ "step": 13980
10676
+ },
10677
+ {
10678
+ "epoch": 6.555763823805061,
10679
+ "grad_norm": 1.6671199798583984,
10680
+ "learning_rate": 9.180529522024368e-06,
10681
+ "loss": 0.0728,
10682
+ "step": 13990
10683
+ },
10684
+ {
10685
+ "epoch": 6.560449859418932,
10686
+ "grad_norm": 1.6179335117340088,
10687
+ "learning_rate": 9.179943767572635e-06,
10688
+ "loss": 0.0948,
10689
+ "step": 14000
10690
+ },
10691
+ {
10692
+ "epoch": 6.560449859418932,
10693
+ "eval_loss": 0.03971678018569946,
10694
+ "eval_pearson_cosine": 0.7830490563978572,
10695
+ "eval_pearson_dot": 0.6307782966974682,
10696
+ "eval_pearson_euclidean": 0.7414858554074186,
10697
+ "eval_pearson_manhattan": 0.7410214751653541,
10698
+ "eval_runtime": 40.1664,
10699
+ "eval_samples_per_second": 37.345,
10700
+ "eval_spearman_cosine": 0.7866220854382757,
10701
+ "eval_spearman_dot": 0.6460234579111318,
10702
+ "eval_spearman_euclidean": 0.7578877607249501,
10703
+ "eval_spearman_manhattan": 0.757795808702236,
10704
+ "eval_steps_per_second": 37.345,
10705
+ "step": 14000
10706
  }
10707
  ],
10708
  "logging_steps": 10,