CocoRoF commited on
Commit
72ff931
·
verified ·
1 Parent(s): 55e45ea

Training in progress, step 13000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3b588761b50be65d8796badee7ef8b1410198e0580687e0e9b4fb211a20c99b
3
  size 613004648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d153a85db882a8d2ec877dfba2d9b581b46d201ce2501b713d912d9b724be90d
3
  size 613004648
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a271571edbbdb0723201c1dcd4e66117d96525b325fc1fd6acf49af21cc25818
3
  size 1226096954
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e64c3f6b99c05bc4af3b1afc1105f63d286ccdb944360ccc4c6c03aaa0867281
3
  size 1226096954
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f92435eb6dc7f41ea95fa3d980a0666d4a13b153aeb2cd8cebe90dc94dc1f10
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:feb76d7d56395cb464f4c6b097cc298c265886f58499ea053baed20b9e64abbb
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8db93a2f6abb2c48f8c464d140e6d8e94f07e6e65ef70a39fa77270a82587ab1
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61bccafd9792e811bc7ce6d26e59618969221a81768981a24e66ff1e4f6c92d4
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.623242736644799,
5
  "eval_steps": 250,
6
- "global_step": 12000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -9175,6 +9175,770 @@
9175
  "eval_spearman_manhattan": 0.7598359774134882,
9176
  "eval_steps_per_second": 37.573,
9177
  "step": 12000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9178
  }
9179
  ],
9180
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.091846298031865,
5
  "eval_steps": 250,
6
+ "global_step": 13000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
9175
  "eval_spearman_manhattan": 0.7598359774134882,
9176
  "eval_steps_per_second": 37.573,
9177
  "step": 12000
9178
+ },
9179
+ {
9180
+ "epoch": 5.627928772258669,
9181
+ "grad_norm": 2.1159090995788574,
9182
+ "learning_rate": 9.296508903467668e-06,
9183
+ "loss": 0.1142,
9184
+ "step": 12010
9185
+ },
9186
+ {
9187
+ "epoch": 5.63261480787254,
9188
+ "grad_norm": 2.249617576599121,
9189
+ "learning_rate": 9.295923149015933e-06,
9190
+ "loss": 0.1091,
9191
+ "step": 12020
9192
+ },
9193
+ {
9194
+ "epoch": 5.63730084348641,
9195
+ "grad_norm": 2.0257644653320312,
9196
+ "learning_rate": 9.295337394564198e-06,
9197
+ "loss": 0.1094,
9198
+ "step": 12030
9199
+ },
9200
+ {
9201
+ "epoch": 5.641986879100282,
9202
+ "grad_norm": 3.4566030502319336,
9203
+ "learning_rate": 9.294751640112467e-06,
9204
+ "loss": 0.1203,
9205
+ "step": 12040
9206
+ },
9207
+ {
9208
+ "epoch": 5.646672914714152,
9209
+ "grad_norm": 3.4752063751220703,
9210
+ "learning_rate": 9.294165885660732e-06,
9211
+ "loss": 0.1359,
9212
+ "step": 12050
9213
+ },
9214
+ {
9215
+ "epoch": 5.651358950328023,
9216
+ "grad_norm": 2.0857534408569336,
9217
+ "learning_rate": 9.293580131208997e-06,
9218
+ "loss": 0.0959,
9219
+ "step": 12060
9220
+ },
9221
+ {
9222
+ "epoch": 5.656044985941893,
9223
+ "grad_norm": 1.1136995553970337,
9224
+ "learning_rate": 9.292994376757264e-06,
9225
+ "loss": 0.0922,
9226
+ "step": 12070
9227
+ },
9228
+ {
9229
+ "epoch": 5.660731021555764,
9230
+ "grad_norm": 1.7703429460525513,
9231
+ "learning_rate": 9.29240862230553e-06,
9232
+ "loss": 0.1314,
9233
+ "step": 12080
9234
+ },
9235
+ {
9236
+ "epoch": 5.665417057169634,
9237
+ "grad_norm": 2.678006172180176,
9238
+ "learning_rate": 9.291822867853796e-06,
9239
+ "loss": 0.1124,
9240
+ "step": 12090
9241
+ },
9242
+ {
9243
+ "epoch": 5.670103092783505,
9244
+ "grad_norm": 2.3180134296417236,
9245
+ "learning_rate": 9.291237113402063e-06,
9246
+ "loss": 0.0916,
9247
+ "step": 12100
9248
+ },
9249
+ {
9250
+ "epoch": 5.674789128397376,
9251
+ "grad_norm": 1.4481223821640015,
9252
+ "learning_rate": 9.290651358950328e-06,
9253
+ "loss": 0.0934,
9254
+ "step": 12110
9255
+ },
9256
+ {
9257
+ "epoch": 5.679475164011246,
9258
+ "grad_norm": 1.6825222969055176,
9259
+ "learning_rate": 9.290065604498595e-06,
9260
+ "loss": 0.112,
9261
+ "step": 12120
9262
+ },
9263
+ {
9264
+ "epoch": 5.684161199625117,
9265
+ "grad_norm": 1.8293483257293701,
9266
+ "learning_rate": 9.28947985004686e-06,
9267
+ "loss": 0.1006,
9268
+ "step": 12130
9269
+ },
9270
+ {
9271
+ "epoch": 5.688847235238988,
9272
+ "grad_norm": 1.5621511936187744,
9273
+ "learning_rate": 9.288894095595127e-06,
9274
+ "loss": 0.1069,
9275
+ "step": 12140
9276
+ },
9277
+ {
9278
+ "epoch": 5.693533270852859,
9279
+ "grad_norm": 1.9712047576904297,
9280
+ "learning_rate": 9.288308341143394e-06,
9281
+ "loss": 0.1121,
9282
+ "step": 12150
9283
+ },
9284
+ {
9285
+ "epoch": 5.698219306466729,
9286
+ "grad_norm": 1.5489860773086548,
9287
+ "learning_rate": 9.28772258669166e-06,
9288
+ "loss": 0.1152,
9289
+ "step": 12160
9290
+ },
9291
+ {
9292
+ "epoch": 5.7029053420806,
9293
+ "grad_norm": 1.5992718935012817,
9294
+ "learning_rate": 9.287136832239927e-06,
9295
+ "loss": 0.1081,
9296
+ "step": 12170
9297
+ },
9298
+ {
9299
+ "epoch": 5.70759137769447,
9300
+ "grad_norm": 2.584080219268799,
9301
+ "learning_rate": 9.286551077788192e-06,
9302
+ "loss": 0.115,
9303
+ "step": 12180
9304
+ },
9305
+ {
9306
+ "epoch": 5.712277413308341,
9307
+ "grad_norm": 1.9940451383590698,
9308
+ "learning_rate": 9.285965323336457e-06,
9309
+ "loss": 0.1334,
9310
+ "step": 12190
9311
+ },
9312
+ {
9313
+ "epoch": 5.716963448922212,
9314
+ "grad_norm": 2.244067668914795,
9315
+ "learning_rate": 9.285379568884726e-06,
9316
+ "loss": 0.1126,
9317
+ "step": 12200
9318
+ },
9319
+ {
9320
+ "epoch": 5.721649484536083,
9321
+ "grad_norm": 2.828308343887329,
9322
+ "learning_rate": 9.28479381443299e-06,
9323
+ "loss": 0.0978,
9324
+ "step": 12210
9325
+ },
9326
+ {
9327
+ "epoch": 5.726335520149953,
9328
+ "grad_norm": 2.3048787117004395,
9329
+ "learning_rate": 9.284208059981256e-06,
9330
+ "loss": 0.1285,
9331
+ "step": 12220
9332
+ },
9333
+ {
9334
+ "epoch": 5.7310215557638235,
9335
+ "grad_norm": 1.9416192770004272,
9336
+ "learning_rate": 9.283622305529523e-06,
9337
+ "loss": 0.114,
9338
+ "step": 12230
9339
+ },
9340
+ {
9341
+ "epoch": 5.735707591377695,
9342
+ "grad_norm": 2.0904664993286133,
9343
+ "learning_rate": 9.283036551077788e-06,
9344
+ "loss": 0.1135,
9345
+ "step": 12240
9346
+ },
9347
+ {
9348
+ "epoch": 5.740393626991565,
9349
+ "grad_norm": 2.0567378997802734,
9350
+ "learning_rate": 9.282450796626055e-06,
9351
+ "loss": 0.1269,
9352
+ "step": 12250
9353
+ },
9354
+ {
9355
+ "epoch": 5.740393626991565,
9356
+ "eval_loss": 0.042026255279779434,
9357
+ "eval_pearson_cosine": 0.7802074426247394,
9358
+ "eval_pearson_dot": 0.621680331450122,
9359
+ "eval_pearson_euclidean": 0.7417166161845756,
9360
+ "eval_pearson_manhattan": 0.7412630516460794,
9361
+ "eval_runtime": 40.3256,
9362
+ "eval_samples_per_second": 37.197,
9363
+ "eval_spearman_cosine": 0.7839546315832364,
9364
+ "eval_spearman_dot": 0.6311338337036988,
9365
+ "eval_spearman_euclidean": 0.7564314536390471,
9366
+ "eval_spearman_manhattan": 0.7562308413966785,
9367
+ "eval_steps_per_second": 37.197,
9368
+ "step": 12250
9369
+ },
9370
+ {
9371
+ "epoch": 5.745079662605436,
9372
+ "grad_norm": 1.8017923831939697,
9373
+ "learning_rate": 9.281865042174322e-06,
9374
+ "loss": 0.116,
9375
+ "step": 12260
9376
+ },
9377
+ {
9378
+ "epoch": 5.749765698219306,
9379
+ "grad_norm": 2.184885025024414,
9380
+ "learning_rate": 9.281279287722587e-06,
9381
+ "loss": 0.1141,
9382
+ "step": 12270
9383
+ },
9384
+ {
9385
+ "epoch": 5.754451733833177,
9386
+ "grad_norm": 2.258493423461914,
9387
+ "learning_rate": 9.280693533270854e-06,
9388
+ "loss": 0.1179,
9389
+ "step": 12280
9390
+ },
9391
+ {
9392
+ "epoch": 5.759137769447047,
9393
+ "grad_norm": 3.2758543491363525,
9394
+ "learning_rate": 9.28010777881912e-06,
9395
+ "loss": 0.1354,
9396
+ "step": 12290
9397
+ },
9398
+ {
9399
+ "epoch": 5.763823805060919,
9400
+ "grad_norm": 2.4894609451293945,
9401
+ "learning_rate": 9.279522024367386e-06,
9402
+ "loss": 0.1088,
9403
+ "step": 12300
9404
+ },
9405
+ {
9406
+ "epoch": 5.768509840674789,
9407
+ "grad_norm": 1.9505615234375,
9408
+ "learning_rate": 9.278936269915653e-06,
9409
+ "loss": 0.1104,
9410
+ "step": 12310
9411
+ },
9412
+ {
9413
+ "epoch": 5.77319587628866,
9414
+ "grad_norm": 2.9411964416503906,
9415
+ "learning_rate": 9.278350515463918e-06,
9416
+ "loss": 0.1333,
9417
+ "step": 12320
9418
+ },
9419
+ {
9420
+ "epoch": 5.77788191190253,
9421
+ "grad_norm": 2.877175807952881,
9422
+ "learning_rate": 9.277764761012185e-06,
9423
+ "loss": 0.1038,
9424
+ "step": 12330
9425
+ },
9426
+ {
9427
+ "epoch": 5.782567947516402,
9428
+ "grad_norm": 2.866086006164551,
9429
+ "learning_rate": 9.27717900656045e-06,
9430
+ "loss": 0.1119,
9431
+ "step": 12340
9432
+ },
9433
+ {
9434
+ "epoch": 5.787253983130272,
9435
+ "grad_norm": 2.0350656509399414,
9436
+ "learning_rate": 9.276593252108716e-06,
9437
+ "loss": 0.1218,
9438
+ "step": 12350
9439
+ },
9440
+ {
9441
+ "epoch": 5.7919400187441425,
9442
+ "grad_norm": 1.9179691076278687,
9443
+ "learning_rate": 9.276007497656983e-06,
9444
+ "loss": 0.117,
9445
+ "step": 12360
9446
+ },
9447
+ {
9448
+ "epoch": 5.796626054358013,
9449
+ "grad_norm": 1.894805669784546,
9450
+ "learning_rate": 9.27542174320525e-06,
9451
+ "loss": 0.1148,
9452
+ "step": 12370
9453
+ },
9454
+ {
9455
+ "epoch": 5.8013120899718835,
9456
+ "grad_norm": 1.7460695505142212,
9457
+ "learning_rate": 9.274835988753515e-06,
9458
+ "loss": 0.1347,
9459
+ "step": 12380
9460
+ },
9461
+ {
9462
+ "epoch": 5.805998125585754,
9463
+ "grad_norm": 2.7748680114746094,
9464
+ "learning_rate": 9.274250234301782e-06,
9465
+ "loss": 0.1077,
9466
+ "step": 12390
9467
+ },
9468
+ {
9469
+ "epoch": 5.810684161199625,
9470
+ "grad_norm": 2.6616406440734863,
9471
+ "learning_rate": 9.273664479850047e-06,
9472
+ "loss": 0.111,
9473
+ "step": 12400
9474
+ },
9475
+ {
9476
+ "epoch": 5.815370196813496,
9477
+ "grad_norm": 2.389298439025879,
9478
+ "learning_rate": 9.273078725398314e-06,
9479
+ "loss": 0.1061,
9480
+ "step": 12410
9481
+ },
9482
+ {
9483
+ "epoch": 5.820056232427366,
9484
+ "grad_norm": 1.6245344877243042,
9485
+ "learning_rate": 9.272492970946579e-06,
9486
+ "loss": 0.1196,
9487
+ "step": 12420
9488
+ },
9489
+ {
9490
+ "epoch": 5.824742268041237,
9491
+ "grad_norm": 2.8195879459381104,
9492
+ "learning_rate": 9.271907216494846e-06,
9493
+ "loss": 0.1265,
9494
+ "step": 12430
9495
+ },
9496
+ {
9497
+ "epoch": 5.829428303655108,
9498
+ "grad_norm": 2.538292169570923,
9499
+ "learning_rate": 9.271321462043113e-06,
9500
+ "loss": 0.1038,
9501
+ "step": 12440
9502
+ },
9503
+ {
9504
+ "epoch": 5.834114339268979,
9505
+ "grad_norm": 1.4378900527954102,
9506
+ "learning_rate": 9.270735707591378e-06,
9507
+ "loss": 0.1097,
9508
+ "step": 12450
9509
+ },
9510
+ {
9511
+ "epoch": 5.838800374882849,
9512
+ "grad_norm": 2.120596170425415,
9513
+ "learning_rate": 9.270149953139645e-06,
9514
+ "loss": 0.1054,
9515
+ "step": 12460
9516
+ },
9517
+ {
9518
+ "epoch": 5.84348641049672,
9519
+ "grad_norm": 1.7521088123321533,
9520
+ "learning_rate": 9.26956419868791e-06,
9521
+ "loss": 0.0985,
9522
+ "step": 12470
9523
+ },
9524
+ {
9525
+ "epoch": 5.84817244611059,
9526
+ "grad_norm": 2.082510471343994,
9527
+ "learning_rate": 9.268978444236177e-06,
9528
+ "loss": 0.1142,
9529
+ "step": 12480
9530
+ },
9531
+ {
9532
+ "epoch": 5.852858481724461,
9533
+ "grad_norm": 2.3451695442199707,
9534
+ "learning_rate": 9.268392689784444e-06,
9535
+ "loss": 0.135,
9536
+ "step": 12490
9537
+ },
9538
+ {
9539
+ "epoch": 5.857544517338332,
9540
+ "grad_norm": 1.9797242879867554,
9541
+ "learning_rate": 9.26780693533271e-06,
9542
+ "loss": 0.0888,
9543
+ "step": 12500
9544
+ },
9545
+ {
9546
+ "epoch": 5.857544517338332,
9547
+ "eval_loss": 0.04142308607697487,
9548
+ "eval_pearson_cosine": 0.7805016780478695,
9549
+ "eval_pearson_dot": 0.6245128907955291,
9550
+ "eval_pearson_euclidean": 0.7411648320805888,
9551
+ "eval_pearson_manhattan": 0.7407809523735267,
9552
+ "eval_runtime": 39.8943,
9553
+ "eval_samples_per_second": 37.599,
9554
+ "eval_spearman_cosine": 0.7841450480888137,
9555
+ "eval_spearman_dot": 0.636499292941551,
9556
+ "eval_spearman_euclidean": 0.7567573577855005,
9557
+ "eval_spearman_manhattan": 0.7567068203829979,
9558
+ "eval_steps_per_second": 37.599,
9559
+ "step": 12500
9560
+ },
9561
+ {
9562
+ "epoch": 5.8622305529522025,
9563
+ "grad_norm": 2.519564628601074,
9564
+ "learning_rate": 9.267221180880975e-06,
9565
+ "loss": 0.1118,
9566
+ "step": 12510
9567
+ },
9568
+ {
9569
+ "epoch": 5.866916588566073,
9570
+ "grad_norm": 2.348604679107666,
9571
+ "learning_rate": 9.266635426429241e-06,
9572
+ "loss": 0.1165,
9573
+ "step": 12520
9574
+ },
9575
+ {
9576
+ "epoch": 5.8716026241799435,
9577
+ "grad_norm": 1.9285309314727783,
9578
+ "learning_rate": 9.266049671977507e-06,
9579
+ "loss": 0.1168,
9580
+ "step": 12530
9581
+ },
9582
+ {
9583
+ "epoch": 5.876288659793815,
9584
+ "grad_norm": 2.3968348503112793,
9585
+ "learning_rate": 9.265463917525774e-06,
9586
+ "loss": 0.1226,
9587
+ "step": 12540
9588
+ },
9589
+ {
9590
+ "epoch": 5.880974695407685,
9591
+ "grad_norm": 1.3296688795089722,
9592
+ "learning_rate": 9.26487816307404e-06,
9593
+ "loss": 0.0979,
9594
+ "step": 12550
9595
+ },
9596
+ {
9597
+ "epoch": 5.885660731021556,
9598
+ "grad_norm": 2.3655405044555664,
9599
+ "learning_rate": 9.264292408622306e-06,
9600
+ "loss": 0.1163,
9601
+ "step": 12560
9602
+ },
9603
+ {
9604
+ "epoch": 5.890346766635426,
9605
+ "grad_norm": 1.9741175174713135,
9606
+ "learning_rate": 9.263706654170573e-06,
9607
+ "loss": 0.1193,
9608
+ "step": 12570
9609
+ },
9610
+ {
9611
+ "epoch": 5.895032802249297,
9612
+ "grad_norm": 2.2787790298461914,
9613
+ "learning_rate": 9.263120899718838e-06,
9614
+ "loss": 0.1053,
9615
+ "step": 12580
9616
+ },
9617
+ {
9618
+ "epoch": 5.899718837863167,
9619
+ "grad_norm": 2.3028697967529297,
9620
+ "learning_rate": 9.262535145267105e-06,
9621
+ "loss": 0.105,
9622
+ "step": 12590
9623
+ },
9624
+ {
9625
+ "epoch": 5.904404873477039,
9626
+ "grad_norm": 2.420567274093628,
9627
+ "learning_rate": 9.261949390815372e-06,
9628
+ "loss": 0.1153,
9629
+ "step": 12600
9630
+ },
9631
+ {
9632
+ "epoch": 5.909090909090909,
9633
+ "grad_norm": 1.8667070865631104,
9634
+ "learning_rate": 9.261363636363637e-06,
9635
+ "loss": 0.1206,
9636
+ "step": 12610
9637
+ },
9638
+ {
9639
+ "epoch": 5.91377694470478,
9640
+ "grad_norm": 2.433323621749878,
9641
+ "learning_rate": 9.260777881911904e-06,
9642
+ "loss": 0.1107,
9643
+ "step": 12620
9644
+ },
9645
+ {
9646
+ "epoch": 5.91846298031865,
9647
+ "grad_norm": 1.6899259090423584,
9648
+ "learning_rate": 9.260192127460169e-06,
9649
+ "loss": 0.1006,
9650
+ "step": 12630
9651
+ },
9652
+ {
9653
+ "epoch": 5.9231490159325215,
9654
+ "grad_norm": 3.0744214057922363,
9655
+ "learning_rate": 9.259606373008434e-06,
9656
+ "loss": 0.1165,
9657
+ "step": 12640
9658
+ },
9659
+ {
9660
+ "epoch": 5.927835051546392,
9661
+ "grad_norm": 1.6527074575424194,
9662
+ "learning_rate": 9.259020618556703e-06,
9663
+ "loss": 0.1134,
9664
+ "step": 12650
9665
+ },
9666
+ {
9667
+ "epoch": 5.9325210871602625,
9668
+ "grad_norm": 2.3836679458618164,
9669
+ "learning_rate": 9.258434864104968e-06,
9670
+ "loss": 0.1195,
9671
+ "step": 12660
9672
+ },
9673
+ {
9674
+ "epoch": 5.937207122774133,
9675
+ "grad_norm": 1.6903315782546997,
9676
+ "learning_rate": 9.257849109653233e-06,
9677
+ "loss": 0.125,
9678
+ "step": 12670
9679
+ },
9680
+ {
9681
+ "epoch": 5.9418931583880035,
9682
+ "grad_norm": 2.0928590297698975,
9683
+ "learning_rate": 9.2572633552015e-06,
9684
+ "loss": 0.114,
9685
+ "step": 12680
9686
+ },
9687
+ {
9688
+ "epoch": 5.946579194001874,
9689
+ "grad_norm": 1.6326929330825806,
9690
+ "learning_rate": 9.256677600749765e-06,
9691
+ "loss": 0.1056,
9692
+ "step": 12690
9693
+ },
9694
+ {
9695
+ "epoch": 5.951265229615745,
9696
+ "grad_norm": 2.0911965370178223,
9697
+ "learning_rate": 9.256091846298032e-06,
9698
+ "loss": 0.128,
9699
+ "step": 12700
9700
+ },
9701
+ {
9702
+ "epoch": 5.955951265229616,
9703
+ "grad_norm": 1.6815580129623413,
9704
+ "learning_rate": 9.2555060918463e-06,
9705
+ "loss": 0.1211,
9706
+ "step": 12710
9707
+ },
9708
+ {
9709
+ "epoch": 5.960637300843486,
9710
+ "grad_norm": 2.4735517501831055,
9711
+ "learning_rate": 9.254920337394565e-06,
9712
+ "loss": 0.1246,
9713
+ "step": 12720
9714
+ },
9715
+ {
9716
+ "epoch": 5.965323336457357,
9717
+ "grad_norm": 1.822643756866455,
9718
+ "learning_rate": 9.254334582942831e-06,
9719
+ "loss": 0.1119,
9720
+ "step": 12730
9721
+ },
9722
+ {
9723
+ "epoch": 5.970009372071228,
9724
+ "grad_norm": 2.694791793823242,
9725
+ "learning_rate": 9.253748828491097e-06,
9726
+ "loss": 0.1186,
9727
+ "step": 12740
9728
+ },
9729
+ {
9730
+ "epoch": 5.974695407685099,
9731
+ "grad_norm": 1.8677020072937012,
9732
+ "learning_rate": 9.253163074039364e-06,
9733
+ "loss": 0.1202,
9734
+ "step": 12750
9735
+ },
9736
+ {
9737
+ "epoch": 5.974695407685099,
9738
+ "eval_loss": 0.04308323189616203,
9739
+ "eval_pearson_cosine": 0.7792983938024989,
9740
+ "eval_pearson_dot": 0.6261386080869897,
9741
+ "eval_pearson_euclidean": 0.7413977396293134,
9742
+ "eval_pearson_manhattan": 0.7411537960595762,
9743
+ "eval_runtime": 41.5128,
9744
+ "eval_samples_per_second": 36.133,
9745
+ "eval_spearman_cosine": 0.7834591025676726,
9746
+ "eval_spearman_dot": 0.6404906337885011,
9747
+ "eval_spearman_euclidean": 0.7574889490533175,
9748
+ "eval_spearman_manhattan": 0.7571743616408941,
9749
+ "eval_steps_per_second": 36.133,
9750
+ "step": 12750
9751
+ },
9752
+ {
9753
+ "epoch": 5.979381443298969,
9754
+ "grad_norm": 2.534433126449585,
9755
+ "learning_rate": 9.25257731958763e-06,
9756
+ "loss": 0.1275,
9757
+ "step": 12760
9758
+ },
9759
+ {
9760
+ "epoch": 5.98406747891284,
9761
+ "grad_norm": 1.7585105895996094,
9762
+ "learning_rate": 9.251991565135896e-06,
9763
+ "loss": 0.1129,
9764
+ "step": 12770
9765
+ },
9766
+ {
9767
+ "epoch": 5.98875351452671,
9768
+ "grad_norm": 2.6499111652374268,
9769
+ "learning_rate": 9.251405810684163e-06,
9770
+ "loss": 0.1117,
9771
+ "step": 12780
9772
+ },
9773
+ {
9774
+ "epoch": 5.993439550140581,
9775
+ "grad_norm": 2.0610055923461914,
9776
+ "learning_rate": 9.250820056232428e-06,
9777
+ "loss": 0.1137,
9778
+ "step": 12790
9779
+ },
9780
+ {
9781
+ "epoch": 5.998125585754452,
9782
+ "grad_norm": 2.293468952178955,
9783
+ "learning_rate": 9.250234301780693e-06,
9784
+ "loss": 0.1178,
9785
+ "step": 12800
9786
+ },
9787
+ {
9788
+ "epoch": 6.0028116213683225,
9789
+ "grad_norm": 1.97608482837677,
9790
+ "learning_rate": 9.249648547328962e-06,
9791
+ "loss": 0.1105,
9792
+ "step": 12810
9793
+ },
9794
+ {
9795
+ "epoch": 6.007497656982193,
9796
+ "grad_norm": 1.9157034158706665,
9797
+ "learning_rate": 9.249062792877227e-06,
9798
+ "loss": 0.0914,
9799
+ "step": 12820
9800
+ },
9801
+ {
9802
+ "epoch": 6.0121836925960634,
9803
+ "grad_norm": 1.4950352907180786,
9804
+ "learning_rate": 9.248477038425492e-06,
9805
+ "loss": 0.0983,
9806
+ "step": 12830
9807
+ },
9808
+ {
9809
+ "epoch": 6.016869728209935,
9810
+ "grad_norm": 1.4796631336212158,
9811
+ "learning_rate": 9.247891283973759e-06,
9812
+ "loss": 0.0799,
9813
+ "step": 12840
9814
+ },
9815
+ {
9816
+ "epoch": 6.021555763823805,
9817
+ "grad_norm": 1.68351149559021,
9818
+ "learning_rate": 9.247305529522024e-06,
9819
+ "loss": 0.079,
9820
+ "step": 12850
9821
+ },
9822
+ {
9823
+ "epoch": 6.026241799437676,
9824
+ "grad_norm": 2.24094295501709,
9825
+ "learning_rate": 9.246719775070291e-06,
9826
+ "loss": 0.0908,
9827
+ "step": 12860
9828
+ },
9829
+ {
9830
+ "epoch": 6.030927835051546,
9831
+ "grad_norm": 2.414583683013916,
9832
+ "learning_rate": 9.246134020618558e-06,
9833
+ "loss": 0.0908,
9834
+ "step": 12870
9835
+ },
9836
+ {
9837
+ "epoch": 6.035613870665417,
9838
+ "grad_norm": 2.87400221824646,
9839
+ "learning_rate": 9.245548266166823e-06,
9840
+ "loss": 0.085,
9841
+ "step": 12880
9842
+ },
9843
+ {
9844
+ "epoch": 6.040299906279288,
9845
+ "grad_norm": 1.8591458797454834,
9846
+ "learning_rate": 9.24496251171509e-06,
9847
+ "loss": 0.0825,
9848
+ "step": 12890
9849
+ },
9850
+ {
9851
+ "epoch": 6.044985941893159,
9852
+ "grad_norm": 2.2384636402130127,
9853
+ "learning_rate": 9.244376757263355e-06,
9854
+ "loss": 0.0826,
9855
+ "step": 12900
9856
+ },
9857
+ {
9858
+ "epoch": 6.049671977507029,
9859
+ "grad_norm": 1.670571208000183,
9860
+ "learning_rate": 9.243791002811622e-06,
9861
+ "loss": 0.0746,
9862
+ "step": 12910
9863
+ },
9864
+ {
9865
+ "epoch": 6.0543580131209,
9866
+ "grad_norm": 1.607620358467102,
9867
+ "learning_rate": 9.243205248359888e-06,
9868
+ "loss": 0.106,
9869
+ "step": 12920
9870
+ },
9871
+ {
9872
+ "epoch": 6.05904404873477,
9873
+ "grad_norm": 1.543734073638916,
9874
+ "learning_rate": 9.242619493908155e-06,
9875
+ "loss": 0.0788,
9876
+ "step": 12930
9877
+ },
9878
+ {
9879
+ "epoch": 6.0637300843486415,
9880
+ "grad_norm": 2.0840065479278564,
9881
+ "learning_rate": 9.242033739456421e-06,
9882
+ "loss": 0.1013,
9883
+ "step": 12940
9884
+ },
9885
+ {
9886
+ "epoch": 6.068416119962512,
9887
+ "grad_norm": 1.8061577081680298,
9888
+ "learning_rate": 9.241447985004687e-06,
9889
+ "loss": 0.0895,
9890
+ "step": 12950
9891
+ },
9892
+ {
9893
+ "epoch": 6.073102155576382,
9894
+ "grad_norm": 1.341036081314087,
9895
+ "learning_rate": 9.240862230552952e-06,
9896
+ "loss": 0.0714,
9897
+ "step": 12960
9898
+ },
9899
+ {
9900
+ "epoch": 6.077788191190253,
9901
+ "grad_norm": 2.1150712966918945,
9902
+ "learning_rate": 9.240276476101219e-06,
9903
+ "loss": 0.0899,
9904
+ "step": 12970
9905
+ },
9906
+ {
9907
+ "epoch": 6.082474226804123,
9908
+ "grad_norm": 2.214730739593506,
9909
+ "learning_rate": 9.239690721649486e-06,
9910
+ "loss": 0.0758,
9911
+ "step": 12980
9912
+ },
9913
+ {
9914
+ "epoch": 6.087160262417995,
9915
+ "grad_norm": 1.489686369895935,
9916
+ "learning_rate": 9.239104967197751e-06,
9917
+ "loss": 0.0784,
9918
+ "step": 12990
9919
+ },
9920
+ {
9921
+ "epoch": 6.091846298031865,
9922
+ "grad_norm": 1.2778211832046509,
9923
+ "learning_rate": 9.238519212746018e-06,
9924
+ "loss": 0.0941,
9925
+ "step": 13000
9926
+ },
9927
+ {
9928
+ "epoch": 6.091846298031865,
9929
+ "eval_loss": 0.0399174839258194,
9930
+ "eval_pearson_cosine": 0.7838266464106027,
9931
+ "eval_pearson_dot": 0.6493223534201924,
9932
+ "eval_pearson_euclidean": 0.739064666910151,
9933
+ "eval_pearson_manhattan": 0.7387769365054666,
9934
+ "eval_runtime": 40.0598,
9935
+ "eval_samples_per_second": 37.444,
9936
+ "eval_spearman_cosine": 0.7872885894711749,
9937
+ "eval_spearman_dot": 0.6641643317048077,
9938
+ "eval_spearman_euclidean": 0.7529671041992676,
9939
+ "eval_spearman_manhattan": 0.752705655614685,
9940
+ "eval_steps_per_second": 37.444,
9941
+ "step": 13000
9942
  }
9943
  ],
9944
  "logging_steps": 10,