CocoRoF commited on
Commit
0489850
·
verified ·
1 Parent(s): daa4051

Training in progress, step 17000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2d2a0f5d2fd2db2c00ba8019c5d26c7e05ea6254b391695cc519d7dab59b225
3
  size 613004648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:723de6fd746cbba66015f7a7da153864465a825d5f4e24435edd8645a25ac837
3
  size 613004648
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b5ee984e35f64350e857f17403ecda5095a0c3d2917a731f8237c213d237bae
3
  size 1226096954
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dafb37725ed184eeb99653a88821c53652c298a048e783bcf251a3b487c248c8
3
  size 1226096954
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2be093747f5a4a232618c3318bfffdf24560aea746cf4c11903c465c5179b6c9
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c551e87aa3f069465eab6343f6462d0da8c27e46770ad44ff0400698bec95cda
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:975ed305a3fe7b4927a3b3d12f66d6b14051cd85dfe6e94defa4d7c56781b5ac
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64e32232d2d68b6508947dd18795fe7ac8dd583abb7b016b68d853036e32fd4b
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.497656982193065,
5
  "eval_steps": 250,
6
- "global_step": 16000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -12231,6 +12231,770 @@
12231
  "eval_spearman_manhattan": 0.7524283280152466,
12232
  "eval_steps_per_second": 37.268,
12233
  "step": 16000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12234
  }
12235
  ],
12236
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 7.966260543580131,
5
  "eval_steps": 250,
6
+ "global_step": 17000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
12231
  "eval_spearman_manhattan": 0.7524283280152466,
12232
  "eval_steps_per_second": 37.268,
12233
  "step": 16000
12234
+ },
12235
+ {
12236
+ "epoch": 7.502343017806935,
12237
+ "grad_norm": 2.1431262493133545,
12238
+ "learning_rate": 9.062207122774134e-06,
12239
+ "loss": 0.0768,
12240
+ "step": 16010
12241
+ },
12242
+ {
12243
+ "epoch": 7.507029053420806,
12244
+ "grad_norm": 1.5847636461257935,
12245
+ "learning_rate": 9.0616213683224e-06,
12246
+ "loss": 0.0772,
12247
+ "step": 16020
12248
+ },
12249
+ {
12250
+ "epoch": 7.511715089034677,
12251
+ "grad_norm": 1.5291898250579834,
12252
+ "learning_rate": 9.061035613870666e-06,
12253
+ "loss": 0.0642,
12254
+ "step": 16030
12255
+ },
12256
+ {
12257
+ "epoch": 7.516401124648548,
12258
+ "grad_norm": 1.497979998588562,
12259
+ "learning_rate": 9.060449859418933e-06,
12260
+ "loss": 0.0846,
12261
+ "step": 16040
12262
+ },
12263
+ {
12264
+ "epoch": 7.521087160262418,
12265
+ "grad_norm": 2.9073336124420166,
12266
+ "learning_rate": 9.059864104967199e-06,
12267
+ "loss": 0.0735,
12268
+ "step": 16050
12269
+ },
12270
+ {
12271
+ "epoch": 7.525773195876289,
12272
+ "grad_norm": 2.264319896697998,
12273
+ "learning_rate": 9.059278350515464e-06,
12274
+ "loss": 0.0752,
12275
+ "step": 16060
12276
+ },
12277
+ {
12278
+ "epoch": 7.530459231490159,
12279
+ "grad_norm": 1.6372432708740234,
12280
+ "learning_rate": 9.05869259606373e-06,
12281
+ "loss": 0.0842,
12282
+ "step": 16070
12283
+ },
12284
+ {
12285
+ "epoch": 7.5351452671040295,
12286
+ "grad_norm": 1.1619336605072021,
12287
+ "learning_rate": 9.058106841611996e-06,
12288
+ "loss": 0.0687,
12289
+ "step": 16080
12290
+ },
12291
+ {
12292
+ "epoch": 7.539831302717901,
12293
+ "grad_norm": 0.9320247173309326,
12294
+ "learning_rate": 9.057521087160263e-06,
12295
+ "loss": 0.0709,
12296
+ "step": 16090
12297
+ },
12298
+ {
12299
+ "epoch": 7.544517338331771,
12300
+ "grad_norm": 1.3003836870193481,
12301
+ "learning_rate": 9.05693533270853e-06,
12302
+ "loss": 0.062,
12303
+ "step": 16100
12304
+ },
12305
+ {
12306
+ "epoch": 7.549203373945642,
12307
+ "grad_norm": 1.8614999055862427,
12308
+ "learning_rate": 9.056349578256795e-06,
12309
+ "loss": 0.0713,
12310
+ "step": 16110
12311
+ },
12312
+ {
12313
+ "epoch": 7.553889409559512,
12314
+ "grad_norm": 2.049309492111206,
12315
+ "learning_rate": 9.055763823805062e-06,
12316
+ "loss": 0.0763,
12317
+ "step": 16120
12318
+ },
12319
+ {
12320
+ "epoch": 7.558575445173384,
12321
+ "grad_norm": 1.3978779315948486,
12322
+ "learning_rate": 9.055178069353327e-06,
12323
+ "loss": 0.0778,
12324
+ "step": 16130
12325
+ },
12326
+ {
12327
+ "epoch": 7.563261480787254,
12328
+ "grad_norm": 1.9440947771072388,
12329
+ "learning_rate": 9.054592314901594e-06,
12330
+ "loss": 0.0637,
12331
+ "step": 16140
12332
+ },
12333
+ {
12334
+ "epoch": 7.567947516401125,
12335
+ "grad_norm": 1.5653728246688843,
12336
+ "learning_rate": 9.054006560449861e-06,
12337
+ "loss": 0.0791,
12338
+ "step": 16150
12339
+ },
12340
+ {
12341
+ "epoch": 7.572633552014995,
12342
+ "grad_norm": 1.3674747943878174,
12343
+ "learning_rate": 9.053420805998126e-06,
12344
+ "loss": 0.0662,
12345
+ "step": 16160
12346
+ },
12347
+ {
12348
+ "epoch": 7.577319587628866,
12349
+ "grad_norm": 1.2067365646362305,
12350
+ "learning_rate": 9.052835051546393e-06,
12351
+ "loss": 0.0677,
12352
+ "step": 16170
12353
+ },
12354
+ {
12355
+ "epoch": 7.582005623242736,
12356
+ "grad_norm": 1.9453731775283813,
12357
+ "learning_rate": 9.052249297094658e-06,
12358
+ "loss": 0.0776,
12359
+ "step": 16180
12360
+ },
12361
+ {
12362
+ "epoch": 7.586691658856608,
12363
+ "grad_norm": 1.6629338264465332,
12364
+ "learning_rate": 9.051663542642925e-06,
12365
+ "loss": 0.0615,
12366
+ "step": 16190
12367
+ },
12368
+ {
12369
+ "epoch": 7.591377694470478,
12370
+ "grad_norm": 2.192781925201416,
12371
+ "learning_rate": 9.051077788191192e-06,
12372
+ "loss": 0.0762,
12373
+ "step": 16200
12374
+ },
12375
+ {
12376
+ "epoch": 7.5960637300843485,
12377
+ "grad_norm": 1.7695443630218506,
12378
+ "learning_rate": 9.050492033739457e-06,
12379
+ "loss": 0.0798,
12380
+ "step": 16210
12381
+ },
12382
+ {
12383
+ "epoch": 7.600749765698219,
12384
+ "grad_norm": 2.5343542098999023,
12385
+ "learning_rate": 9.049906279287723e-06,
12386
+ "loss": 0.0642,
12387
+ "step": 16220
12388
+ },
12389
+ {
12390
+ "epoch": 7.60543580131209,
12391
+ "grad_norm": 2.3569960594177246,
12392
+ "learning_rate": 9.04932052483599e-06,
12393
+ "loss": 0.0791,
12394
+ "step": 16230
12395
+ },
12396
+ {
12397
+ "epoch": 7.610121836925961,
12398
+ "grad_norm": 1.6627905368804932,
12399
+ "learning_rate": 9.048734770384255e-06,
12400
+ "loss": 0.0751,
12401
+ "step": 16240
12402
+ },
12403
+ {
12404
+ "epoch": 7.614807872539831,
12405
+ "grad_norm": 2.732750177383423,
12406
+ "learning_rate": 9.048149015932522e-06,
12407
+ "loss": 0.0779,
12408
+ "step": 16250
12409
+ },
12410
+ {
12411
+ "epoch": 7.614807872539831,
12412
+ "eval_loss": 0.03914293646812439,
12413
+ "eval_pearson_cosine": 0.7825741148617453,
12414
+ "eval_pearson_dot": 0.6371994676784709,
12415
+ "eval_pearson_euclidean": 0.7333148039136894,
12416
+ "eval_pearson_manhattan": 0.7325802747620713,
12417
+ "eval_runtime": 39.7805,
12418
+ "eval_samples_per_second": 37.707,
12419
+ "eval_spearman_cosine": 0.7845511113526636,
12420
+ "eval_spearman_dot": 0.6532379504600656,
12421
+ "eval_spearman_euclidean": 0.746661992951968,
12422
+ "eval_spearman_manhattan": 0.7461572363651041,
12423
+ "eval_steps_per_second": 37.707,
12424
+ "step": 16250
12425
+ },
12426
+ {
12427
+ "epoch": 7.619493908153702,
12428
+ "grad_norm": 1.1624847650527954,
12429
+ "learning_rate": 9.047563261480789e-06,
12430
+ "loss": 0.0677,
12431
+ "step": 16260
12432
+ },
12433
+ {
12434
+ "epoch": 7.624179943767572,
12435
+ "grad_norm": 2.173877477645874,
12436
+ "learning_rate": 9.046977507029054e-06,
12437
+ "loss": 0.0697,
12438
+ "step": 16270
12439
+ },
12440
+ {
12441
+ "epoch": 7.628865979381443,
12442
+ "grad_norm": 2.1271920204162598,
12443
+ "learning_rate": 9.04639175257732e-06,
12444
+ "loss": 0.0736,
12445
+ "step": 16280
12446
+ },
12447
+ {
12448
+ "epoch": 7.633552014995314,
12449
+ "grad_norm": 1.3459683656692505,
12450
+ "learning_rate": 9.045805998125586e-06,
12451
+ "loss": 0.082,
12452
+ "step": 16290
12453
+ },
12454
+ {
12455
+ "epoch": 7.638238050609185,
12456
+ "grad_norm": 2.0023584365844727,
12457
+ "learning_rate": 9.045220243673853e-06,
12458
+ "loss": 0.0662,
12459
+ "step": 16300
12460
+ },
12461
+ {
12462
+ "epoch": 7.642924086223055,
12463
+ "grad_norm": 1.8559486865997314,
12464
+ "learning_rate": 9.044634489222118e-06,
12465
+ "loss": 0.0685,
12466
+ "step": 16310
12467
+ },
12468
+ {
12469
+ "epoch": 7.647610121836926,
12470
+ "grad_norm": 2.1703007221221924,
12471
+ "learning_rate": 9.044048734770385e-06,
12472
+ "loss": 0.0659,
12473
+ "step": 16320
12474
+ },
12475
+ {
12476
+ "epoch": 7.652296157450797,
12477
+ "grad_norm": 2.0675439834594727,
12478
+ "learning_rate": 9.043462980318652e-06,
12479
+ "loss": 0.0708,
12480
+ "step": 16330
12481
+ },
12482
+ {
12483
+ "epoch": 7.6569821930646675,
12484
+ "grad_norm": 1.600040316581726,
12485
+ "learning_rate": 9.042877225866917e-06,
12486
+ "loss": 0.0682,
12487
+ "step": 16340
12488
+ },
12489
+ {
12490
+ "epoch": 7.661668228678538,
12491
+ "grad_norm": 2.5737037658691406,
12492
+ "learning_rate": 9.042291471415184e-06,
12493
+ "loss": 0.077,
12494
+ "step": 16350
12495
+ },
12496
+ {
12497
+ "epoch": 7.6663542642924085,
12498
+ "grad_norm": 1.909056544303894,
12499
+ "learning_rate": 9.04170571696345e-06,
12500
+ "loss": 0.076,
12501
+ "step": 16360
12502
+ },
12503
+ {
12504
+ "epoch": 7.671040299906279,
12505
+ "grad_norm": 2.5772509574890137,
12506
+ "learning_rate": 9.041119962511716e-06,
12507
+ "loss": 0.0743,
12508
+ "step": 16370
12509
+ },
12510
+ {
12511
+ "epoch": 7.6757263355201495,
12512
+ "grad_norm": 1.6890363693237305,
12513
+ "learning_rate": 9.040534208059981e-06,
12514
+ "loss": 0.0696,
12515
+ "step": 16380
12516
+ },
12517
+ {
12518
+ "epoch": 7.680412371134021,
12519
+ "grad_norm": 1.6115903854370117,
12520
+ "learning_rate": 9.039948453608248e-06,
12521
+ "loss": 0.0849,
12522
+ "step": 16390
12523
+ },
12524
+ {
12525
+ "epoch": 7.685098406747891,
12526
+ "grad_norm": 2.3779239654541016,
12527
+ "learning_rate": 9.039362699156514e-06,
12528
+ "loss": 0.0819,
12529
+ "step": 16400
12530
+ },
12531
+ {
12532
+ "epoch": 7.689784442361762,
12533
+ "grad_norm": 1.5444949865341187,
12534
+ "learning_rate": 9.03877694470478e-06,
12535
+ "loss": 0.0718,
12536
+ "step": 16410
12537
+ },
12538
+ {
12539
+ "epoch": 7.694470477975632,
12540
+ "grad_norm": 2.221595525741577,
12541
+ "learning_rate": 9.038191190253046e-06,
12542
+ "loss": 0.084,
12543
+ "step": 16420
12544
+ },
12545
+ {
12546
+ "epoch": 7.699156513589504,
12547
+ "grad_norm": 2.68977427482605,
12548
+ "learning_rate": 9.037605435801313e-06,
12549
+ "loss": 0.0828,
12550
+ "step": 16430
12551
+ },
12552
+ {
12553
+ "epoch": 7.703842549203374,
12554
+ "grad_norm": 1.9625297784805298,
12555
+ "learning_rate": 9.03701968134958e-06,
12556
+ "loss": 0.0645,
12557
+ "step": 16440
12558
+ },
12559
+ {
12560
+ "epoch": 7.708528584817245,
12561
+ "grad_norm": 1.4848051071166992,
12562
+ "learning_rate": 9.036433926897845e-06,
12563
+ "loss": 0.0775,
12564
+ "step": 16450
12565
+ },
12566
+ {
12567
+ "epoch": 7.713214620431115,
12568
+ "grad_norm": 1.2312238216400146,
12569
+ "learning_rate": 9.035848172446112e-06,
12570
+ "loss": 0.0654,
12571
+ "step": 16460
12572
+ },
12573
+ {
12574
+ "epoch": 7.717900656044986,
12575
+ "grad_norm": 2.353621006011963,
12576
+ "learning_rate": 9.035262417994377e-06,
12577
+ "loss": 0.0798,
12578
+ "step": 16470
12579
+ },
12580
+ {
12581
+ "epoch": 7.722586691658856,
12582
+ "grad_norm": 2.2561025619506836,
12583
+ "learning_rate": 9.034676663542644e-06,
12584
+ "loss": 0.0679,
12585
+ "step": 16480
12586
+ },
12587
+ {
12588
+ "epoch": 7.7272727272727275,
12589
+ "grad_norm": 1.7776751518249512,
12590
+ "learning_rate": 9.03409090909091e-06,
12591
+ "loss": 0.0619,
12592
+ "step": 16490
12593
+ },
12594
+ {
12595
+ "epoch": 7.731958762886598,
12596
+ "grad_norm": 1.9327503442764282,
12597
+ "learning_rate": 9.033505154639176e-06,
12598
+ "loss": 0.078,
12599
+ "step": 16500
12600
+ },
12601
+ {
12602
+ "epoch": 7.731958762886598,
12603
+ "eval_loss": 0.039704494178295135,
12604
+ "eval_pearson_cosine": 0.7809507850262349,
12605
+ "eval_pearson_dot": 0.636416760886064,
12606
+ "eval_pearson_euclidean": 0.7299738247708838,
12607
+ "eval_pearson_manhattan": 0.7298765220753651,
12608
+ "eval_runtime": 40.0275,
12609
+ "eval_samples_per_second": 37.474,
12610
+ "eval_spearman_cosine": 0.782649086170428,
12611
+ "eval_spearman_dot": 0.6554585356303039,
12612
+ "eval_spearman_euclidean": 0.7456788267720733,
12613
+ "eval_spearman_manhattan": 0.7461425779916862,
12614
+ "eval_steps_per_second": 37.474,
12615
+ "step": 16500
12616
+ },
12617
+ {
12618
+ "epoch": 7.7366447985004685,
12619
+ "grad_norm": 1.9791489839553833,
12620
+ "learning_rate": 9.032919400187443e-06,
12621
+ "loss": 0.08,
12622
+ "step": 16510
12623
+ },
12624
+ {
12625
+ "epoch": 7.741330834114339,
12626
+ "grad_norm": 2.5181267261505127,
12627
+ "learning_rate": 9.032333645735708e-06,
12628
+ "loss": 0.0822,
12629
+ "step": 16520
12630
+ },
12631
+ {
12632
+ "epoch": 7.74601686972821,
12633
+ "grad_norm": 1.2553796768188477,
12634
+ "learning_rate": 9.031747891283973e-06,
12635
+ "loss": 0.0713,
12636
+ "step": 16530
12637
+ },
12638
+ {
12639
+ "epoch": 7.750702905342081,
12640
+ "grad_norm": 2.394421100616455,
12641
+ "learning_rate": 9.03116213683224e-06,
12642
+ "loss": 0.0852,
12643
+ "step": 16540
12644
+ },
12645
+ {
12646
+ "epoch": 7.755388940955951,
12647
+ "grad_norm": 2.388476848602295,
12648
+ "learning_rate": 9.030576382380507e-06,
12649
+ "loss": 0.0703,
12650
+ "step": 16550
12651
+ },
12652
+ {
12653
+ "epoch": 7.760074976569822,
12654
+ "grad_norm": 1.3286539316177368,
12655
+ "learning_rate": 9.029990627928772e-06,
12656
+ "loss": 0.0606,
12657
+ "step": 16560
12658
+ },
12659
+ {
12660
+ "epoch": 7.764761012183692,
12661
+ "grad_norm": 2.0466766357421875,
12662
+ "learning_rate": 9.02940487347704e-06,
12663
+ "loss": 0.0729,
12664
+ "step": 16570
12665
+ },
12666
+ {
12667
+ "epoch": 7.769447047797563,
12668
+ "grad_norm": 1.3759188652038574,
12669
+ "learning_rate": 9.028819119025305e-06,
12670
+ "loss": 0.0776,
12671
+ "step": 16580
12672
+ },
12673
+ {
12674
+ "epoch": 7.774133083411434,
12675
+ "grad_norm": 1.6511011123657227,
12676
+ "learning_rate": 9.028233364573571e-06,
12677
+ "loss": 0.0728,
12678
+ "step": 16590
12679
+ },
12680
+ {
12681
+ "epoch": 7.778819119025305,
12682
+ "grad_norm": 2.05136775970459,
12683
+ "learning_rate": 9.027647610121838e-06,
12684
+ "loss": 0.0719,
12685
+ "step": 16600
12686
+ },
12687
+ {
12688
+ "epoch": 7.783505154639175,
12689
+ "grad_norm": 2.3014705181121826,
12690
+ "learning_rate": 9.027061855670104e-06,
12691
+ "loss": 0.0682,
12692
+ "step": 16610
12693
+ },
12694
+ {
12695
+ "epoch": 7.788191190253046,
12696
+ "grad_norm": 2.6752190589904785,
12697
+ "learning_rate": 9.02647610121837e-06,
12698
+ "loss": 0.0856,
12699
+ "step": 16620
12700
+ },
12701
+ {
12702
+ "epoch": 7.792877225866917,
12703
+ "grad_norm": 1.7644881010055542,
12704
+ "learning_rate": 9.025890346766636e-06,
12705
+ "loss": 0.0705,
12706
+ "step": 16630
12707
+ },
12708
+ {
12709
+ "epoch": 7.7975632614807875,
12710
+ "grad_norm": 2.1563751697540283,
12711
+ "learning_rate": 9.025304592314903e-06,
12712
+ "loss": 0.0842,
12713
+ "step": 16640
12714
+ },
12715
+ {
12716
+ "epoch": 7.802249297094658,
12717
+ "grad_norm": 1.4930392503738403,
12718
+ "learning_rate": 9.02471883786317e-06,
12719
+ "loss": 0.0759,
12720
+ "step": 16650
12721
+ },
12722
+ {
12723
+ "epoch": 7.8069353327085285,
12724
+ "grad_norm": 2.3332340717315674,
12725
+ "learning_rate": 9.024133083411435e-06,
12726
+ "loss": 0.0668,
12727
+ "step": 16660
12728
+ },
12729
+ {
12730
+ "epoch": 7.811621368322399,
12731
+ "grad_norm": 2.424914836883545,
12732
+ "learning_rate": 9.023547328959702e-06,
12733
+ "loss": 0.0619,
12734
+ "step": 16670
12735
+ },
12736
+ {
12737
+ "epoch": 7.816307403936269,
12738
+ "grad_norm": 2.246410369873047,
12739
+ "learning_rate": 9.022961574507967e-06,
12740
+ "loss": 0.0772,
12741
+ "step": 16680
12742
+ },
12743
+ {
12744
+ "epoch": 7.820993439550141,
12745
+ "grad_norm": 1.8411740064620972,
12746
+ "learning_rate": 9.022375820056232e-06,
12747
+ "loss": 0.0685,
12748
+ "step": 16690
12749
+ },
12750
+ {
12751
+ "epoch": 7.825679475164011,
12752
+ "grad_norm": 1.6910183429718018,
12753
+ "learning_rate": 9.021790065604499e-06,
12754
+ "loss": 0.0666,
12755
+ "step": 16700
12756
+ },
12757
+ {
12758
+ "epoch": 7.830365510777882,
12759
+ "grad_norm": 1.7055261135101318,
12760
+ "learning_rate": 9.021204311152766e-06,
12761
+ "loss": 0.0555,
12762
+ "step": 16710
12763
+ },
12764
+ {
12765
+ "epoch": 7.835051546391752,
12766
+ "grad_norm": 1.0138518810272217,
12767
+ "learning_rate": 9.020618556701031e-06,
12768
+ "loss": 0.0709,
12769
+ "step": 16720
12770
+ },
12771
+ {
12772
+ "epoch": 7.839737582005624,
12773
+ "grad_norm": 1.5108051300048828,
12774
+ "learning_rate": 9.020032802249298e-06,
12775
+ "loss": 0.0563,
12776
+ "step": 16730
12777
+ },
12778
+ {
12779
+ "epoch": 7.844423617619494,
12780
+ "grad_norm": 1.4900165796279907,
12781
+ "learning_rate": 9.019447047797563e-06,
12782
+ "loss": 0.0725,
12783
+ "step": 16740
12784
+ },
12785
+ {
12786
+ "epoch": 7.849109653233365,
12787
+ "grad_norm": 2.11224627494812,
12788
+ "learning_rate": 9.01886129334583e-06,
12789
+ "loss": 0.0699,
12790
+ "step": 16750
12791
+ },
12792
+ {
12793
+ "epoch": 7.849109653233365,
12794
+ "eval_loss": 0.04045228287577629,
12795
+ "eval_pearson_cosine": 0.7810519865633125,
12796
+ "eval_pearson_dot": 0.6315366018290618,
12797
+ "eval_pearson_euclidean": 0.7311610922445455,
12798
+ "eval_pearson_manhattan": 0.7308498943499657,
12799
+ "eval_runtime": 40.5903,
12800
+ "eval_samples_per_second": 36.955,
12801
+ "eval_spearman_cosine": 0.7836878662688926,
12802
+ "eval_spearman_dot": 0.642582986344888,
12803
+ "eval_spearman_euclidean": 0.7470082334118219,
12804
+ "eval_spearman_manhattan": 0.7467779627853639,
12805
+ "eval_steps_per_second": 36.955,
12806
+ "step": 16750
12807
+ },
12808
+ {
12809
+ "epoch": 7.853795688847235,
12810
+ "grad_norm": 1.3252798318862915,
12811
+ "learning_rate": 9.018275538894097e-06,
12812
+ "loss": 0.0691,
12813
+ "step": 16760
12814
+ },
12815
+ {
12816
+ "epoch": 7.858481724461106,
12817
+ "grad_norm": 0.9918208122253418,
12818
+ "learning_rate": 9.017689784442362e-06,
12819
+ "loss": 0.0723,
12820
+ "step": 16770
12821
+ },
12822
+ {
12823
+ "epoch": 7.863167760074976,
12824
+ "grad_norm": 2.2344889640808105,
12825
+ "learning_rate": 9.01710402999063e-06,
12826
+ "loss": 0.0739,
12827
+ "step": 16780
12828
+ },
12829
+ {
12830
+ "epoch": 7.8678537956888475,
12831
+ "grad_norm": 1.8005706071853638,
12832
+ "learning_rate": 9.016518275538895e-06,
12833
+ "loss": 0.0831,
12834
+ "step": 16790
12835
+ },
12836
+ {
12837
+ "epoch": 7.872539831302718,
12838
+ "grad_norm": 1.365945816040039,
12839
+ "learning_rate": 9.015932521087161e-06,
12840
+ "loss": 0.0759,
12841
+ "step": 16800
12842
+ },
12843
+ {
12844
+ "epoch": 7.877225866916588,
12845
+ "grad_norm": 1.3977360725402832,
12846
+ "learning_rate": 9.015346766635427e-06,
12847
+ "loss": 0.0806,
12848
+ "step": 16810
12849
+ },
12850
+ {
12851
+ "epoch": 7.881911902530459,
12852
+ "grad_norm": 1.3826375007629395,
12853
+ "learning_rate": 9.014761012183694e-06,
12854
+ "loss": 0.0744,
12855
+ "step": 16820
12856
+ },
12857
+ {
12858
+ "epoch": 7.88659793814433,
12859
+ "grad_norm": 2.0823261737823486,
12860
+ "learning_rate": 9.01417525773196e-06,
12861
+ "loss": 0.0731,
12862
+ "step": 16830
12863
+ },
12864
+ {
12865
+ "epoch": 7.891283973758201,
12866
+ "grad_norm": 1.4947584867477417,
12867
+ "learning_rate": 9.013589503280226e-06,
12868
+ "loss": 0.0788,
12869
+ "step": 16840
12870
+ },
12871
+ {
12872
+ "epoch": 7.895970009372071,
12873
+ "grad_norm": 1.659224033355713,
12874
+ "learning_rate": 9.013003748828491e-06,
12875
+ "loss": 0.0733,
12876
+ "step": 16850
12877
+ },
12878
+ {
12879
+ "epoch": 7.900656044985942,
12880
+ "grad_norm": 1.4698199033737183,
12881
+ "learning_rate": 9.012417994376758e-06,
12882
+ "loss": 0.0789,
12883
+ "step": 16860
12884
+ },
12885
+ {
12886
+ "epoch": 7.905342080599812,
12887
+ "grad_norm": 1.6106451749801636,
12888
+ "learning_rate": 9.011832239925025e-06,
12889
+ "loss": 0.0656,
12890
+ "step": 16870
12891
+ },
12892
+ {
12893
+ "epoch": 7.910028116213683,
12894
+ "grad_norm": 1.2820615768432617,
12895
+ "learning_rate": 9.01124648547329e-06,
12896
+ "loss": 0.0648,
12897
+ "step": 16880
12898
+ },
12899
+ {
12900
+ "epoch": 7.914714151827554,
12901
+ "grad_norm": 2.3736705780029297,
12902
+ "learning_rate": 9.010660731021557e-06,
12903
+ "loss": 0.0884,
12904
+ "step": 16890
12905
+ },
12906
+ {
12907
+ "epoch": 7.919400187441425,
12908
+ "grad_norm": 1.1591442823410034,
12909
+ "learning_rate": 9.010074976569822e-06,
12910
+ "loss": 0.0657,
12911
+ "step": 16900
12912
+ },
12913
+ {
12914
+ "epoch": 7.924086223055295,
12915
+ "grad_norm": 1.9707759618759155,
12916
+ "learning_rate": 9.009489222118089e-06,
12917
+ "loss": 0.0833,
12918
+ "step": 16910
12919
+ },
12920
+ {
12921
+ "epoch": 7.928772258669166,
12922
+ "grad_norm": 2.5806972980499268,
12923
+ "learning_rate": 9.008903467666354e-06,
12924
+ "loss": 0.073,
12925
+ "step": 16920
12926
+ },
12927
+ {
12928
+ "epoch": 7.933458294283037,
12929
+ "grad_norm": 0.8301031589508057,
12930
+ "learning_rate": 9.008317713214621e-06,
12931
+ "loss": 0.0694,
12932
+ "step": 16930
12933
+ },
12934
+ {
12935
+ "epoch": 7.938144329896907,
12936
+ "grad_norm": 2.491325855255127,
12937
+ "learning_rate": 9.007731958762888e-06,
12938
+ "loss": 0.07,
12939
+ "step": 16940
12940
+ },
12941
+ {
12942
+ "epoch": 7.942830365510778,
12943
+ "grad_norm": 1.3585147857666016,
12944
+ "learning_rate": 9.007146204311153e-06,
12945
+ "loss": 0.0844,
12946
+ "step": 16950
12947
+ },
12948
+ {
12949
+ "epoch": 7.947516401124648,
12950
+ "grad_norm": 0.8648898601531982,
12951
+ "learning_rate": 9.00656044985942e-06,
12952
+ "loss": 0.0706,
12953
+ "step": 16960
12954
+ },
12955
+ {
12956
+ "epoch": 7.952202436738519,
12957
+ "grad_norm": 1.6157063245773315,
12958
+ "learning_rate": 9.005974695407685e-06,
12959
+ "loss": 0.0849,
12960
+ "step": 16970
12961
+ },
12962
+ {
12963
+ "epoch": 7.956888472352389,
12964
+ "grad_norm": 2.6578354835510254,
12965
+ "learning_rate": 9.005388940955952e-06,
12966
+ "loss": 0.0785,
12967
+ "step": 16980
12968
+ },
12969
+ {
12970
+ "epoch": 7.961574507966261,
12971
+ "grad_norm": 2.8184850215911865,
12972
+ "learning_rate": 9.00480318650422e-06,
12973
+ "loss": 0.0769,
12974
+ "step": 16990
12975
+ },
12976
+ {
12977
+ "epoch": 7.966260543580131,
12978
+ "grad_norm": 1.8346798419952393,
12979
+ "learning_rate": 9.004217432052485e-06,
12980
+ "loss": 0.0735,
12981
+ "step": 17000
12982
+ },
12983
+ {
12984
+ "epoch": 7.966260543580131,
12985
+ "eval_loss": 0.03939095139503479,
12986
+ "eval_pearson_cosine": 0.780422600052205,
12987
+ "eval_pearson_dot": 0.646788551622171,
12988
+ "eval_pearson_euclidean": 0.7325980054422985,
12989
+ "eval_pearson_manhattan": 0.731991687137608,
12990
+ "eval_runtime": 39.8363,
12991
+ "eval_samples_per_second": 37.654,
12992
+ "eval_spearman_cosine": 0.7823114033515521,
12993
+ "eval_spearman_dot": 0.6607344073150395,
12994
+ "eval_spearman_euclidean": 0.7461718651526544,
12995
+ "eval_spearman_manhattan": 0.745468210963869,
12996
+ "eval_steps_per_second": 37.654,
12997
+ "step": 17000
12998
  }
12999
  ],
13000
  "logging_steps": 10,