CocoRoF commited on
Commit
a9f6019
·
verified ·
1 Parent(s): 9dd433d

Training in progress, step 21000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a99077961d0a641c5ff38bc41aeb0e96f4e0aa881e97473db5564c741bb8ca1
3
  size 613004648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2345f93cb689f8d7f41eab40d0cef18241e972878e7fb6948d71f1371719ca8
3
  size 613004648
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b633f36fabb5fb014eb719663342186c16c8ad074853b96d787c85ceecedc06
3
  size 1226096954
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88aacbb6072c3cfcd0a072fac3b759771484a894347ed77a4b36afa5c1d0bc3b
3
  size 1226096954
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc17503afa3bf0eafca6b72efbe6ae8cb454a3c16da90f560f71f4af87c7a4e4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28d77b9708e785984189dd87311c593d951d08be1862b45c82e09f23e0a264bc
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f38d69aa9147d77e04cfc7d1c0433054c65fed5f88ad2cafb308669398f46b3f
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a12c30886420598486baa82bdd0616396462f1a93af3275146e2f56424c6d27
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.372071227741332,
5
  "eval_steps": 250,
6
- "global_step": 20000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -15287,6 +15287,770 @@
15287
  "eval_spearman_manhattan": 0.742345267890976,
15288
  "eval_steps_per_second": 37.771,
15289
  "step": 20000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15290
  }
15291
  ],
15292
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.840674789128398,
5
  "eval_steps": 250,
6
+ "global_step": 21000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
15287
  "eval_spearman_manhattan": 0.742345267890976,
15288
  "eval_steps_per_second": 37.771,
15289
  "step": 20000
15290
+ },
15291
+ {
15292
+ "epoch": 9.376757263355202,
15293
+ "grad_norm": 1.831284999847412,
15294
+ "learning_rate": 8.827905342080601e-06,
15295
+ "loss": 0.0489,
15296
+ "step": 20010
15297
+ },
15298
+ {
15299
+ "epoch": 9.381443298969073,
15300
+ "grad_norm": 1.498917818069458,
15301
+ "learning_rate": 8.827319587628866e-06,
15302
+ "loss": 0.0497,
15303
+ "step": 20020
15304
+ },
15305
+ {
15306
+ "epoch": 9.386129334582943,
15307
+ "grad_norm": 1.7997996807098389,
15308
+ "learning_rate": 8.826733833177133e-06,
15309
+ "loss": 0.0543,
15310
+ "step": 20030
15311
+ },
15312
+ {
15313
+ "epoch": 9.390815370196814,
15314
+ "grad_norm": 1.4676984548568726,
15315
+ "learning_rate": 8.8261480787254e-06,
15316
+ "loss": 0.0402,
15317
+ "step": 20040
15318
+ },
15319
+ {
15320
+ "epoch": 9.395501405810684,
15321
+ "grad_norm": 1.4647475481033325,
15322
+ "learning_rate": 8.825562324273665e-06,
15323
+ "loss": 0.0483,
15324
+ "step": 20050
15325
+ },
15326
+ {
15327
+ "epoch": 9.400187441424555,
15328
+ "grad_norm": 1.9055359363555908,
15329
+ "learning_rate": 8.824976569821932e-06,
15330
+ "loss": 0.057,
15331
+ "step": 20060
15332
+ },
15333
+ {
15334
+ "epoch": 9.404873477038425,
15335
+ "grad_norm": 1.243730068206787,
15336
+ "learning_rate": 8.824390815370197e-06,
15337
+ "loss": 0.0521,
15338
+ "step": 20070
15339
+ },
15340
+ {
15341
+ "epoch": 9.409559512652296,
15342
+ "grad_norm": 2.290194272994995,
15343
+ "learning_rate": 8.823805060918463e-06,
15344
+ "loss": 0.0536,
15345
+ "step": 20080
15346
+ },
15347
+ {
15348
+ "epoch": 9.414245548266166,
15349
+ "grad_norm": 1.28463613986969,
15350
+ "learning_rate": 8.823219306466731e-06,
15351
+ "loss": 0.0627,
15352
+ "step": 20090
15353
+ },
15354
+ {
15355
+ "epoch": 9.418931583880038,
15356
+ "grad_norm": 1.6804534196853638,
15357
+ "learning_rate": 8.822633552014996e-06,
15358
+ "loss": 0.0512,
15359
+ "step": 20100
15360
+ },
15361
+ {
15362
+ "epoch": 9.423617619493909,
15363
+ "grad_norm": 0.8809636831283569,
15364
+ "learning_rate": 8.822047797563262e-06,
15365
+ "loss": 0.0429,
15366
+ "step": 20110
15367
+ },
15368
+ {
15369
+ "epoch": 9.42830365510778,
15370
+ "grad_norm": 1.8962526321411133,
15371
+ "learning_rate": 8.821462043111529e-06,
15372
+ "loss": 0.0531,
15373
+ "step": 20120
15374
+ },
15375
+ {
15376
+ "epoch": 9.43298969072165,
15377
+ "grad_norm": 1.0176962614059448,
15378
+ "learning_rate": 8.820876288659794e-06,
15379
+ "loss": 0.0467,
15380
+ "step": 20130
15381
+ },
15382
+ {
15383
+ "epoch": 9.43767572633552,
15384
+ "grad_norm": 1.49270761013031,
15385
+ "learning_rate": 8.82029053420806e-06,
15386
+ "loss": 0.0578,
15387
+ "step": 20140
15388
+ },
15389
+ {
15390
+ "epoch": 9.44236176194939,
15391
+ "grad_norm": 1.4182747602462769,
15392
+ "learning_rate": 8.819704779756328e-06,
15393
+ "loss": 0.051,
15394
+ "step": 20150
15395
+ },
15396
+ {
15397
+ "epoch": 9.447047797563261,
15398
+ "grad_norm": 1.2575933933258057,
15399
+ "learning_rate": 8.819119025304593e-06,
15400
+ "loss": 0.0503,
15401
+ "step": 20160
15402
+ },
15403
+ {
15404
+ "epoch": 9.451733833177132,
15405
+ "grad_norm": 1.8485591411590576,
15406
+ "learning_rate": 8.81853327085286e-06,
15407
+ "loss": 0.0578,
15408
+ "step": 20170
15409
+ },
15410
+ {
15411
+ "epoch": 9.456419868791002,
15412
+ "grad_norm": 1.7406198978424072,
15413
+ "learning_rate": 8.817947516401125e-06,
15414
+ "loss": 0.0532,
15415
+ "step": 20180
15416
+ },
15417
+ {
15418
+ "epoch": 9.461105904404873,
15419
+ "grad_norm": 1.138297438621521,
15420
+ "learning_rate": 8.817361761949392e-06,
15421
+ "loss": 0.0484,
15422
+ "step": 20190
15423
+ },
15424
+ {
15425
+ "epoch": 9.465791940018745,
15426
+ "grad_norm": 1.2107694149017334,
15427
+ "learning_rate": 8.816776007497657e-06,
15428
+ "loss": 0.045,
15429
+ "step": 20200
15430
+ },
15431
+ {
15432
+ "epoch": 9.470477975632615,
15433
+ "grad_norm": 1.5909892320632935,
15434
+ "learning_rate": 8.816190253045924e-06,
15435
+ "loss": 0.0463,
15436
+ "step": 20210
15437
+ },
15438
+ {
15439
+ "epoch": 9.475164011246486,
15440
+ "grad_norm": 1.1377689838409424,
15441
+ "learning_rate": 8.815604498594191e-06,
15442
+ "loss": 0.0688,
15443
+ "step": 20220
15444
+ },
15445
+ {
15446
+ "epoch": 9.479850046860356,
15447
+ "grad_norm": 2.0724937915802,
15448
+ "learning_rate": 8.815018744142456e-06,
15449
+ "loss": 0.0547,
15450
+ "step": 20230
15451
+ },
15452
+ {
15453
+ "epoch": 9.484536082474227,
15454
+ "grad_norm": 0.9459996819496155,
15455
+ "learning_rate": 8.814432989690721e-06,
15456
+ "loss": 0.0482,
15457
+ "step": 20240
15458
+ },
15459
+ {
15460
+ "epoch": 9.489222118088097,
15461
+ "grad_norm": 0.7871867418289185,
15462
+ "learning_rate": 8.813847235238988e-06,
15463
+ "loss": 0.0429,
15464
+ "step": 20250
15465
+ },
15466
+ {
15467
+ "epoch": 9.489222118088097,
15468
+ "eval_loss": 0.03782571852207184,
15469
+ "eval_pearson_cosine": 0.786819398080425,
15470
+ "eval_pearson_dot": 0.6502888686958528,
15471
+ "eval_pearson_euclidean": 0.7291925678539002,
15472
+ "eval_pearson_manhattan": 0.7285750403533555,
15473
+ "eval_runtime": 41.4193,
15474
+ "eval_samples_per_second": 36.215,
15475
+ "eval_spearman_cosine": 0.7882680919473954,
15476
+ "eval_spearman_dot": 0.6683716175414093,
15477
+ "eval_spearman_euclidean": 0.7431012804543077,
15478
+ "eval_spearman_manhattan": 0.7425560629845656,
15479
+ "eval_steps_per_second": 36.215,
15480
+ "step": 20250
15481
+ },
15482
+ {
15483
+ "epoch": 9.493908153701968,
15484
+ "grad_norm": 1.718775749206543,
15485
+ "learning_rate": 8.813261480787255e-06,
15486
+ "loss": 0.0528,
15487
+ "step": 20260
15488
+ },
15489
+ {
15490
+ "epoch": 9.498594189315838,
15491
+ "grad_norm": 1.860888957977295,
15492
+ "learning_rate": 8.81267572633552e-06,
15493
+ "loss": 0.051,
15494
+ "step": 20270
15495
+ },
15496
+ {
15497
+ "epoch": 9.503280224929709,
15498
+ "grad_norm": 1.33186674118042,
15499
+ "learning_rate": 8.812089971883787e-06,
15500
+ "loss": 0.0558,
15501
+ "step": 20280
15502
+ },
15503
+ {
15504
+ "epoch": 9.50796626054358,
15505
+ "grad_norm": 1.3585968017578125,
15506
+ "learning_rate": 8.811504217432053e-06,
15507
+ "loss": 0.0418,
15508
+ "step": 20290
15509
+ },
15510
+ {
15511
+ "epoch": 9.512652296157452,
15512
+ "grad_norm": 1.2041314840316772,
15513
+ "learning_rate": 8.81091846298032e-06,
15514
+ "loss": 0.0661,
15515
+ "step": 20300
15516
+ },
15517
+ {
15518
+ "epoch": 9.517338331771322,
15519
+ "grad_norm": 1.2717355489730835,
15520
+ "learning_rate": 8.810332708528585e-06,
15521
+ "loss": 0.0511,
15522
+ "step": 20310
15523
+ },
15524
+ {
15525
+ "epoch": 9.522024367385193,
15526
+ "grad_norm": 0.9652617573738098,
15527
+ "learning_rate": 8.809746954076852e-06,
15528
+ "loss": 0.0514,
15529
+ "step": 20320
15530
+ },
15531
+ {
15532
+ "epoch": 9.526710402999063,
15533
+ "grad_norm": 1.9312084913253784,
15534
+ "learning_rate": 8.809161199625119e-06,
15535
+ "loss": 0.0607,
15536
+ "step": 20330
15537
+ },
15538
+ {
15539
+ "epoch": 9.531396438612934,
15540
+ "grad_norm": 1.669273018836975,
15541
+ "learning_rate": 8.808575445173384e-06,
15542
+ "loss": 0.0468,
15543
+ "step": 20340
15544
+ },
15545
+ {
15546
+ "epoch": 9.536082474226804,
15547
+ "grad_norm": 1.204368233680725,
15548
+ "learning_rate": 8.80798969072165e-06,
15549
+ "loss": 0.0409,
15550
+ "step": 20350
15551
+ },
15552
+ {
15553
+ "epoch": 9.540768509840674,
15554
+ "grad_norm": 1.2132142782211304,
15555
+ "learning_rate": 8.807403936269916e-06,
15556
+ "loss": 0.0448,
15557
+ "step": 20360
15558
+ },
15559
+ {
15560
+ "epoch": 9.545454545454545,
15561
+ "grad_norm": 0.8759263157844543,
15562
+ "learning_rate": 8.806818181818183e-06,
15563
+ "loss": 0.0486,
15564
+ "step": 20370
15565
+ },
15566
+ {
15567
+ "epoch": 9.550140581068415,
15568
+ "grad_norm": 0.980694591999054,
15569
+ "learning_rate": 8.80623242736645e-06,
15570
+ "loss": 0.04,
15571
+ "step": 20380
15572
+ },
15573
+ {
15574
+ "epoch": 9.554826616682288,
15575
+ "grad_norm": 1.7257814407348633,
15576
+ "learning_rate": 8.805646672914715e-06,
15577
+ "loss": 0.0551,
15578
+ "step": 20390
15579
+ },
15580
+ {
15581
+ "epoch": 9.559512652296158,
15582
+ "grad_norm": 0.9855765700340271,
15583
+ "learning_rate": 8.80506091846298e-06,
15584
+ "loss": 0.0442,
15585
+ "step": 20400
15586
+ },
15587
+ {
15588
+ "epoch": 9.564198687910029,
15589
+ "grad_norm": 2.2688076496124268,
15590
+ "learning_rate": 8.804475164011247e-06,
15591
+ "loss": 0.0474,
15592
+ "step": 20410
15593
+ },
15594
+ {
15595
+ "epoch": 9.5688847235239,
15596
+ "grad_norm": 0.8345751762390137,
15597
+ "learning_rate": 8.803889409559512e-06,
15598
+ "loss": 0.0507,
15599
+ "step": 20420
15600
+ },
15601
+ {
15602
+ "epoch": 9.57357075913777,
15603
+ "grad_norm": 1.0074180364608765,
15604
+ "learning_rate": 8.80330365510778e-06,
15605
+ "loss": 0.0487,
15606
+ "step": 20430
15607
+ },
15608
+ {
15609
+ "epoch": 9.57825679475164,
15610
+ "grad_norm": 1.1515982151031494,
15611
+ "learning_rate": 8.802717900656046e-06,
15612
+ "loss": 0.0443,
15613
+ "step": 20440
15614
+ },
15615
+ {
15616
+ "epoch": 9.58294283036551,
15617
+ "grad_norm": 0.5248059630393982,
15618
+ "learning_rate": 8.802132146204311e-06,
15619
+ "loss": 0.0561,
15620
+ "step": 20450
15621
+ },
15622
+ {
15623
+ "epoch": 9.587628865979381,
15624
+ "grad_norm": 1.2470523118972778,
15625
+ "learning_rate": 8.801546391752578e-06,
15626
+ "loss": 0.0469,
15627
+ "step": 20460
15628
+ },
15629
+ {
15630
+ "epoch": 9.592314901593252,
15631
+ "grad_norm": 2.120579957962036,
15632
+ "learning_rate": 8.800960637300844e-06,
15633
+ "loss": 0.0513,
15634
+ "step": 20470
15635
+ },
15636
+ {
15637
+ "epoch": 9.597000937207122,
15638
+ "grad_norm": 2.442443609237671,
15639
+ "learning_rate": 8.80037488284911e-06,
15640
+ "loss": 0.0635,
15641
+ "step": 20480
15642
+ },
15643
+ {
15644
+ "epoch": 9.601686972820993,
15645
+ "grad_norm": 2.420138120651245,
15646
+ "learning_rate": 8.799789128397377e-06,
15647
+ "loss": 0.0626,
15648
+ "step": 20490
15649
+ },
15650
+ {
15651
+ "epoch": 9.606373008434865,
15652
+ "grad_norm": 2.3432815074920654,
15653
+ "learning_rate": 8.799203373945643e-06,
15654
+ "loss": 0.0534,
15655
+ "step": 20500
15656
+ },
15657
+ {
15658
+ "epoch": 9.606373008434865,
15659
+ "eval_loss": 0.037995509803295135,
15660
+ "eval_pearson_cosine": 0.786149907730362,
15661
+ "eval_pearson_dot": 0.6445644977545584,
15662
+ "eval_pearson_euclidean": 0.7304901967314237,
15663
+ "eval_pearson_manhattan": 0.7299852754916856,
15664
+ "eval_runtime": 40.4167,
15665
+ "eval_samples_per_second": 37.113,
15666
+ "eval_spearman_cosine": 0.788096924565833,
15667
+ "eval_spearman_dot": 0.6634744984860802,
15668
+ "eval_spearman_euclidean": 0.7450878530420201,
15669
+ "eval_spearman_manhattan": 0.7443460197740337,
15670
+ "eval_steps_per_second": 37.113,
15671
+ "step": 20500
15672
+ },
15673
+ {
15674
+ "epoch": 9.611059044048735,
15675
+ "grad_norm": 2.5431413650512695,
15676
+ "learning_rate": 8.79861761949391e-06,
15677
+ "loss": 0.0499,
15678
+ "step": 20510
15679
+ },
15680
+ {
15681
+ "epoch": 9.615745079662606,
15682
+ "grad_norm": 1.4701391458511353,
15683
+ "learning_rate": 8.798031865042175e-06,
15684
+ "loss": 0.0528,
15685
+ "step": 20520
15686
+ },
15687
+ {
15688
+ "epoch": 9.620431115276476,
15689
+ "grad_norm": 1.0605581998825073,
15690
+ "learning_rate": 8.79744611059044e-06,
15691
+ "loss": 0.0513,
15692
+ "step": 20530
15693
+ },
15694
+ {
15695
+ "epoch": 9.625117150890347,
15696
+ "grad_norm": 1.7231255769729614,
15697
+ "learning_rate": 8.796860356138709e-06,
15698
+ "loss": 0.0432,
15699
+ "step": 20540
15700
+ },
15701
+ {
15702
+ "epoch": 9.629803186504217,
15703
+ "grad_norm": 2.4519450664520264,
15704
+ "learning_rate": 8.796274601686974e-06,
15705
+ "loss": 0.0555,
15706
+ "step": 20550
15707
+ },
15708
+ {
15709
+ "epoch": 9.634489222118088,
15710
+ "grad_norm": 1.7406028509140015,
15711
+ "learning_rate": 8.795688847235239e-06,
15712
+ "loss": 0.0547,
15713
+ "step": 20560
15714
+ },
15715
+ {
15716
+ "epoch": 9.639175257731958,
15717
+ "grad_norm": 1.357200026512146,
15718
+ "learning_rate": 8.795103092783506e-06,
15719
+ "loss": 0.0548,
15720
+ "step": 20570
15721
+ },
15722
+ {
15723
+ "epoch": 9.643861293345829,
15724
+ "grad_norm": 1.7510253190994263,
15725
+ "learning_rate": 8.794517338331771e-06,
15726
+ "loss": 0.0541,
15727
+ "step": 20580
15728
+ },
15729
+ {
15730
+ "epoch": 9.648547328959701,
15731
+ "grad_norm": 2.1982178688049316,
15732
+ "learning_rate": 8.793931583880038e-06,
15733
+ "loss": 0.0572,
15734
+ "step": 20590
15735
+ },
15736
+ {
15737
+ "epoch": 9.653233364573572,
15738
+ "grad_norm": 1.6132203340530396,
15739
+ "learning_rate": 8.793345829428305e-06,
15740
+ "loss": 0.0467,
15741
+ "step": 20600
15742
+ },
15743
+ {
15744
+ "epoch": 9.657919400187442,
15745
+ "grad_norm": 1.165385127067566,
15746
+ "learning_rate": 8.79276007497657e-06,
15747
+ "loss": 0.0463,
15748
+ "step": 20610
15749
+ },
15750
+ {
15751
+ "epoch": 9.662605435801312,
15752
+ "grad_norm": 2.306887149810791,
15753
+ "learning_rate": 8.792174320524837e-06,
15754
+ "loss": 0.0529,
15755
+ "step": 20620
15756
+ },
15757
+ {
15758
+ "epoch": 9.667291471415183,
15759
+ "grad_norm": 1.740670084953308,
15760
+ "learning_rate": 8.791588566073102e-06,
15761
+ "loss": 0.0497,
15762
+ "step": 20630
15763
+ },
15764
+ {
15765
+ "epoch": 9.671977507029053,
15766
+ "grad_norm": 1.0078073740005493,
15767
+ "learning_rate": 8.79100281162137e-06,
15768
+ "loss": 0.0495,
15769
+ "step": 20640
15770
+ },
15771
+ {
15772
+ "epoch": 9.676663542642924,
15773
+ "grad_norm": 1.454647421836853,
15774
+ "learning_rate": 8.790417057169636e-06,
15775
+ "loss": 0.0477,
15776
+ "step": 20650
15777
+ },
15778
+ {
15779
+ "epoch": 9.681349578256794,
15780
+ "grad_norm": 1.6520277261734009,
15781
+ "learning_rate": 8.789831302717901e-06,
15782
+ "loss": 0.0499,
15783
+ "step": 20660
15784
+ },
15785
+ {
15786
+ "epoch": 9.686035613870665,
15787
+ "grad_norm": 2.0566940307617188,
15788
+ "learning_rate": 8.789245548266168e-06,
15789
+ "loss": 0.0504,
15790
+ "step": 20670
15791
+ },
15792
+ {
15793
+ "epoch": 9.690721649484535,
15794
+ "grad_norm": 1.7212245464324951,
15795
+ "learning_rate": 8.788659793814434e-06,
15796
+ "loss": 0.0558,
15797
+ "step": 20680
15798
+ },
15799
+ {
15800
+ "epoch": 9.695407685098406,
15801
+ "grad_norm": 0.9179878234863281,
15802
+ "learning_rate": 8.788074039362699e-06,
15803
+ "loss": 0.055,
15804
+ "step": 20690
15805
+ },
15806
+ {
15807
+ "epoch": 9.700093720712278,
15808
+ "grad_norm": 1.1311330795288086,
15809
+ "learning_rate": 8.787488284910966e-06,
15810
+ "loss": 0.0555,
15811
+ "step": 20700
15812
+ },
15813
+ {
15814
+ "epoch": 9.704779756326149,
15815
+ "grad_norm": 1.4247910976409912,
15816
+ "learning_rate": 8.786902530459233e-06,
15817
+ "loss": 0.0522,
15818
+ "step": 20710
15819
+ },
15820
+ {
15821
+ "epoch": 9.70946579194002,
15822
+ "grad_norm": 2.309624195098877,
15823
+ "learning_rate": 8.786316776007498e-06,
15824
+ "loss": 0.0492,
15825
+ "step": 20720
15826
+ },
15827
+ {
15828
+ "epoch": 9.71415182755389,
15829
+ "grad_norm": 0.9960254430770874,
15830
+ "learning_rate": 8.785731021555765e-06,
15831
+ "loss": 0.0461,
15832
+ "step": 20730
15833
+ },
15834
+ {
15835
+ "epoch": 9.71883786316776,
15836
+ "grad_norm": 0.9048061966896057,
15837
+ "learning_rate": 8.78514526710403e-06,
15838
+ "loss": 0.0497,
15839
+ "step": 20740
15840
+ },
15841
+ {
15842
+ "epoch": 9.72352389878163,
15843
+ "grad_norm": 1.7553735971450806,
15844
+ "learning_rate": 8.784559512652297e-06,
15845
+ "loss": 0.0531,
15846
+ "step": 20750
15847
+ },
15848
+ {
15849
+ "epoch": 9.72352389878163,
15850
+ "eval_loss": 0.0375310480594635,
15851
+ "eval_pearson_cosine": 0.7885717010435052,
15852
+ "eval_pearson_dot": 0.6441669695807519,
15853
+ "eval_pearson_euclidean": 0.7356023128188269,
15854
+ "eval_pearson_manhattan": 0.7349906496289833,
15855
+ "eval_runtime": 42.7003,
15856
+ "eval_samples_per_second": 35.129,
15857
+ "eval_spearman_cosine": 0.7894128881355192,
15858
+ "eval_spearman_dot": 0.6634003738795025,
15859
+ "eval_spearman_euclidean": 0.7498228606359407,
15860
+ "eval_spearman_manhattan": 0.7492125285743606,
15861
+ "eval_steps_per_second": 35.129,
15862
+ "step": 20750
15863
+ },
15864
+ {
15865
+ "epoch": 9.728209934395501,
15866
+ "grad_norm": 2.050300121307373,
15867
+ "learning_rate": 8.783973758200564e-06,
15868
+ "loss": 0.0559,
15869
+ "step": 20760
15870
+ },
15871
+ {
15872
+ "epoch": 9.732895970009372,
15873
+ "grad_norm": 1.7900549173355103,
15874
+ "learning_rate": 8.783388003748829e-06,
15875
+ "loss": 0.0468,
15876
+ "step": 20770
15877
+ },
15878
+ {
15879
+ "epoch": 9.737582005623242,
15880
+ "grad_norm": 2.7999625205993652,
15881
+ "learning_rate": 8.782802249297096e-06,
15882
+ "loss": 0.0567,
15883
+ "step": 20780
15884
+ },
15885
+ {
15886
+ "epoch": 9.742268041237114,
15887
+ "grad_norm": 1.746066927909851,
15888
+ "learning_rate": 8.782216494845361e-06,
15889
+ "loss": 0.0462,
15890
+ "step": 20790
15891
+ },
15892
+ {
15893
+ "epoch": 9.746954076850985,
15894
+ "grad_norm": 1.6031302213668823,
15895
+ "learning_rate": 8.781630740393628e-06,
15896
+ "loss": 0.0569,
15897
+ "step": 20800
15898
+ },
15899
+ {
15900
+ "epoch": 9.751640112464855,
15901
+ "grad_norm": 0.795835554599762,
15902
+ "learning_rate": 8.781044985941893e-06,
15903
+ "loss": 0.0471,
15904
+ "step": 20810
15905
+ },
15906
+ {
15907
+ "epoch": 9.756326148078726,
15908
+ "grad_norm": 1.4143311977386475,
15909
+ "learning_rate": 8.78045923149016e-06,
15910
+ "loss": 0.0495,
15911
+ "step": 20820
15912
+ },
15913
+ {
15914
+ "epoch": 9.761012183692596,
15915
+ "grad_norm": 1.2782717943191528,
15916
+ "learning_rate": 8.779873477038427e-06,
15917
+ "loss": 0.0594,
15918
+ "step": 20830
15919
+ },
15920
+ {
15921
+ "epoch": 9.765698219306467,
15922
+ "grad_norm": 0.9974650144577026,
15923
+ "learning_rate": 8.779287722586692e-06,
15924
+ "loss": 0.0485,
15925
+ "step": 20840
15926
+ },
15927
+ {
15928
+ "epoch": 9.770384254920337,
15929
+ "grad_norm": 1.5415414571762085,
15930
+ "learning_rate": 8.778701968134958e-06,
15931
+ "loss": 0.0476,
15932
+ "step": 20850
15933
+ },
15934
+ {
15935
+ "epoch": 9.775070290534208,
15936
+ "grad_norm": 0.8162183165550232,
15937
+ "learning_rate": 8.778116213683225e-06,
15938
+ "loss": 0.0529,
15939
+ "step": 20860
15940
+ },
15941
+ {
15942
+ "epoch": 9.779756326148078,
15943
+ "grad_norm": 1.532882571220398,
15944
+ "learning_rate": 8.777530459231491e-06,
15945
+ "loss": 0.0484,
15946
+ "step": 20870
15947
+ },
15948
+ {
15949
+ "epoch": 9.784442361761949,
15950
+ "grad_norm": 1.609947919845581,
15951
+ "learning_rate": 8.776944704779757e-06,
15952
+ "loss": 0.0575,
15953
+ "step": 20880
15954
+ },
15955
+ {
15956
+ "epoch": 9.78912839737582,
15957
+ "grad_norm": 1.7726844549179077,
15958
+ "learning_rate": 8.776358950328024e-06,
15959
+ "loss": 0.055,
15960
+ "step": 20890
15961
+ },
15962
+ {
15963
+ "epoch": 9.793814432989691,
15964
+ "grad_norm": 0.7471759915351868,
15965
+ "learning_rate": 8.775773195876289e-06,
15966
+ "loss": 0.0439,
15967
+ "step": 20900
15968
+ },
15969
+ {
15970
+ "epoch": 9.798500468603562,
15971
+ "grad_norm": 1.8393468856811523,
15972
+ "learning_rate": 8.775187441424556e-06,
15973
+ "loss": 0.0576,
15974
+ "step": 20910
15975
+ },
15976
+ {
15977
+ "epoch": 9.803186504217432,
15978
+ "grad_norm": 1.570793867111206,
15979
+ "learning_rate": 8.774601686972821e-06,
15980
+ "loss": 0.0518,
15981
+ "step": 20920
15982
+ },
15983
+ {
15984
+ "epoch": 9.807872539831303,
15985
+ "grad_norm": 2.121197462081909,
15986
+ "learning_rate": 8.774015932521088e-06,
15987
+ "loss": 0.0499,
15988
+ "step": 20930
15989
+ },
15990
+ {
15991
+ "epoch": 9.812558575445173,
15992
+ "grad_norm": 1.4100779294967651,
15993
+ "learning_rate": 8.773430178069355e-06,
15994
+ "loss": 0.0518,
15995
+ "step": 20940
15996
+ },
15997
+ {
15998
+ "epoch": 9.817244611059044,
15999
+ "grad_norm": 2.057370901107788,
16000
+ "learning_rate": 8.77284442361762e-06,
16001
+ "loss": 0.0445,
16002
+ "step": 20950
16003
+ },
16004
+ {
16005
+ "epoch": 9.821930646672914,
16006
+ "grad_norm": 1.0159096717834473,
16007
+ "learning_rate": 8.772258669165887e-06,
16008
+ "loss": 0.0488,
16009
+ "step": 20960
16010
+ },
16011
+ {
16012
+ "epoch": 9.826616682286785,
16013
+ "grad_norm": 2.1321892738342285,
16014
+ "learning_rate": 8.771672914714152e-06,
16015
+ "loss": 0.0435,
16016
+ "step": 20970
16017
+ },
16018
+ {
16019
+ "epoch": 9.831302717900655,
16020
+ "grad_norm": 1.727754831314087,
16021
+ "learning_rate": 8.771087160262419e-06,
16022
+ "loss": 0.0506,
16023
+ "step": 20980
16024
+ },
16025
+ {
16026
+ "epoch": 9.835988753514528,
16027
+ "grad_norm": 1.4544596672058105,
16028
+ "learning_rate": 8.770501405810686e-06,
16029
+ "loss": 0.0538,
16030
+ "step": 20990
16031
+ },
16032
+ {
16033
+ "epoch": 9.840674789128398,
16034
+ "grad_norm": 1.8320542573928833,
16035
+ "learning_rate": 8.769915651358951e-06,
16036
+ "loss": 0.0464,
16037
+ "step": 21000
16038
+ },
16039
+ {
16040
+ "epoch": 9.840674789128398,
16041
+ "eval_loss": 0.037997569888830185,
16042
+ "eval_pearson_cosine": 0.7860642455644182,
16043
+ "eval_pearson_dot": 0.6414601204917716,
16044
+ "eval_pearson_euclidean": 0.7319623204333681,
16045
+ "eval_pearson_manhattan": 0.731351329880491,
16046
+ "eval_runtime": 42.4574,
16047
+ "eval_samples_per_second": 35.33,
16048
+ "eval_spearman_cosine": 0.7870558046080526,
16049
+ "eval_spearman_dot": 0.6600440085619812,
16050
+ "eval_spearman_euclidean": 0.746835302683809,
16051
+ "eval_spearman_manhattan": 0.7463752489757238,
16052
+ "eval_steps_per_second": 35.33,
16053
+ "step": 21000
16054
  }
16055
  ],
16056
  "logging_steps": 10,