CocoRoF commited on
Commit
3b172e0
·
verified ·
1 Parent(s): a9a429f

Training in progress, step 8000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb141f64f181f8aaa27c9d3a77af0d93d7afb9857067a90441308d67ea3f00a0
3
  size 737580392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14971b6e04cadaa88534f712e721171d13999a95ce9d9ac46c4729800b89e946
3
  size 737580392
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd3f4a311baa95b8c3c9a1270d3c59f445a214334d14d8eaebcf84317b8587c7
3
  size 1475248442
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cebce73c6f9897e73bc658e05632b2e976a2c1891e8ef3f6c3c2f8924ba60b4c
3
  size 1475248442
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9819055317e0aa1215ad120239bc4cecc175225c0dc18c98ca0bffe9f465133f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff2a98b7b58dd272a86869334fd0edf641ab47ceb102b634b242c3ff92151a26
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:879c433ff59d472a2c5d7c4da04ae14ac33f2ec2552c8f49d34302050bc28fb0
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7a23627952aa878a89c58e1effd3a883c81420a06e0fccd761ecb8b1539b91f
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.280224929709466,
5
  "eval_steps": 250,
6
- "global_step": 7000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5355,6 +5355,770 @@
5355
  "eval_spearman_manhattan": 0.7992497485259732,
5356
  "eval_steps_per_second": 27.898,
5357
  "step": 7000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5358
  }
5359
  ],
5360
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.7488284910965324,
5
  "eval_steps": 250,
6
+ "global_step": 8000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5355
  "eval_spearman_manhattan": 0.7992497485259732,
5356
  "eval_steps_per_second": 27.898,
5357
  "step": 7000
5358
+ },
5359
+ {
5360
+ "epoch": 3.2849109653233364,
5361
+ "grad_norm": 1.3195544481277466,
5362
+ "learning_rate": 1.7946930646672916e-05,
5363
+ "loss": 0.1136,
5364
+ "step": 7010
5365
+ },
5366
+ {
5367
+ "epoch": 3.2895970009372073,
5368
+ "grad_norm": 1.7174955606460571,
5369
+ "learning_rate": 1.794400187441425e-05,
5370
+ "loss": 0.1114,
5371
+ "step": 7020
5372
+ },
5373
+ {
5374
+ "epoch": 3.294283036551078,
5375
+ "grad_norm": 1.8189715147018433,
5376
+ "learning_rate": 1.7941073102155578e-05,
5377
+ "loss": 0.133,
5378
+ "step": 7030
5379
+ },
5380
+ {
5381
+ "epoch": 3.2989690721649483,
5382
+ "grad_norm": 1.8281652927398682,
5383
+ "learning_rate": 1.793814432989691e-05,
5384
+ "loss": 0.1079,
5385
+ "step": 7040
5386
+ },
5387
+ {
5388
+ "epoch": 3.303655107778819,
5389
+ "grad_norm": 1.686579704284668,
5390
+ "learning_rate": 1.793521555763824e-05,
5391
+ "loss": 0.1038,
5392
+ "step": 7050
5393
+ },
5394
+ {
5395
+ "epoch": 3.3083411433926897,
5396
+ "grad_norm": 1.5126315355300903,
5397
+ "learning_rate": 1.793228678537957e-05,
5398
+ "loss": 0.1181,
5399
+ "step": 7060
5400
+ },
5401
+ {
5402
+ "epoch": 3.3130271790065606,
5403
+ "grad_norm": 1.5008283853530884,
5404
+ "learning_rate": 1.79293580131209e-05,
5405
+ "loss": 0.1223,
5406
+ "step": 7070
5407
+ },
5408
+ {
5409
+ "epoch": 3.317713214620431,
5410
+ "grad_norm": 0.7563474178314209,
5411
+ "learning_rate": 1.7926429240862232e-05,
5412
+ "loss": 0.1028,
5413
+ "step": 7080
5414
+ },
5415
+ {
5416
+ "epoch": 3.3223992502343016,
5417
+ "grad_norm": 0.9533982872962952,
5418
+ "learning_rate": 1.7923500468603562e-05,
5419
+ "loss": 0.1032,
5420
+ "step": 7090
5421
+ },
5422
+ {
5423
+ "epoch": 3.3270852858481725,
5424
+ "grad_norm": 1.5246295928955078,
5425
+ "learning_rate": 1.792057169634489e-05,
5426
+ "loss": 0.128,
5427
+ "step": 7100
5428
+ },
5429
+ {
5430
+ "epoch": 3.331771321462043,
5431
+ "grad_norm": 1.4202959537506104,
5432
+ "learning_rate": 1.7917642924086224e-05,
5433
+ "loss": 0.1173,
5434
+ "step": 7110
5435
+ },
5436
+ {
5437
+ "epoch": 3.336457357075914,
5438
+ "grad_norm": 1.5270695686340332,
5439
+ "learning_rate": 1.7914714151827554e-05,
5440
+ "loss": 0.1188,
5441
+ "step": 7120
5442
+ },
5443
+ {
5444
+ "epoch": 3.3411433926897844,
5445
+ "grad_norm": 1.5747121572494507,
5446
+ "learning_rate": 1.7911785379568887e-05,
5447
+ "loss": 0.1154,
5448
+ "step": 7130
5449
+ },
5450
+ {
5451
+ "epoch": 3.345829428303655,
5452
+ "grad_norm": 1.4199724197387695,
5453
+ "learning_rate": 1.7908856607310216e-05,
5454
+ "loss": 0.1174,
5455
+ "step": 7140
5456
+ },
5457
+ {
5458
+ "epoch": 3.350515463917526,
5459
+ "grad_norm": 1.601879596710205,
5460
+ "learning_rate": 1.790592783505155e-05,
5461
+ "loss": 0.1095,
5462
+ "step": 7150
5463
+ },
5464
+ {
5465
+ "epoch": 3.3552014995313963,
5466
+ "grad_norm": 1.8753916025161743,
5467
+ "learning_rate": 1.790299906279288e-05,
5468
+ "loss": 0.1137,
5469
+ "step": 7160
5470
+ },
5471
+ {
5472
+ "epoch": 3.3598875351452673,
5473
+ "grad_norm": 2.2938902378082275,
5474
+ "learning_rate": 1.790007029053421e-05,
5475
+ "loss": 0.1187,
5476
+ "step": 7170
5477
+ },
5478
+ {
5479
+ "epoch": 3.3645735707591378,
5480
+ "grad_norm": 1.0570533275604248,
5481
+ "learning_rate": 1.789714151827554e-05,
5482
+ "loss": 0.1122,
5483
+ "step": 7180
5484
+ },
5485
+ {
5486
+ "epoch": 3.3692596063730083,
5487
+ "grad_norm": 1.3693780899047852,
5488
+ "learning_rate": 1.789421274601687e-05,
5489
+ "loss": 0.1056,
5490
+ "step": 7190
5491
+ },
5492
+ {
5493
+ "epoch": 3.373945641986879,
5494
+ "grad_norm": 1.088611364364624,
5495
+ "learning_rate": 1.7891283973758203e-05,
5496
+ "loss": 0.0943,
5497
+ "step": 7200
5498
+ },
5499
+ {
5500
+ "epoch": 3.3786316776007497,
5501
+ "grad_norm": 1.651551604270935,
5502
+ "learning_rate": 1.7888355201499533e-05,
5503
+ "loss": 0.1148,
5504
+ "step": 7210
5505
+ },
5506
+ {
5507
+ "epoch": 3.3833177132146206,
5508
+ "grad_norm": 1.9416500329971313,
5509
+ "learning_rate": 1.7885426429240866e-05,
5510
+ "loss": 0.1162,
5511
+ "step": 7220
5512
+ },
5513
+ {
5514
+ "epoch": 3.388003748828491,
5515
+ "grad_norm": 1.549742341041565,
5516
+ "learning_rate": 1.7882497656982195e-05,
5517
+ "loss": 0.0984,
5518
+ "step": 7230
5519
+ },
5520
+ {
5521
+ "epoch": 3.3926897844423616,
5522
+ "grad_norm": 1.9772891998291016,
5523
+ "learning_rate": 1.7879568884723525e-05,
5524
+ "loss": 0.1146,
5525
+ "step": 7240
5526
+ },
5527
+ {
5528
+ "epoch": 3.3973758200562325,
5529
+ "grad_norm": 1.915805459022522,
5530
+ "learning_rate": 1.7876640112464858e-05,
5531
+ "loss": 0.12,
5532
+ "step": 7250
5533
+ },
5534
+ {
5535
+ "epoch": 3.3973758200562325,
5536
+ "eval_loss": 0.030257537961006165,
5537
+ "eval_pearson_cosine": 0.8236641701952188,
5538
+ "eval_pearson_dot": 0.7683165917501924,
5539
+ "eval_pearson_euclidean": 0.7929510591670237,
5540
+ "eval_pearson_manhattan": 0.7953417991908651,
5541
+ "eval_runtime": 3.1268,
5542
+ "eval_samples_per_second": 479.717,
5543
+ "eval_spearman_cosine": 0.8229564695132245,
5544
+ "eval_spearman_dot": 0.7689794551234463,
5545
+ "eval_spearman_euclidean": 0.8016168710764218,
5546
+ "eval_spearman_manhattan": 0.8035095618864339,
5547
+ "eval_steps_per_second": 30.062,
5548
+ "step": 7250
5549
+ },
5550
+ {
5551
+ "epoch": 3.402061855670103,
5552
+ "grad_norm": 1.2766612768173218,
5553
+ "learning_rate": 1.7873711340206187e-05,
5554
+ "loss": 0.1256,
5555
+ "step": 7260
5556
+ },
5557
+ {
5558
+ "epoch": 3.406747891283974,
5559
+ "grad_norm": 1.6186020374298096,
5560
+ "learning_rate": 1.7870782567947517e-05,
5561
+ "loss": 0.106,
5562
+ "step": 7270
5563
+ },
5564
+ {
5565
+ "epoch": 3.4114339268978444,
5566
+ "grad_norm": 1.5091776847839355,
5567
+ "learning_rate": 1.7867853795688846e-05,
5568
+ "loss": 0.1157,
5569
+ "step": 7280
5570
+ },
5571
+ {
5572
+ "epoch": 3.416119962511715,
5573
+ "grad_norm": 1.5809932947158813,
5574
+ "learning_rate": 1.786492502343018e-05,
5575
+ "loss": 0.1151,
5576
+ "step": 7290
5577
+ },
5578
+ {
5579
+ "epoch": 3.420805998125586,
5580
+ "grad_norm": 1.6943892240524292,
5581
+ "learning_rate": 1.786199625117151e-05,
5582
+ "loss": 0.1131,
5583
+ "step": 7300
5584
+ },
5585
+ {
5586
+ "epoch": 3.4254920337394563,
5587
+ "grad_norm": 1.5174516439437866,
5588
+ "learning_rate": 1.785906747891284e-05,
5589
+ "loss": 0.0843,
5590
+ "step": 7310
5591
+ },
5592
+ {
5593
+ "epoch": 3.4301780693533273,
5594
+ "grad_norm": 1.4588013887405396,
5595
+ "learning_rate": 1.785613870665417e-05,
5596
+ "loss": 0.1079,
5597
+ "step": 7320
5598
+ },
5599
+ {
5600
+ "epoch": 3.4348641049671977,
5601
+ "grad_norm": 1.5333396196365356,
5602
+ "learning_rate": 1.7853209934395504e-05,
5603
+ "loss": 0.1309,
5604
+ "step": 7330
5605
+ },
5606
+ {
5607
+ "epoch": 3.4395501405810682,
5608
+ "grad_norm": 1.5144264698028564,
5609
+ "learning_rate": 1.7850281162136833e-05,
5610
+ "loss": 0.1006,
5611
+ "step": 7340
5612
+ },
5613
+ {
5614
+ "epoch": 3.444236176194939,
5615
+ "grad_norm": 1.2702832221984863,
5616
+ "learning_rate": 1.7847352389878166e-05,
5617
+ "loss": 0.1173,
5618
+ "step": 7350
5619
+ },
5620
+ {
5621
+ "epoch": 3.4489222118088096,
5622
+ "grad_norm": 1.808031678199768,
5623
+ "learning_rate": 1.7844423617619496e-05,
5624
+ "loss": 0.1321,
5625
+ "step": 7360
5626
+ },
5627
+ {
5628
+ "epoch": 3.4536082474226806,
5629
+ "grad_norm": 1.8384732007980347,
5630
+ "learning_rate": 1.784149484536083e-05,
5631
+ "loss": 0.1093,
5632
+ "step": 7370
5633
+ },
5634
+ {
5635
+ "epoch": 3.458294283036551,
5636
+ "grad_norm": 1.468562126159668,
5637
+ "learning_rate": 1.7838566073102158e-05,
5638
+ "loss": 0.1117,
5639
+ "step": 7380
5640
+ },
5641
+ {
5642
+ "epoch": 3.4629803186504216,
5643
+ "grad_norm": 1.647537350654602,
5644
+ "learning_rate": 1.7835637300843487e-05,
5645
+ "loss": 0.1161,
5646
+ "step": 7390
5647
+ },
5648
+ {
5649
+ "epoch": 3.4676663542642925,
5650
+ "grad_norm": 1.5217511653900146,
5651
+ "learning_rate": 1.783270852858482e-05,
5652
+ "loss": 0.1287,
5653
+ "step": 7400
5654
+ },
5655
+ {
5656
+ "epoch": 3.472352389878163,
5657
+ "grad_norm": 1.3853079080581665,
5658
+ "learning_rate": 1.782977975632615e-05,
5659
+ "loss": 0.1273,
5660
+ "step": 7410
5661
+ },
5662
+ {
5663
+ "epoch": 3.477038425492034,
5664
+ "grad_norm": 1.938225269317627,
5665
+ "learning_rate": 1.782685098406748e-05,
5666
+ "loss": 0.1381,
5667
+ "step": 7420
5668
+ },
5669
+ {
5670
+ "epoch": 3.4817244611059044,
5671
+ "grad_norm": 1.4750497341156006,
5672
+ "learning_rate": 1.7823922211808812e-05,
5673
+ "loss": 0.1173,
5674
+ "step": 7430
5675
+ },
5676
+ {
5677
+ "epoch": 3.486410496719775,
5678
+ "grad_norm": 1.421303391456604,
5679
+ "learning_rate": 1.7820993439550142e-05,
5680
+ "loss": 0.104,
5681
+ "step": 7440
5682
+ },
5683
+ {
5684
+ "epoch": 3.491096532333646,
5685
+ "grad_norm": 1.1640669107437134,
5686
+ "learning_rate": 1.781806466729147e-05,
5687
+ "loss": 0.1018,
5688
+ "step": 7450
5689
+ },
5690
+ {
5691
+ "epoch": 3.4957825679475163,
5692
+ "grad_norm": 1.3329296112060547,
5693
+ "learning_rate": 1.7815135895032804e-05,
5694
+ "loss": 0.1127,
5695
+ "step": 7460
5696
+ },
5697
+ {
5698
+ "epoch": 3.5004686035613872,
5699
+ "grad_norm": 2.0254921913146973,
5700
+ "learning_rate": 1.7812207122774134e-05,
5701
+ "loss": 0.1361,
5702
+ "step": 7470
5703
+ },
5704
+ {
5705
+ "epoch": 3.5051546391752577,
5706
+ "grad_norm": 1.3234291076660156,
5707
+ "learning_rate": 1.7809278350515463e-05,
5708
+ "loss": 0.1211,
5709
+ "step": 7480
5710
+ },
5711
+ {
5712
+ "epoch": 3.509840674789128,
5713
+ "grad_norm": 1.448867678642273,
5714
+ "learning_rate": 1.7806349578256796e-05,
5715
+ "loss": 0.0999,
5716
+ "step": 7490
5717
+ },
5718
+ {
5719
+ "epoch": 3.514526710402999,
5720
+ "grad_norm": 1.56028151512146,
5721
+ "learning_rate": 1.7803420805998125e-05,
5722
+ "loss": 0.1003,
5723
+ "step": 7500
5724
+ },
5725
+ {
5726
+ "epoch": 3.514526710402999,
5727
+ "eval_loss": 0.031522952020168304,
5728
+ "eval_pearson_cosine": 0.81808882827319,
5729
+ "eval_pearson_dot": 0.750183242563736,
5730
+ "eval_pearson_euclidean": 0.7941389529600116,
5731
+ "eval_pearson_manhattan": 0.7963593749181541,
5732
+ "eval_runtime": 3.4063,
5733
+ "eval_samples_per_second": 440.357,
5734
+ "eval_spearman_cosine": 0.8172168876953952,
5735
+ "eval_spearman_dot": 0.7505334150938179,
5736
+ "eval_spearman_euclidean": 0.8027508928771647,
5737
+ "eval_spearman_manhattan": 0.8047390955898335,
5738
+ "eval_steps_per_second": 27.596,
5739
+ "step": 7500
5740
+ },
5741
+ {
5742
+ "epoch": 3.5192127460168696,
5743
+ "grad_norm": 1.775154948234558,
5744
+ "learning_rate": 1.780049203373946e-05,
5745
+ "loss": 0.1058,
5746
+ "step": 7510
5747
+ },
5748
+ {
5749
+ "epoch": 3.5238987816307406,
5750
+ "grad_norm": 2.034623622894287,
5751
+ "learning_rate": 1.7797563261480788e-05,
5752
+ "loss": 0.1198,
5753
+ "step": 7520
5754
+ },
5755
+ {
5756
+ "epoch": 3.528584817244611,
5757
+ "grad_norm": 2.037757635116577,
5758
+ "learning_rate": 1.779463448922212e-05,
5759
+ "loss": 0.1047,
5760
+ "step": 7530
5761
+ },
5762
+ {
5763
+ "epoch": 3.5332708528584815,
5764
+ "grad_norm": 1.7488856315612793,
5765
+ "learning_rate": 1.779170571696345e-05,
5766
+ "loss": 0.1096,
5767
+ "step": 7540
5768
+ },
5769
+ {
5770
+ "epoch": 3.5379568884723525,
5771
+ "grad_norm": 1.4379994869232178,
5772
+ "learning_rate": 1.7788776944704783e-05,
5773
+ "loss": 0.1294,
5774
+ "step": 7550
5775
+ },
5776
+ {
5777
+ "epoch": 3.542642924086223,
5778
+ "grad_norm": 1.6399285793304443,
5779
+ "learning_rate": 1.7785848172446113e-05,
5780
+ "loss": 0.1022,
5781
+ "step": 7560
5782
+ },
5783
+ {
5784
+ "epoch": 3.547328959700094,
5785
+ "grad_norm": 2.018709182739258,
5786
+ "learning_rate": 1.7782919400187446e-05,
5787
+ "loss": 0.1109,
5788
+ "step": 7570
5789
+ },
5790
+ {
5791
+ "epoch": 3.5520149953139644,
5792
+ "grad_norm": 0.8838659524917603,
5793
+ "learning_rate": 1.7779990627928775e-05,
5794
+ "loss": 0.1174,
5795
+ "step": 7580
5796
+ },
5797
+ {
5798
+ "epoch": 3.556701030927835,
5799
+ "grad_norm": 2.33553147315979,
5800
+ "learning_rate": 1.7777061855670105e-05,
5801
+ "loss": 0.1303,
5802
+ "step": 7590
5803
+ },
5804
+ {
5805
+ "epoch": 3.561387066541706,
5806
+ "grad_norm": 1.3626232147216797,
5807
+ "learning_rate": 1.7774133083411434e-05,
5808
+ "loss": 0.1261,
5809
+ "step": 7600
5810
+ },
5811
+ {
5812
+ "epoch": 3.5660731021555763,
5813
+ "grad_norm": 1.8817883729934692,
5814
+ "learning_rate": 1.7771204311152767e-05,
5815
+ "loss": 0.129,
5816
+ "step": 7610
5817
+ },
5818
+ {
5819
+ "epoch": 3.570759137769447,
5820
+ "grad_norm": 1.817522644996643,
5821
+ "learning_rate": 1.7768275538894096e-05,
5822
+ "loss": 0.1029,
5823
+ "step": 7620
5824
+ },
5825
+ {
5826
+ "epoch": 3.5754451733833177,
5827
+ "grad_norm": 2.126809597015381,
5828
+ "learning_rate": 1.7765346766635426e-05,
5829
+ "loss": 0.133,
5830
+ "step": 7630
5831
+ },
5832
+ {
5833
+ "epoch": 3.580131208997188,
5834
+ "grad_norm": 1.5726145505905151,
5835
+ "learning_rate": 1.776241799437676e-05,
5836
+ "loss": 0.1174,
5837
+ "step": 7640
5838
+ },
5839
+ {
5840
+ "epoch": 3.584817244611059,
5841
+ "grad_norm": 1.2460345029830933,
5842
+ "learning_rate": 1.7759489222118088e-05,
5843
+ "loss": 0.104,
5844
+ "step": 7650
5845
+ },
5846
+ {
5847
+ "epoch": 3.5895032802249296,
5848
+ "grad_norm": 0.8555458784103394,
5849
+ "learning_rate": 1.775656044985942e-05,
5850
+ "loss": 0.0922,
5851
+ "step": 7660
5852
+ },
5853
+ {
5854
+ "epoch": 3.5941893158388005,
5855
+ "grad_norm": 1.0801823139190674,
5856
+ "learning_rate": 1.775363167760075e-05,
5857
+ "loss": 0.1155,
5858
+ "step": 7670
5859
+ },
5860
+ {
5861
+ "epoch": 3.598875351452671,
5862
+ "grad_norm": 1.5534123182296753,
5863
+ "learning_rate": 1.775070290534208e-05,
5864
+ "loss": 0.1188,
5865
+ "step": 7680
5866
+ },
5867
+ {
5868
+ "epoch": 3.6035613870665415,
5869
+ "grad_norm": 1.877568244934082,
5870
+ "learning_rate": 1.7747774133083413e-05,
5871
+ "loss": 0.1247,
5872
+ "step": 7690
5873
+ },
5874
+ {
5875
+ "epoch": 3.6082474226804124,
5876
+ "grad_norm": 1.3344906568527222,
5877
+ "learning_rate": 1.7744845360824743e-05,
5878
+ "loss": 0.1026,
5879
+ "step": 7700
5880
+ },
5881
+ {
5882
+ "epoch": 3.612933458294283,
5883
+ "grad_norm": 1.3001571893692017,
5884
+ "learning_rate": 1.7741916588566075e-05,
5885
+ "loss": 0.1084,
5886
+ "step": 7710
5887
+ },
5888
+ {
5889
+ "epoch": 3.617619493908154,
5890
+ "grad_norm": 1.6098321676254272,
5891
+ "learning_rate": 1.7738987816307405e-05,
5892
+ "loss": 0.1198,
5893
+ "step": 7720
5894
+ },
5895
+ {
5896
+ "epoch": 3.6223055295220243,
5897
+ "grad_norm": 1.9912426471710205,
5898
+ "learning_rate": 1.7736059044048738e-05,
5899
+ "loss": 0.1207,
5900
+ "step": 7730
5901
+ },
5902
+ {
5903
+ "epoch": 3.626991565135895,
5904
+ "grad_norm": 1.3719514608383179,
5905
+ "learning_rate": 1.7733130271790067e-05,
5906
+ "loss": 0.1146,
5907
+ "step": 7740
5908
+ },
5909
+ {
5910
+ "epoch": 3.6316776007497658,
5911
+ "grad_norm": 1.8076539039611816,
5912
+ "learning_rate": 1.77302014995314e-05,
5913
+ "loss": 0.1237,
5914
+ "step": 7750
5915
+ },
5916
+ {
5917
+ "epoch": 3.6316776007497658,
5918
+ "eval_loss": 0.030823856592178345,
5919
+ "eval_pearson_cosine": 0.8190383769297682,
5920
+ "eval_pearson_dot": 0.7588954827942871,
5921
+ "eval_pearson_euclidean": 0.7885622153014467,
5922
+ "eval_pearson_manhattan": 0.7914609723187311,
5923
+ "eval_runtime": 3.3684,
5924
+ "eval_samples_per_second": 445.315,
5925
+ "eval_spearman_cosine": 0.8178038191764202,
5926
+ "eval_spearman_dot": 0.7582507405982527,
5927
+ "eval_spearman_euclidean": 0.7968649602561575,
5928
+ "eval_spearman_manhattan": 0.7990160021869986,
5929
+ "eval_steps_per_second": 27.906,
5930
+ "step": 7750
5931
+ },
5932
+ {
5933
+ "epoch": 3.6363636363636362,
5934
+ "grad_norm": 1.93159818649292,
5935
+ "learning_rate": 1.772727272727273e-05,
5936
+ "loss": 0.1406,
5937
+ "step": 7760
5938
+ },
5939
+ {
5940
+ "epoch": 3.641049671977507,
5941
+ "grad_norm": 1.5332342386245728,
5942
+ "learning_rate": 1.772434395501406e-05,
5943
+ "loss": 0.1012,
5944
+ "step": 7770
5945
+ },
5946
+ {
5947
+ "epoch": 3.6457357075913777,
5948
+ "grad_norm": 1.2360097169876099,
5949
+ "learning_rate": 1.772141518275539e-05,
5950
+ "loss": 0.1226,
5951
+ "step": 7780
5952
+ },
5953
+ {
5954
+ "epoch": 3.650421743205248,
5955
+ "grad_norm": 1.1280817985534668,
5956
+ "learning_rate": 1.771848641049672e-05,
5957
+ "loss": 0.1096,
5958
+ "step": 7790
5959
+ },
5960
+ {
5961
+ "epoch": 3.655107778819119,
5962
+ "grad_norm": 1.785524606704712,
5963
+ "learning_rate": 1.771555763823805e-05,
5964
+ "loss": 0.0999,
5965
+ "step": 7800
5966
+ },
5967
+ {
5968
+ "epoch": 3.6597938144329896,
5969
+ "grad_norm": 1.4114161729812622,
5970
+ "learning_rate": 1.771262886597938e-05,
5971
+ "loss": 0.1516,
5972
+ "step": 7810
5973
+ },
5974
+ {
5975
+ "epoch": 3.6644798500468605,
5976
+ "grad_norm": 0.764554500579834,
5977
+ "learning_rate": 1.7709700093720713e-05,
5978
+ "loss": 0.1187,
5979
+ "step": 7820
5980
+ },
5981
+ {
5982
+ "epoch": 3.669165885660731,
5983
+ "grad_norm": 1.6357208490371704,
5984
+ "learning_rate": 1.7706771321462043e-05,
5985
+ "loss": 0.125,
5986
+ "step": 7830
5987
+ },
5988
+ {
5989
+ "epoch": 3.6738519212746015,
5990
+ "grad_norm": 1.6706812381744385,
5991
+ "learning_rate": 1.7703842549203376e-05,
5992
+ "loss": 0.1231,
5993
+ "step": 7840
5994
+ },
5995
+ {
5996
+ "epoch": 3.6785379568884724,
5997
+ "grad_norm": 1.300350308418274,
5998
+ "learning_rate": 1.7700913776944705e-05,
5999
+ "loss": 0.1322,
6000
+ "step": 7850
6001
+ },
6002
+ {
6003
+ "epoch": 3.683223992502343,
6004
+ "grad_norm": 2.3312079906463623,
6005
+ "learning_rate": 1.7697985004686038e-05,
6006
+ "loss": 0.1215,
6007
+ "step": 7860
6008
+ },
6009
+ {
6010
+ "epoch": 3.687910028116214,
6011
+ "grad_norm": 1.8526086807250977,
6012
+ "learning_rate": 1.7695056232427368e-05,
6013
+ "loss": 0.1221,
6014
+ "step": 7870
6015
+ },
6016
+ {
6017
+ "epoch": 3.6925960637300843,
6018
+ "grad_norm": 1.1056420803070068,
6019
+ "learning_rate": 1.7692127460168697e-05,
6020
+ "loss": 0.1133,
6021
+ "step": 7880
6022
+ },
6023
+ {
6024
+ "epoch": 3.697282099343955,
6025
+ "grad_norm": 1.6572386026382446,
6026
+ "learning_rate": 1.768919868791003e-05,
6027
+ "loss": 0.1088,
6028
+ "step": 7890
6029
+ },
6030
+ {
6031
+ "epoch": 3.7019681349578257,
6032
+ "grad_norm": 2.0055267810821533,
6033
+ "learning_rate": 1.768626991565136e-05,
6034
+ "loss": 0.1329,
6035
+ "step": 7900
6036
+ },
6037
+ {
6038
+ "epoch": 3.706654170571696,
6039
+ "grad_norm": 1.336578130722046,
6040
+ "learning_rate": 1.7683341143392693e-05,
6041
+ "loss": 0.1383,
6042
+ "step": 7910
6043
+ },
6044
+ {
6045
+ "epoch": 3.711340206185567,
6046
+ "grad_norm": 1.4480973482131958,
6047
+ "learning_rate": 1.7680412371134022e-05,
6048
+ "loss": 0.1315,
6049
+ "step": 7920
6050
+ },
6051
+ {
6052
+ "epoch": 3.7160262417994376,
6053
+ "grad_norm": 1.1693243980407715,
6054
+ "learning_rate": 1.7677483598875355e-05,
6055
+ "loss": 0.1103,
6056
+ "step": 7930
6057
+ },
6058
+ {
6059
+ "epoch": 3.720712277413308,
6060
+ "grad_norm": 1.479332447052002,
6061
+ "learning_rate": 1.7674554826616684e-05,
6062
+ "loss": 0.1113,
6063
+ "step": 7940
6064
+ },
6065
+ {
6066
+ "epoch": 3.725398313027179,
6067
+ "grad_norm": 1.9750406742095947,
6068
+ "learning_rate": 1.7671626054358014e-05,
6069
+ "loss": 0.1241,
6070
+ "step": 7950
6071
+ },
6072
+ {
6073
+ "epoch": 3.7300843486410495,
6074
+ "grad_norm": 1.7816461324691772,
6075
+ "learning_rate": 1.7668697282099347e-05,
6076
+ "loss": 0.1437,
6077
+ "step": 7960
6078
+ },
6079
+ {
6080
+ "epoch": 3.7347703842549205,
6081
+ "grad_norm": 1.6916552782058716,
6082
+ "learning_rate": 1.7665768509840676e-05,
6083
+ "loss": 0.1114,
6084
+ "step": 7970
6085
+ },
6086
+ {
6087
+ "epoch": 3.739456419868791,
6088
+ "grad_norm": 2.0555579662323,
6089
+ "learning_rate": 1.7662839737582006e-05,
6090
+ "loss": 0.1294,
6091
+ "step": 7980
6092
+ },
6093
+ {
6094
+ "epoch": 3.7441424554826614,
6095
+ "grad_norm": 1.6788338422775269,
6096
+ "learning_rate": 1.7659910965323335e-05,
6097
+ "loss": 0.1097,
6098
+ "step": 7990
6099
+ },
6100
+ {
6101
+ "epoch": 3.7488284910965324,
6102
+ "grad_norm": 1.449519395828247,
6103
+ "learning_rate": 1.7656982193064668e-05,
6104
+ "loss": 0.0991,
6105
+ "step": 8000
6106
+ },
6107
+ {
6108
+ "epoch": 3.7488284910965324,
6109
+ "eval_loss": 0.031480398029088974,
6110
+ "eval_pearson_cosine": 0.8186177727574986,
6111
+ "eval_pearson_dot": 0.7539932234321896,
6112
+ "eval_pearson_euclidean": 0.7925447531882952,
6113
+ "eval_pearson_manhattan": 0.7952092015244716,
6114
+ "eval_runtime": 3.1967,
6115
+ "eval_samples_per_second": 469.239,
6116
+ "eval_spearman_cosine": 0.8171537163292079,
6117
+ "eval_spearman_dot": 0.7531451005912094,
6118
+ "eval_spearman_euclidean": 0.8000355030273796,
6119
+ "eval_spearman_manhattan": 0.8023527525471515,
6120
+ "eval_steps_per_second": 29.406,
6121
+ "step": 8000
6122
  }
6123
  ],
6124
  "logging_steps": 10,