CocoRoF commited on
Commit
42e08ab
·
verified ·
1 Parent(s): b96132f

Training in progress, step 16000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b233efdfedfad8f103bdeed119d35dcd099eba59c85ffbbf10546b2a64d3674e
3
  size 613004648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2d2a0f5d2fd2db2c00ba8019c5d26c7e05ea6254b391695cc519d7dab59b225
3
  size 613004648
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6cd415bf9e90b88d0f924f465a8f839a232fb228bedc13d8305dea0013598c0
3
  size 1226096954
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b5ee984e35f64350e857f17403ecda5095a0c3d2917a731f8237c213d237bae
3
  size 1226096954
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35a846db1f952a5c57e0f96eff0e1f51f9bc69325d1c9533532221ee67d3d2cc
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2be093747f5a4a232618c3318bfffdf24560aea746cf4c11903c465c5179b6c9
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ae2d4f55a59e8b5160b468e406d78254547d58c1b3eaf0e1797452533ce3e19
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:975ed305a3fe7b4927a3b3d12f66d6b14051cd85dfe6e94defa4d7c56781b5ac
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.029053420805998,
5
  "eval_steps": 250,
6
- "global_step": 15000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -11467,6 +11467,770 @@
11467
  "eval_spearman_manhattan": 0.7568637419859118,
11468
  "eval_steps_per_second": 38.084,
11469
  "step": 15000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11470
  }
11471
  ],
11472
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 7.497656982193065,
5
  "eval_steps": 250,
6
+ "global_step": 16000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
11467
  "eval_spearman_manhattan": 0.7568637419859118,
11468
  "eval_steps_per_second": 38.084,
11469
  "step": 15000
11470
+ },
11471
+ {
11472
+ "epoch": 7.033739456419869,
11473
+ "grad_norm": 1.905097246170044,
11474
+ "learning_rate": 9.120782567947517e-06,
11475
+ "loss": 0.058,
11476
+ "step": 15010
11477
+ },
11478
+ {
11479
+ "epoch": 7.038425492033739,
11480
+ "grad_norm": 1.7693982124328613,
11481
+ "learning_rate": 9.120196813495782e-06,
11482
+ "loss": 0.0661,
11483
+ "step": 15020
11484
+ },
11485
+ {
11486
+ "epoch": 7.04311152764761,
11487
+ "grad_norm": 2.130645990371704,
11488
+ "learning_rate": 9.11961105904405e-06,
11489
+ "loss": 0.0749,
11490
+ "step": 15030
11491
+ },
11492
+ {
11493
+ "epoch": 7.047797563261481,
11494
+ "grad_norm": 1.778387427330017,
11495
+ "learning_rate": 9.119025304592316e-06,
11496
+ "loss": 0.063,
11497
+ "step": 15040
11498
+ },
11499
+ {
11500
+ "epoch": 7.052483598875352,
11501
+ "grad_norm": 1.038841724395752,
11502
+ "learning_rate": 9.118439550140582e-06,
11503
+ "loss": 0.0717,
11504
+ "step": 15050
11505
+ },
11506
+ {
11507
+ "epoch": 7.057169634489222,
11508
+ "grad_norm": 1.8356302976608276,
11509
+ "learning_rate": 9.117853795688848e-06,
11510
+ "loss": 0.0607,
11511
+ "step": 15060
11512
+ },
11513
+ {
11514
+ "epoch": 7.061855670103093,
11515
+ "grad_norm": 1.5863852500915527,
11516
+ "learning_rate": 9.117268041237114e-06,
11517
+ "loss": 0.0609,
11518
+ "step": 15070
11519
+ },
11520
+ {
11521
+ "epoch": 7.066541705716963,
11522
+ "grad_norm": 1.3317904472351074,
11523
+ "learning_rate": 9.11668228678538e-06,
11524
+ "loss": 0.0576,
11525
+ "step": 15080
11526
+ },
11527
+ {
11528
+ "epoch": 7.071227741330834,
11529
+ "grad_norm": 2.148087978363037,
11530
+ "learning_rate": 9.116096532333648e-06,
11531
+ "loss": 0.0694,
11532
+ "step": 15090
11533
+ },
11534
+ {
11535
+ "epoch": 7.075913776944705,
11536
+ "grad_norm": 1.34135901927948,
11537
+ "learning_rate": 9.115510777881913e-06,
11538
+ "loss": 0.063,
11539
+ "step": 15100
11540
+ },
11541
+ {
11542
+ "epoch": 7.080599812558575,
11543
+ "grad_norm": 1.9813562631607056,
11544
+ "learning_rate": 9.11492502343018e-06,
11545
+ "loss": 0.0843,
11546
+ "step": 15110
11547
+ },
11548
+ {
11549
+ "epoch": 7.085285848172446,
11550
+ "grad_norm": 1.3236151933670044,
11551
+ "learning_rate": 9.114339268978445e-06,
11552
+ "loss": 0.0682,
11553
+ "step": 15120
11554
+ },
11555
+ {
11556
+ "epoch": 7.089971883786316,
11557
+ "grad_norm": 1.483312726020813,
11558
+ "learning_rate": 9.11375351452671e-06,
11559
+ "loss": 0.0742,
11560
+ "step": 15130
11561
+ },
11562
+ {
11563
+ "epoch": 7.094657919400188,
11564
+ "grad_norm": 1.7315001487731934,
11565
+ "learning_rate": 9.113167760074977e-06,
11566
+ "loss": 0.0544,
11567
+ "step": 15140
11568
+ },
11569
+ {
11570
+ "epoch": 7.099343955014058,
11571
+ "grad_norm": 2.530773162841797,
11572
+ "learning_rate": 9.112582005623244e-06,
11573
+ "loss": 0.0768,
11574
+ "step": 15150
11575
+ },
11576
+ {
11577
+ "epoch": 7.104029990627929,
11578
+ "grad_norm": 2.089907169342041,
11579
+ "learning_rate": 9.11199625117151e-06,
11580
+ "loss": 0.0617,
11581
+ "step": 15160
11582
+ },
11583
+ {
11584
+ "epoch": 7.108716026241799,
11585
+ "grad_norm": 1.7428967952728271,
11586
+ "learning_rate": 9.111410496719776e-06,
11587
+ "loss": 0.0763,
11588
+ "step": 15170
11589
+ },
11590
+ {
11591
+ "epoch": 7.11340206185567,
11592
+ "grad_norm": 1.5844217538833618,
11593
+ "learning_rate": 9.110824742268041e-06,
11594
+ "loss": 0.0554,
11595
+ "step": 15180
11596
+ },
11597
+ {
11598
+ "epoch": 7.118088097469541,
11599
+ "grad_norm": 1.881791353225708,
11600
+ "learning_rate": 9.110238987816308e-06,
11601
+ "loss": 0.0662,
11602
+ "step": 15190
11603
+ },
11604
+ {
11605
+ "epoch": 7.122774133083412,
11606
+ "grad_norm": 1.2586263418197632,
11607
+ "learning_rate": 9.109653233364575e-06,
11608
+ "loss": 0.0648,
11609
+ "step": 15200
11610
+ },
11611
+ {
11612
+ "epoch": 7.127460168697282,
11613
+ "grad_norm": 1.9210679531097412,
11614
+ "learning_rate": 9.10906747891284e-06,
11615
+ "loss": 0.0647,
11616
+ "step": 15210
11617
+ },
11618
+ {
11619
+ "epoch": 7.1321462043111525,
11620
+ "grad_norm": 2.3099005222320557,
11621
+ "learning_rate": 9.108481724461107e-06,
11622
+ "loss": 0.0632,
11623
+ "step": 15220
11624
+ },
11625
+ {
11626
+ "epoch": 7.136832239925023,
11627
+ "grad_norm": 2.189209222793579,
11628
+ "learning_rate": 9.107895970009372e-06,
11629
+ "loss": 0.0779,
11630
+ "step": 15230
11631
+ },
11632
+ {
11633
+ "epoch": 7.141518275538894,
11634
+ "grad_norm": 1.2473788261413574,
11635
+ "learning_rate": 9.10731021555764e-06,
11636
+ "loss": 0.0684,
11637
+ "step": 15240
11638
+ },
11639
+ {
11640
+ "epoch": 7.146204311152765,
11641
+ "grad_norm": 1.381177306175232,
11642
+ "learning_rate": 9.106724461105905e-06,
11643
+ "loss": 0.0698,
11644
+ "step": 15250
11645
+ },
11646
+ {
11647
+ "epoch": 7.146204311152765,
11648
+ "eval_loss": 0.039560701698064804,
11649
+ "eval_pearson_cosine": 0.7821626687438226,
11650
+ "eval_pearson_dot": 0.6380954414398445,
11651
+ "eval_pearson_euclidean": 0.7346215848669857,
11652
+ "eval_pearson_manhattan": 0.7341262164749853,
11653
+ "eval_runtime": 40.552,
11654
+ "eval_samples_per_second": 36.99,
11655
+ "eval_spearman_cosine": 0.7855493497996395,
11656
+ "eval_spearman_dot": 0.6551626564215037,
11657
+ "eval_spearman_euclidean": 0.7509105689283286,
11658
+ "eval_spearman_manhattan": 0.7507208072274875,
11659
+ "eval_steps_per_second": 36.99,
11660
+ "step": 15250
11661
+ },
11662
+ {
11663
+ "epoch": 7.150890346766635,
11664
+ "grad_norm": 1.887905478477478,
11665
+ "learning_rate": 9.106138706654172e-06,
11666
+ "loss": 0.0744,
11667
+ "step": 15260
11668
+ },
11669
+ {
11670
+ "epoch": 7.155576382380506,
11671
+ "grad_norm": 1.1103498935699463,
11672
+ "learning_rate": 9.105552952202438e-06,
11673
+ "loss": 0.0614,
11674
+ "step": 15270
11675
+ },
11676
+ {
11677
+ "epoch": 7.160262417994376,
11678
+ "grad_norm": 1.2665252685546875,
11679
+ "learning_rate": 9.104967197750704e-06,
11680
+ "loss": 0.0668,
11681
+ "step": 15280
11682
+ },
11683
+ {
11684
+ "epoch": 7.164948453608248,
11685
+ "grad_norm": 1.8980008363723755,
11686
+ "learning_rate": 9.104381443298969e-06,
11687
+ "loss": 0.0615,
11688
+ "step": 15290
11689
+ },
11690
+ {
11691
+ "epoch": 7.169634489222118,
11692
+ "grad_norm": 1.789542317390442,
11693
+ "learning_rate": 9.103795688847236e-06,
11694
+ "loss": 0.0857,
11695
+ "step": 15300
11696
+ },
11697
+ {
11698
+ "epoch": 7.174320524835989,
11699
+ "grad_norm": 1.84969162940979,
11700
+ "learning_rate": 9.103209934395501e-06,
11701
+ "loss": 0.069,
11702
+ "step": 15310
11703
+ },
11704
+ {
11705
+ "epoch": 7.179006560449859,
11706
+ "grad_norm": 1.0945720672607422,
11707
+ "learning_rate": 9.102624179943768e-06,
11708
+ "loss": 0.0665,
11709
+ "step": 15320
11710
+ },
11711
+ {
11712
+ "epoch": 7.18369259606373,
11713
+ "grad_norm": 1.3088226318359375,
11714
+ "learning_rate": 9.102038425492035e-06,
11715
+ "loss": 0.0747,
11716
+ "step": 15330
11717
+ },
11718
+ {
11719
+ "epoch": 7.188378631677601,
11720
+ "grad_norm": 1.0556889772415161,
11721
+ "learning_rate": 9.1014526710403e-06,
11722
+ "loss": 0.0642,
11723
+ "step": 15340
11724
+ },
11725
+ {
11726
+ "epoch": 7.1930646672914715,
11727
+ "grad_norm": 1.7667440176010132,
11728
+ "learning_rate": 9.100866916588567e-06,
11729
+ "loss": 0.0644,
11730
+ "step": 15350
11731
+ },
11732
+ {
11733
+ "epoch": 7.197750702905342,
11734
+ "grad_norm": 1.3899027109146118,
11735
+ "learning_rate": 9.100281162136832e-06,
11736
+ "loss": 0.0712,
11737
+ "step": 15360
11738
+ },
11739
+ {
11740
+ "epoch": 7.2024367385192125,
11741
+ "grad_norm": 1.9200291633605957,
11742
+ "learning_rate": 9.099695407685099e-06,
11743
+ "loss": 0.08,
11744
+ "step": 15370
11745
+ },
11746
+ {
11747
+ "epoch": 7.207122774133083,
11748
+ "grad_norm": 2.3893768787384033,
11749
+ "learning_rate": 9.099109653233366e-06,
11750
+ "loss": 0.0741,
11751
+ "step": 15380
11752
+ },
11753
+ {
11754
+ "epoch": 7.211808809746954,
11755
+ "grad_norm": 1.2158704996109009,
11756
+ "learning_rate": 9.098523898781631e-06,
11757
+ "loss": 0.0866,
11758
+ "step": 15390
11759
+ },
11760
+ {
11761
+ "epoch": 7.216494845360825,
11762
+ "grad_norm": 2.252181053161621,
11763
+ "learning_rate": 9.097938144329898e-06,
11764
+ "loss": 0.076,
11765
+ "step": 15400
11766
+ },
11767
+ {
11768
+ "epoch": 7.221180880974695,
11769
+ "grad_norm": 1.2606340646743774,
11770
+ "learning_rate": 9.097352389878163e-06,
11771
+ "loss": 0.0661,
11772
+ "step": 15410
11773
+ },
11774
+ {
11775
+ "epoch": 7.225866916588566,
11776
+ "grad_norm": 1.1483300924301147,
11777
+ "learning_rate": 9.096766635426429e-06,
11778
+ "loss": 0.0776,
11779
+ "step": 15420
11780
+ },
11781
+ {
11782
+ "epoch": 7.230552952202436,
11783
+ "grad_norm": 1.4554270505905151,
11784
+ "learning_rate": 9.096180880974697e-06,
11785
+ "loss": 0.0713,
11786
+ "step": 15430
11787
+ },
11788
+ {
11789
+ "epoch": 7.235238987816308,
11790
+ "grad_norm": 1.8985337018966675,
11791
+ "learning_rate": 9.095595126522962e-06,
11792
+ "loss": 0.0787,
11793
+ "step": 15440
11794
+ },
11795
+ {
11796
+ "epoch": 7.239925023430178,
11797
+ "grad_norm": 2.394465208053589,
11798
+ "learning_rate": 9.095009372071228e-06,
11799
+ "loss": 0.0767,
11800
+ "step": 15450
11801
+ },
11802
+ {
11803
+ "epoch": 7.244611059044049,
11804
+ "grad_norm": 1.2314172983169556,
11805
+ "learning_rate": 9.094423617619495e-06,
11806
+ "loss": 0.0647,
11807
+ "step": 15460
11808
+ },
11809
+ {
11810
+ "epoch": 7.249297094657919,
11811
+ "grad_norm": 2.137882947921753,
11812
+ "learning_rate": 9.09383786316776e-06,
11813
+ "loss": 0.0656,
11814
+ "step": 15470
11815
+ },
11816
+ {
11817
+ "epoch": 7.25398313027179,
11818
+ "grad_norm": 1.7702836990356445,
11819
+ "learning_rate": 9.093252108716027e-06,
11820
+ "loss": 0.0576,
11821
+ "step": 15480
11822
+ },
11823
+ {
11824
+ "epoch": 7.258669165885661,
11825
+ "grad_norm": 2.0788486003875732,
11826
+ "learning_rate": 9.092666354264294e-06,
11827
+ "loss": 0.0582,
11828
+ "step": 15490
11829
+ },
11830
+ {
11831
+ "epoch": 7.2633552014995315,
11832
+ "grad_norm": 1.0218828916549683,
11833
+ "learning_rate": 9.092080599812559e-06,
11834
+ "loss": 0.0699,
11835
+ "step": 15500
11836
+ },
11837
+ {
11838
+ "epoch": 7.2633552014995315,
11839
+ "eval_loss": 0.03923952579498291,
11840
+ "eval_pearson_cosine": 0.7819686811712643,
11841
+ "eval_pearson_dot": 0.6466329055139823,
11842
+ "eval_pearson_euclidean": 0.732531031261928,
11843
+ "eval_pearson_manhattan": 0.732247137892952,
11844
+ "eval_runtime": 39.7194,
11845
+ "eval_samples_per_second": 37.765,
11846
+ "eval_spearman_cosine": 0.7850868672642034,
11847
+ "eval_spearman_dot": 0.6628931528870909,
11848
+ "eval_spearman_euclidean": 0.7501575772894145,
11849
+ "eval_spearman_manhattan": 0.7501561306691681,
11850
+ "eval_steps_per_second": 37.765,
11851
+ "step": 15500
11852
+ },
11853
+ {
11854
+ "epoch": 7.268041237113402,
11855
+ "grad_norm": 0.7509507536888123,
11856
+ "learning_rate": 9.091494845360826e-06,
11857
+ "loss": 0.0823,
11858
+ "step": 15510
11859
+ },
11860
+ {
11861
+ "epoch": 7.2727272727272725,
11862
+ "grad_norm": 2.109041690826416,
11863
+ "learning_rate": 9.090909090909091e-06,
11864
+ "loss": 0.0701,
11865
+ "step": 15520
11866
+ },
11867
+ {
11868
+ "epoch": 7.277413308341144,
11869
+ "grad_norm": 1.6811095476150513,
11870
+ "learning_rate": 9.090323336457358e-06,
11871
+ "loss": 0.0726,
11872
+ "step": 15530
11873
+ },
11874
+ {
11875
+ "epoch": 7.282099343955014,
11876
+ "grad_norm": 1.9557669162750244,
11877
+ "learning_rate": 9.089737582005625e-06,
11878
+ "loss": 0.0796,
11879
+ "step": 15540
11880
+ },
11881
+ {
11882
+ "epoch": 7.286785379568885,
11883
+ "grad_norm": 1.6342480182647705,
11884
+ "learning_rate": 9.08915182755389e-06,
11885
+ "loss": 0.0618,
11886
+ "step": 15550
11887
+ },
11888
+ {
11889
+ "epoch": 7.291471415182755,
11890
+ "grad_norm": 1.3443505764007568,
11891
+ "learning_rate": 9.088566073102157e-06,
11892
+ "loss": 0.0767,
11893
+ "step": 15560
11894
+ },
11895
+ {
11896
+ "epoch": 7.296157450796626,
11897
+ "grad_norm": 2.0768396854400635,
11898
+ "learning_rate": 9.087980318650422e-06,
11899
+ "loss": 0.0699,
11900
+ "step": 15570
11901
+ },
11902
+ {
11903
+ "epoch": 7.300843486410496,
11904
+ "grad_norm": 1.989401936531067,
11905
+ "learning_rate": 9.087394564198687e-06,
11906
+ "loss": 0.0706,
11907
+ "step": 15580
11908
+ },
11909
+ {
11910
+ "epoch": 7.305529522024368,
11911
+ "grad_norm": 1.7831469774246216,
11912
+ "learning_rate": 9.086808809746956e-06,
11913
+ "loss": 0.0701,
11914
+ "step": 15590
11915
+ },
11916
+ {
11917
+ "epoch": 7.310215557638238,
11918
+ "grad_norm": 2.3312692642211914,
11919
+ "learning_rate": 9.086223055295221e-06,
11920
+ "loss": 0.0703,
11921
+ "step": 15600
11922
+ },
11923
+ {
11924
+ "epoch": 7.314901593252109,
11925
+ "grad_norm": 1.7669209241867065,
11926
+ "learning_rate": 9.085637300843487e-06,
11927
+ "loss": 0.0646,
11928
+ "step": 15610
11929
+ },
11930
+ {
11931
+ "epoch": 7.319587628865979,
11932
+ "grad_norm": 1.880066156387329,
11933
+ "learning_rate": 9.085051546391753e-06,
11934
+ "loss": 0.0798,
11935
+ "step": 15620
11936
+ },
11937
+ {
11938
+ "epoch": 7.3242736644798505,
11939
+ "grad_norm": 1.3240752220153809,
11940
+ "learning_rate": 9.084465791940019e-06,
11941
+ "loss": 0.0749,
11942
+ "step": 15630
11943
+ },
11944
+ {
11945
+ "epoch": 7.328959700093721,
11946
+ "grad_norm": 1.0103267431259155,
11947
+ "learning_rate": 9.083880037488286e-06,
11948
+ "loss": 0.0635,
11949
+ "step": 15640
11950
+ },
11951
+ {
11952
+ "epoch": 7.3336457357075915,
11953
+ "grad_norm": 1.4677484035491943,
11954
+ "learning_rate": 9.083294283036552e-06,
11955
+ "loss": 0.0705,
11956
+ "step": 15650
11957
+ },
11958
+ {
11959
+ "epoch": 7.338331771321462,
11960
+ "grad_norm": 2.090219736099243,
11961
+ "learning_rate": 9.082708528584818e-06,
11962
+ "loss": 0.0693,
11963
+ "step": 15660
11964
+ },
11965
+ {
11966
+ "epoch": 7.3430178069353325,
11967
+ "grad_norm": 2.349215030670166,
11968
+ "learning_rate": 9.082122774133085e-06,
11969
+ "loss": 0.0714,
11970
+ "step": 15670
11971
+ },
11972
+ {
11973
+ "epoch": 7.347703842549203,
11974
+ "grad_norm": 0.8705586791038513,
11975
+ "learning_rate": 9.08153701968135e-06,
11976
+ "loss": 0.0761,
11977
+ "step": 15680
11978
+ },
11979
+ {
11980
+ "epoch": 7.352389878163074,
11981
+ "grad_norm": 1.986405372619629,
11982
+ "learning_rate": 9.080951265229617e-06,
11983
+ "loss": 0.0579,
11984
+ "step": 15690
11985
+ },
11986
+ {
11987
+ "epoch": 7.357075913776945,
11988
+ "grad_norm": 2.267803430557251,
11989
+ "learning_rate": 9.080365510777884e-06,
11990
+ "loss": 0.0925,
11991
+ "step": 15700
11992
+ },
11993
+ {
11994
+ "epoch": 7.361761949390815,
11995
+ "grad_norm": 1.7816276550292969,
11996
+ "learning_rate": 9.079779756326149e-06,
11997
+ "loss": 0.0713,
11998
+ "step": 15710
11999
+ },
12000
+ {
12001
+ "epoch": 7.366447985004686,
12002
+ "grad_norm": 3.1647868156433105,
12003
+ "learning_rate": 9.079194001874416e-06,
12004
+ "loss": 0.089,
12005
+ "step": 15720
12006
+ },
12007
+ {
12008
+ "epoch": 7.371134020618557,
12009
+ "grad_norm": 2.082855463027954,
12010
+ "learning_rate": 9.078608247422681e-06,
12011
+ "loss": 0.0695,
12012
+ "step": 15730
12013
+ },
12014
+ {
12015
+ "epoch": 7.375820056232428,
12016
+ "grad_norm": 1.4253464937210083,
12017
+ "learning_rate": 9.078022492970946e-06,
12018
+ "loss": 0.0681,
12019
+ "step": 15740
12020
+ },
12021
+ {
12022
+ "epoch": 7.380506091846298,
12023
+ "grad_norm": 1.7833616733551025,
12024
+ "learning_rate": 9.077436738519213e-06,
12025
+ "loss": 0.0739,
12026
+ "step": 15750
12027
+ },
12028
+ {
12029
+ "epoch": 7.380506091846298,
12030
+ "eval_loss": 0.03890243172645569,
12031
+ "eval_pearson_cosine": 0.7865226942731169,
12032
+ "eval_pearson_dot": 0.6411769886141485,
12033
+ "eval_pearson_euclidean": 0.7328063007950192,
12034
+ "eval_pearson_manhattan": 0.7322937842561661,
12035
+ "eval_runtime": 39.8796,
12036
+ "eval_samples_per_second": 37.613,
12037
+ "eval_spearman_cosine": 0.7886056526857715,
12038
+ "eval_spearman_dot": 0.6589435896491915,
12039
+ "eval_spearman_euclidean": 0.7495362761356495,
12040
+ "eval_spearman_manhattan": 0.7491310374131812,
12041
+ "eval_steps_per_second": 37.613,
12042
+ "step": 15750
12043
+ },
12044
+ {
12045
+ "epoch": 7.385192127460169,
12046
+ "grad_norm": 1.368802785873413,
12047
+ "learning_rate": 9.07685098406748e-06,
12048
+ "loss": 0.0634,
12049
+ "step": 15760
12050
+ },
12051
+ {
12052
+ "epoch": 7.389878163074039,
12053
+ "grad_norm": 2.0611209869384766,
12054
+ "learning_rate": 9.076265229615745e-06,
12055
+ "loss": 0.0732,
12056
+ "step": 15770
12057
+ },
12058
+ {
12059
+ "epoch": 7.39456419868791,
12060
+ "grad_norm": 1.3949185609817505,
12061
+ "learning_rate": 9.075679475164012e-06,
12062
+ "loss": 0.0643,
12063
+ "step": 15780
12064
+ },
12065
+ {
12066
+ "epoch": 7.399250234301781,
12067
+ "grad_norm": 2.267596960067749,
12068
+ "learning_rate": 9.075093720712277e-06,
12069
+ "loss": 0.0721,
12070
+ "step": 15790
12071
+ },
12072
+ {
12073
+ "epoch": 7.4039362699156515,
12074
+ "grad_norm": 1.2794581651687622,
12075
+ "learning_rate": 9.074507966260544e-06,
12076
+ "loss": 0.0718,
12077
+ "step": 15800
12078
+ },
12079
+ {
12080
+ "epoch": 7.408622305529522,
12081
+ "grad_norm": 1.8668746948242188,
12082
+ "learning_rate": 9.07392221180881e-06,
12083
+ "loss": 0.0734,
12084
+ "step": 15810
12085
+ },
12086
+ {
12087
+ "epoch": 7.413308341143392,
12088
+ "grad_norm": 2.141602039337158,
12089
+ "learning_rate": 9.073336457357077e-06,
12090
+ "loss": 0.0637,
12091
+ "step": 15820
12092
+ },
12093
+ {
12094
+ "epoch": 7.417994376757264,
12095
+ "grad_norm": 1.9020168781280518,
12096
+ "learning_rate": 9.072750702905343e-06,
12097
+ "loss": 0.0727,
12098
+ "step": 15830
12099
+ },
12100
+ {
12101
+ "epoch": 7.422680412371134,
12102
+ "grad_norm": 1.7302427291870117,
12103
+ "learning_rate": 9.072164948453609e-06,
12104
+ "loss": 0.0742,
12105
+ "step": 15840
12106
+ },
12107
+ {
12108
+ "epoch": 7.427366447985005,
12109
+ "grad_norm": 1.6152589321136475,
12110
+ "learning_rate": 9.071579194001876e-06,
12111
+ "loss": 0.0764,
12112
+ "step": 15850
12113
+ },
12114
+ {
12115
+ "epoch": 7.432052483598875,
12116
+ "grad_norm": 2.491912364959717,
12117
+ "learning_rate": 9.07099343955014e-06,
12118
+ "loss": 0.0746,
12119
+ "step": 15860
12120
+ },
12121
+ {
12122
+ "epoch": 7.436738519212746,
12123
+ "grad_norm": 1.8737932443618774,
12124
+ "learning_rate": 9.070407685098408e-06,
12125
+ "loss": 0.0665,
12126
+ "step": 15870
12127
+ },
12128
+ {
12129
+ "epoch": 7.441424554826616,
12130
+ "grad_norm": 2.3536882400512695,
12131
+ "learning_rate": 9.069821930646675e-06,
12132
+ "loss": 0.0702,
12133
+ "step": 15880
12134
+ },
12135
+ {
12136
+ "epoch": 7.446110590440488,
12137
+ "grad_norm": 1.0954251289367676,
12138
+ "learning_rate": 9.06923617619494e-06,
12139
+ "loss": 0.0768,
12140
+ "step": 15890
12141
+ },
12142
+ {
12143
+ "epoch": 7.450796626054358,
12144
+ "grad_norm": 1.973325490951538,
12145
+ "learning_rate": 9.068650421743205e-06,
12146
+ "loss": 0.0666,
12147
+ "step": 15900
12148
+ },
12149
+ {
12150
+ "epoch": 7.455482661668229,
12151
+ "grad_norm": 0.6486696600914001,
12152
+ "learning_rate": 9.068064667291472e-06,
12153
+ "loss": 0.0813,
12154
+ "step": 15910
12155
+ },
12156
+ {
12157
+ "epoch": 7.460168697282099,
12158
+ "grad_norm": 2.4715213775634766,
12159
+ "learning_rate": 9.067478912839737e-06,
12160
+ "loss": 0.0698,
12161
+ "step": 15920
12162
+ },
12163
+ {
12164
+ "epoch": 7.4648547328959705,
12165
+ "grad_norm": 1.3833788633346558,
12166
+ "learning_rate": 9.066893158388004e-06,
12167
+ "loss": 0.0783,
12168
+ "step": 15930
12169
+ },
12170
+ {
12171
+ "epoch": 7.469540768509841,
12172
+ "grad_norm": 1.5679866075515747,
12173
+ "learning_rate": 9.066307403936271e-06,
12174
+ "loss": 0.0675,
12175
+ "step": 15940
12176
+ },
12177
+ {
12178
+ "epoch": 7.474226804123711,
12179
+ "grad_norm": 1.173086166381836,
12180
+ "learning_rate": 9.065721649484536e-06,
12181
+ "loss": 0.0705,
12182
+ "step": 15950
12183
+ },
12184
+ {
12185
+ "epoch": 7.478912839737582,
12186
+ "grad_norm": 2.0635769367218018,
12187
+ "learning_rate": 9.065135895032803e-06,
12188
+ "loss": 0.0717,
12189
+ "step": 15960
12190
+ },
12191
+ {
12192
+ "epoch": 7.483598875351452,
12193
+ "grad_norm": 2.0800647735595703,
12194
+ "learning_rate": 9.064550140581068e-06,
12195
+ "loss": 0.0712,
12196
+ "step": 15970
12197
+ },
12198
+ {
12199
+ "epoch": 7.488284910965323,
12200
+ "grad_norm": 1.0860838890075684,
12201
+ "learning_rate": 9.063964386129335e-06,
12202
+ "loss": 0.0637,
12203
+ "step": 15980
12204
+ },
12205
+ {
12206
+ "epoch": 7.492970946579194,
12207
+ "grad_norm": 2.794854164123535,
12208
+ "learning_rate": 9.063378631677602e-06,
12209
+ "loss": 0.0799,
12210
+ "step": 15990
12211
+ },
12212
+ {
12213
+ "epoch": 7.497656982193065,
12214
+ "grad_norm": 0.8473652005195618,
12215
+ "learning_rate": 9.062792877225867e-06,
12216
+ "loss": 0.0745,
12217
+ "step": 16000
12218
+ },
12219
+ {
12220
+ "epoch": 7.497656982193065,
12221
+ "eval_loss": 0.039655983448028564,
12222
+ "eval_pearson_cosine": 0.779382095257283,
12223
+ "eval_pearson_dot": 0.6379781011095105,
12224
+ "eval_pearson_euclidean": 0.7372894551077778,
12225
+ "eval_pearson_manhattan": 0.736644904985166,
12226
+ "eval_runtime": 40.2485,
12227
+ "eval_samples_per_second": 37.268,
12228
+ "eval_spearman_cosine": 0.7827440097255054,
12229
+ "eval_spearman_dot": 0.6504351353485877,
12230
+ "eval_spearman_euclidean": 0.752448571801891,
12231
+ "eval_spearman_manhattan": 0.7524283280152466,
12232
+ "eval_steps_per_second": 37.268,
12233
+ "step": 16000
12234
  }
12235
  ],
12236
  "logging_steps": 10,