CocoRoF commited on
Commit
feed1bc
·
verified ·
1 Parent(s): cb6cef5

Training in progress, step 11000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad6a577dd191618d3b00fe33fe0cdf33b81a5c002fe33712258e536fcc1520a8
3
  size 613004648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d7c345217153b5ffddd752365249e312bba97798c34c7461fe730d174535d39
3
  size 613004648
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ffe192963611108e55a8d98be8d1dafc323965fc2794aed1bf226c3a6c17145
3
  size 1226096954
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:528a95b443b1dbf7f94d3bb468ca4f53192fe7e23b946971046c87c0b9045e56
3
  size 1226096954
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5e1f8a9c771c1f6fac3e948b043a309dd550a6e17ac92d5fdcc99c5e406cf1c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b017ee0b8d5b0208434fd4a2a199fc3b93d7395c3472e6edb0f63512bbc20c3
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c9a67dc102c0201a7b47ec5ea788b4d7cd182385f8b19e21244eece8e7dbf3b
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:193283a3db7c41b10c55462d4cd2b8ef19c4b39369364b96fdc0d6e4e4e07810
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.686035613870665,
5
  "eval_steps": 250,
6
- "global_step": 10000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -7647,6 +7647,770 @@
7647
  "eval_spearman_manhattan": 0.7658321542772971,
7648
  "eval_steps_per_second": 37.213,
7649
  "step": 10000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7650
  }
7651
  ],
7652
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.154639175257732,
5
  "eval_steps": 250,
6
+ "global_step": 11000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
7647
  "eval_spearman_manhattan": 0.7658321542772971,
7648
  "eval_steps_per_second": 37.213,
7649
  "step": 10000
7650
+ },
7651
+ {
7652
+ "epoch": 4.690721649484536,
7653
+ "grad_norm": 2.401972532272339,
7654
+ "learning_rate": 9.413659793814434e-06,
7655
+ "loss": 0.1564,
7656
+ "step": 10010
7657
+ },
7658
+ {
7659
+ "epoch": 4.695407685098407,
7660
+ "grad_norm": 1.4866012334823608,
7661
+ "learning_rate": 9.413074039362699e-06,
7662
+ "loss": 0.1573,
7663
+ "step": 10020
7664
+ },
7665
+ {
7666
+ "epoch": 4.700093720712277,
7667
+ "grad_norm": 2.4179933071136475,
7668
+ "learning_rate": 9.412488284910966e-06,
7669
+ "loss": 0.1613,
7670
+ "step": 10030
7671
+ },
7672
+ {
7673
+ "epoch": 4.704779756326148,
7674
+ "grad_norm": 2.4758639335632324,
7675
+ "learning_rate": 9.411902530459233e-06,
7676
+ "loss": 0.1621,
7677
+ "step": 10040
7678
+ },
7679
+ {
7680
+ "epoch": 4.709465791940019,
7681
+ "grad_norm": 2.450495481491089,
7682
+ "learning_rate": 9.411316776007498e-06,
7683
+ "loss": 0.1355,
7684
+ "step": 10050
7685
+ },
7686
+ {
7687
+ "epoch": 4.71415182755389,
7688
+ "grad_norm": 2.5910768508911133,
7689
+ "learning_rate": 9.410731021555765e-06,
7690
+ "loss": 0.1382,
7691
+ "step": 10060
7692
+ },
7693
+ {
7694
+ "epoch": 4.71883786316776,
7695
+ "grad_norm": 3.6344025135040283,
7696
+ "learning_rate": 9.41014526710403e-06,
7697
+ "loss": 0.1609,
7698
+ "step": 10070
7699
+ },
7700
+ {
7701
+ "epoch": 4.723523898781631,
7702
+ "grad_norm": 2.099355697631836,
7703
+ "learning_rate": 9.409559512652297e-06,
7704
+ "loss": 0.1476,
7705
+ "step": 10080
7706
+ },
7707
+ {
7708
+ "epoch": 4.728209934395501,
7709
+ "grad_norm": 2.334122896194458,
7710
+ "learning_rate": 9.408973758200564e-06,
7711
+ "loss": 0.1444,
7712
+ "step": 10090
7713
+ },
7714
+ {
7715
+ "epoch": 4.7328959700093725,
7716
+ "grad_norm": 2.061877965927124,
7717
+ "learning_rate": 9.408388003748829e-06,
7718
+ "loss": 0.1507,
7719
+ "step": 10100
7720
+ },
7721
+ {
7722
+ "epoch": 4.737582005623243,
7723
+ "grad_norm": 1.8726963996887207,
7724
+ "learning_rate": 9.407802249297096e-06,
7725
+ "loss": 0.1546,
7726
+ "step": 10110
7727
+ },
7728
+ {
7729
+ "epoch": 4.742268041237113,
7730
+ "grad_norm": 2.147475004196167,
7731
+ "learning_rate": 9.407216494845361e-06,
7732
+ "loss": 0.1378,
7733
+ "step": 10120
7734
+ },
7735
+ {
7736
+ "epoch": 4.746954076850984,
7737
+ "grad_norm": 2.2211480140686035,
7738
+ "learning_rate": 9.406630740393628e-06,
7739
+ "loss": 0.1636,
7740
+ "step": 10130
7741
+ },
7742
+ {
7743
+ "epoch": 4.751640112464854,
7744
+ "grad_norm": 2.4492108821868896,
7745
+ "learning_rate": 9.406044985941893e-06,
7746
+ "loss": 0.1485,
7747
+ "step": 10140
7748
+ },
7749
+ {
7750
+ "epoch": 4.756326148078726,
7751
+ "grad_norm": 2.647960662841797,
7752
+ "learning_rate": 9.40545923149016e-06,
7753
+ "loss": 0.1518,
7754
+ "step": 10150
7755
+ },
7756
+ {
7757
+ "epoch": 4.761012183692596,
7758
+ "grad_norm": 2.5408220291137695,
7759
+ "learning_rate": 9.404873477038427e-06,
7760
+ "loss": 0.1088,
7761
+ "step": 10160
7762
+ },
7763
+ {
7764
+ "epoch": 4.765698219306467,
7765
+ "grad_norm": 2.3026010990142822,
7766
+ "learning_rate": 9.404287722586692e-06,
7767
+ "loss": 0.1414,
7768
+ "step": 10170
7769
+ },
7770
+ {
7771
+ "epoch": 4.770384254920337,
7772
+ "grad_norm": 2.1535961627960205,
7773
+ "learning_rate": 9.403701968134958e-06,
7774
+ "loss": 0.1438,
7775
+ "step": 10180
7776
+ },
7777
+ {
7778
+ "epoch": 4.775070290534208,
7779
+ "grad_norm": 2.3178975582122803,
7780
+ "learning_rate": 9.403116213683224e-06,
7781
+ "loss": 0.1579,
7782
+ "step": 10190
7783
+ },
7784
+ {
7785
+ "epoch": 4.779756326148079,
7786
+ "grad_norm": 1.7218929529190063,
7787
+ "learning_rate": 9.402530459231491e-06,
7788
+ "loss": 0.1565,
7789
+ "step": 10200
7790
+ },
7791
+ {
7792
+ "epoch": 4.78444236176195,
7793
+ "grad_norm": 1.7112232446670532,
7794
+ "learning_rate": 9.401944704779757e-06,
7795
+ "loss": 0.1576,
7796
+ "step": 10210
7797
+ },
7798
+ {
7799
+ "epoch": 4.78912839737582,
7800
+ "grad_norm": 2.9099996089935303,
7801
+ "learning_rate": 9.401358950328024e-06,
7802
+ "loss": 0.1404,
7803
+ "step": 10220
7804
+ },
7805
+ {
7806
+ "epoch": 4.793814432989691,
7807
+ "grad_norm": 2.4656243324279785,
7808
+ "learning_rate": 9.400773195876289e-06,
7809
+ "loss": 0.1537,
7810
+ "step": 10230
7811
+ },
7812
+ {
7813
+ "epoch": 4.798500468603561,
7814
+ "grad_norm": 2.836191415786743,
7815
+ "learning_rate": 9.400187441424556e-06,
7816
+ "loss": 0.1444,
7817
+ "step": 10240
7818
+ },
7819
+ {
7820
+ "epoch": 4.803186504217432,
7821
+ "grad_norm": 1.7375587224960327,
7822
+ "learning_rate": 9.399601686972821e-06,
7823
+ "loss": 0.148,
7824
+ "step": 10250
7825
+ },
7826
+ {
7827
+ "epoch": 4.803186504217432,
7828
+ "eval_loss": 0.04357453063130379,
7829
+ "eval_pearson_cosine": 0.7816648988028874,
7830
+ "eval_pearson_dot": 0.617140121100384,
7831
+ "eval_pearson_euclidean": 0.7424818050017805,
7832
+ "eval_pearson_manhattan": 0.74207686822308,
7833
+ "eval_runtime": 40.6143,
7834
+ "eval_samples_per_second": 36.933,
7835
+ "eval_spearman_cosine": 0.7855171487091146,
7836
+ "eval_spearman_dot": 0.623924985286794,
7837
+ "eval_spearman_euclidean": 0.7600678672581802,
7838
+ "eval_spearman_manhattan": 0.7596066338339531,
7839
+ "eval_steps_per_second": 36.933,
7840
+ "step": 10250
7841
+ },
7842
+ {
7843
+ "epoch": 4.807872539831303,
7844
+ "grad_norm": 2.1630876064300537,
7845
+ "learning_rate": 9.399015932521088e-06,
7846
+ "loss": 0.1382,
7847
+ "step": 10260
7848
+ },
7849
+ {
7850
+ "epoch": 4.812558575445173,
7851
+ "grad_norm": 2.035651445388794,
7852
+ "learning_rate": 9.398430178069355e-06,
7853
+ "loss": 0.1449,
7854
+ "step": 10270
7855
+ },
7856
+ {
7857
+ "epoch": 4.817244611059044,
7858
+ "grad_norm": 3.047879219055176,
7859
+ "learning_rate": 9.39784442361762e-06,
7860
+ "loss": 0.1438,
7861
+ "step": 10280
7862
+ },
7863
+ {
7864
+ "epoch": 4.821930646672914,
7865
+ "grad_norm": 3.29184889793396,
7866
+ "learning_rate": 9.397258669165887e-06,
7867
+ "loss": 0.1794,
7868
+ "step": 10290
7869
+ },
7870
+ {
7871
+ "epoch": 4.826616682286786,
7872
+ "grad_norm": 2.301718235015869,
7873
+ "learning_rate": 9.396672914714152e-06,
7874
+ "loss": 0.1511,
7875
+ "step": 10300
7876
+ },
7877
+ {
7878
+ "epoch": 4.831302717900656,
7879
+ "grad_norm": 3.2951526641845703,
7880
+ "learning_rate": 9.396087160262419e-06,
7881
+ "loss": 0.1624,
7882
+ "step": 10310
7883
+ },
7884
+ {
7885
+ "epoch": 4.835988753514527,
7886
+ "grad_norm": 2.597886800765991,
7887
+ "learning_rate": 9.395501405810686e-06,
7888
+ "loss": 0.1464,
7889
+ "step": 10320
7890
+ },
7891
+ {
7892
+ "epoch": 4.840674789128397,
7893
+ "grad_norm": 2.1214399337768555,
7894
+ "learning_rate": 9.394915651358951e-06,
7895
+ "loss": 0.1434,
7896
+ "step": 10330
7897
+ },
7898
+ {
7899
+ "epoch": 4.845360824742268,
7900
+ "grad_norm": 2.453712224960327,
7901
+ "learning_rate": 9.394329896907216e-06,
7902
+ "loss": 0.145,
7903
+ "step": 10340
7904
+ },
7905
+ {
7906
+ "epoch": 4.850046860356139,
7907
+ "grad_norm": 1.6721562147140503,
7908
+ "learning_rate": 9.393744142455483e-06,
7909
+ "loss": 0.1481,
7910
+ "step": 10350
7911
+ },
7912
+ {
7913
+ "epoch": 4.85473289597001,
7914
+ "grad_norm": 3.1657016277313232,
7915
+ "learning_rate": 9.393158388003748e-06,
7916
+ "loss": 0.1661,
7917
+ "step": 10360
7918
+ },
7919
+ {
7920
+ "epoch": 4.85941893158388,
7921
+ "grad_norm": 2.637096881866455,
7922
+ "learning_rate": 9.392572633552015e-06,
7923
+ "loss": 0.1445,
7924
+ "step": 10370
7925
+ },
7926
+ {
7927
+ "epoch": 4.8641049671977505,
7928
+ "grad_norm": 2.9260575771331787,
7929
+ "learning_rate": 9.391986879100282e-06,
7930
+ "loss": 0.1446,
7931
+ "step": 10380
7932
+ },
7933
+ {
7934
+ "epoch": 4.868791002811621,
7935
+ "grad_norm": 2.740488052368164,
7936
+ "learning_rate": 9.391401124648548e-06,
7937
+ "loss": 0.1233,
7938
+ "step": 10390
7939
+ },
7940
+ {
7941
+ "epoch": 4.873477038425492,
7942
+ "grad_norm": 3.028416156768799,
7943
+ "learning_rate": 9.390815370196814e-06,
7944
+ "loss": 0.1496,
7945
+ "step": 10400
7946
+ },
7947
+ {
7948
+ "epoch": 4.878163074039363,
7949
+ "grad_norm": 2.666135787963867,
7950
+ "learning_rate": 9.39022961574508e-06,
7951
+ "loss": 0.1509,
7952
+ "step": 10410
7953
+ },
7954
+ {
7955
+ "epoch": 4.882849109653233,
7956
+ "grad_norm": 1.6811872720718384,
7957
+ "learning_rate": 9.389643861293347e-06,
7958
+ "loss": 0.1288,
7959
+ "step": 10420
7960
+ },
7961
+ {
7962
+ "epoch": 4.887535145267104,
7963
+ "grad_norm": 3.225088357925415,
7964
+ "learning_rate": 9.389058106841614e-06,
7965
+ "loss": 0.1674,
7966
+ "step": 10430
7967
+ },
7968
+ {
7969
+ "epoch": 4.892221180880974,
7970
+ "grad_norm": 2.038783311843872,
7971
+ "learning_rate": 9.388472352389879e-06,
7972
+ "loss": 0.1628,
7973
+ "step": 10440
7974
+ },
7975
+ {
7976
+ "epoch": 4.896907216494846,
7977
+ "grad_norm": 2.5650861263275146,
7978
+ "learning_rate": 9.387886597938146e-06,
7979
+ "loss": 0.1423,
7980
+ "step": 10450
7981
+ },
7982
+ {
7983
+ "epoch": 4.901593252108716,
7984
+ "grad_norm": 2.9860751628875732,
7985
+ "learning_rate": 9.387300843486411e-06,
7986
+ "loss": 0.14,
7987
+ "step": 10460
7988
+ },
7989
+ {
7990
+ "epoch": 4.906279287722587,
7991
+ "grad_norm": 2.6722230911254883,
7992
+ "learning_rate": 9.386715089034678e-06,
7993
+ "loss": 0.1416,
7994
+ "step": 10470
7995
+ },
7996
+ {
7997
+ "epoch": 4.910965323336457,
7998
+ "grad_norm": 2.631300926208496,
7999
+ "learning_rate": 9.386129334582945e-06,
8000
+ "loss": 0.1482,
8001
+ "step": 10480
8002
+ },
8003
+ {
8004
+ "epoch": 4.915651358950328,
8005
+ "grad_norm": 2.792668104171753,
8006
+ "learning_rate": 9.38554358013121e-06,
8007
+ "loss": 0.1639,
8008
+ "step": 10490
8009
+ },
8010
+ {
8011
+ "epoch": 4.920337394564199,
8012
+ "grad_norm": 2.3472137451171875,
8013
+ "learning_rate": 9.384957825679475e-06,
8014
+ "loss": 0.1382,
8015
+ "step": 10500
8016
+ },
8017
+ {
8018
+ "epoch": 4.920337394564199,
8019
+ "eval_loss": 0.04462406784296036,
8020
+ "eval_pearson_cosine": 0.7824275239321139,
8021
+ "eval_pearson_dot": 0.6329557539240795,
8022
+ "eval_pearson_euclidean": 0.7443049370608668,
8023
+ "eval_pearson_manhattan": 0.7436570805807037,
8024
+ "eval_runtime": 39.5855,
8025
+ "eval_samples_per_second": 37.893,
8026
+ "eval_spearman_cosine": 0.7871549274543737,
8027
+ "eval_spearman_dot": 0.6424130372572389,
8028
+ "eval_spearman_euclidean": 0.7624881768092765,
8029
+ "eval_spearman_manhattan": 0.7619956515478402,
8030
+ "eval_steps_per_second": 37.893,
8031
+ "step": 10500
8032
+ },
8033
+ {
8034
+ "epoch": 4.9250234301780695,
8035
+ "grad_norm": 2.011869192123413,
8036
+ "learning_rate": 9.384372071227742e-06,
8037
+ "loss": 0.1682,
8038
+ "step": 10510
8039
+ },
8040
+ {
8041
+ "epoch": 4.92970946579194,
8042
+ "grad_norm": 2.2601213455200195,
8043
+ "learning_rate": 9.383786316776007e-06,
8044
+ "loss": 0.1386,
8045
+ "step": 10520
8046
+ },
8047
+ {
8048
+ "epoch": 4.9343955014058105,
8049
+ "grad_norm": 3.00738263130188,
8050
+ "learning_rate": 9.383200562324274e-06,
8051
+ "loss": 0.1712,
8052
+ "step": 10530
8053
+ },
8054
+ {
8055
+ "epoch": 4.939081537019681,
8056
+ "grad_norm": 2.7528035640716553,
8057
+ "learning_rate": 9.382614807872541e-06,
8058
+ "loss": 0.163,
8059
+ "step": 10540
8060
+ },
8061
+ {
8062
+ "epoch": 4.943767572633552,
8063
+ "grad_norm": 2.2604238986968994,
8064
+ "learning_rate": 9.382029053420806e-06,
8065
+ "loss": 0.1638,
8066
+ "step": 10550
8067
+ },
8068
+ {
8069
+ "epoch": 4.948453608247423,
8070
+ "grad_norm": 2.3817455768585205,
8071
+ "learning_rate": 9.381443298969073e-06,
8072
+ "loss": 0.1478,
8073
+ "step": 10560
8074
+ },
8075
+ {
8076
+ "epoch": 4.953139643861293,
8077
+ "grad_norm": 2.392261028289795,
8078
+ "learning_rate": 9.380857544517338e-06,
8079
+ "loss": 0.1521,
8080
+ "step": 10570
8081
+ },
8082
+ {
8083
+ "epoch": 4.957825679475164,
8084
+ "grad_norm": 2.8620009422302246,
8085
+ "learning_rate": 9.380271790065605e-06,
8086
+ "loss": 0.1477,
8087
+ "step": 10580
8088
+ },
8089
+ {
8090
+ "epoch": 4.962511715089034,
8091
+ "grad_norm": 2.658115863800049,
8092
+ "learning_rate": 9.379686035613872e-06,
8093
+ "loss": 0.1251,
8094
+ "step": 10590
8095
+ },
8096
+ {
8097
+ "epoch": 4.967197750702906,
8098
+ "grad_norm": 2.8949685096740723,
8099
+ "learning_rate": 9.379100281162138e-06,
8100
+ "loss": 0.1595,
8101
+ "step": 10600
8102
+ },
8103
+ {
8104
+ "epoch": 4.971883786316776,
8105
+ "grad_norm": 2.472933292388916,
8106
+ "learning_rate": 9.378514526710404e-06,
8107
+ "loss": 0.1611,
8108
+ "step": 10610
8109
+ },
8110
+ {
8111
+ "epoch": 4.976569821930647,
8112
+ "grad_norm": 2.650047540664673,
8113
+ "learning_rate": 9.37792877225867e-06,
8114
+ "loss": 0.1483,
8115
+ "step": 10620
8116
+ },
8117
+ {
8118
+ "epoch": 4.981255857544517,
8119
+ "grad_norm": 2.293938398361206,
8120
+ "learning_rate": 9.377343017806937e-06,
8121
+ "loss": 0.1535,
8122
+ "step": 10630
8123
+ },
8124
+ {
8125
+ "epoch": 4.985941893158388,
8126
+ "grad_norm": 2.1445188522338867,
8127
+ "learning_rate": 9.376757263355202e-06,
8128
+ "loss": 0.1562,
8129
+ "step": 10640
8130
+ },
8131
+ {
8132
+ "epoch": 4.990627928772259,
8133
+ "grad_norm": 1.898871660232544,
8134
+ "learning_rate": 9.376171508903469e-06,
8135
+ "loss": 0.16,
8136
+ "step": 10650
8137
+ },
8138
+ {
8139
+ "epoch": 4.9953139643861295,
8140
+ "grad_norm": 1.8987932205200195,
8141
+ "learning_rate": 9.375585754451734e-06,
8142
+ "loss": 0.1348,
8143
+ "step": 10660
8144
+ },
8145
+ {
8146
+ "epoch": 5.0,
8147
+ "grad_norm": 1.9247934818267822,
8148
+ "learning_rate": 9.375000000000001e-06,
8149
+ "loss": 0.1357,
8150
+ "step": 10670
8151
+ },
8152
+ {
8153
+ "epoch": 5.0046860356138705,
8154
+ "grad_norm": 2.254408836364746,
8155
+ "learning_rate": 9.374414245548266e-06,
8156
+ "loss": 0.0979,
8157
+ "step": 10680
8158
+ },
8159
+ {
8160
+ "epoch": 5.009372071227741,
8161
+ "grad_norm": 2.336634874343872,
8162
+ "learning_rate": 9.373828491096533e-06,
8163
+ "loss": 0.1008,
8164
+ "step": 10690
8165
+ },
8166
+ {
8167
+ "epoch": 5.014058106841612,
8168
+ "grad_norm": 2.0158193111419678,
8169
+ "learning_rate": 9.3732427366448e-06,
8170
+ "loss": 0.1206,
8171
+ "step": 10700
8172
+ },
8173
+ {
8174
+ "epoch": 5.018744142455483,
8175
+ "grad_norm": 2.12335205078125,
8176
+ "learning_rate": 9.372656982193065e-06,
8177
+ "loss": 0.1153,
8178
+ "step": 10710
8179
+ },
8180
+ {
8181
+ "epoch": 5.023430178069353,
8182
+ "grad_norm": 1.9908068180084229,
8183
+ "learning_rate": 9.372071227741332e-06,
8184
+ "loss": 0.096,
8185
+ "step": 10720
8186
+ },
8187
+ {
8188
+ "epoch": 5.028116213683224,
8189
+ "grad_norm": 2.713801145553589,
8190
+ "learning_rate": 9.371485473289597e-06,
8191
+ "loss": 0.1183,
8192
+ "step": 10730
8193
+ },
8194
+ {
8195
+ "epoch": 5.032802249297094,
8196
+ "grad_norm": 1.922958493232727,
8197
+ "learning_rate": 9.370899718837864e-06,
8198
+ "loss": 0.0975,
8199
+ "step": 10740
8200
+ },
8201
+ {
8202
+ "epoch": 5.037488284910966,
8203
+ "grad_norm": 1.787152647972107,
8204
+ "learning_rate": 9.37031396438613e-06,
8205
+ "loss": 0.1109,
8206
+ "step": 10750
8207
+ },
8208
+ {
8209
+ "epoch": 5.037488284910966,
8210
+ "eval_loss": 0.042590245604515076,
8211
+ "eval_pearson_cosine": 0.7796460620464813,
8212
+ "eval_pearson_dot": 0.6195124617279077,
8213
+ "eval_pearson_euclidean": 0.7433733944090761,
8214
+ "eval_pearson_manhattan": 0.7430945401829945,
8215
+ "eval_runtime": 39.7087,
8216
+ "eval_samples_per_second": 37.775,
8217
+ "eval_spearman_cosine": 0.7845566035417548,
8218
+ "eval_spearman_dot": 0.6248763613915714,
8219
+ "eval_spearman_euclidean": 0.7601630737316083,
8220
+ "eval_spearman_manhattan": 0.7599814027838542,
8221
+ "eval_steps_per_second": 37.775,
8222
+ "step": 10750
8223
+ },
8224
+ {
8225
+ "epoch": 5.042174320524836,
8226
+ "grad_norm": 2.3772122859954834,
8227
+ "learning_rate": 9.369728209934396e-06,
8228
+ "loss": 0.1063,
8229
+ "step": 10760
8230
+ },
8231
+ {
8232
+ "epoch": 5.046860356138707,
8233
+ "grad_norm": 2.4249303340911865,
8234
+ "learning_rate": 9.369142455482663e-06,
8235
+ "loss": 0.0909,
8236
+ "step": 10770
8237
+ },
8238
+ {
8239
+ "epoch": 5.051546391752577,
8240
+ "grad_norm": 2.354619264602661,
8241
+ "learning_rate": 9.368556701030928e-06,
8242
+ "loss": 0.1121,
8243
+ "step": 10780
8244
+ },
8245
+ {
8246
+ "epoch": 5.056232427366448,
8247
+ "grad_norm": 2.3761093616485596,
8248
+ "learning_rate": 9.367970946579195e-06,
8249
+ "loss": 0.0962,
8250
+ "step": 10790
8251
+ },
8252
+ {
8253
+ "epoch": 5.060918462980319,
8254
+ "grad_norm": 1.9781490564346313,
8255
+ "learning_rate": 9.36738519212746e-06,
8256
+ "loss": 0.1159,
8257
+ "step": 10800
8258
+ },
8259
+ {
8260
+ "epoch": 5.0656044985941895,
8261
+ "grad_norm": 2.9498939514160156,
8262
+ "learning_rate": 9.366799437675728e-06,
8263
+ "loss": 0.1046,
8264
+ "step": 10810
8265
+ },
8266
+ {
8267
+ "epoch": 5.07029053420806,
8268
+ "grad_norm": 1.618909478187561,
8269
+ "learning_rate": 9.366213683223993e-06,
8270
+ "loss": 0.1145,
8271
+ "step": 10820
8272
+ },
8273
+ {
8274
+ "epoch": 5.0749765698219305,
8275
+ "grad_norm": 1.5350381135940552,
8276
+ "learning_rate": 9.36562792877226e-06,
8277
+ "loss": 0.125,
8278
+ "step": 10830
8279
+ },
8280
+ {
8281
+ "epoch": 5.079662605435801,
8282
+ "grad_norm": 2.1376562118530273,
8283
+ "learning_rate": 9.365042174320525e-06,
8284
+ "loss": 0.11,
8285
+ "step": 10840
8286
+ },
8287
+ {
8288
+ "epoch": 5.084348641049672,
8289
+ "grad_norm": 2.638848066329956,
8290
+ "learning_rate": 9.364456419868792e-06,
8291
+ "loss": 0.1257,
8292
+ "step": 10850
8293
+ },
8294
+ {
8295
+ "epoch": 5.089034676663543,
8296
+ "grad_norm": 2.5730180740356445,
8297
+ "learning_rate": 9.363870665417057e-06,
8298
+ "loss": 0.1131,
8299
+ "step": 10860
8300
+ },
8301
+ {
8302
+ "epoch": 5.093720712277413,
8303
+ "grad_norm": 1.658488392829895,
8304
+ "learning_rate": 9.363284910965324e-06,
8305
+ "loss": 0.1095,
8306
+ "step": 10870
8307
+ },
8308
+ {
8309
+ "epoch": 5.098406747891284,
8310
+ "grad_norm": 2.5831501483917236,
8311
+ "learning_rate": 9.362699156513591e-06,
8312
+ "loss": 0.1159,
8313
+ "step": 10880
8314
+ },
8315
+ {
8316
+ "epoch": 5.103092783505154,
8317
+ "grad_norm": 1.3760308027267456,
8318
+ "learning_rate": 9.362113402061856e-06,
8319
+ "loss": 0.1052,
8320
+ "step": 10890
8321
+ },
8322
+ {
8323
+ "epoch": 5.107778819119026,
8324
+ "grad_norm": 1.529236078262329,
8325
+ "learning_rate": 9.361527647610123e-06,
8326
+ "loss": 0.1132,
8327
+ "step": 10900
8328
+ },
8329
+ {
8330
+ "epoch": 5.112464854732896,
8331
+ "grad_norm": 1.9166224002838135,
8332
+ "learning_rate": 9.360941893158388e-06,
8333
+ "loss": 0.1142,
8334
+ "step": 10910
8335
+ },
8336
+ {
8337
+ "epoch": 5.117150890346767,
8338
+ "grad_norm": 2.521615505218506,
8339
+ "learning_rate": 9.360356138706655e-06,
8340
+ "loss": 0.1222,
8341
+ "step": 10920
8342
+ },
8343
+ {
8344
+ "epoch": 5.121836925960637,
8345
+ "grad_norm": 2.1560580730438232,
8346
+ "learning_rate": 9.359770384254922e-06,
8347
+ "loss": 0.1081,
8348
+ "step": 10930
8349
+ },
8350
+ {
8351
+ "epoch": 5.126522961574508,
8352
+ "grad_norm": 1.3674089908599854,
8353
+ "learning_rate": 9.359184629803187e-06,
8354
+ "loss": 0.1198,
8355
+ "step": 10940
8356
+ },
8357
+ {
8358
+ "epoch": 5.131208997188379,
8359
+ "grad_norm": 2.396967649459839,
8360
+ "learning_rate": 9.358598875351454e-06,
8361
+ "loss": 0.1101,
8362
+ "step": 10950
8363
+ },
8364
+ {
8365
+ "epoch": 5.1358950328022495,
8366
+ "grad_norm": 1.6446019411087036,
8367
+ "learning_rate": 9.35801312089972e-06,
8368
+ "loss": 0.096,
8369
+ "step": 10960
8370
+ },
8371
+ {
8372
+ "epoch": 5.14058106841612,
8373
+ "grad_norm": 2.565040349960327,
8374
+ "learning_rate": 9.357427366447985e-06,
8375
+ "loss": 0.1047,
8376
+ "step": 10970
8377
+ },
8378
+ {
8379
+ "epoch": 5.14526710402999,
8380
+ "grad_norm": 1.8515427112579346,
8381
+ "learning_rate": 9.356841611996252e-06,
8382
+ "loss": 0.0955,
8383
+ "step": 10980
8384
+ },
8385
+ {
8386
+ "epoch": 5.149953139643861,
8387
+ "grad_norm": 2.294940233230591,
8388
+ "learning_rate": 9.356255857544518e-06,
8389
+ "loss": 0.1212,
8390
+ "step": 10990
8391
+ },
8392
+ {
8393
+ "epoch": 5.154639175257732,
8394
+ "grad_norm": 1.995937466621399,
8395
+ "learning_rate": 9.355670103092784e-06,
8396
+ "loss": 0.1009,
8397
+ "step": 11000
8398
+ },
8399
+ {
8400
+ "epoch": 5.154639175257732,
8401
+ "eval_loss": 0.04305338114500046,
8402
+ "eval_pearson_cosine": 0.780686355713101,
8403
+ "eval_pearson_dot": 0.6237317182336994,
8404
+ "eval_pearson_euclidean": 0.7427541562137776,
8405
+ "eval_pearson_manhattan": 0.7423336637021691,
8406
+ "eval_runtime": 40.3116,
8407
+ "eval_samples_per_second": 37.21,
8408
+ "eval_spearman_cosine": 0.7834710778584321,
8409
+ "eval_spearman_dot": 0.637744309819412,
8410
+ "eval_spearman_euclidean": 0.7591336014689473,
8411
+ "eval_spearman_manhattan": 0.7590724893258253,
8412
+ "eval_steps_per_second": 37.21,
8413
+ "step": 11000
8414
  }
8415
  ],
8416
  "logging_steps": 10,