CocoRoF commited on
Commit
112afef
·
verified ·
1 Parent(s): 58d0ddc

Training in progress, step 6000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24857bc4b164a0fc3e6179ea07b38593c6c2d12e6d0172627df2d50db6fea93d
3
  size 737580392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bea384ab228f9e4e48eb09750911bf53afac4e27ba8e57f44979634ad866b1c1
3
  size 737580392
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd9a1d0b8c63d7b12cdde7d50236f1a919f3ab5b10dee3efa64febf5e045dd28
3
  size 1475248442
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a64a6a144fbcea9043ebf95d58cf6dc0de1643977d324340c43b19a2a87324c9
3
  size 1475248442
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0dce7b39374d406c6e9a2e0a52c3d989921f1d308f3e0a976795e1defff56359
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e59fd29507b0a3f94de60acf1485068cfbd28d4220459a98545dc01f241293d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fed7e74ecc01b85f3fc063ed097046907474fb7f4b9c36a632434be797d10982
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:923c446f373ad2f0ffab5651c7c112ff9f6139581eac3a16834e284a234062e7
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.3430178069353325,
5
  "eval_steps": 250,
6
- "global_step": 5000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3827,6 +3827,770 @@
3827
  "eval_spearman_manhattan": 0.807850996089844,
3828
  "eval_steps_per_second": 28.467,
3829
  "step": 5000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3830
  }
3831
  ],
3832
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.8116213683223994,
5
  "eval_steps": 250,
6
+ "global_step": 6000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3827
  "eval_spearman_manhattan": 0.807850996089844,
3828
  "eval_steps_per_second": 28.467,
3829
  "step": 5000
3830
+ },
3831
+ {
3832
+ "epoch": 2.3477038425492034,
3833
+ "grad_norm": 1.9489047527313232,
3834
+ "learning_rate": 1.8532685098406748e-05,
3835
+ "loss": 0.1478,
3836
+ "step": 5010
3837
+ },
3838
+ {
3839
+ "epoch": 2.352389878163074,
3840
+ "grad_norm": 1.7943733930587769,
3841
+ "learning_rate": 1.852975632614808e-05,
3842
+ "loss": 0.1447,
3843
+ "step": 5020
3844
+ },
3845
+ {
3846
+ "epoch": 2.357075913776945,
3847
+ "grad_norm": 1.9680578708648682,
3848
+ "learning_rate": 1.852682755388941e-05,
3849
+ "loss": 0.158,
3850
+ "step": 5030
3851
+ },
3852
+ {
3853
+ "epoch": 2.3617619493908153,
3854
+ "grad_norm": 1.76882004737854,
3855
+ "learning_rate": 1.8523898781630743e-05,
3856
+ "loss": 0.1616,
3857
+ "step": 5040
3858
+ },
3859
+ {
3860
+ "epoch": 2.3664479850046862,
3861
+ "grad_norm": 1.7545366287231445,
3862
+ "learning_rate": 1.8520970009372073e-05,
3863
+ "loss": 0.1748,
3864
+ "step": 5050
3865
+ },
3866
+ {
3867
+ "epoch": 2.3711340206185567,
3868
+ "grad_norm": 1.8479957580566406,
3869
+ "learning_rate": 1.8518041237113406e-05,
3870
+ "loss": 0.1536,
3871
+ "step": 5060
3872
+ },
3873
+ {
3874
+ "epoch": 2.375820056232427,
3875
+ "grad_norm": 2.2647321224212646,
3876
+ "learning_rate": 1.8515112464854735e-05,
3877
+ "loss": 0.169,
3878
+ "step": 5070
3879
+ },
3880
+ {
3881
+ "epoch": 2.380506091846298,
3882
+ "grad_norm": 2.5361509323120117,
3883
+ "learning_rate": 1.8512183692596065e-05,
3884
+ "loss": 0.1664,
3885
+ "step": 5080
3886
+ },
3887
+ {
3888
+ "epoch": 2.3851921274601686,
3889
+ "grad_norm": 1.6961290836334229,
3890
+ "learning_rate": 1.8509254920337398e-05,
3891
+ "loss": 0.1581,
3892
+ "step": 5090
3893
+ },
3894
+ {
3895
+ "epoch": 2.3898781630740396,
3896
+ "grad_norm": 2.0420143604278564,
3897
+ "learning_rate": 1.8506326148078727e-05,
3898
+ "loss": 0.1761,
3899
+ "step": 5100
3900
+ },
3901
+ {
3902
+ "epoch": 2.39456419868791,
3903
+ "grad_norm": 1.5825088024139404,
3904
+ "learning_rate": 1.8503397375820057e-05,
3905
+ "loss": 0.1462,
3906
+ "step": 5110
3907
+ },
3908
+ {
3909
+ "epoch": 2.3992502343017805,
3910
+ "grad_norm": 1.1550185680389404,
3911
+ "learning_rate": 1.8500468603561386e-05,
3912
+ "loss": 0.1612,
3913
+ "step": 5120
3914
+ },
3915
+ {
3916
+ "epoch": 2.4039362699156515,
3917
+ "grad_norm": 1.1477668285369873,
3918
+ "learning_rate": 1.849753983130272e-05,
3919
+ "loss": 0.168,
3920
+ "step": 5130
3921
+ },
3922
+ {
3923
+ "epoch": 2.408622305529522,
3924
+ "grad_norm": 2.5751688480377197,
3925
+ "learning_rate": 1.849461105904405e-05,
3926
+ "loss": 0.1737,
3927
+ "step": 5140
3928
+ },
3929
+ {
3930
+ "epoch": 2.413308341143393,
3931
+ "grad_norm": 0.8213298916816711,
3932
+ "learning_rate": 1.849168228678538e-05,
3933
+ "loss": 0.1528,
3934
+ "step": 5150
3935
+ },
3936
+ {
3937
+ "epoch": 2.4179943767572634,
3938
+ "grad_norm": 2.1841747760772705,
3939
+ "learning_rate": 1.848875351452671e-05,
3940
+ "loss": 0.1658,
3941
+ "step": 5160
3942
+ },
3943
+ {
3944
+ "epoch": 2.422680412371134,
3945
+ "grad_norm": 1.1756603717803955,
3946
+ "learning_rate": 1.8485824742268044e-05,
3947
+ "loss": 0.1593,
3948
+ "step": 5170
3949
+ },
3950
+ {
3951
+ "epoch": 2.427366447985005,
3952
+ "grad_norm": 1.4406476020812988,
3953
+ "learning_rate": 1.8482895970009373e-05,
3954
+ "loss": 0.1725,
3955
+ "step": 5180
3956
+ },
3957
+ {
3958
+ "epoch": 2.4320524835988753,
3959
+ "grad_norm": 1.0863920450210571,
3960
+ "learning_rate": 1.8479967197750703e-05,
3961
+ "loss": 0.1629,
3962
+ "step": 5190
3963
+ },
3964
+ {
3965
+ "epoch": 2.436738519212746,
3966
+ "grad_norm": 1.736379861831665,
3967
+ "learning_rate": 1.8477038425492036e-05,
3968
+ "loss": 0.1452,
3969
+ "step": 5200
3970
+ },
3971
+ {
3972
+ "epoch": 2.4414245548266167,
3973
+ "grad_norm": 1.3213779926300049,
3974
+ "learning_rate": 1.8474109653233365e-05,
3975
+ "loss": 0.1654,
3976
+ "step": 5210
3977
+ },
3978
+ {
3979
+ "epoch": 2.446110590440487,
3980
+ "grad_norm": 1.8341721296310425,
3981
+ "learning_rate": 1.8471180880974698e-05,
3982
+ "loss": 0.149,
3983
+ "step": 5220
3984
+ },
3985
+ {
3986
+ "epoch": 2.450796626054358,
3987
+ "grad_norm": 1.6373144388198853,
3988
+ "learning_rate": 1.8468252108716028e-05,
3989
+ "loss": 0.1769,
3990
+ "step": 5230
3991
+ },
3992
+ {
3993
+ "epoch": 2.4554826616682286,
3994
+ "grad_norm": 1.9199351072311401,
3995
+ "learning_rate": 1.846532333645736e-05,
3996
+ "loss": 0.1848,
3997
+ "step": 5240
3998
+ },
3999
+ {
4000
+ "epoch": 2.4601686972820995,
4001
+ "grad_norm": 1.5321757793426514,
4002
+ "learning_rate": 1.846239456419869e-05,
4003
+ "loss": 0.166,
4004
+ "step": 5250
4005
+ },
4006
+ {
4007
+ "epoch": 2.4601686972820995,
4008
+ "eval_loss": 0.033459678292274475,
4009
+ "eval_pearson_cosine": 0.81833954439713,
4010
+ "eval_pearson_dot": 0.7450131051603677,
4011
+ "eval_pearson_euclidean": 0.7997398913396002,
4012
+ "eval_pearson_manhattan": 0.8015232151946456,
4013
+ "eval_runtime": 3.3372,
4014
+ "eval_samples_per_second": 449.478,
4015
+ "eval_spearman_cosine": 0.818760866631701,
4016
+ "eval_spearman_dot": 0.7445336159751238,
4017
+ "eval_spearman_euclidean": 0.8090651280572486,
4018
+ "eval_spearman_manhattan": 0.8107108068343578,
4019
+ "eval_steps_per_second": 28.167,
4020
+ "step": 5250
4021
+ },
4022
+ {
4023
+ "epoch": 2.46485473289597,
4024
+ "grad_norm": 2.2415823936462402,
4025
+ "learning_rate": 1.845946579194002e-05,
4026
+ "loss": 0.1606,
4027
+ "step": 5260
4028
+ },
4029
+ {
4030
+ "epoch": 2.4695407685098405,
4031
+ "grad_norm": 0.9711971879005432,
4032
+ "learning_rate": 1.8456537019681352e-05,
4033
+ "loss": 0.1546,
4034
+ "step": 5270
4035
+ },
4036
+ {
4037
+ "epoch": 2.4742268041237114,
4038
+ "grad_norm": 1.7712616920471191,
4039
+ "learning_rate": 1.8453608247422682e-05,
4040
+ "loss": 0.1725,
4041
+ "step": 5280
4042
+ },
4043
+ {
4044
+ "epoch": 2.478912839737582,
4045
+ "grad_norm": 1.9109606742858887,
4046
+ "learning_rate": 1.845067947516401e-05,
4047
+ "loss": 0.175,
4048
+ "step": 5290
4049
+ },
4050
+ {
4051
+ "epoch": 2.483598875351453,
4052
+ "grad_norm": 1.4726054668426514,
4053
+ "learning_rate": 1.8447750702905344e-05,
4054
+ "loss": 0.1432,
4055
+ "step": 5300
4056
+ },
4057
+ {
4058
+ "epoch": 2.4882849109653233,
4059
+ "grad_norm": 1.5022660493850708,
4060
+ "learning_rate": 1.8444821930646674e-05,
4061
+ "loss": 0.1682,
4062
+ "step": 5310
4063
+ },
4064
+ {
4065
+ "epoch": 2.492970946579194,
4066
+ "grad_norm": 2.4403464794158936,
4067
+ "learning_rate": 1.8441893158388003e-05,
4068
+ "loss": 0.1554,
4069
+ "step": 5320
4070
+ },
4071
+ {
4072
+ "epoch": 2.4976569821930648,
4073
+ "grad_norm": 2.6438190937042236,
4074
+ "learning_rate": 1.8438964386129336e-05,
4075
+ "loss": 0.1874,
4076
+ "step": 5330
4077
+ },
4078
+ {
4079
+ "epoch": 2.5023430178069352,
4080
+ "grad_norm": 1.2486696243286133,
4081
+ "learning_rate": 1.8436035613870666e-05,
4082
+ "loss": 0.179,
4083
+ "step": 5340
4084
+ },
4085
+ {
4086
+ "epoch": 2.5070290534208057,
4087
+ "grad_norm": 1.551320195198059,
4088
+ "learning_rate": 1.8433106841612e-05,
4089
+ "loss": 0.1498,
4090
+ "step": 5350
4091
+ },
4092
+ {
4093
+ "epoch": 2.5117150890346767,
4094
+ "grad_norm": 1.7184678316116333,
4095
+ "learning_rate": 1.8430178069353328e-05,
4096
+ "loss": 0.1643,
4097
+ "step": 5360
4098
+ },
4099
+ {
4100
+ "epoch": 2.5164011246485476,
4101
+ "grad_norm": 1.7342448234558105,
4102
+ "learning_rate": 1.842724929709466e-05,
4103
+ "loss": 0.158,
4104
+ "step": 5370
4105
+ },
4106
+ {
4107
+ "epoch": 2.521087160262418,
4108
+ "grad_norm": 1.900806188583374,
4109
+ "learning_rate": 1.842432052483599e-05,
4110
+ "loss": 0.1706,
4111
+ "step": 5380
4112
+ },
4113
+ {
4114
+ "epoch": 2.5257731958762886,
4115
+ "grad_norm": 1.599795937538147,
4116
+ "learning_rate": 1.842139175257732e-05,
4117
+ "loss": 0.1878,
4118
+ "step": 5390
4119
+ },
4120
+ {
4121
+ "epoch": 2.530459231490159,
4122
+ "grad_norm": 1.4454323053359985,
4123
+ "learning_rate": 1.8418462980318653e-05,
4124
+ "loss": 0.1655,
4125
+ "step": 5400
4126
+ },
4127
+ {
4128
+ "epoch": 2.53514526710403,
4129
+ "grad_norm": 1.7434407472610474,
4130
+ "learning_rate": 1.8415534208059982e-05,
4131
+ "loss": 0.1503,
4132
+ "step": 5410
4133
+ },
4134
+ {
4135
+ "epoch": 2.539831302717901,
4136
+ "grad_norm": 2.328422784805298,
4137
+ "learning_rate": 1.8412605435801315e-05,
4138
+ "loss": 0.1702,
4139
+ "step": 5420
4140
+ },
4141
+ {
4142
+ "epoch": 2.5445173383317714,
4143
+ "grad_norm": 1.7260003089904785,
4144
+ "learning_rate": 1.8409676663542645e-05,
4145
+ "loss": 0.1624,
4146
+ "step": 5430
4147
+ },
4148
+ {
4149
+ "epoch": 2.549203373945642,
4150
+ "grad_norm": 1.7711926698684692,
4151
+ "learning_rate": 1.8406747891283974e-05,
4152
+ "loss": 0.1609,
4153
+ "step": 5440
4154
+ },
4155
+ {
4156
+ "epoch": 2.5538894095595124,
4157
+ "grad_norm": 1.963535189628601,
4158
+ "learning_rate": 1.8403819119025307e-05,
4159
+ "loss": 0.1881,
4160
+ "step": 5450
4161
+ },
4162
+ {
4163
+ "epoch": 2.5585754451733833,
4164
+ "grad_norm": 1.5332142114639282,
4165
+ "learning_rate": 1.8400890346766637e-05,
4166
+ "loss": 0.1891,
4167
+ "step": 5460
4168
+ },
4169
+ {
4170
+ "epoch": 2.5632614807872542,
4171
+ "grad_norm": 1.4771735668182373,
4172
+ "learning_rate": 1.8397961574507966e-05,
4173
+ "loss": 0.1455,
4174
+ "step": 5470
4175
+ },
4176
+ {
4177
+ "epoch": 2.5679475164011247,
4178
+ "grad_norm": 1.3587532043457031,
4179
+ "learning_rate": 1.83950328022493e-05,
4180
+ "loss": 0.1579,
4181
+ "step": 5480
4182
+ },
4183
+ {
4184
+ "epoch": 2.572633552014995,
4185
+ "grad_norm": 1.5083823204040527,
4186
+ "learning_rate": 1.839210402999063e-05,
4187
+ "loss": 0.1624,
4188
+ "step": 5490
4189
+ },
4190
+ {
4191
+ "epoch": 2.5773195876288657,
4192
+ "grad_norm": 2.0399646759033203,
4193
+ "learning_rate": 1.8389175257731958e-05,
4194
+ "loss": 0.1572,
4195
+ "step": 5500
4196
+ },
4197
+ {
4198
+ "epoch": 2.5773195876288657,
4199
+ "eval_loss": 0.03521975129842758,
4200
+ "eval_pearson_cosine": 0.8123064687585853,
4201
+ "eval_pearson_dot": 0.7368072149645286,
4202
+ "eval_pearson_euclidean": 0.8002505346194582,
4203
+ "eval_pearson_manhattan": 0.8020724209892114,
4204
+ "eval_runtime": 3.2078,
4205
+ "eval_samples_per_second": 467.604,
4206
+ "eval_spearman_cosine": 0.8135314473340735,
4207
+ "eval_spearman_dot": 0.7336319115548467,
4208
+ "eval_spearman_euclidean": 0.8084064839841847,
4209
+ "eval_spearman_manhattan": 0.8100456053831014,
4210
+ "eval_steps_per_second": 29.303,
4211
+ "step": 5500
4212
+ },
4213
+ {
4214
+ "epoch": 2.5820056232427366,
4215
+ "grad_norm": 1.6468899250030518,
4216
+ "learning_rate": 1.838624648547329e-05,
4217
+ "loss": 0.1884,
4218
+ "step": 5510
4219
+ },
4220
+ {
4221
+ "epoch": 2.5866916588566076,
4222
+ "grad_norm": 1.9936749935150146,
4223
+ "learning_rate": 1.838331771321462e-05,
4224
+ "loss": 0.1692,
4225
+ "step": 5520
4226
+ },
4227
+ {
4228
+ "epoch": 2.591377694470478,
4229
+ "grad_norm": 2.251502513885498,
4230
+ "learning_rate": 1.8380388940955953e-05,
4231
+ "loss": 0.1657,
4232
+ "step": 5530
4233
+ },
4234
+ {
4235
+ "epoch": 2.5960637300843485,
4236
+ "grad_norm": 1.6334820985794067,
4237
+ "learning_rate": 1.8377460168697283e-05,
4238
+ "loss": 0.1692,
4239
+ "step": 5540
4240
+ },
4241
+ {
4242
+ "epoch": 2.600749765698219,
4243
+ "grad_norm": 1.9554181098937988,
4244
+ "learning_rate": 1.8374531396438616e-05,
4245
+ "loss": 0.167,
4246
+ "step": 5550
4247
+ },
4248
+ {
4249
+ "epoch": 2.60543580131209,
4250
+ "grad_norm": 1.866745114326477,
4251
+ "learning_rate": 1.8371602624179945e-05,
4252
+ "loss": 0.1508,
4253
+ "step": 5560
4254
+ },
4255
+ {
4256
+ "epoch": 2.610121836925961,
4257
+ "grad_norm": 1.533691644668579,
4258
+ "learning_rate": 1.8368673851921278e-05,
4259
+ "loss": 0.1726,
4260
+ "step": 5570
4261
+ },
4262
+ {
4263
+ "epoch": 2.6148078725398314,
4264
+ "grad_norm": 1.8526815176010132,
4265
+ "learning_rate": 1.8365745079662608e-05,
4266
+ "loss": 0.1714,
4267
+ "step": 5580
4268
+ },
4269
+ {
4270
+ "epoch": 2.619493908153702,
4271
+ "grad_norm": 1.5112234354019165,
4272
+ "learning_rate": 1.8362816307403937e-05,
4273
+ "loss": 0.1424,
4274
+ "step": 5590
4275
+ },
4276
+ {
4277
+ "epoch": 2.624179943767573,
4278
+ "grad_norm": 1.8528941869735718,
4279
+ "learning_rate": 1.835988753514527e-05,
4280
+ "loss": 0.1677,
4281
+ "step": 5600
4282
+ },
4283
+ {
4284
+ "epoch": 2.6288659793814433,
4285
+ "grad_norm": 1.9312357902526855,
4286
+ "learning_rate": 1.83569587628866e-05,
4287
+ "loss": 0.1505,
4288
+ "step": 5610
4289
+ },
4290
+ {
4291
+ "epoch": 2.633552014995314,
4292
+ "grad_norm": 1.429540753364563,
4293
+ "learning_rate": 1.8354029990627932e-05,
4294
+ "loss": 0.1559,
4295
+ "step": 5620
4296
+ },
4297
+ {
4298
+ "epoch": 2.6382380506091847,
4299
+ "grad_norm": 1.532606840133667,
4300
+ "learning_rate": 1.8351101218369262e-05,
4301
+ "loss": 0.1601,
4302
+ "step": 5630
4303
+ },
4304
+ {
4305
+ "epoch": 2.642924086223055,
4306
+ "grad_norm": 2.127380847930908,
4307
+ "learning_rate": 1.834817244611059e-05,
4308
+ "loss": 0.1992,
4309
+ "step": 5640
4310
+ },
4311
+ {
4312
+ "epoch": 2.647610121836926,
4313
+ "grad_norm": 1.3522067070007324,
4314
+ "learning_rate": 1.834524367385192e-05,
4315
+ "loss": 0.1629,
4316
+ "step": 5650
4317
+ },
4318
+ {
4319
+ "epoch": 2.6522961574507966,
4320
+ "grad_norm": 2.4547903537750244,
4321
+ "learning_rate": 1.8342314901593254e-05,
4322
+ "loss": 0.1817,
4323
+ "step": 5660
4324
+ },
4325
+ {
4326
+ "epoch": 2.6569821930646675,
4327
+ "grad_norm": 1.7367674112319946,
4328
+ "learning_rate": 1.8339386129334583e-05,
4329
+ "loss": 0.1764,
4330
+ "step": 5670
4331
+ },
4332
+ {
4333
+ "epoch": 2.661668228678538,
4334
+ "grad_norm": 1.256063461303711,
4335
+ "learning_rate": 1.8336457357075916e-05,
4336
+ "loss": 0.1532,
4337
+ "step": 5680
4338
+ },
4339
+ {
4340
+ "epoch": 2.6663542642924085,
4341
+ "grad_norm": 1.6752853393554688,
4342
+ "learning_rate": 1.8333528584817246e-05,
4343
+ "loss": 0.2087,
4344
+ "step": 5690
4345
+ },
4346
+ {
4347
+ "epoch": 2.6710402999062794,
4348
+ "grad_norm": 1.8749183416366577,
4349
+ "learning_rate": 1.8330599812558575e-05,
4350
+ "loss": 0.1722,
4351
+ "step": 5700
4352
+ },
4353
+ {
4354
+ "epoch": 2.67572633552015,
4355
+ "grad_norm": 1.9365167617797852,
4356
+ "learning_rate": 1.8327671040299908e-05,
4357
+ "loss": 0.1828,
4358
+ "step": 5710
4359
+ },
4360
+ {
4361
+ "epoch": 2.680412371134021,
4362
+ "grad_norm": 1.8167498111724854,
4363
+ "learning_rate": 1.8324742268041237e-05,
4364
+ "loss": 0.1566,
4365
+ "step": 5720
4366
+ },
4367
+ {
4368
+ "epoch": 2.6850984067478914,
4369
+ "grad_norm": 1.8729602098464966,
4370
+ "learning_rate": 1.832181349578257e-05,
4371
+ "loss": 0.159,
4372
+ "step": 5730
4373
+ },
4374
+ {
4375
+ "epoch": 2.689784442361762,
4376
+ "grad_norm": 1.565158724784851,
4377
+ "learning_rate": 1.83188847235239e-05,
4378
+ "loss": 0.1669,
4379
+ "step": 5740
4380
+ },
4381
+ {
4382
+ "epoch": 2.6944704779756328,
4383
+ "grad_norm": 2.2538883686065674,
4384
+ "learning_rate": 1.8315955951265233e-05,
4385
+ "loss": 0.1353,
4386
+ "step": 5750
4387
+ },
4388
+ {
4389
+ "epoch": 2.6944704779756328,
4390
+ "eval_loss": 0.03330089524388313,
4391
+ "eval_pearson_cosine": 0.8210346883559136,
4392
+ "eval_pearson_dot": 0.7463452164747508,
4393
+ "eval_pearson_euclidean": 0.8023729858931796,
4394
+ "eval_pearson_manhattan": 0.8045441158759452,
4395
+ "eval_runtime": 3.2103,
4396
+ "eval_samples_per_second": 467.241,
4397
+ "eval_spearman_cosine": 0.8210648613938134,
4398
+ "eval_spearman_dot": 0.7463044894785922,
4399
+ "eval_spearman_euclidean": 0.8102763562695625,
4400
+ "eval_spearman_manhattan": 0.8122826147618483,
4401
+ "eval_steps_per_second": 29.28,
4402
+ "step": 5750
4403
+ },
4404
+ {
4405
+ "epoch": 2.6991565135895033,
4406
+ "grad_norm": 1.5002834796905518,
4407
+ "learning_rate": 1.8313027179006562e-05,
4408
+ "loss": 0.1694,
4409
+ "step": 5760
4410
+ },
4411
+ {
4412
+ "epoch": 2.703842549203374,
4413
+ "grad_norm": 1.4530423879623413,
4414
+ "learning_rate": 1.8310098406747895e-05,
4415
+ "loss": 0.1625,
4416
+ "step": 5770
4417
+ },
4418
+ {
4419
+ "epoch": 2.7085285848172447,
4420
+ "grad_norm": 1.4987956285476685,
4421
+ "learning_rate": 1.8307169634489225e-05,
4422
+ "loss": 0.1881,
4423
+ "step": 5780
4424
+ },
4425
+ {
4426
+ "epoch": 2.713214620431115,
4427
+ "grad_norm": 1.6835180521011353,
4428
+ "learning_rate": 1.8304240862230554e-05,
4429
+ "loss": 0.1388,
4430
+ "step": 5790
4431
+ },
4432
+ {
4433
+ "epoch": 2.717900656044986,
4434
+ "grad_norm": 1.752693772315979,
4435
+ "learning_rate": 1.8301312089971887e-05,
4436
+ "loss": 0.1452,
4437
+ "step": 5800
4438
+ },
4439
+ {
4440
+ "epoch": 2.7225866916588566,
4441
+ "grad_norm": 1.5680220127105713,
4442
+ "learning_rate": 1.8298383317713217e-05,
4443
+ "loss": 0.1613,
4444
+ "step": 5810
4445
+ },
4446
+ {
4447
+ "epoch": 2.7272727272727275,
4448
+ "grad_norm": 2.067552328109741,
4449
+ "learning_rate": 1.8295454545454546e-05,
4450
+ "loss": 0.1458,
4451
+ "step": 5820
4452
+ },
4453
+ {
4454
+ "epoch": 2.731958762886598,
4455
+ "grad_norm": 2.0963711738586426,
4456
+ "learning_rate": 1.8292525773195876e-05,
4457
+ "loss": 0.1972,
4458
+ "step": 5830
4459
+ },
4460
+ {
4461
+ "epoch": 2.7366447985004685,
4462
+ "grad_norm": 1.5372573137283325,
4463
+ "learning_rate": 1.828959700093721e-05,
4464
+ "loss": 0.1657,
4465
+ "step": 5840
4466
+ },
4467
+ {
4468
+ "epoch": 2.7413308341143394,
4469
+ "grad_norm": 2.0116796493530273,
4470
+ "learning_rate": 1.8286668228678538e-05,
4471
+ "loss": 0.1576,
4472
+ "step": 5850
4473
+ },
4474
+ {
4475
+ "epoch": 2.74601686972821,
4476
+ "grad_norm": 1.3485506772994995,
4477
+ "learning_rate": 1.828373945641987e-05,
4478
+ "loss": 0.1503,
4479
+ "step": 5860
4480
+ },
4481
+ {
4482
+ "epoch": 2.750702905342081,
4483
+ "grad_norm": 1.7089899778366089,
4484
+ "learning_rate": 1.82808106841612e-05,
4485
+ "loss": 0.1787,
4486
+ "step": 5870
4487
+ },
4488
+ {
4489
+ "epoch": 2.7553889409559513,
4490
+ "grad_norm": 1.6269711256027222,
4491
+ "learning_rate": 1.8277881911902533e-05,
4492
+ "loss": 0.1879,
4493
+ "step": 5880
4494
+ },
4495
+ {
4496
+ "epoch": 2.760074976569822,
4497
+ "grad_norm": 1.6839511394500732,
4498
+ "learning_rate": 1.8274953139643863e-05,
4499
+ "loss": 0.1499,
4500
+ "step": 5890
4501
+ },
4502
+ {
4503
+ "epoch": 2.7647610121836927,
4504
+ "grad_norm": 1.829288363456726,
4505
+ "learning_rate": 1.8272024367385192e-05,
4506
+ "loss": 0.1776,
4507
+ "step": 5900
4508
+ },
4509
+ {
4510
+ "epoch": 2.7694470477975632,
4511
+ "grad_norm": 1.4138745069503784,
4512
+ "learning_rate": 1.8269095595126525e-05,
4513
+ "loss": 0.1612,
4514
+ "step": 5910
4515
+ },
4516
+ {
4517
+ "epoch": 2.774133083411434,
4518
+ "grad_norm": 1.4199497699737549,
4519
+ "learning_rate": 1.8266166822867855e-05,
4520
+ "loss": 0.1717,
4521
+ "step": 5920
4522
+ },
4523
+ {
4524
+ "epoch": 2.7788191190253047,
4525
+ "grad_norm": 1.8683243989944458,
4526
+ "learning_rate": 1.8263238050609187e-05,
4527
+ "loss": 0.1701,
4528
+ "step": 5930
4529
+ },
4530
+ {
4531
+ "epoch": 2.783505154639175,
4532
+ "grad_norm": 1.5344245433807373,
4533
+ "learning_rate": 1.8260309278350517e-05,
4534
+ "loss": 0.1704,
4535
+ "step": 5940
4536
+ },
4537
+ {
4538
+ "epoch": 2.788191190253046,
4539
+ "grad_norm": 1.8963440656661987,
4540
+ "learning_rate": 1.825738050609185e-05,
4541
+ "loss": 0.1876,
4542
+ "step": 5950
4543
+ },
4544
+ {
4545
+ "epoch": 2.7928772258669166,
4546
+ "grad_norm": 2.009709358215332,
4547
+ "learning_rate": 1.825445173383318e-05,
4548
+ "loss": 0.204,
4549
+ "step": 5960
4550
+ },
4551
+ {
4552
+ "epoch": 2.7975632614807875,
4553
+ "grad_norm": 1.5217466354370117,
4554
+ "learning_rate": 1.825152296157451e-05,
4555
+ "loss": 0.1582,
4556
+ "step": 5970
4557
+ },
4558
+ {
4559
+ "epoch": 2.802249297094658,
4560
+ "grad_norm": 1.7628065347671509,
4561
+ "learning_rate": 1.8248594189315842e-05,
4562
+ "loss": 0.1563,
4563
+ "step": 5980
4564
+ },
4565
+ {
4566
+ "epoch": 2.8069353327085285,
4567
+ "grad_norm": 1.8212190866470337,
4568
+ "learning_rate": 1.824566541705717e-05,
4569
+ "loss": 0.1662,
4570
+ "step": 5990
4571
+ },
4572
+ {
4573
+ "epoch": 2.8116213683223994,
4574
+ "grad_norm": 2.4741644859313965,
4575
+ "learning_rate": 1.82427366447985e-05,
4576
+ "loss": 0.1555,
4577
+ "step": 6000
4578
+ },
4579
+ {
4580
+ "epoch": 2.8116213683223994,
4581
+ "eval_loss": 0.03254643455147743,
4582
+ "eval_pearson_cosine": 0.818468145503374,
4583
+ "eval_pearson_dot": 0.7526356355108419,
4584
+ "eval_pearson_euclidean": 0.7938927456366471,
4585
+ "eval_pearson_manhattan": 0.7958636725988697,
4586
+ "eval_runtime": 3.1897,
4587
+ "eval_samples_per_second": 470.262,
4588
+ "eval_spearman_cosine": 0.8183439432602023,
4589
+ "eval_spearman_dot": 0.7537522979367676,
4590
+ "eval_spearman_euclidean": 0.8019040119245933,
4591
+ "eval_spearman_manhattan": 0.8036153637269691,
4592
+ "eval_steps_per_second": 29.47,
4593
+ "step": 6000
4594
  }
4595
  ],
4596
  "logging_steps": 10,