Training in progress, step 18000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 613004648
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0ca95a7948bce19be5449f8781c37f0268eecbc454dcb50de5ef8e89c3d9a4e6
|
| 3 |
size 613004648
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1226096954
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:667a4ed3665b904cc6a25c6508c89fb468bc4b1b80e08cd26eb7f6e936a1d8ff
|
| 3 |
size 1226096954
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b67589c462b9c803b3450b2a56b26bd15fd2aad689878137f6e7e3b31569b4d3
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8e87f5dbc951603cbe6f4d5f5d51c8e0ef8863d7cd661e58ac58827859c30521
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 250,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -12995,6 +12995,770 @@
|
|
| 12995 |
"eval_spearman_manhattan": 0.745468210963869,
|
| 12996 |
"eval_steps_per_second": 37.654,
|
| 12997 |
"step": 17000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12998 |
}
|
| 12999 |
],
|
| 13000 |
"logging_steps": 10,
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 8.434864104967197,
|
| 5 |
"eval_steps": 250,
|
| 6 |
+
"global_step": 18000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 12995 |
"eval_spearman_manhattan": 0.745468210963869,
|
| 12996 |
"eval_steps_per_second": 37.654,
|
| 12997 |
"step": 17000
|
| 12998 |
+
},
|
| 12999 |
+
{
|
| 13000 |
+
"epoch": 7.970946579194002,
|
| 13001 |
+
"grad_norm": 1.712897539138794,
|
| 13002 |
+
"learning_rate": 9.00363167760075e-06,
|
| 13003 |
+
"loss": 0.0638,
|
| 13004 |
+
"step": 17010
|
| 13005 |
+
},
|
| 13006 |
+
{
|
| 13007 |
+
"epoch": 7.975632614807872,
|
| 13008 |
+
"grad_norm": 1.6661624908447266,
|
| 13009 |
+
"learning_rate": 9.003045923149017e-06,
|
| 13010 |
+
"loss": 0.0721,
|
| 13011 |
+
"step": 17020
|
| 13012 |
+
},
|
| 13013 |
+
{
|
| 13014 |
+
"epoch": 7.980318650421744,
|
| 13015 |
+
"grad_norm": 1.8900232315063477,
|
| 13016 |
+
"learning_rate": 9.002460168697282e-06,
|
| 13017 |
+
"loss": 0.0697,
|
| 13018 |
+
"step": 17030
|
| 13019 |
+
},
|
| 13020 |
+
{
|
| 13021 |
+
"epoch": 7.985004686035614,
|
| 13022 |
+
"grad_norm": 1.7580076456069946,
|
| 13023 |
+
"learning_rate": 9.001874414245549e-06,
|
| 13024 |
+
"loss": 0.0829,
|
| 13025 |
+
"step": 17040
|
| 13026 |
+
},
|
| 13027 |
+
{
|
| 13028 |
+
"epoch": 7.989690721649485,
|
| 13029 |
+
"grad_norm": 1.0621994733810425,
|
| 13030 |
+
"learning_rate": 9.001288659793816e-06,
|
| 13031 |
+
"loss": 0.0654,
|
| 13032 |
+
"step": 17050
|
| 13033 |
+
},
|
| 13034 |
+
{
|
| 13035 |
+
"epoch": 7.994376757263355,
|
| 13036 |
+
"grad_norm": 2.382904529571533,
|
| 13037 |
+
"learning_rate": 9.000702905342081e-06,
|
| 13038 |
+
"loss": 0.0832,
|
| 13039 |
+
"step": 17060
|
| 13040 |
+
},
|
| 13041 |
+
{
|
| 13042 |
+
"epoch": 7.9990627928772255,
|
| 13043 |
+
"grad_norm": 2.5280025005340576,
|
| 13044 |
+
"learning_rate": 9.000117150890348e-06,
|
| 13045 |
+
"loss": 0.0671,
|
| 13046 |
+
"step": 17070
|
| 13047 |
+
},
|
| 13048 |
+
{
|
| 13049 |
+
"epoch": 8.003748828491096,
|
| 13050 |
+
"grad_norm": 1.5230673551559448,
|
| 13051 |
+
"learning_rate": 8.999531396438613e-06,
|
| 13052 |
+
"loss": 0.0705,
|
| 13053 |
+
"step": 17080
|
| 13054 |
+
},
|
| 13055 |
+
{
|
| 13056 |
+
"epoch": 8.008434864104967,
|
| 13057 |
+
"grad_norm": 1.430708646774292,
|
| 13058 |
+
"learning_rate": 8.99894564198688e-06,
|
| 13059 |
+
"loss": 0.0469,
|
| 13060 |
+
"step": 17090
|
| 13061 |
+
},
|
| 13062 |
+
{
|
| 13063 |
+
"epoch": 8.013120899718837,
|
| 13064 |
+
"grad_norm": 1.6292754411697388,
|
| 13065 |
+
"learning_rate": 8.998359887535147e-06,
|
| 13066 |
+
"loss": 0.0511,
|
| 13067 |
+
"step": 17100
|
| 13068 |
+
},
|
| 13069 |
+
{
|
| 13070 |
+
"epoch": 8.01780693533271,
|
| 13071 |
+
"grad_norm": 1.5162855386734009,
|
| 13072 |
+
"learning_rate": 8.997774133083412e-06,
|
| 13073 |
+
"loss": 0.0562,
|
| 13074 |
+
"step": 17110
|
| 13075 |
+
},
|
| 13076 |
+
{
|
| 13077 |
+
"epoch": 8.02249297094658,
|
| 13078 |
+
"grad_norm": 1.8024640083312988,
|
| 13079 |
+
"learning_rate": 8.997188378631679e-06,
|
| 13080 |
+
"loss": 0.0591,
|
| 13081 |
+
"step": 17120
|
| 13082 |
+
},
|
| 13083 |
+
{
|
| 13084 |
+
"epoch": 8.02717900656045,
|
| 13085 |
+
"grad_norm": 1.7311487197875977,
|
| 13086 |
+
"learning_rate": 8.996602624179944e-06,
|
| 13087 |
+
"loss": 0.0588,
|
| 13088 |
+
"step": 17130
|
| 13089 |
+
},
|
| 13090 |
+
{
|
| 13091 |
+
"epoch": 8.03186504217432,
|
| 13092 |
+
"grad_norm": 0.8079742193222046,
|
| 13093 |
+
"learning_rate": 8.99601686972821e-06,
|
| 13094 |
+
"loss": 0.0575,
|
| 13095 |
+
"step": 17140
|
| 13096 |
+
},
|
| 13097 |
+
{
|
| 13098 |
+
"epoch": 8.036551077788191,
|
| 13099 |
+
"grad_norm": 1.5149396657943726,
|
| 13100 |
+
"learning_rate": 8.995431115276478e-06,
|
| 13101 |
+
"loss": 0.0631,
|
| 13102 |
+
"step": 17150
|
| 13103 |
+
},
|
| 13104 |
+
{
|
| 13105 |
+
"epoch": 8.041237113402062,
|
| 13106 |
+
"grad_norm": 1.4858596324920654,
|
| 13107 |
+
"learning_rate": 8.994845360824743e-06,
|
| 13108 |
+
"loss": 0.0531,
|
| 13109 |
+
"step": 17160
|
| 13110 |
+
},
|
| 13111 |
+
{
|
| 13112 |
+
"epoch": 8.045923149015932,
|
| 13113 |
+
"grad_norm": 0.9805922508239746,
|
| 13114 |
+
"learning_rate": 8.994259606373009e-06,
|
| 13115 |
+
"loss": 0.0507,
|
| 13116 |
+
"step": 17170
|
| 13117 |
+
},
|
| 13118 |
+
{
|
| 13119 |
+
"epoch": 8.050609184629803,
|
| 13120 |
+
"grad_norm": 1.2604528665542603,
|
| 13121 |
+
"learning_rate": 8.993673851921275e-06,
|
| 13122 |
+
"loss": 0.0552,
|
| 13123 |
+
"step": 17180
|
| 13124 |
+
},
|
| 13125 |
+
{
|
| 13126 |
+
"epoch": 8.055295220243673,
|
| 13127 |
+
"grad_norm": 1.1252182722091675,
|
| 13128 |
+
"learning_rate": 8.99308809746954e-06,
|
| 13129 |
+
"loss": 0.05,
|
| 13130 |
+
"step": 17190
|
| 13131 |
+
},
|
| 13132 |
+
{
|
| 13133 |
+
"epoch": 8.059981255857544,
|
| 13134 |
+
"grad_norm": 2.151175022125244,
|
| 13135 |
+
"learning_rate": 8.992502343017808e-06,
|
| 13136 |
+
"loss": 0.0678,
|
| 13137 |
+
"step": 17200
|
| 13138 |
+
},
|
| 13139 |
+
{
|
| 13140 |
+
"epoch": 8.064667291471416,
|
| 13141 |
+
"grad_norm": 1.468262791633606,
|
| 13142 |
+
"learning_rate": 8.991916588566075e-06,
|
| 13143 |
+
"loss": 0.0542,
|
| 13144 |
+
"step": 17210
|
| 13145 |
+
},
|
| 13146 |
+
{
|
| 13147 |
+
"epoch": 8.069353327085286,
|
| 13148 |
+
"grad_norm": 1.679754376411438,
|
| 13149 |
+
"learning_rate": 8.99133083411434e-06,
|
| 13150 |
+
"loss": 0.0574,
|
| 13151 |
+
"step": 17220
|
| 13152 |
+
},
|
| 13153 |
+
{
|
| 13154 |
+
"epoch": 8.074039362699157,
|
| 13155 |
+
"grad_norm": 2.2520573139190674,
|
| 13156 |
+
"learning_rate": 8.990745079662607e-06,
|
| 13157 |
+
"loss": 0.0517,
|
| 13158 |
+
"step": 17230
|
| 13159 |
+
},
|
| 13160 |
+
{
|
| 13161 |
+
"epoch": 8.078725398313027,
|
| 13162 |
+
"grad_norm": 1.5211695432662964,
|
| 13163 |
+
"learning_rate": 8.990159325210872e-06,
|
| 13164 |
+
"loss": 0.0527,
|
| 13165 |
+
"step": 17240
|
| 13166 |
+
},
|
| 13167 |
+
{
|
| 13168 |
+
"epoch": 8.083411433926898,
|
| 13169 |
+
"grad_norm": 1.7770270109176636,
|
| 13170 |
+
"learning_rate": 8.989573570759139e-06,
|
| 13171 |
+
"loss": 0.0682,
|
| 13172 |
+
"step": 17250
|
| 13173 |
+
},
|
| 13174 |
+
{
|
| 13175 |
+
"epoch": 8.083411433926898,
|
| 13176 |
+
"eval_loss": 0.03860222175717354,
|
| 13177 |
+
"eval_pearson_cosine": 0.7845454144482034,
|
| 13178 |
+
"eval_pearson_dot": 0.6431296048602846,
|
| 13179 |
+
"eval_pearson_euclidean": 0.7311376660170836,
|
| 13180 |
+
"eval_pearson_manhattan": 0.7305724358867849,
|
| 13181 |
+
"eval_runtime": 40.1479,
|
| 13182 |
+
"eval_samples_per_second": 37.362,
|
| 13183 |
+
"eval_spearman_cosine": 0.7869140607349678,
|
| 13184 |
+
"eval_spearman_dot": 0.6613187727914379,
|
| 13185 |
+
"eval_spearman_euclidean": 0.744930207684551,
|
| 13186 |
+
"eval_spearman_manhattan": 0.7446744595094797,
|
| 13187 |
+
"eval_steps_per_second": 37.362,
|
| 13188 |
+
"step": 17250
|
| 13189 |
+
},
|
| 13190 |
+
{
|
| 13191 |
+
"epoch": 8.088097469540768,
|
| 13192 |
+
"grad_norm": 1.6006652116775513,
|
| 13193 |
+
"learning_rate": 8.988987816307406e-06,
|
| 13194 |
+
"loss": 0.0604,
|
| 13195 |
+
"step": 17260
|
| 13196 |
+
},
|
| 13197 |
+
{
|
| 13198 |
+
"epoch": 8.092783505154639,
|
| 13199 |
+
"grad_norm": 1.7531373500823975,
|
| 13200 |
+
"learning_rate": 8.988402061855671e-06,
|
| 13201 |
+
"loss": 0.063,
|
| 13202 |
+
"step": 17270
|
| 13203 |
+
},
|
| 13204 |
+
{
|
| 13205 |
+
"epoch": 8.09746954076851,
|
| 13206 |
+
"grad_norm": 2.294930934906006,
|
| 13207 |
+
"learning_rate": 8.987816307403938e-06,
|
| 13208 |
+
"loss": 0.0568,
|
| 13209 |
+
"step": 17280
|
| 13210 |
+
},
|
| 13211 |
+
{
|
| 13212 |
+
"epoch": 8.10215557638238,
|
| 13213 |
+
"grad_norm": 1.9267457723617554,
|
| 13214 |
+
"learning_rate": 8.987230552952203e-06,
|
| 13215 |
+
"loss": 0.0565,
|
| 13216 |
+
"step": 17290
|
| 13217 |
+
},
|
| 13218 |
+
{
|
| 13219 |
+
"epoch": 8.10684161199625,
|
| 13220 |
+
"grad_norm": 2.1076624393463135,
|
| 13221 |
+
"learning_rate": 8.986644798500468e-06,
|
| 13222 |
+
"loss": 0.05,
|
| 13223 |
+
"step": 17300
|
| 13224 |
+
},
|
| 13225 |
+
{
|
| 13226 |
+
"epoch": 8.111527647610123,
|
| 13227 |
+
"grad_norm": 2.3128514289855957,
|
| 13228 |
+
"learning_rate": 8.986059044048735e-06,
|
| 13229 |
+
"loss": 0.0656,
|
| 13230 |
+
"step": 17310
|
| 13231 |
+
},
|
| 13232 |
+
{
|
| 13233 |
+
"epoch": 8.116213683223993,
|
| 13234 |
+
"grad_norm": 1.6104718446731567,
|
| 13235 |
+
"learning_rate": 8.985473289597002e-06,
|
| 13236 |
+
"loss": 0.0554,
|
| 13237 |
+
"step": 17320
|
| 13238 |
+
},
|
| 13239 |
+
{
|
| 13240 |
+
"epoch": 8.120899718837864,
|
| 13241 |
+
"grad_norm": 1.1439037322998047,
|
| 13242 |
+
"learning_rate": 8.984887535145267e-06,
|
| 13243 |
+
"loss": 0.0588,
|
| 13244 |
+
"step": 17330
|
| 13245 |
+
},
|
| 13246 |
+
{
|
| 13247 |
+
"epoch": 8.125585754451734,
|
| 13248 |
+
"grad_norm": 1.342757225036621,
|
| 13249 |
+
"learning_rate": 8.984301780693534e-06,
|
| 13250 |
+
"loss": 0.0539,
|
| 13251 |
+
"step": 17340
|
| 13252 |
+
},
|
| 13253 |
+
{
|
| 13254 |
+
"epoch": 8.130271790065605,
|
| 13255 |
+
"grad_norm": 0.8630651235580444,
|
| 13256 |
+
"learning_rate": 8.9837160262418e-06,
|
| 13257 |
+
"loss": 0.0602,
|
| 13258 |
+
"step": 17350
|
| 13259 |
+
},
|
| 13260 |
+
{
|
| 13261 |
+
"epoch": 8.134957825679475,
|
| 13262 |
+
"grad_norm": 2.1189727783203125,
|
| 13263 |
+
"learning_rate": 8.983130271790066e-06,
|
| 13264 |
+
"loss": 0.0676,
|
| 13265 |
+
"step": 17360
|
| 13266 |
+
},
|
| 13267 |
+
{
|
| 13268 |
+
"epoch": 8.139643861293345,
|
| 13269 |
+
"grad_norm": 1.943943977355957,
|
| 13270 |
+
"learning_rate": 8.982544517338333e-06,
|
| 13271 |
+
"loss": 0.0564,
|
| 13272 |
+
"step": 17370
|
| 13273 |
+
},
|
| 13274 |
+
{
|
| 13275 |
+
"epoch": 8.144329896907216,
|
| 13276 |
+
"grad_norm": 2.4925365447998047,
|
| 13277 |
+
"learning_rate": 8.981958762886599e-06,
|
| 13278 |
+
"loss": 0.058,
|
| 13279 |
+
"step": 17380
|
| 13280 |
+
},
|
| 13281 |
+
{
|
| 13282 |
+
"epoch": 8.149015932521086,
|
| 13283 |
+
"grad_norm": 0.8549938797950745,
|
| 13284 |
+
"learning_rate": 8.981373008434865e-06,
|
| 13285 |
+
"loss": 0.0583,
|
| 13286 |
+
"step": 17390
|
| 13287 |
+
},
|
| 13288 |
+
{
|
| 13289 |
+
"epoch": 8.153701968134957,
|
| 13290 |
+
"grad_norm": 2.259129762649536,
|
| 13291 |
+
"learning_rate": 8.98078725398313e-06,
|
| 13292 |
+
"loss": 0.064,
|
| 13293 |
+
"step": 17400
|
| 13294 |
+
},
|
| 13295 |
+
{
|
| 13296 |
+
"epoch": 8.15838800374883,
|
| 13297 |
+
"grad_norm": 1.5482234954833984,
|
| 13298 |
+
"learning_rate": 8.980201499531398e-06,
|
| 13299 |
+
"loss": 0.0608,
|
| 13300 |
+
"step": 17410
|
| 13301 |
+
},
|
| 13302 |
+
{
|
| 13303 |
+
"epoch": 8.1630740393627,
|
| 13304 |
+
"grad_norm": 1.5130146741867065,
|
| 13305 |
+
"learning_rate": 8.979615745079663e-06,
|
| 13306 |
+
"loss": 0.0487,
|
| 13307 |
+
"step": 17420
|
| 13308 |
+
},
|
| 13309 |
+
{
|
| 13310 |
+
"epoch": 8.16776007497657,
|
| 13311 |
+
"grad_norm": 2.1378371715545654,
|
| 13312 |
+
"learning_rate": 8.97902999062793e-06,
|
| 13313 |
+
"loss": 0.0687,
|
| 13314 |
+
"step": 17430
|
| 13315 |
+
},
|
| 13316 |
+
{
|
| 13317 |
+
"epoch": 8.17244611059044,
|
| 13318 |
+
"grad_norm": 1.4148082733154297,
|
| 13319 |
+
"learning_rate": 8.978444236176197e-06,
|
| 13320 |
+
"loss": 0.0527,
|
| 13321 |
+
"step": 17440
|
| 13322 |
+
},
|
| 13323 |
+
{
|
| 13324 |
+
"epoch": 8.177132146204311,
|
| 13325 |
+
"grad_norm": 1.621864676475525,
|
| 13326 |
+
"learning_rate": 8.977858481724462e-06,
|
| 13327 |
+
"loss": 0.0541,
|
| 13328 |
+
"step": 17450
|
| 13329 |
+
},
|
| 13330 |
+
{
|
| 13331 |
+
"epoch": 8.181818181818182,
|
| 13332 |
+
"grad_norm": 1.5040533542633057,
|
| 13333 |
+
"learning_rate": 8.977272727272727e-06,
|
| 13334 |
+
"loss": 0.0505,
|
| 13335 |
+
"step": 17460
|
| 13336 |
+
},
|
| 13337 |
+
{
|
| 13338 |
+
"epoch": 8.186504217432052,
|
| 13339 |
+
"grad_norm": 1.7481443881988525,
|
| 13340 |
+
"learning_rate": 8.976686972820994e-06,
|
| 13341 |
+
"loss": 0.0658,
|
| 13342 |
+
"step": 17470
|
| 13343 |
+
},
|
| 13344 |
+
{
|
| 13345 |
+
"epoch": 8.191190253045923,
|
| 13346 |
+
"grad_norm": 1.6171940565109253,
|
| 13347 |
+
"learning_rate": 8.976101218369261e-06,
|
| 13348 |
+
"loss": 0.0545,
|
| 13349 |
+
"step": 17480
|
| 13350 |
+
},
|
| 13351 |
+
{
|
| 13352 |
+
"epoch": 8.195876288659793,
|
| 13353 |
+
"grad_norm": 3.027470350265503,
|
| 13354 |
+
"learning_rate": 8.975515463917526e-06,
|
| 13355 |
+
"loss": 0.0497,
|
| 13356 |
+
"step": 17490
|
| 13357 |
+
},
|
| 13358 |
+
{
|
| 13359 |
+
"epoch": 8.200562324273664,
|
| 13360 |
+
"grad_norm": 1.5770045518875122,
|
| 13361 |
+
"learning_rate": 8.974929709465793e-06,
|
| 13362 |
+
"loss": 0.0526,
|
| 13363 |
+
"step": 17500
|
| 13364 |
+
},
|
| 13365 |
+
{
|
| 13366 |
+
"epoch": 8.200562324273664,
|
| 13367 |
+
"eval_loss": 0.03888610377907753,
|
| 13368 |
+
"eval_pearson_cosine": 0.7824405710209184,
|
| 13369 |
+
"eval_pearson_dot": 0.6370045075889941,
|
| 13370 |
+
"eval_pearson_euclidean": 0.7275292814047258,
|
| 13371 |
+
"eval_pearson_manhattan": 0.7271617935348544,
|
| 13372 |
+
"eval_runtime": 40.0736,
|
| 13373 |
+
"eval_samples_per_second": 37.431,
|
| 13374 |
+
"eval_spearman_cosine": 0.7832356097193793,
|
| 13375 |
+
"eval_spearman_dot": 0.6538613957323862,
|
| 13376 |
+
"eval_spearman_euclidean": 0.7430558622725291,
|
| 13377 |
+
"eval_spearman_manhattan": 0.7430619174369794,
|
| 13378 |
+
"eval_steps_per_second": 37.431,
|
| 13379 |
+
"step": 17500
|
| 13380 |
+
},
|
| 13381 |
+
{
|
| 13382 |
+
"epoch": 8.205248359887536,
|
| 13383 |
+
"grad_norm": 2.2228381633758545,
|
| 13384 |
+
"learning_rate": 8.974343955014058e-06,
|
| 13385 |
+
"loss": 0.0457,
|
| 13386 |
+
"step": 17510
|
| 13387 |
+
},
|
| 13388 |
+
{
|
| 13389 |
+
"epoch": 8.209934395501406,
|
| 13390 |
+
"grad_norm": 2.4519641399383545,
|
| 13391 |
+
"learning_rate": 8.973758200562325e-06,
|
| 13392 |
+
"loss": 0.051,
|
| 13393 |
+
"step": 17520
|
| 13394 |
+
},
|
| 13395 |
+
{
|
| 13396 |
+
"epoch": 8.214620431115277,
|
| 13397 |
+
"grad_norm": 1.8084455728530884,
|
| 13398 |
+
"learning_rate": 8.97317244611059e-06,
|
| 13399 |
+
"loss": 0.0575,
|
| 13400 |
+
"step": 17530
|
| 13401 |
+
},
|
| 13402 |
+
{
|
| 13403 |
+
"epoch": 8.219306466729147,
|
| 13404 |
+
"grad_norm": 1.3803386688232422,
|
| 13405 |
+
"learning_rate": 8.972586691658857e-06,
|
| 13406 |
+
"loss": 0.053,
|
| 13407 |
+
"step": 17540
|
| 13408 |
+
},
|
| 13409 |
+
{
|
| 13410 |
+
"epoch": 8.223992502343018,
|
| 13411 |
+
"grad_norm": 1.3450793027877808,
|
| 13412 |
+
"learning_rate": 8.972000937207124e-06,
|
| 13413 |
+
"loss": 0.0556,
|
| 13414 |
+
"step": 17550
|
| 13415 |
+
},
|
| 13416 |
+
{
|
| 13417 |
+
"epoch": 8.228678537956888,
|
| 13418 |
+
"grad_norm": 2.0758721828460693,
|
| 13419 |
+
"learning_rate": 8.97141518275539e-06,
|
| 13420 |
+
"loss": 0.0521,
|
| 13421 |
+
"step": 17560
|
| 13422 |
+
},
|
| 13423 |
+
{
|
| 13424 |
+
"epoch": 8.233364573570759,
|
| 13425 |
+
"grad_norm": 1.4197956323623657,
|
| 13426 |
+
"learning_rate": 8.970829428303656e-06,
|
| 13427 |
+
"loss": 0.0631,
|
| 13428 |
+
"step": 17570
|
| 13429 |
+
},
|
| 13430 |
+
{
|
| 13431 |
+
"epoch": 8.23805060918463,
|
| 13432 |
+
"grad_norm": 1.497050166130066,
|
| 13433 |
+
"learning_rate": 8.970243673851922e-06,
|
| 13434 |
+
"loss": 0.0614,
|
| 13435 |
+
"step": 17580
|
| 13436 |
+
},
|
| 13437 |
+
{
|
| 13438 |
+
"epoch": 8.2427366447985,
|
| 13439 |
+
"grad_norm": 1.0769314765930176,
|
| 13440 |
+
"learning_rate": 8.969657919400189e-06,
|
| 13441 |
+
"loss": 0.0587,
|
| 13442 |
+
"step": 17590
|
| 13443 |
+
},
|
| 13444 |
+
{
|
| 13445 |
+
"epoch": 8.24742268041237,
|
| 13446 |
+
"grad_norm": 1.9401723146438599,
|
| 13447 |
+
"learning_rate": 8.969072164948455e-06,
|
| 13448 |
+
"loss": 0.0584,
|
| 13449 |
+
"step": 17600
|
| 13450 |
+
},
|
| 13451 |
+
{
|
| 13452 |
+
"epoch": 8.252108716026243,
|
| 13453 |
+
"grad_norm": 0.6708168387413025,
|
| 13454 |
+
"learning_rate": 8.96848641049672e-06,
|
| 13455 |
+
"loss": 0.058,
|
| 13456 |
+
"step": 17610
|
| 13457 |
+
},
|
| 13458 |
+
{
|
| 13459 |
+
"epoch": 8.256794751640113,
|
| 13460 |
+
"grad_norm": 1.555535912513733,
|
| 13461 |
+
"learning_rate": 8.967900656044986e-06,
|
| 13462 |
+
"loss": 0.0623,
|
| 13463 |
+
"step": 17620
|
| 13464 |
+
},
|
| 13465 |
+
{
|
| 13466 |
+
"epoch": 8.261480787253983,
|
| 13467 |
+
"grad_norm": 1.182997703552246,
|
| 13468 |
+
"learning_rate": 8.967314901593253e-06,
|
| 13469 |
+
"loss": 0.0521,
|
| 13470 |
+
"step": 17630
|
| 13471 |
+
},
|
| 13472 |
+
{
|
| 13473 |
+
"epoch": 8.266166822867854,
|
| 13474 |
+
"grad_norm": 1.7748857736587524,
|
| 13475 |
+
"learning_rate": 8.966729147141518e-06,
|
| 13476 |
+
"loss": 0.0573,
|
| 13477 |
+
"step": 17640
|
| 13478 |
+
},
|
| 13479 |
+
{
|
| 13480 |
+
"epoch": 8.270852858481724,
|
| 13481 |
+
"grad_norm": 1.558457851409912,
|
| 13482 |
+
"learning_rate": 8.966143392689785e-06,
|
| 13483 |
+
"loss": 0.0586,
|
| 13484 |
+
"step": 17650
|
| 13485 |
+
},
|
| 13486 |
+
{
|
| 13487 |
+
"epoch": 8.275538894095595,
|
| 13488 |
+
"grad_norm": 2.463069438934326,
|
| 13489 |
+
"learning_rate": 8.965557638238052e-06,
|
| 13490 |
+
"loss": 0.0581,
|
| 13491 |
+
"step": 17660
|
| 13492 |
+
},
|
| 13493 |
+
{
|
| 13494 |
+
"epoch": 8.280224929709465,
|
| 13495 |
+
"grad_norm": 1.325049877166748,
|
| 13496 |
+
"learning_rate": 8.964971883786317e-06,
|
| 13497 |
+
"loss": 0.0526,
|
| 13498 |
+
"step": 17670
|
| 13499 |
+
},
|
| 13500 |
+
{
|
| 13501 |
+
"epoch": 8.284910965323336,
|
| 13502 |
+
"grad_norm": 1.9136682748794556,
|
| 13503 |
+
"learning_rate": 8.964386129334584e-06,
|
| 13504 |
+
"loss": 0.0717,
|
| 13505 |
+
"step": 17680
|
| 13506 |
+
},
|
| 13507 |
+
{
|
| 13508 |
+
"epoch": 8.289597000937206,
|
| 13509 |
+
"grad_norm": 0.9149712920188904,
|
| 13510 |
+
"learning_rate": 8.96380037488285e-06,
|
| 13511 |
+
"loss": 0.0551,
|
| 13512 |
+
"step": 17690
|
| 13513 |
+
},
|
| 13514 |
+
{
|
| 13515 |
+
"epoch": 8.294283036551079,
|
| 13516 |
+
"grad_norm": 1.0004934072494507,
|
| 13517 |
+
"learning_rate": 8.963214620431116e-06,
|
| 13518 |
+
"loss": 0.0552,
|
| 13519 |
+
"step": 17700
|
| 13520 |
+
},
|
| 13521 |
+
{
|
| 13522 |
+
"epoch": 8.29896907216495,
|
| 13523 |
+
"grad_norm": 2.1920504570007324,
|
| 13524 |
+
"learning_rate": 8.962628865979383e-06,
|
| 13525 |
+
"loss": 0.0631,
|
| 13526 |
+
"step": 17710
|
| 13527 |
+
},
|
| 13528 |
+
{
|
| 13529 |
+
"epoch": 8.30365510777882,
|
| 13530 |
+
"grad_norm": 1.7555533647537231,
|
| 13531 |
+
"learning_rate": 8.962043111527648e-06,
|
| 13532 |
+
"loss": 0.0643,
|
| 13533 |
+
"step": 17720
|
| 13534 |
+
},
|
| 13535 |
+
{
|
| 13536 |
+
"epoch": 8.30834114339269,
|
| 13537 |
+
"grad_norm": 1.980637550354004,
|
| 13538 |
+
"learning_rate": 8.961457357075915e-06,
|
| 13539 |
+
"loss": 0.0594,
|
| 13540 |
+
"step": 17730
|
| 13541 |
+
},
|
| 13542 |
+
{
|
| 13543 |
+
"epoch": 8.31302717900656,
|
| 13544 |
+
"grad_norm": 1.4178955554962158,
|
| 13545 |
+
"learning_rate": 8.96087160262418e-06,
|
| 13546 |
+
"loss": 0.0584,
|
| 13547 |
+
"step": 17740
|
| 13548 |
+
},
|
| 13549 |
+
{
|
| 13550 |
+
"epoch": 8.317713214620431,
|
| 13551 |
+
"grad_norm": 1.375645399093628,
|
| 13552 |
+
"learning_rate": 8.960285848172446e-06,
|
| 13553 |
+
"loss": 0.0558,
|
| 13554 |
+
"step": 17750
|
| 13555 |
+
},
|
| 13556 |
+
{
|
| 13557 |
+
"epoch": 8.317713214620431,
|
| 13558 |
+
"eval_loss": 0.03849739581346512,
|
| 13559 |
+
"eval_pearson_cosine": 0.7855877317949194,
|
| 13560 |
+
"eval_pearson_dot": 0.651727283647233,
|
| 13561 |
+
"eval_pearson_euclidean": 0.7376296235813697,
|
| 13562 |
+
"eval_pearson_manhattan": 0.7370097948427539,
|
| 13563 |
+
"eval_runtime": 40.7984,
|
| 13564 |
+
"eval_samples_per_second": 36.766,
|
| 13565 |
+
"eval_spearman_cosine": 0.7865254359033228,
|
| 13566 |
+
"eval_spearman_dot": 0.6678553912046729,
|
| 13567 |
+
"eval_spearman_euclidean": 0.7518223898617357,
|
| 13568 |
+
"eval_spearman_manhattan": 0.7512717468993468,
|
| 13569 |
+
"eval_steps_per_second": 36.766,
|
| 13570 |
+
"step": 17750
|
| 13571 |
+
},
|
| 13572 |
+
{
|
| 13573 |
+
"epoch": 8.322399250234302,
|
| 13574 |
+
"grad_norm": 1.6528228521347046,
|
| 13575 |
+
"learning_rate": 8.959700093720714e-06,
|
| 13576 |
+
"loss": 0.0671,
|
| 13577 |
+
"step": 17760
|
| 13578 |
+
},
|
| 13579 |
+
{
|
| 13580 |
+
"epoch": 8.327085285848172,
|
| 13581 |
+
"grad_norm": 1.526089072227478,
|
| 13582 |
+
"learning_rate": 8.95911433926898e-06,
|
| 13583 |
+
"loss": 0.0661,
|
| 13584 |
+
"step": 17770
|
| 13585 |
+
},
|
| 13586 |
+
{
|
| 13587 |
+
"epoch": 8.331771321462043,
|
| 13588 |
+
"grad_norm": 1.9455267190933228,
|
| 13589 |
+
"learning_rate": 8.958528584817245e-06,
|
| 13590 |
+
"loss": 0.059,
|
| 13591 |
+
"step": 17780
|
| 13592 |
+
},
|
| 13593 |
+
{
|
| 13594 |
+
"epoch": 8.336457357075913,
|
| 13595 |
+
"grad_norm": 2.1176974773406982,
|
| 13596 |
+
"learning_rate": 8.957942830365512e-06,
|
| 13597 |
+
"loss": 0.0628,
|
| 13598 |
+
"step": 17790
|
| 13599 |
+
},
|
| 13600 |
+
{
|
| 13601 |
+
"epoch": 8.341143392689784,
|
| 13602 |
+
"grad_norm": 1.9059792757034302,
|
| 13603 |
+
"learning_rate": 8.957357075913777e-06,
|
| 13604 |
+
"loss": 0.0547,
|
| 13605 |
+
"step": 17800
|
| 13606 |
+
},
|
| 13607 |
+
{
|
| 13608 |
+
"epoch": 8.345829428303656,
|
| 13609 |
+
"grad_norm": 1.9086081981658936,
|
| 13610 |
+
"learning_rate": 8.956771321462044e-06,
|
| 13611 |
+
"loss": 0.0598,
|
| 13612 |
+
"step": 17810
|
| 13613 |
+
},
|
| 13614 |
+
{
|
| 13615 |
+
"epoch": 8.350515463917526,
|
| 13616 |
+
"grad_norm": 1.835897445678711,
|
| 13617 |
+
"learning_rate": 8.95618556701031e-06,
|
| 13618 |
+
"loss": 0.0528,
|
| 13619 |
+
"step": 17820
|
| 13620 |
+
},
|
| 13621 |
+
{
|
| 13622 |
+
"epoch": 8.355201499531397,
|
| 13623 |
+
"grad_norm": 1.4925363063812256,
|
| 13624 |
+
"learning_rate": 8.955599812558576e-06,
|
| 13625 |
+
"loss": 0.054,
|
| 13626 |
+
"step": 17830
|
| 13627 |
+
},
|
| 13628 |
+
{
|
| 13629 |
+
"epoch": 8.359887535145267,
|
| 13630 |
+
"grad_norm": 1.8737494945526123,
|
| 13631 |
+
"learning_rate": 8.955014058106843e-06,
|
| 13632 |
+
"loss": 0.0592,
|
| 13633 |
+
"step": 17840
|
| 13634 |
+
},
|
| 13635 |
+
{
|
| 13636 |
+
"epoch": 8.364573570759138,
|
| 13637 |
+
"grad_norm": 2.0734856128692627,
|
| 13638 |
+
"learning_rate": 8.954428303655108e-06,
|
| 13639 |
+
"loss": 0.0577,
|
| 13640 |
+
"step": 17850
|
| 13641 |
+
},
|
| 13642 |
+
{
|
| 13643 |
+
"epoch": 8.369259606373008,
|
| 13644 |
+
"grad_norm": 1.1876471042633057,
|
| 13645 |
+
"learning_rate": 8.953842549203375e-06,
|
| 13646 |
+
"loss": 0.0529,
|
| 13647 |
+
"step": 17860
|
| 13648 |
+
},
|
| 13649 |
+
{
|
| 13650 |
+
"epoch": 8.373945641986879,
|
| 13651 |
+
"grad_norm": 0.8391751646995544,
|
| 13652 |
+
"learning_rate": 8.953256794751642e-06,
|
| 13653 |
+
"loss": 0.0513,
|
| 13654 |
+
"step": 17870
|
| 13655 |
+
},
|
| 13656 |
+
{
|
| 13657 |
+
"epoch": 8.37863167760075,
|
| 13658 |
+
"grad_norm": 2.0527615547180176,
|
| 13659 |
+
"learning_rate": 8.952671040299907e-06,
|
| 13660 |
+
"loss": 0.0802,
|
| 13661 |
+
"step": 17880
|
| 13662 |
+
},
|
| 13663 |
+
{
|
| 13664 |
+
"epoch": 8.38331771321462,
|
| 13665 |
+
"grad_norm": 1.1670820713043213,
|
| 13666 |
+
"learning_rate": 8.952085285848174e-06,
|
| 13667 |
+
"loss": 0.0567,
|
| 13668 |
+
"step": 17890
|
| 13669 |
+
},
|
| 13670 |
+
{
|
| 13671 |
+
"epoch": 8.388003748828492,
|
| 13672 |
+
"grad_norm": 1.0440400838851929,
|
| 13673 |
+
"learning_rate": 8.95149953139644e-06,
|
| 13674 |
+
"loss": 0.0589,
|
| 13675 |
+
"step": 17900
|
| 13676 |
+
},
|
| 13677 |
+
{
|
| 13678 |
+
"epoch": 8.392689784442362,
|
| 13679 |
+
"grad_norm": 1.3903789520263672,
|
| 13680 |
+
"learning_rate": 8.950913776944704e-06,
|
| 13681 |
+
"loss": 0.0555,
|
| 13682 |
+
"step": 17910
|
| 13683 |
+
},
|
| 13684 |
+
{
|
| 13685 |
+
"epoch": 8.397375820056233,
|
| 13686 |
+
"grad_norm": 2.042224407196045,
|
| 13687 |
+
"learning_rate": 8.950328022492971e-06,
|
| 13688 |
+
"loss": 0.0705,
|
| 13689 |
+
"step": 17920
|
| 13690 |
+
},
|
| 13691 |
+
{
|
| 13692 |
+
"epoch": 8.402061855670103,
|
| 13693 |
+
"grad_norm": 1.8270450830459595,
|
| 13694 |
+
"learning_rate": 8.949742268041238e-06,
|
| 13695 |
+
"loss": 0.0568,
|
| 13696 |
+
"step": 17930
|
| 13697 |
+
},
|
| 13698 |
+
{
|
| 13699 |
+
"epoch": 8.406747891283974,
|
| 13700 |
+
"grad_norm": 1.7498126029968262,
|
| 13701 |
+
"learning_rate": 8.949156513589504e-06,
|
| 13702 |
+
"loss": 0.0584,
|
| 13703 |
+
"step": 17940
|
| 13704 |
+
},
|
| 13705 |
+
{
|
| 13706 |
+
"epoch": 8.411433926897844,
|
| 13707 |
+
"grad_norm": 1.2420893907546997,
|
| 13708 |
+
"learning_rate": 8.94857075913777e-06,
|
| 13709 |
+
"loss": 0.06,
|
| 13710 |
+
"step": 17950
|
| 13711 |
+
},
|
| 13712 |
+
{
|
| 13713 |
+
"epoch": 8.416119962511715,
|
| 13714 |
+
"grad_norm": 1.9896409511566162,
|
| 13715 |
+
"learning_rate": 8.947985004686036e-06,
|
| 13716 |
+
"loss": 0.0505,
|
| 13717 |
+
"step": 17960
|
| 13718 |
+
},
|
| 13719 |
+
{
|
| 13720 |
+
"epoch": 8.420805998125585,
|
| 13721 |
+
"grad_norm": 1.1669880151748657,
|
| 13722 |
+
"learning_rate": 8.947399250234303e-06,
|
| 13723 |
+
"loss": 0.0595,
|
| 13724 |
+
"step": 17970
|
| 13725 |
+
},
|
| 13726 |
+
{
|
| 13727 |
+
"epoch": 8.425492033739456,
|
| 13728 |
+
"grad_norm": 1.2261865139007568,
|
| 13729 |
+
"learning_rate": 8.94681349578257e-06,
|
| 13730 |
+
"loss": 0.0604,
|
| 13731 |
+
"step": 17980
|
| 13732 |
+
},
|
| 13733 |
+
{
|
| 13734 |
+
"epoch": 8.430178069353326,
|
| 13735 |
+
"grad_norm": 1.5421935319900513,
|
| 13736 |
+
"learning_rate": 8.946227741330835e-06,
|
| 13737 |
+
"loss": 0.0621,
|
| 13738 |
+
"step": 17990
|
| 13739 |
+
},
|
| 13740 |
+
{
|
| 13741 |
+
"epoch": 8.434864104967197,
|
| 13742 |
+
"grad_norm": 1.9026983976364136,
|
| 13743 |
+
"learning_rate": 8.945641986879102e-06,
|
| 13744 |
+
"loss": 0.0633,
|
| 13745 |
+
"step": 18000
|
| 13746 |
+
},
|
| 13747 |
+
{
|
| 13748 |
+
"epoch": 8.434864104967197,
|
| 13749 |
+
"eval_loss": 0.039177875965833664,
|
| 13750 |
+
"eval_pearson_cosine": 0.7822495113035757,
|
| 13751 |
+
"eval_pearson_dot": 0.6511666258149553,
|
| 13752 |
+
"eval_pearson_euclidean": 0.7395462188066446,
|
| 13753 |
+
"eval_pearson_manhattan": 0.7387984914454222,
|
| 13754 |
+
"eval_runtime": 42.2768,
|
| 13755 |
+
"eval_samples_per_second": 35.48,
|
| 13756 |
+
"eval_spearman_cosine": 0.7845228935533591,
|
| 13757 |
+
"eval_spearman_dot": 0.6664111108433938,
|
| 13758 |
+
"eval_spearman_euclidean": 0.7541690232038317,
|
| 13759 |
+
"eval_spearman_manhattan": 0.7537307168421792,
|
| 13760 |
+
"eval_steps_per_second": 35.48,
|
| 13761 |
+
"step": 18000
|
| 13762 |
}
|
| 13763 |
],
|
| 13764 |
"logging_steps": 10,
|