Training in progress, step 3200, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 140815952
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3ac64ee4e0bd3e418c59a709a7c428ec4b44f8460b46d24fcd19a23cc0c24348
|
| 3 |
size 140815952
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 71878996
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:535c31667b572c7c3bd2e6a0105994a38f0e527ba03bd2d1bde2921d11fed6a7
|
| 3 |
size 71878996
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b7a3933da542d8dfc5421dd39329fbc22a08897e0013e8ccf4211b3870efde85
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:88b86e2ef4af2ecabfaf8859176e0c352600a43b6f303ea279fbdaec73765bdc
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 100,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -21963,6 +21963,714 @@
|
|
| 21963 |
"eval_samples_per_second": 39.073,
|
| 21964 |
"eval_steps_per_second": 9.768,
|
| 21965 |
"step": 3100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21966 |
}
|
| 21967 |
],
|
| 21968 |
"logging_steps": 1,
|
|
@@ -21982,7 +22690,7 @@
|
|
| 21982 |
"attributes": {}
|
| 21983 |
}
|
| 21984 |
},
|
| 21985 |
-
"total_flos": 1.
|
| 21986 |
"train_batch_size": 4,
|
| 21987 |
"trial_name": null,
|
| 21988 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.8577076423091098,
|
| 5 |
"eval_steps": 100,
|
| 6 |
+
"global_step": 3200,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 21963 |
"eval_samples_per_second": 39.073,
|
| 21964 |
"eval_steps_per_second": 9.768,
|
| 21965 |
"step": 3100
|
| 21966 |
+
},
|
| 21967 |
+
{
|
| 21968 |
+
"epoch": 0.8311723121251717,
|
| 21969 |
+
"grad_norm": 3.765991687774658,
|
| 21970 |
+
"learning_rate": 8.251519339467657e-05,
|
| 21971 |
+
"loss": 3.3596,
|
| 21972 |
+
"step": 3101
|
| 21973 |
+
},
|
| 21974 |
+
{
|
| 21975 |
+
"epoch": 0.8314403457633933,
|
| 21976 |
+
"grad_norm": 2.9936914443969727,
|
| 21977 |
+
"learning_rate": 8.250449951301103e-05,
|
| 21978 |
+
"loss": 2.7545,
|
| 21979 |
+
"step": 3102
|
| 21980 |
+
},
|
| 21981 |
+
{
|
| 21982 |
+
"epoch": 0.8317083794016149,
|
| 21983 |
+
"grad_norm": 2.693584442138672,
|
| 21984 |
+
"learning_rate": 8.249380305552949e-05,
|
| 21985 |
+
"loss": 2.6637,
|
| 21986 |
+
"step": 3103
|
| 21987 |
+
},
|
| 21988 |
+
{
|
| 21989 |
+
"epoch": 0.8319764130398365,
|
| 21990 |
+
"grad_norm": 3.2070119380950928,
|
| 21991 |
+
"learning_rate": 8.248310402307961e-05,
|
| 21992 |
+
"loss": 2.9294,
|
| 21993 |
+
"step": 3104
|
| 21994 |
+
},
|
| 21995 |
+
{
|
| 21996 |
+
"epoch": 0.8322444466780581,
|
| 21997 |
+
"grad_norm": 3.2056386470794678,
|
| 21998 |
+
"learning_rate": 8.247240241650918e-05,
|
| 21999 |
+
"loss": 2.9327,
|
| 22000 |
+
"step": 3105
|
| 22001 |
+
},
|
| 22002 |
+
{
|
| 22003 |
+
"epoch": 0.8325124803162797,
|
| 22004 |
+
"grad_norm": 2.5587897300720215,
|
| 22005 |
+
"learning_rate": 8.24616982366663e-05,
|
| 22006 |
+
"loss": 2.7247,
|
| 22007 |
+
"step": 3106
|
| 22008 |
+
},
|
| 22009 |
+
{
|
| 22010 |
+
"epoch": 0.8327805139545013,
|
| 22011 |
+
"grad_norm": 3.0326316356658936,
|
| 22012 |
+
"learning_rate": 8.24509914843992e-05,
|
| 22013 |
+
"loss": 2.8525,
|
| 22014 |
+
"step": 3107
|
| 22015 |
+
},
|
| 22016 |
+
{
|
| 22017 |
+
"epoch": 0.8330485475927228,
|
| 22018 |
+
"grad_norm": 3.1906213760375977,
|
| 22019 |
+
"learning_rate": 8.244028216055634e-05,
|
| 22020 |
+
"loss": 2.9844,
|
| 22021 |
+
"step": 3108
|
| 22022 |
+
},
|
| 22023 |
+
{
|
| 22024 |
+
"epoch": 0.8333165812309444,
|
| 22025 |
+
"grad_norm": 3.0366783142089844,
|
| 22026 |
+
"learning_rate": 8.242957026598638e-05,
|
| 22027 |
+
"loss": 2.8466,
|
| 22028 |
+
"step": 3109
|
| 22029 |
+
},
|
| 22030 |
+
{
|
| 22031 |
+
"epoch": 0.8335846148691661,
|
| 22032 |
+
"grad_norm": 2.767413377761841,
|
| 22033 |
+
"learning_rate": 8.241885580153818e-05,
|
| 22034 |
+
"loss": 2.773,
|
| 22035 |
+
"step": 3110
|
| 22036 |
+
},
|
| 22037 |
+
{
|
| 22038 |
+
"epoch": 0.8338526485073877,
|
| 22039 |
+
"grad_norm": 2.5532186031341553,
|
| 22040 |
+
"learning_rate": 8.240813876806079e-05,
|
| 22041 |
+
"loss": 2.4941,
|
| 22042 |
+
"step": 3111
|
| 22043 |
+
},
|
| 22044 |
+
{
|
| 22045 |
+
"epoch": 0.8341206821456093,
|
| 22046 |
+
"grad_norm": 2.848140001296997,
|
| 22047 |
+
"learning_rate": 8.239741916640351e-05,
|
| 22048 |
+
"loss": 2.8352,
|
| 22049 |
+
"step": 3112
|
| 22050 |
+
},
|
| 22051 |
+
{
|
| 22052 |
+
"epoch": 0.8343887157838309,
|
| 22053 |
+
"grad_norm": 2.937314987182617,
|
| 22054 |
+
"learning_rate": 8.23866969974158e-05,
|
| 22055 |
+
"loss": 2.6597,
|
| 22056 |
+
"step": 3113
|
| 22057 |
+
},
|
| 22058 |
+
{
|
| 22059 |
+
"epoch": 0.8346567494220525,
|
| 22060 |
+
"grad_norm": 2.6668167114257812,
|
| 22061 |
+
"learning_rate": 8.237597226194733e-05,
|
| 22062 |
+
"loss": 2.6354,
|
| 22063 |
+
"step": 3114
|
| 22064 |
+
},
|
| 22065 |
+
{
|
| 22066 |
+
"epoch": 0.8349247830602741,
|
| 22067 |
+
"grad_norm": 3.1095848083496094,
|
| 22068 |
+
"learning_rate": 8.236524496084801e-05,
|
| 22069 |
+
"loss": 2.8998,
|
| 22070 |
+
"step": 3115
|
| 22071 |
+
},
|
| 22072 |
+
{
|
| 22073 |
+
"epoch": 0.8351928166984957,
|
| 22074 |
+
"grad_norm": 2.8646724224090576,
|
| 22075 |
+
"learning_rate": 8.235451509496789e-05,
|
| 22076 |
+
"loss": 2.7162,
|
| 22077 |
+
"step": 3116
|
| 22078 |
+
},
|
| 22079 |
+
{
|
| 22080 |
+
"epoch": 0.8354608503367172,
|
| 22081 |
+
"grad_norm": 3.2314741611480713,
|
| 22082 |
+
"learning_rate": 8.234378266515727e-05,
|
| 22083 |
+
"loss": 2.963,
|
| 22084 |
+
"step": 3117
|
| 22085 |
+
},
|
| 22086 |
+
{
|
| 22087 |
+
"epoch": 0.8357288839749388,
|
| 22088 |
+
"grad_norm": 2.798070192337036,
|
| 22089 |
+
"learning_rate": 8.233304767226663e-05,
|
| 22090 |
+
"loss": 2.7093,
|
| 22091 |
+
"step": 3118
|
| 22092 |
+
},
|
| 22093 |
+
{
|
| 22094 |
+
"epoch": 0.8359969176131604,
|
| 22095 |
+
"grad_norm": 3.209622383117676,
|
| 22096 |
+
"learning_rate": 8.23223101171467e-05,
|
| 22097 |
+
"loss": 3.0303,
|
| 22098 |
+
"step": 3119
|
| 22099 |
+
},
|
| 22100 |
+
{
|
| 22101 |
+
"epoch": 0.836264951251382,
|
| 22102 |
+
"grad_norm": 3.1647841930389404,
|
| 22103 |
+
"learning_rate": 8.231157000064833e-05,
|
| 22104 |
+
"loss": 2.9435,
|
| 22105 |
+
"step": 3120
|
| 22106 |
+
},
|
| 22107 |
+
{
|
| 22108 |
+
"epoch": 0.8365329848896036,
|
| 22109 |
+
"grad_norm": 3.2297987937927246,
|
| 22110 |
+
"learning_rate": 8.230082732362264e-05,
|
| 22111 |
+
"loss": 3.0019,
|
| 22112 |
+
"step": 3121
|
| 22113 |
+
},
|
| 22114 |
+
{
|
| 22115 |
+
"epoch": 0.8368010185278253,
|
| 22116 |
+
"grad_norm": 2.8666510581970215,
|
| 22117 |
+
"learning_rate": 8.229008208692093e-05,
|
| 22118 |
+
"loss": 2.7266,
|
| 22119 |
+
"step": 3122
|
| 22120 |
+
},
|
| 22121 |
+
{
|
| 22122 |
+
"epoch": 0.8370690521660469,
|
| 22123 |
+
"grad_norm": 2.94901442527771,
|
| 22124 |
+
"learning_rate": 8.227933429139471e-05,
|
| 22125 |
+
"loss": 2.9068,
|
| 22126 |
+
"step": 3123
|
| 22127 |
+
},
|
| 22128 |
+
{
|
| 22129 |
+
"epoch": 0.8373370858042685,
|
| 22130 |
+
"grad_norm": 3.306313991546631,
|
| 22131 |
+
"learning_rate": 8.226858393789571e-05,
|
| 22132 |
+
"loss": 2.9639,
|
| 22133 |
+
"step": 3124
|
| 22134 |
+
},
|
| 22135 |
+
{
|
| 22136 |
+
"epoch": 0.83760511944249,
|
| 22137 |
+
"grad_norm": 3.987175464630127,
|
| 22138 |
+
"learning_rate": 8.22578310272758e-05,
|
| 22139 |
+
"loss": 2.7636,
|
| 22140 |
+
"step": 3125
|
| 22141 |
+
},
|
| 22142 |
+
{
|
| 22143 |
+
"epoch": 0.8378731530807116,
|
| 22144 |
+
"grad_norm": 2.8015623092651367,
|
| 22145 |
+
"learning_rate": 8.22470755603871e-05,
|
| 22146 |
+
"loss": 2.7572,
|
| 22147 |
+
"step": 3126
|
| 22148 |
+
},
|
| 22149 |
+
{
|
| 22150 |
+
"epoch": 0.8381411867189332,
|
| 22151 |
+
"grad_norm": 2.992177724838257,
|
| 22152 |
+
"learning_rate": 8.223631753808195e-05,
|
| 22153 |
+
"loss": 2.8723,
|
| 22154 |
+
"step": 3127
|
| 22155 |
+
},
|
| 22156 |
+
{
|
| 22157 |
+
"epoch": 0.8384092203571548,
|
| 22158 |
+
"grad_norm": 3.115063190460205,
|
| 22159 |
+
"learning_rate": 8.222555696121283e-05,
|
| 22160 |
+
"loss": 3.0286,
|
| 22161 |
+
"step": 3128
|
| 22162 |
+
},
|
| 22163 |
+
{
|
| 22164 |
+
"epoch": 0.8386772539953764,
|
| 22165 |
+
"grad_norm": 3.0285449028015137,
|
| 22166 |
+
"learning_rate": 8.22147938306325e-05,
|
| 22167 |
+
"loss": 2.826,
|
| 22168 |
+
"step": 3129
|
| 22169 |
+
},
|
| 22170 |
+
{
|
| 22171 |
+
"epoch": 0.838945287633598,
|
| 22172 |
+
"grad_norm": 3.2307591438293457,
|
| 22173 |
+
"learning_rate": 8.220402814719387e-05,
|
| 22174 |
+
"loss": 2.8402,
|
| 22175 |
+
"step": 3130
|
| 22176 |
+
},
|
| 22177 |
+
{
|
| 22178 |
+
"epoch": 0.8392133212718196,
|
| 22179 |
+
"grad_norm": 2.8563008308410645,
|
| 22180 |
+
"learning_rate": 8.219325991175009e-05,
|
| 22181 |
+
"loss": 2.5679,
|
| 22182 |
+
"step": 3131
|
| 22183 |
+
},
|
| 22184 |
+
{
|
| 22185 |
+
"epoch": 0.8394813549100412,
|
| 22186 |
+
"grad_norm": 2.7312331199645996,
|
| 22187 |
+
"learning_rate": 8.218248912515442e-05,
|
| 22188 |
+
"loss": 2.6174,
|
| 22189 |
+
"step": 3132
|
| 22190 |
+
},
|
| 22191 |
+
{
|
| 22192 |
+
"epoch": 0.8397493885482628,
|
| 22193 |
+
"grad_norm": 2.7081151008605957,
|
| 22194 |
+
"learning_rate": 8.217171578826046e-05,
|
| 22195 |
+
"loss": 2.9534,
|
| 22196 |
+
"step": 3133
|
| 22197 |
+
},
|
| 22198 |
+
{
|
| 22199 |
+
"epoch": 0.8400174221864845,
|
| 22200 |
+
"grad_norm": 2.9271397590637207,
|
| 22201 |
+
"learning_rate": 8.21609399019219e-05,
|
| 22202 |
+
"loss": 3.0468,
|
| 22203 |
+
"step": 3134
|
| 22204 |
+
},
|
| 22205 |
+
{
|
| 22206 |
+
"epoch": 0.840285455824706,
|
| 22207 |
+
"grad_norm": 3.3061866760253906,
|
| 22208 |
+
"learning_rate": 8.21501614669927e-05,
|
| 22209 |
+
"loss": 3.0003,
|
| 22210 |
+
"step": 3135
|
| 22211 |
+
},
|
| 22212 |
+
{
|
| 22213 |
+
"epoch": 0.8405534894629276,
|
| 22214 |
+
"grad_norm": 3.312804937362671,
|
| 22215 |
+
"learning_rate": 8.213938048432697e-05,
|
| 22216 |
+
"loss": 3.0239,
|
| 22217 |
+
"step": 3136
|
| 22218 |
+
},
|
| 22219 |
+
{
|
| 22220 |
+
"epoch": 0.8408215231011492,
|
| 22221 |
+
"grad_norm": 2.8543965816497803,
|
| 22222 |
+
"learning_rate": 8.212859695477906e-05,
|
| 22223 |
+
"loss": 2.5733,
|
| 22224 |
+
"step": 3137
|
| 22225 |
+
},
|
| 22226 |
+
{
|
| 22227 |
+
"epoch": 0.8410895567393708,
|
| 22228 |
+
"grad_norm": 3.0889506340026855,
|
| 22229 |
+
"learning_rate": 8.211781087920353e-05,
|
| 22230 |
+
"loss": 2.9983,
|
| 22231 |
+
"step": 3138
|
| 22232 |
+
},
|
| 22233 |
+
{
|
| 22234 |
+
"epoch": 0.8413575903775924,
|
| 22235 |
+
"grad_norm": 3.14336895942688,
|
| 22236 |
+
"learning_rate": 8.210702225845511e-05,
|
| 22237 |
+
"loss": 2.8062,
|
| 22238 |
+
"step": 3139
|
| 22239 |
+
},
|
| 22240 |
+
{
|
| 22241 |
+
"epoch": 0.841625624015814,
|
| 22242 |
+
"grad_norm": 2.58722186088562,
|
| 22243 |
+
"learning_rate": 8.209623109338871e-05,
|
| 22244 |
+
"loss": 2.7192,
|
| 22245 |
+
"step": 3140
|
| 22246 |
+
},
|
| 22247 |
+
{
|
| 22248 |
+
"epoch": 0.8418936576540356,
|
| 22249 |
+
"grad_norm": 3.014845371246338,
|
| 22250 |
+
"learning_rate": 8.20854373848595e-05,
|
| 22251 |
+
"loss": 3.005,
|
| 22252 |
+
"step": 3141
|
| 22253 |
+
},
|
| 22254 |
+
{
|
| 22255 |
+
"epoch": 0.8421616912922572,
|
| 22256 |
+
"grad_norm": 2.933680534362793,
|
| 22257 |
+
"learning_rate": 8.207464113372283e-05,
|
| 22258 |
+
"loss": 2.97,
|
| 22259 |
+
"step": 3142
|
| 22260 |
+
},
|
| 22261 |
+
{
|
| 22262 |
+
"epoch": 0.8424297249304787,
|
| 22263 |
+
"grad_norm": 2.8684470653533936,
|
| 22264 |
+
"learning_rate": 8.206384234083427e-05,
|
| 22265 |
+
"loss": 2.5981,
|
| 22266 |
+
"step": 3143
|
| 22267 |
+
},
|
| 22268 |
+
{
|
| 22269 |
+
"epoch": 0.8426977585687003,
|
| 22270 |
+
"grad_norm": 3.119670867919922,
|
| 22271 |
+
"learning_rate": 8.205304100704953e-05,
|
| 22272 |
+
"loss": 2.8613,
|
| 22273 |
+
"step": 3144
|
| 22274 |
+
},
|
| 22275 |
+
{
|
| 22276 |
+
"epoch": 0.8429657922069219,
|
| 22277 |
+
"grad_norm": 2.9081950187683105,
|
| 22278 |
+
"learning_rate": 8.204223713322457e-05,
|
| 22279 |
+
"loss": 2.7478,
|
| 22280 |
+
"step": 3145
|
| 22281 |
+
},
|
| 22282 |
+
{
|
| 22283 |
+
"epoch": 0.8432338258451436,
|
| 22284 |
+
"grad_norm": 2.779106855392456,
|
| 22285 |
+
"learning_rate": 8.203143072021556e-05,
|
| 22286 |
+
"loss": 2.9309,
|
| 22287 |
+
"step": 3146
|
| 22288 |
+
},
|
| 22289 |
+
{
|
| 22290 |
+
"epoch": 0.8435018594833652,
|
| 22291 |
+
"grad_norm": 2.790799379348755,
|
| 22292 |
+
"learning_rate": 8.202062176887883e-05,
|
| 22293 |
+
"loss": 2.7294,
|
| 22294 |
+
"step": 3147
|
| 22295 |
+
},
|
| 22296 |
+
{
|
| 22297 |
+
"epoch": 0.8437698931215868,
|
| 22298 |
+
"grad_norm": 3.0613210201263428,
|
| 22299 |
+
"learning_rate": 8.200981028007095e-05,
|
| 22300 |
+
"loss": 2.876,
|
| 22301 |
+
"step": 3148
|
| 22302 |
+
},
|
| 22303 |
+
{
|
| 22304 |
+
"epoch": 0.8440379267598084,
|
| 22305 |
+
"grad_norm": 2.910881280899048,
|
| 22306 |
+
"learning_rate": 8.199899625464867e-05,
|
| 22307 |
+
"loss": 2.5908,
|
| 22308 |
+
"step": 3149
|
| 22309 |
+
},
|
| 22310 |
+
{
|
| 22311 |
+
"epoch": 0.84430596039803,
|
| 22312 |
+
"grad_norm": 2.8150923252105713,
|
| 22313 |
+
"learning_rate": 8.198817969346894e-05,
|
| 22314 |
+
"loss": 2.7145,
|
| 22315 |
+
"step": 3150
|
| 22316 |
+
},
|
| 22317 |
+
{
|
| 22318 |
+
"epoch": 0.8445739940362516,
|
| 22319 |
+
"grad_norm": 2.830840587615967,
|
| 22320 |
+
"learning_rate": 8.197736059738894e-05,
|
| 22321 |
+
"loss": 2.74,
|
| 22322 |
+
"step": 3151
|
| 22323 |
+
},
|
| 22324 |
+
{
|
| 22325 |
+
"epoch": 0.8448420276744731,
|
| 22326 |
+
"grad_norm": 2.8052470684051514,
|
| 22327 |
+
"learning_rate": 8.196653896726601e-05,
|
| 22328 |
+
"loss": 2.6825,
|
| 22329 |
+
"step": 3152
|
| 22330 |
+
},
|
| 22331 |
+
{
|
| 22332 |
+
"epoch": 0.8451100613126947,
|
| 22333 |
+
"grad_norm": 2.8668699264526367,
|
| 22334 |
+
"learning_rate": 8.19557148039577e-05,
|
| 22335 |
+
"loss": 2.9702,
|
| 22336 |
+
"step": 3153
|
| 22337 |
+
},
|
| 22338 |
+
{
|
| 22339 |
+
"epoch": 0.8453780949509163,
|
| 22340 |
+
"grad_norm": 2.7870476245880127,
|
| 22341 |
+
"learning_rate": 8.194488810832179e-05,
|
| 22342 |
+
"loss": 2.951,
|
| 22343 |
+
"step": 3154
|
| 22344 |
+
},
|
| 22345 |
+
{
|
| 22346 |
+
"epoch": 0.8456461285891379,
|
| 22347 |
+
"grad_norm": 3.0203659534454346,
|
| 22348 |
+
"learning_rate": 8.193405888121622e-05,
|
| 22349 |
+
"loss": 2.8264,
|
| 22350 |
+
"step": 3155
|
| 22351 |
+
},
|
| 22352 |
+
{
|
| 22353 |
+
"epoch": 0.8459141622273595,
|
| 22354 |
+
"grad_norm": 3.251181125640869,
|
| 22355 |
+
"learning_rate": 8.192322712349917e-05,
|
| 22356 |
+
"loss": 3.0736,
|
| 22357 |
+
"step": 3156
|
| 22358 |
+
},
|
| 22359 |
+
{
|
| 22360 |
+
"epoch": 0.8461821958655811,
|
| 22361 |
+
"grad_norm": 2.867091417312622,
|
| 22362 |
+
"learning_rate": 8.1912392836029e-05,
|
| 22363 |
+
"loss": 2.5752,
|
| 22364 |
+
"step": 3157
|
| 22365 |
+
},
|
| 22366 |
+
{
|
| 22367 |
+
"epoch": 0.8464502295038028,
|
| 22368 |
+
"grad_norm": 3.3381619453430176,
|
| 22369 |
+
"learning_rate": 8.190155601966427e-05,
|
| 22370 |
+
"loss": 2.9668,
|
| 22371 |
+
"step": 3158
|
| 22372 |
+
},
|
| 22373 |
+
{
|
| 22374 |
+
"epoch": 0.8467182631420244,
|
| 22375 |
+
"grad_norm": 3.1242899894714355,
|
| 22376 |
+
"learning_rate": 8.189071667526373e-05,
|
| 22377 |
+
"loss": 2.8379,
|
| 22378 |
+
"step": 3159
|
| 22379 |
+
},
|
| 22380 |
+
{
|
| 22381 |
+
"epoch": 0.846986296780246,
|
| 22382 |
+
"grad_norm": 3.4567618370056152,
|
| 22383 |
+
"learning_rate": 8.187987480368637e-05,
|
| 22384 |
+
"loss": 3.0123,
|
| 22385 |
+
"step": 3160
|
| 22386 |
+
},
|
| 22387 |
+
{
|
| 22388 |
+
"epoch": 0.8472543304184675,
|
| 22389 |
+
"grad_norm": 2.864577293395996,
|
| 22390 |
+
"learning_rate": 8.186903040579131e-05,
|
| 22391 |
+
"loss": 2.5651,
|
| 22392 |
+
"step": 3161
|
| 22393 |
+
},
|
| 22394 |
+
{
|
| 22395 |
+
"epoch": 0.8475223640566891,
|
| 22396 |
+
"grad_norm": 3.081716775894165,
|
| 22397 |
+
"learning_rate": 8.185818348243796e-05,
|
| 22398 |
+
"loss": 2.775,
|
| 22399 |
+
"step": 3162
|
| 22400 |
+
},
|
| 22401 |
+
{
|
| 22402 |
+
"epoch": 0.8477903976949107,
|
| 22403 |
+
"grad_norm": 2.9339306354522705,
|
| 22404 |
+
"learning_rate": 8.184733403448585e-05,
|
| 22405 |
+
"loss": 2.4889,
|
| 22406 |
+
"step": 3163
|
| 22407 |
+
},
|
| 22408 |
+
{
|
| 22409 |
+
"epoch": 0.8480584313331323,
|
| 22410 |
+
"grad_norm": 3.1311564445495605,
|
| 22411 |
+
"learning_rate": 8.183648206279475e-05,
|
| 22412 |
+
"loss": 2.7713,
|
| 22413 |
+
"step": 3164
|
| 22414 |
+
},
|
| 22415 |
+
{
|
| 22416 |
+
"epoch": 0.8483264649713539,
|
| 22417 |
+
"grad_norm": 3.117518663406372,
|
| 22418 |
+
"learning_rate": 8.182562756822464e-05,
|
| 22419 |
+
"loss": 2.8101,
|
| 22420 |
+
"step": 3165
|
| 22421 |
+
},
|
| 22422 |
+
{
|
| 22423 |
+
"epoch": 0.8485944986095755,
|
| 22424 |
+
"grad_norm": 3.273800849914551,
|
| 22425 |
+
"learning_rate": 8.181477055163567e-05,
|
| 22426 |
+
"loss": 2.7604,
|
| 22427 |
+
"step": 3166
|
| 22428 |
+
},
|
| 22429 |
+
{
|
| 22430 |
+
"epoch": 0.8488625322477971,
|
| 22431 |
+
"grad_norm": 3.348430871963501,
|
| 22432 |
+
"learning_rate": 8.18039110138882e-05,
|
| 22433 |
+
"loss": 2.7476,
|
| 22434 |
+
"step": 3167
|
| 22435 |
+
},
|
| 22436 |
+
{
|
| 22437 |
+
"epoch": 0.8491305658860187,
|
| 22438 |
+
"grad_norm": 3.052398204803467,
|
| 22439 |
+
"learning_rate": 8.17930489558428e-05,
|
| 22440 |
+
"loss": 2.8796,
|
| 22441 |
+
"step": 3168
|
| 22442 |
+
},
|
| 22443 |
+
{
|
| 22444 |
+
"epoch": 0.8493985995242402,
|
| 22445 |
+
"grad_norm": 3.4873738288879395,
|
| 22446 |
+
"learning_rate": 8.178218437836023e-05,
|
| 22447 |
+
"loss": 2.7686,
|
| 22448 |
+
"step": 3169
|
| 22449 |
+
},
|
| 22450 |
+
{
|
| 22451 |
+
"epoch": 0.8496666331624619,
|
| 22452 |
+
"grad_norm": 3.0493202209472656,
|
| 22453 |
+
"learning_rate": 8.177131728230148e-05,
|
| 22454 |
+
"loss": 2.8739,
|
| 22455 |
+
"step": 3170
|
| 22456 |
+
},
|
| 22457 |
+
{
|
| 22458 |
+
"epoch": 0.8499346668006835,
|
| 22459 |
+
"grad_norm": 2.8752858638763428,
|
| 22460 |
+
"learning_rate": 8.176044766852766e-05,
|
| 22461 |
+
"loss": 2.7826,
|
| 22462 |
+
"step": 3171
|
| 22463 |
+
},
|
| 22464 |
+
{
|
| 22465 |
+
"epoch": 0.8502027004389051,
|
| 22466 |
+
"grad_norm": 3.091642379760742,
|
| 22467 |
+
"learning_rate": 8.174957553790014e-05,
|
| 22468 |
+
"loss": 2.8947,
|
| 22469 |
+
"step": 3172
|
| 22470 |
+
},
|
| 22471 |
+
{
|
| 22472 |
+
"epoch": 0.8504707340771267,
|
| 22473 |
+
"grad_norm": 3.901031255722046,
|
| 22474 |
+
"learning_rate": 8.173870089128053e-05,
|
| 22475 |
+
"loss": 3.2836,
|
| 22476 |
+
"step": 3173
|
| 22477 |
+
},
|
| 22478 |
+
{
|
| 22479 |
+
"epoch": 0.8507387677153483,
|
| 22480 |
+
"grad_norm": 3.0523319244384766,
|
| 22481 |
+
"learning_rate": 8.172782372953055e-05,
|
| 22482 |
+
"loss": 3.0609,
|
| 22483 |
+
"step": 3174
|
| 22484 |
+
},
|
| 22485 |
+
{
|
| 22486 |
+
"epoch": 0.8510068013535699,
|
| 22487 |
+
"grad_norm": 2.8478736877441406,
|
| 22488 |
+
"learning_rate": 8.171694405351216e-05,
|
| 22489 |
+
"loss": 2.7312,
|
| 22490 |
+
"step": 3175
|
| 22491 |
+
},
|
| 22492 |
+
{
|
| 22493 |
+
"epoch": 0.8512748349917915,
|
| 22494 |
+
"grad_norm": 3.260422706604004,
|
| 22495 |
+
"learning_rate": 8.170606186408755e-05,
|
| 22496 |
+
"loss": 3.0813,
|
| 22497 |
+
"step": 3176
|
| 22498 |
+
},
|
| 22499 |
+
{
|
| 22500 |
+
"epoch": 0.851542868630013,
|
| 22501 |
+
"grad_norm": 3.0320775508880615,
|
| 22502 |
+
"learning_rate": 8.169517716211902e-05,
|
| 22503 |
+
"loss": 2.6246,
|
| 22504 |
+
"step": 3177
|
| 22505 |
+
},
|
| 22506 |
+
{
|
| 22507 |
+
"epoch": 0.8518109022682346,
|
| 22508 |
+
"grad_norm": 3.255002021789551,
|
| 22509 |
+
"learning_rate": 8.168428994846919e-05,
|
| 22510 |
+
"loss": 2.6883,
|
| 22511 |
+
"step": 3178
|
| 22512 |
+
},
|
| 22513 |
+
{
|
| 22514 |
+
"epoch": 0.8520789359064562,
|
| 22515 |
+
"grad_norm": 2.799616813659668,
|
| 22516 |
+
"learning_rate": 8.167340022400078e-05,
|
| 22517 |
+
"loss": 2.4905,
|
| 22518 |
+
"step": 3179
|
| 22519 |
+
},
|
| 22520 |
+
{
|
| 22521 |
+
"epoch": 0.8523469695446778,
|
| 22522 |
+
"grad_norm": 3.230668067932129,
|
| 22523 |
+
"learning_rate": 8.166250798957676e-05,
|
| 22524 |
+
"loss": 2.7647,
|
| 22525 |
+
"step": 3180
|
| 22526 |
+
},
|
| 22527 |
+
{
|
| 22528 |
+
"epoch": 0.8526150031828994,
|
| 22529 |
+
"grad_norm": 2.8910818099975586,
|
| 22530 |
+
"learning_rate": 8.165161324606026e-05,
|
| 22531 |
+
"loss": 3.0522,
|
| 22532 |
+
"step": 3181
|
| 22533 |
+
},
|
| 22534 |
+
{
|
| 22535 |
+
"epoch": 0.8528830368211211,
|
| 22536 |
+
"grad_norm": 2.608095407485962,
|
| 22537 |
+
"learning_rate": 8.164071599431467e-05,
|
| 22538 |
+
"loss": 2.9134,
|
| 22539 |
+
"step": 3182
|
| 22540 |
+
},
|
| 22541 |
+
{
|
| 22542 |
+
"epoch": 0.8531510704593427,
|
| 22543 |
+
"grad_norm": 3.173567533493042,
|
| 22544 |
+
"learning_rate": 8.162981623520352e-05,
|
| 22545 |
+
"loss": 3.1328,
|
| 22546 |
+
"step": 3183
|
| 22547 |
+
},
|
| 22548 |
+
{
|
| 22549 |
+
"epoch": 0.8534191040975643,
|
| 22550 |
+
"grad_norm": 3.0655722618103027,
|
| 22551 |
+
"learning_rate": 8.161891396959057e-05,
|
| 22552 |
+
"loss": 2.6358,
|
| 22553 |
+
"step": 3184
|
| 22554 |
+
},
|
| 22555 |
+
{
|
| 22556 |
+
"epoch": 0.8536871377357859,
|
| 22557 |
+
"grad_norm": 2.8257834911346436,
|
| 22558 |
+
"learning_rate": 8.160800919833973e-05,
|
| 22559 |
+
"loss": 2.8067,
|
| 22560 |
+
"step": 3185
|
| 22561 |
+
},
|
| 22562 |
+
{
|
| 22563 |
+
"epoch": 0.8539551713740074,
|
| 22564 |
+
"grad_norm": 3.129920721054077,
|
| 22565 |
+
"learning_rate": 8.15971019223152e-05,
|
| 22566 |
+
"loss": 2.9928,
|
| 22567 |
+
"step": 3186
|
| 22568 |
+
},
|
| 22569 |
+
{
|
| 22570 |
+
"epoch": 0.854223205012229,
|
| 22571 |
+
"grad_norm": 3.318128824234009,
|
| 22572 |
+
"learning_rate": 8.15861921423813e-05,
|
| 22573 |
+
"loss": 2.6948,
|
| 22574 |
+
"step": 3187
|
| 22575 |
+
},
|
| 22576 |
+
{
|
| 22577 |
+
"epoch": 0.8544912386504506,
|
| 22578 |
+
"grad_norm": 2.9098756313323975,
|
| 22579 |
+
"learning_rate": 8.157527985940258e-05,
|
| 22580 |
+
"loss": 2.833,
|
| 22581 |
+
"step": 3188
|
| 22582 |
+
},
|
| 22583 |
+
{
|
| 22584 |
+
"epoch": 0.8547592722886722,
|
| 22585 |
+
"grad_norm": 3.071049690246582,
|
| 22586 |
+
"learning_rate": 8.15643650742438e-05,
|
| 22587 |
+
"loss": 2.7623,
|
| 22588 |
+
"step": 3189
|
| 22589 |
+
},
|
| 22590 |
+
{
|
| 22591 |
+
"epoch": 0.8550273059268938,
|
| 22592 |
+
"grad_norm": 3.198852300643921,
|
| 22593 |
+
"learning_rate": 8.155344778776987e-05,
|
| 22594 |
+
"loss": 2.496,
|
| 22595 |
+
"step": 3190
|
| 22596 |
+
},
|
| 22597 |
+
{
|
| 22598 |
+
"epoch": 0.8552953395651154,
|
| 22599 |
+
"grad_norm": 2.8775556087493896,
|
| 22600 |
+
"learning_rate": 8.154252800084595e-05,
|
| 22601 |
+
"loss": 2.991,
|
| 22602 |
+
"step": 3191
|
| 22603 |
+
},
|
| 22604 |
+
{
|
| 22605 |
+
"epoch": 0.855563373203337,
|
| 22606 |
+
"grad_norm": 3.092869281768799,
|
| 22607 |
+
"learning_rate": 8.153160571433737e-05,
|
| 22608 |
+
"loss": 2.8313,
|
| 22609 |
+
"step": 3192
|
| 22610 |
+
},
|
| 22611 |
+
{
|
| 22612 |
+
"epoch": 0.8558314068415586,
|
| 22613 |
+
"grad_norm": 2.7808609008789062,
|
| 22614 |
+
"learning_rate": 8.152068092910967e-05,
|
| 22615 |
+
"loss": 2.8599,
|
| 22616 |
+
"step": 3193
|
| 22617 |
+
},
|
| 22618 |
+
{
|
| 22619 |
+
"epoch": 0.8560994404797803,
|
| 22620 |
+
"grad_norm": 2.8283629417419434,
|
| 22621 |
+
"learning_rate": 8.150975364602857e-05,
|
| 22622 |
+
"loss": 2.8706,
|
| 22623 |
+
"step": 3194
|
| 22624 |
+
},
|
| 22625 |
+
{
|
| 22626 |
+
"epoch": 0.8563674741180018,
|
| 22627 |
+
"grad_norm": 2.813171863555908,
|
| 22628 |
+
"learning_rate": 8.149882386596002e-05,
|
| 22629 |
+
"loss": 2.8007,
|
| 22630 |
+
"step": 3195
|
| 22631 |
+
},
|
| 22632 |
+
{
|
| 22633 |
+
"epoch": 0.8566355077562234,
|
| 22634 |
+
"grad_norm": 2.916139602661133,
|
| 22635 |
+
"learning_rate": 8.148789158977013e-05,
|
| 22636 |
+
"loss": 2.973,
|
| 22637 |
+
"step": 3196
|
| 22638 |
+
},
|
| 22639 |
+
{
|
| 22640 |
+
"epoch": 0.856903541394445,
|
| 22641 |
+
"grad_norm": 2.494389533996582,
|
| 22642 |
+
"learning_rate": 8.147695681832523e-05,
|
| 22643 |
+
"loss": 2.702,
|
| 22644 |
+
"step": 3197
|
| 22645 |
+
},
|
| 22646 |
+
{
|
| 22647 |
+
"epoch": 0.8571715750326666,
|
| 22648 |
+
"grad_norm": 2.7285983562469482,
|
| 22649 |
+
"learning_rate": 8.146601955249188e-05,
|
| 22650 |
+
"loss": 2.757,
|
| 22651 |
+
"step": 3198
|
| 22652 |
+
},
|
| 22653 |
+
{
|
| 22654 |
+
"epoch": 0.8574396086708882,
|
| 22655 |
+
"grad_norm": 3.0228183269500732,
|
| 22656 |
+
"learning_rate": 8.145507979313675e-05,
|
| 22657 |
+
"loss": 2.8903,
|
| 22658 |
+
"step": 3199
|
| 22659 |
+
},
|
| 22660 |
+
{
|
| 22661 |
+
"epoch": 0.8577076423091098,
|
| 22662 |
+
"grad_norm": 2.881899118423462,
|
| 22663 |
+
"learning_rate": 8.14441375411268e-05,
|
| 22664 |
+
"loss": 2.89,
|
| 22665 |
+
"step": 3200
|
| 22666 |
+
},
|
| 22667 |
+
{
|
| 22668 |
+
"epoch": 0.8577076423091098,
|
| 22669 |
+
"eval_loss": 2.834207057952881,
|
| 22670 |
+
"eval_runtime": 15.3558,
|
| 22671 |
+
"eval_samples_per_second": 39.073,
|
| 22672 |
+
"eval_steps_per_second": 9.768,
|
| 22673 |
+
"step": 3200
|
| 22674 |
}
|
| 22675 |
],
|
| 22676 |
"logging_steps": 1,
|
|
|
|
| 22690 |
"attributes": {}
|
| 22691 |
}
|
| 22692 |
},
|
| 22693 |
+
"total_flos": 1.236557189087232e+17,
|
| 22694 |
"train_batch_size": 4,
|
| 22695 |
"trial_name": null,
|
| 22696 |
"trial_params": null
|