Training in progress, epoch 1, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step9900/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step9900/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step9900/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step9900/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step9900/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +316 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1037269336
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:80d57dd1f2aa0bde9f3bd55de9a262b8de6b8609d2e7c1343bf3751d42242354
|
| 3 |
size 1037269336
|
last-checkpoint/global_step9900/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d04fea83d85a84f793425ad94dd0b5eabd724b841c0109c2f5dfdd72f0429f15
|
| 3 |
+
size 781993445
|
last-checkpoint/global_step9900/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:724d23b792de6b58f7369f916910aedbdb64d653cae44381266f59e3251da219
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step9900/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2f615a7cbf6fa0b3bc685fe9e2f265dfd87c265b0db5cb00b804400ef20670f8
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step9900/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5462fbd5dd6265ec39a6a030b1dcfff55166fdbc452d28cdc03c5103b4835fed
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step9900/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:126e49f2ceba5214fc20df2deca8ca69a6fee6bcb0ef9f50375b18efba3677f0
|
| 3 |
+
size 2610290277
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step9900
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d91dc1dd14f8f32c0f2217452eabdba7d9d5c72d5834c18f2d9a544844a06ea2
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eb9b54face23724bdaca5ec09618f36e5c2b8f499be332a0f9475dbaf3eefc21
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f28748778c6ae6a9269ab98073eb87225303dfa4aad70ad7fd421f531885ed96
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae8077b24d1ab7d135f5d3fd1b77e547df789862744ffa297b2d183e7403fce2
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a0bb734018af63817744f06e0b869d778449a4d39f667f516ea0fba502652490
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 1.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -15140,6 +15140,318 @@
|
|
| 15140 |
"eval_samples_per_second": 173.601,
|
| 15141 |
"eval_steps_per_second": 10.886,
|
| 15142 |
"step": 9700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15143 |
}
|
| 15144 |
],
|
| 15145 |
"logging_steps": 5,
|
|
@@ -15168,7 +15480,7 @@
|
|
| 15168 |
"attributes": {}
|
| 15169 |
}
|
| 15170 |
},
|
| 15171 |
-
"total_flos": 2.
|
| 15172 |
"train_batch_size": 4,
|
| 15173 |
"trial_name": null,
|
| 15174 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 1.5213963985443115,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.4391626689925863,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 9900,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 15140 |
"eval_samples_per_second": 173.601,
|
| 15141 |
"eval_steps_per_second": 10.886,
|
| 15142 |
"step": 9700
|
| 15143 |
+
},
|
| 15144 |
+
{
|
| 15145 |
+
"epoch": 1.410815525512429,
|
| 15146 |
+
"grad_norm": 2.4059064388275146,
|
| 15147 |
+
"learning_rate": 2.20844720632845e-07,
|
| 15148 |
+
"loss": 1.6476,
|
| 15149 |
+
"step": 9705
|
| 15150 |
+
},
|
| 15151 |
+
{
|
| 15152 |
+
"epoch": 1.4115423753452536,
|
| 15153 |
+
"grad_norm": 2.4945123195648193,
|
| 15154 |
+
"learning_rate": 2.134520232126146e-07,
|
| 15155 |
+
"loss": 1.6076,
|
| 15156 |
+
"step": 9710
|
| 15157 |
+
},
|
| 15158 |
+
{
|
| 15159 |
+
"epoch": 1.4122692251780782,
|
| 15160 |
+
"grad_norm": 2.8330612182617188,
|
| 15161 |
+
"learning_rate": 2.0618491100792133e-07,
|
| 15162 |
+
"loss": 1.5924,
|
| 15163 |
+
"step": 9715
|
| 15164 |
+
},
|
| 15165 |
+
{
|
| 15166 |
+
"epoch": 1.4129960750109027,
|
| 15167 |
+
"grad_norm": 2.243333339691162,
|
| 15168 |
+
"learning_rate": 1.990434023137036e-07,
|
| 15169 |
+
"loss": 1.5927,
|
| 15170 |
+
"step": 9720
|
| 15171 |
+
},
|
| 15172 |
+
{
|
| 15173 |
+
"epoch": 1.4137229248437273,
|
| 15174 |
+
"grad_norm": 2.5298187732696533,
|
| 15175 |
+
"learning_rate": 1.9202751510870365e-07,
|
| 15176 |
+
"loss": 1.5937,
|
| 15177 |
+
"step": 9725
|
| 15178 |
+
},
|
| 15179 |
+
{
|
| 15180 |
+
"epoch": 1.4144497746765519,
|
| 15181 |
+
"grad_norm": 2.333787679672241,
|
| 15182 |
+
"learning_rate": 1.851372670554175e-07,
|
| 15183 |
+
"loss": 1.5879,
|
| 15184 |
+
"step": 9730
|
| 15185 |
+
},
|
| 15186 |
+
{
|
| 15187 |
+
"epoch": 1.4151766245093764,
|
| 15188 |
+
"grad_norm": 2.3179080486297607,
|
| 15189 |
+
"learning_rate": 1.7837267550002254e-07,
|
| 15190 |
+
"loss": 1.3676,
|
| 15191 |
+
"step": 9735
|
| 15192 |
+
},
|
| 15193 |
+
{
|
| 15194 |
+
"epoch": 1.4159034743422008,
|
| 15195 |
+
"grad_norm": 2.2587573528289795,
|
| 15196 |
+
"learning_rate": 1.7173375747237766e-07,
|
| 15197 |
+
"loss": 1.6639,
|
| 15198 |
+
"step": 9740
|
| 15199 |
+
},
|
| 15200 |
+
{
|
| 15201 |
+
"epoch": 1.4166303241750255,
|
| 15202 |
+
"grad_norm": 2.4788784980773926,
|
| 15203 |
+
"learning_rate": 1.6522052968595648e-07,
|
| 15204 |
+
"loss": 1.6174,
|
| 15205 |
+
"step": 9745
|
| 15206 |
+
},
|
| 15207 |
+
{
|
| 15208 |
+
"epoch": 1.4173571740078499,
|
| 15209 |
+
"grad_norm": 2.38806414604187,
|
| 15210 |
+
"learning_rate": 1.5883300853778604e-07,
|
| 15211 |
+
"loss": 1.7383,
|
| 15212 |
+
"step": 9750
|
| 15213 |
+
},
|
| 15214 |
+
{
|
| 15215 |
+
"epoch": 1.4173571740078499,
|
| 15216 |
+
"eval_loss": 1.52242112159729,
|
| 15217 |
+
"eval_runtime": 19.1417,
|
| 15218 |
+
"eval_samples_per_second": 172.451,
|
| 15219 |
+
"eval_steps_per_second": 10.814,
|
| 15220 |
+
"step": 9750
|
| 15221 |
+
},
|
| 15222 |
+
{
|
| 15223 |
+
"epoch": 1.4180840238406744,
|
| 15224 |
+
"grad_norm": 2.2191879749298096,
|
| 15225 |
+
"learning_rate": 1.5257121010846365e-07,
|
| 15226 |
+
"loss": 1.5705,
|
| 15227 |
+
"step": 9755
|
| 15228 |
+
},
|
| 15229 |
+
{
|
| 15230 |
+
"epoch": 1.418810873673499,
|
| 15231 |
+
"grad_norm": 2.3617305755615234,
|
| 15232 |
+
"learning_rate": 1.464351501620456e-07,
|
| 15233 |
+
"loss": 1.5469,
|
| 15234 |
+
"step": 9760
|
| 15235 |
+
},
|
| 15236 |
+
{
|
| 15237 |
+
"epoch": 1.4195377235063236,
|
| 15238 |
+
"grad_norm": 2.5067806243896484,
|
| 15239 |
+
"learning_rate": 1.404248441460582e-07,
|
| 15240 |
+
"loss": 1.7018,
|
| 15241 |
+
"step": 9765
|
| 15242 |
+
},
|
| 15243 |
+
{
|
| 15244 |
+
"epoch": 1.4202645733391481,
|
| 15245 |
+
"grad_norm": 2.475242853164673,
|
| 15246 |
+
"learning_rate": 1.3454030719143674e-07,
|
| 15247 |
+
"loss": 1.6947,
|
| 15248 |
+
"step": 9770
|
| 15249 |
+
},
|
| 15250 |
+
{
|
| 15251 |
+
"epoch": 1.4209914231719727,
|
| 15252 |
+
"grad_norm": 2.2841944694519043,
|
| 15253 |
+
"learning_rate": 1.2878155411250307e-07,
|
| 15254 |
+
"loss": 1.6853,
|
| 15255 |
+
"step": 9775
|
| 15256 |
+
},
|
| 15257 |
+
{
|
| 15258 |
+
"epoch": 1.4217182730047973,
|
| 15259 |
+
"grad_norm": 2.1120128631591797,
|
| 15260 |
+
"learning_rate": 1.231485994069046e-07,
|
| 15261 |
+
"loss": 1.6585,
|
| 15262 |
+
"step": 9780
|
| 15263 |
+
},
|
| 15264 |
+
{
|
| 15265 |
+
"epoch": 1.4224451228376218,
|
| 15266 |
+
"grad_norm": 2.586662769317627,
|
| 15267 |
+
"learning_rate": 1.1764145725560866e-07,
|
| 15268 |
+
"loss": 1.699,
|
| 15269 |
+
"step": 9785
|
| 15270 |
+
},
|
| 15271 |
+
{
|
| 15272 |
+
"epoch": 1.4231719726704464,
|
| 15273 |
+
"grad_norm": 2.748775005340576,
|
| 15274 |
+
"learning_rate": 1.1226014152282453e-07,
|
| 15275 |
+
"loss": 1.5495,
|
| 15276 |
+
"step": 9790
|
| 15277 |
+
},
|
| 15278 |
+
{
|
| 15279 |
+
"epoch": 1.4238988225032707,
|
| 15280 |
+
"grad_norm": 2.5237104892730713,
|
| 15281 |
+
"learning_rate": 1.0700466575602029e-07,
|
| 15282 |
+
"loss": 1.5464,
|
| 15283 |
+
"step": 9795
|
| 15284 |
+
},
|
| 15285 |
+
{
|
| 15286 |
+
"epoch": 1.4246256723360955,
|
| 15287 |
+
"grad_norm": 2.8664605617523193,
|
| 15288 |
+
"learning_rate": 1.018750431858393e-07,
|
| 15289 |
+
"loss": 1.6628,
|
| 15290 |
+
"step": 9800
|
| 15291 |
+
},
|
| 15292 |
+
{
|
| 15293 |
+
"epoch": 1.4246256723360955,
|
| 15294 |
+
"eval_loss": 1.5222878456115723,
|
| 15295 |
+
"eval_runtime": 19.036,
|
| 15296 |
+
"eval_samples_per_second": 173.408,
|
| 15297 |
+
"eval_steps_per_second": 10.874,
|
| 15298 |
+
"step": 9800
|
| 15299 |
+
},
|
| 15300 |
+
{
|
| 15301 |
+
"epoch": 1.4253525221689198,
|
| 15302 |
+
"grad_norm": 2.546454668045044,
|
| 15303 |
+
"learning_rate": 9.687128672611134e-08,
|
| 15304 |
+
"loss": 1.7066,
|
| 15305 |
+
"step": 9805
|
| 15306 |
+
},
|
| 15307 |
+
{
|
| 15308 |
+
"epoch": 1.4260793720017444,
|
| 15309 |
+
"grad_norm": 2.584137201309204,
|
| 15310 |
+
"learning_rate": 9.199340897378033e-08,
|
| 15311 |
+
"loss": 1.6069,
|
| 15312 |
+
"step": 9810
|
| 15313 |
+
},
|
| 15314 |
+
{
|
| 15315 |
+
"epoch": 1.426806221834569,
|
| 15316 |
+
"grad_norm": 2.591409683227539,
|
| 15317 |
+
"learning_rate": 8.724142220889871e-08,
|
| 15318 |
+
"loss": 1.5393,
|
| 15319 |
+
"step": 9815
|
| 15320 |
+
},
|
| 15321 |
+
{
|
| 15322 |
+
"epoch": 1.4275330716673935,
|
| 15323 |
+
"grad_norm": 2.2875685691833496,
|
| 15324 |
+
"learning_rate": 8.261533839458856e-08,
|
| 15325 |
+
"loss": 1.4082,
|
| 15326 |
+
"step": 9820
|
| 15327 |
+
},
|
| 15328 |
+
{
|
| 15329 |
+
"epoch": 1.428259921500218,
|
| 15330 |
+
"grad_norm": 2.495056390762329,
|
| 15331 |
+
"learning_rate": 7.811516917700819e-08,
|
| 15332 |
+
"loss": 1.6082,
|
| 15333 |
+
"step": 9825
|
| 15334 |
+
},
|
| 15335 |
+
{
|
| 15336 |
+
"epoch": 1.4289867713330426,
|
| 15337 |
+
"grad_norm": 2.618781328201294,
|
| 15338 |
+
"learning_rate": 7.374092588532993e-08,
|
| 15339 |
+
"loss": 1.7317,
|
| 15340 |
+
"step": 9830
|
| 15341 |
+
},
|
| 15342 |
+
{
|
| 15343 |
+
"epoch": 1.4297136211658672,
|
| 15344 |
+
"grad_norm": 2.6624369621276855,
|
| 15345 |
+
"learning_rate": 6.949261953171231e-08,
|
| 15346 |
+
"loss": 1.6049,
|
| 15347 |
+
"step": 9835
|
| 15348 |
+
},
|
| 15349 |
+
{
|
| 15350 |
+
"epoch": 1.4304404709986915,
|
| 15351 |
+
"grad_norm": 3.055304527282715,
|
| 15352 |
+
"learning_rate": 6.537026081124995e-08,
|
| 15353 |
+
"loss": 1.6846,
|
| 15354 |
+
"step": 9840
|
| 15355 |
+
},
|
| 15356 |
+
{
|
| 15357 |
+
"epoch": 1.4311673208315163,
|
| 15358 |
+
"grad_norm": 2.291666269302368,
|
| 15359 |
+
"learning_rate": 6.137386010197918e-08,
|
| 15360 |
+
"loss": 1.5199,
|
| 15361 |
+
"step": 9845
|
| 15362 |
+
},
|
| 15363 |
+
{
|
| 15364 |
+
"epoch": 1.4318941706643407,
|
| 15365 |
+
"grad_norm": 2.2404119968414307,
|
| 15366 |
+
"learning_rate": 5.75034274648391e-08,
|
| 15367 |
+
"loss": 1.6049,
|
| 15368 |
+
"step": 9850
|
| 15369 |
+
},
|
| 15370 |
+
{
|
| 15371 |
+
"epoch": 1.4318941706643407,
|
| 15372 |
+
"eval_loss": 1.522445797920227,
|
| 15373 |
+
"eval_runtime": 18.961,
|
| 15374 |
+
"eval_samples_per_second": 174.094,
|
| 15375 |
+
"eval_steps_per_second": 10.917,
|
| 15376 |
+
"step": 9850
|
| 15377 |
+
},
|
| 15378 |
+
{
|
| 15379 |
+
"epoch": 1.4326210204971652,
|
| 15380 |
+
"grad_norm": 2.296211004257202,
|
| 15381 |
+
"learning_rate": 5.37589726436382e-08,
|
| 15382 |
+
"loss": 1.6874,
|
| 15383 |
+
"step": 9855
|
| 15384 |
+
},
|
| 15385 |
+
{
|
| 15386 |
+
"epoch": 1.4333478703299898,
|
| 15387 |
+
"grad_norm": 2.5468204021453857,
|
| 15388 |
+
"learning_rate": 5.014050506503209e-08,
|
| 15389 |
+
"loss": 1.6244,
|
| 15390 |
+
"step": 9860
|
| 15391 |
+
},
|
| 15392 |
+
{
|
| 15393 |
+
"epoch": 1.4340747201628143,
|
| 15394 |
+
"grad_norm": 2.6297662258148193,
|
| 15395 |
+
"learning_rate": 4.664803383851241e-08,
|
| 15396 |
+
"loss": 1.574,
|
| 15397 |
+
"step": 9865
|
| 15398 |
+
},
|
| 15399 |
+
{
|
| 15400 |
+
"epoch": 1.434801569995639,
|
| 15401 |
+
"grad_norm": 2.9105236530303955,
|
| 15402 |
+
"learning_rate": 4.328156775637343e-08,
|
| 15403 |
+
"loss": 1.6189,
|
| 15404 |
+
"step": 9870
|
| 15405 |
+
},
|
| 15406 |
+
{
|
| 15407 |
+
"epoch": 1.4355284198284635,
|
| 15408 |
+
"grad_norm": 2.4492199420928955,
|
| 15409 |
+
"learning_rate": 4.004111529368426e-08,
|
| 15410 |
+
"loss": 1.5159,
|
| 15411 |
+
"step": 9875
|
| 15412 |
+
},
|
| 15413 |
+
{
|
| 15414 |
+
"epoch": 1.436255269661288,
|
| 15415 |
+
"grad_norm": 2.902602195739746,
|
| 15416 |
+
"learning_rate": 3.6926684608283267e-08,
|
| 15417 |
+
"loss": 1.6313,
|
| 15418 |
+
"step": 9880
|
| 15419 |
+
},
|
| 15420 |
+
{
|
| 15421 |
+
"epoch": 1.4369821194941126,
|
| 15422 |
+
"grad_norm": 2.4516420364379883,
|
| 15423 |
+
"learning_rate": 3.393828354074474e-08,
|
| 15424 |
+
"loss": 1.6638,
|
| 15425 |
+
"step": 9885
|
| 15426 |
+
},
|
| 15427 |
+
{
|
| 15428 |
+
"epoch": 1.4377089693269371,
|
| 15429 |
+
"grad_norm": 2.564882516860962,
|
| 15430 |
+
"learning_rate": 3.107591961436216e-08,
|
| 15431 |
+
"loss": 1.5475,
|
| 15432 |
+
"step": 9890
|
| 15433 |
+
},
|
| 15434 |
+
{
|
| 15435 |
+
"epoch": 1.4384358191597615,
|
| 15436 |
+
"grad_norm": 2.7581264972686768,
|
| 15437 |
+
"learning_rate": 2.8339600035137093e-08,
|
| 15438 |
+
"loss": 1.6877,
|
| 15439 |
+
"step": 9895
|
| 15440 |
+
},
|
| 15441 |
+
{
|
| 15442 |
+
"epoch": 1.4391626689925863,
|
| 15443 |
+
"grad_norm": 2.6273791790008545,
|
| 15444 |
+
"learning_rate": 2.5729331691756963e-08,
|
| 15445 |
+
"loss": 1.7434,
|
| 15446 |
+
"step": 9900
|
| 15447 |
+
},
|
| 15448 |
+
{
|
| 15449 |
+
"epoch": 1.4391626689925863,
|
| 15450 |
+
"eval_loss": 1.5213963985443115,
|
| 15451 |
+
"eval_runtime": 18.8476,
|
| 15452 |
+
"eval_samples_per_second": 175.142,
|
| 15453 |
+
"eval_steps_per_second": 10.983,
|
| 15454 |
+
"step": 9900
|
| 15455 |
}
|
| 15456 |
],
|
| 15457 |
"logging_steps": 5,
|
|
|
|
| 15480 |
"attributes": {}
|
| 15481 |
}
|
| 15482 |
},
|
| 15483 |
+
"total_flos": 2.5848701774263747e+18,
|
| 15484 |
"train_batch_size": 4,
|
| 15485 |
"trial_name": null,
|
| 15486 |
"trial_params": null
|