Training in progress, step 310000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +2 -2
- last-checkpoint/rng_state_1.pth +2 -2
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +2 -2
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b00071efc84beb2feace3b5a3f0a50851e6aef7cfc9f54978cffaf5df2df0e7
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e7e2ee92a6eaf05a81d8cc01dee92eda2af9b205d2ac738942206a8dc6fa3a0e
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aa0bf92a765d09c7474a24e0b91454931a94402222241ea8b1d6ef7d1cfb2a2f
|
| 3 |
+
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b7f1efcee6a815bf5e7abad62d9857979801ac6926e6361e60063a1830c0e0cf
|
| 3 |
+
size 14567
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e56ee00cae3dff0125b532f391da7e095ad5ffe308a7da8b824c5f69e3852d28
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:063d140296abe43f5c05bc7e1492ad16b8cd4d6e4e44bfa11ee5374ad0035817
|
| 3 |
+
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:510bbb6396dcecf907e43c584a6f575f85c197777553938b74c3d9882298be6b
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 4.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -2226,11 +2226,85 @@
|
|
| 2226 |
"eval_samples_per_second": 994.612,
|
| 2227 |
"eval_steps_per_second": 15.914,
|
| 2228 |
"step": 300000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2229 |
}
|
| 2230 |
],
|
| 2231 |
"max_steps": 1000000,
|
| 2232 |
"num_train_epochs": 16,
|
| 2233 |
-
"total_flos": 2.
|
| 2234 |
"trial_name": null,
|
| 2235 |
"trial_params": null
|
| 2236 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 4.733763953151007,
|
| 5 |
+
"global_step": 310000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 2226 |
"eval_samples_per_second": 994.612,
|
| 2227 |
"eval_steps_per_second": 15.914,
|
| 2228 |
"step": 300000
|
| 2229 |
+
},
|
| 2230 |
+
{
|
| 2231 |
+
"epoch": 4.6,
|
| 2232 |
+
"learning_rate": 0.00012723914147245663,
|
| 2233 |
+
"loss": 0.2906,
|
| 2234 |
+
"step": 301000
|
| 2235 |
+
},
|
| 2236 |
+
{
|
| 2237 |
+
"epoch": 4.61,
|
| 2238 |
+
"learning_rate": 0.00012706805633142863,
|
| 2239 |
+
"loss": 0.2906,
|
| 2240 |
+
"step": 302000
|
| 2241 |
+
},
|
| 2242 |
+
{
|
| 2243 |
+
"epoch": 4.63,
|
| 2244 |
+
"learning_rate": 0.00012689645646167755,
|
| 2245 |
+
"loss": 0.2902,
|
| 2246 |
+
"step": 303000
|
| 2247 |
+
},
|
| 2248 |
+
{
|
| 2249 |
+
"epoch": 4.64,
|
| 2250 |
+
"learning_rate": 0.00012672434373979207,
|
| 2251 |
+
"loss": 0.291,
|
| 2252 |
+
"step": 304000
|
| 2253 |
+
},
|
| 2254 |
+
{
|
| 2255 |
+
"epoch": 4.66,
|
| 2256 |
+
"learning_rate": 0.00012655172004796936,
|
| 2257 |
+
"loss": 0.2899,
|
| 2258 |
+
"step": 305000
|
| 2259 |
+
},
|
| 2260 |
+
{
|
| 2261 |
+
"epoch": 4.66,
|
| 2262 |
+
"eval_runtime": 1.0975,
|
| 2263 |
+
"eval_samples_per_second": 911.158,
|
| 2264 |
+
"eval_steps_per_second": 14.579,
|
| 2265 |
+
"step": 305000
|
| 2266 |
+
},
|
| 2267 |
+
{
|
| 2268 |
+
"epoch": 4.67,
|
| 2269 |
+
"learning_rate": 0.00012637858727399448,
|
| 2270 |
+
"loss": 0.2898,
|
| 2271 |
+
"step": 306000
|
| 2272 |
+
},
|
| 2273 |
+
{
|
| 2274 |
+
"epoch": 4.69,
|
| 2275 |
+
"learning_rate": 0.00012620494731121966,
|
| 2276 |
+
"loss": 0.2896,
|
| 2277 |
+
"step": 307000
|
| 2278 |
+
},
|
| 2279 |
+
{
|
| 2280 |
+
"epoch": 4.7,
|
| 2281 |
+
"learning_rate": 0.00012603080205854372,
|
| 2282 |
+
"loss": 0.2894,
|
| 2283 |
+
"step": 308000
|
| 2284 |
+
},
|
| 2285 |
+
{
|
| 2286 |
+
"epoch": 4.72,
|
| 2287 |
+
"learning_rate": 0.00012585615342039126,
|
| 2288 |
+
"loss": 0.2894,
|
| 2289 |
+
"step": 309000
|
| 2290 |
+
},
|
| 2291 |
+
{
|
| 2292 |
+
"epoch": 4.73,
|
| 2293 |
+
"learning_rate": 0.0001256810033066918,
|
| 2294 |
+
"loss": 0.2894,
|
| 2295 |
+
"step": 310000
|
| 2296 |
+
},
|
| 2297 |
+
{
|
| 2298 |
+
"epoch": 4.73,
|
| 2299 |
+
"eval_runtime": 1.0481,
|
| 2300 |
+
"eval_samples_per_second": 954.11,
|
| 2301 |
+
"eval_steps_per_second": 15.266,
|
| 2302 |
+
"step": 310000
|
| 2303 |
}
|
| 2304 |
],
|
| 2305 |
"max_steps": 1000000,
|
| 2306 |
"num_train_epochs": 16,
|
| 2307 |
+
"total_flos": 2.1731054636681665e+22,
|
| 2308 |
"trial_name": null,
|
| 2309 |
"trial_params": null
|
| 2310 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e7e2ee92a6eaf05a81d8cc01dee92eda2af9b205d2ac738942206a8dc6fa3a0e
|
| 3 |
size 449471589
|