Training in progress, step 710000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +2 -2
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f8b4f5708667fda381a1ab66f80f980e40daa1a707f100921fdae99c2db5e334
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8c7dafd574b6c18c8b0838dfe7c6fd825d5c841584d2d916d404b3af2a78c999
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1d7fee820bb752afb8f603542eaf0336c5e7c41a61ac1c1147dd536354714a47
|
| 3 |
+
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a2561ae8bb02e359a9713b9977fa63ca1b6854d76fd4510a159042ccdda7aff7
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:330a4501421948528f860e170d41c93583e955ea4b60c53d7ee73adbfd6aa9e2
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:014d339be1f5943d92bce27af7e1e9d7ced53c7dfd7c34412b3de983c10ddb04
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:099db39f4d942e6817b283451a401c36d6ba4059eea01842b742770347fceca3
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 10.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -5186,11 +5186,85 @@
|
|
| 5186 |
"eval_samples_per_second": 1282.444,
|
| 5187 |
"eval_steps_per_second": 20.519,
|
| 5188 |
"step": 700000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5189 |
}
|
| 5190 |
],
|
| 5191 |
"max_steps": 1000000,
|
| 5192 |
"num_train_epochs": 16,
|
| 5193 |
-
"total_flos": 4.
|
| 5194 |
"trial_name": null,
|
| 5195 |
"trial_params": null
|
| 5196 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 10.841846473345855,
|
| 5 |
+
"global_step": 710000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 5186 |
"eval_samples_per_second": 1282.444,
|
| 5187 |
"eval_steps_per_second": 20.519,
|
| 5188 |
"step": 700000
|
| 5189 |
+
},
|
| 5190 |
+
{
|
| 5191 |
+
"epoch": 10.7,
|
| 5192 |
+
"learning_rate": 4.1520046500868384e-05,
|
| 5193 |
+
"loss": 0.2442,
|
| 5194 |
+
"step": 701000
|
| 5195 |
+
},
|
| 5196 |
+
{
|
| 5197 |
+
"epoch": 10.72,
|
| 5198 |
+
"learning_rate": 4.1326884880913074e-05,
|
| 5199 |
+
"loss": 0.2454,
|
| 5200 |
+
"step": 702000
|
| 5201 |
+
},
|
| 5202 |
+
{
|
| 5203 |
+
"epoch": 10.73,
|
| 5204 |
+
"learning_rate": 4.1134146183948724e-05,
|
| 5205 |
+
"loss": 0.2445,
|
| 5206 |
+
"step": 703000
|
| 5207 |
+
},
|
| 5208 |
+
{
|
| 5209 |
+
"epoch": 10.75,
|
| 5210 |
+
"learning_rate": 4.0941832517734885e-05,
|
| 5211 |
+
"loss": 0.2448,
|
| 5212 |
+
"step": 704000
|
| 5213 |
+
},
|
| 5214 |
+
{
|
| 5215 |
+
"epoch": 10.77,
|
| 5216 |
+
"learning_rate": 4.0749945985382915e-05,
|
| 5217 |
+
"loss": 0.2445,
|
| 5218 |
+
"step": 705000
|
| 5219 |
+
},
|
| 5220 |
+
{
|
| 5221 |
+
"epoch": 10.77,
|
| 5222 |
+
"eval_runtime": 0.7458,
|
| 5223 |
+
"eval_samples_per_second": 1340.853,
|
| 5224 |
+
"eval_steps_per_second": 21.454,
|
| 5225 |
+
"step": 705000
|
| 5226 |
+
},
|
| 5227 |
+
{
|
| 5228 |
+
"epoch": 10.78,
|
| 5229 |
+
"learning_rate": 4.0558488685333235e-05,
|
| 5230 |
+
"loss": 0.253,
|
| 5231 |
+
"step": 706000
|
| 5232 |
+
},
|
| 5233 |
+
{
|
| 5234 |
+
"epoch": 10.8,
|
| 5235 |
+
"learning_rate": 4.036746271133223e-05,
|
| 5236 |
+
"loss": 0.2533,
|
| 5237 |
+
"step": 707000
|
| 5238 |
+
},
|
| 5239 |
+
{
|
| 5240 |
+
"epoch": 10.81,
|
| 5241 |
+
"learning_rate": 4.0176870152409324e-05,
|
| 5242 |
+
"loss": 0.2547,
|
| 5243 |
+
"step": 708000
|
| 5244 |
+
},
|
| 5245 |
+
{
|
| 5246 |
+
"epoch": 10.83,
|
| 5247 |
+
"learning_rate": 3.998671309285417e-05,
|
| 5248 |
+
"loss": 0.2529,
|
| 5249 |
+
"step": 709000
|
| 5250 |
+
},
|
| 5251 |
+
{
|
| 5252 |
+
"epoch": 10.84,
|
| 5253 |
+
"learning_rate": 3.979699361219395e-05,
|
| 5254 |
+
"loss": 0.2457,
|
| 5255 |
+
"step": 710000
|
| 5256 |
+
},
|
| 5257 |
+
{
|
| 5258 |
+
"epoch": 10.84,
|
| 5259 |
+
"eval_runtime": 0.7472,
|
| 5260 |
+
"eval_samples_per_second": 1338.326,
|
| 5261 |
+
"eval_steps_per_second": 21.413,
|
| 5262 |
+
"step": 710000
|
| 5263 |
}
|
| 5264 |
],
|
| 5265 |
"max_steps": 1000000,
|
| 5266 |
"num_train_epochs": 16,
|
| 5267 |
+
"total_flos": 4.97711196247316e+22,
|
| 5268 |
"trial_name": null,
|
| 5269 |
"trial_params": null
|
| 5270 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8c7dafd574b6c18c8b0838dfe7c6fd825d5c841584d2d916d404b3af2a78c999
|
| 3 |
size 449471589
|