Training in progress, step 840000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fc81750a4eb225fa74ef3e834e447b2cf3b9d46b04a3dcc0606a474798d1e20b
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61d539fc493053cb9a04c81161d0492689b9ab7fcaaea2c1a24a3e6ce4acc990
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a81aaecde25369e5575d31d6c4641f897f68348ded1792ee668fac75f81b865d
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:91bed33973e1e6566de030884af5ce2f52f782d9e7ad79ccceffa9ead0f4b212
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:83dc738e3825b6749be56158669ca941276cf2897108d6fb521bf33692ea02ee
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4ad21b9ae0990cf2f16f8fe417227b8f259cc3ef9a1ae3ddbfa629ee1f04f4cc
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b75da63b821a4c72c4b37f39fc301b88ce6e4d7dc37edf4f078b7f5706f736e3
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 12.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -6148,11 +6148,85 @@
|
|
| 6148 |
"eval_samples_per_second": 1364.889,
|
| 6149 |
"eval_steps_per_second": 21.838,
|
| 6150 |
"step": 830000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6151 |
}
|
| 6152 |
],
|
| 6153 |
"max_steps": 1000000,
|
| 6154 |
"num_train_epochs": 16,
|
| 6155 |
-
"total_flos": 5.
|
| 6156 |
"trial_name": null,
|
| 6157 |
"trial_params": null
|
| 6158 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 12.82697329240918,
|
| 5 |
+
"global_step": 840000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 6148 |
"eval_samples_per_second": 1364.889,
|
| 6149 |
"eval_steps_per_second": 21.838,
|
| 6150 |
"step": 830000
|
| 6151 |
+
},
|
| 6152 |
+
{
|
| 6153 |
+
"epoch": 12.69,
|
| 6154 |
+
"learning_rate": 2.0650267139558772e-05,
|
| 6155 |
+
"loss": 0.2339,
|
| 6156 |
+
"step": 831000
|
| 6157 |
+
},
|
| 6158 |
+
{
|
| 6159 |
+
"epoch": 12.7,
|
| 6160 |
+
"learning_rate": 2.052785098775293e-05,
|
| 6161 |
+
"loss": 0.2339,
|
| 6162 |
+
"step": 832000
|
| 6163 |
+
},
|
| 6164 |
+
{
|
| 6165 |
+
"epoch": 12.72,
|
| 6166 |
+
"learning_rate": 2.04060852138404e-05,
|
| 6167 |
+
"loss": 0.234,
|
| 6168 |
+
"step": 833000
|
| 6169 |
+
},
|
| 6170 |
+
{
|
| 6171 |
+
"epoch": 12.74,
|
| 6172 |
+
"learning_rate": 2.028497114943219e-05,
|
| 6173 |
+
"loss": 0.234,
|
| 6174 |
+
"step": 834000
|
| 6175 |
+
},
|
| 6176 |
+
{
|
| 6177 |
+
"epoch": 12.75,
|
| 6178 |
+
"learning_rate": 2.0164510119012263e-05,
|
| 6179 |
+
"loss": 0.2338,
|
| 6180 |
+
"step": 835000
|
| 6181 |
+
},
|
| 6182 |
+
{
|
| 6183 |
+
"epoch": 12.75,
|
| 6184 |
+
"eval_runtime": 0.7099,
|
| 6185 |
+
"eval_samples_per_second": 1408.578,
|
| 6186 |
+
"eval_steps_per_second": 22.537,
|
| 6187 |
+
"step": 835000
|
| 6188 |
+
},
|
| 6189 |
+
{
|
| 6190 |
+
"epoch": 12.77,
|
| 6191 |
+
"learning_rate": 2.0044703439923217e-05,
|
| 6192 |
+
"loss": 0.2336,
|
| 6193 |
+
"step": 836000
|
| 6194 |
+
},
|
| 6195 |
+
{
|
| 6196 |
+
"epoch": 12.78,
|
| 6197 |
+
"learning_rate": 1.9925552422351654e-05,
|
| 6198 |
+
"loss": 0.2338,
|
| 6199 |
+
"step": 837000
|
| 6200 |
+
},
|
| 6201 |
+
{
|
| 6202 |
+
"epoch": 12.8,
|
| 6203 |
+
"learning_rate": 1.9807058369314016e-05,
|
| 6204 |
+
"loss": 0.2335,
|
| 6205 |
+
"step": 838000
|
| 6206 |
+
},
|
| 6207 |
+
{
|
| 6208 |
+
"epoch": 12.81,
|
| 6209 |
+
"learning_rate": 1.968922257664231e-05,
|
| 6210 |
+
"loss": 0.2337,
|
| 6211 |
+
"step": 839000
|
| 6212 |
+
},
|
| 6213 |
+
{
|
| 6214 |
+
"epoch": 12.83,
|
| 6215 |
+
"learning_rate": 1.9572046332969825e-05,
|
| 6216 |
+
"loss": 0.2335,
|
| 6217 |
+
"step": 840000
|
| 6218 |
+
},
|
| 6219 |
+
{
|
| 6220 |
+
"epoch": 12.83,
|
| 6221 |
+
"eval_runtime": 0.7491,
|
| 6222 |
+
"eval_samples_per_second": 1334.897,
|
| 6223 |
+
"eval_steps_per_second": 21.358,
|
| 6224 |
+
"step": 840000
|
| 6225 |
}
|
| 6226 |
],
|
| 6227 |
"max_steps": 1000000,
|
| 6228 |
"num_train_epochs": 16,
|
| 6229 |
+
"total_flos": 5.888414041731375e+22,
|
| 6230 |
"trial_name": null,
|
| 6231 |
"trial_params": null
|
| 6232 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61d539fc493053cb9a04c81161d0492689b9ab7fcaaea2c1a24a3e6ce4acc990
|
| 3 |
size 449471589
|