Training in progress, step 820000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +2 -2
- last-checkpoint/rng_state_1.pth +2 -2
- last-checkpoint/rng_state_2.pth +2 -2
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ece7b379c7a5a58ecf06bad11ca4cf9bd52de11683f133c698a0cd46d58e7f3f
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:83c02b97525bdc0170d693b11906b5064b3417228255adea84f3fccd20f429d1
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:471be9073ca16931b802d54b255e23b3102b1be6c0e243832ecda5d9de213243
|
| 3 |
+
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c26d9707c692819517328265ab4fd028c1ec91f8e531a3899c963382192856e1
|
| 3 |
+
size 14567
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2f7ef808d3e0c55b070bb63a51ac31108dc6594c932f2329575c64f0ec5d34a6
|
| 3 |
+
size 14439
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3915824192442b74d4af5e3ad4160d4a09adeeec3fcac9fa02d181a6969e8279
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c63a65d51252613e1cd5f3ab255f2a8e56d55631776ee22be37789c5802ebbf2
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 12.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -6000,11 +6000,85 @@
|
|
| 6000 |
"eval_samples_per_second": 1341.078,
|
| 6001 |
"eval_steps_per_second": 21.457,
|
| 6002 |
"step": 810000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6003 |
}
|
| 6004 |
],
|
| 6005 |
"max_steps": 1000000,
|
| 6006 |
"num_train_epochs": 16,
|
| 6007 |
-
"total_flos": 5.
|
| 6008 |
"trial_name": null,
|
| 6009 |
"trial_params": null
|
| 6010 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 12.521569166399438,
|
| 5 |
+
"global_step": 820000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 6000 |
"eval_samples_per_second": 1341.078,
|
| 6001 |
"eval_steps_per_second": 21.457,
|
| 6002 |
"step": 810000
|
| 6003 |
+
},
|
| 6004 |
+
{
|
| 6005 |
+
"epoch": 12.38,
|
| 6006 |
+
"learning_rate": 2.32330563722056e-05,
|
| 6007 |
+
"loss": 0.2357,
|
| 6008 |
+
"step": 811000
|
| 6009 |
+
},
|
| 6010 |
+
{
|
| 6011 |
+
"epoch": 12.4,
|
| 6012 |
+
"learning_rate": 2.309792314485815e-05,
|
| 6013 |
+
"loss": 0.2356,
|
| 6014 |
+
"step": 812000
|
| 6015 |
+
},
|
| 6016 |
+
{
|
| 6017 |
+
"epoch": 12.41,
|
| 6018 |
+
"learning_rate": 2.2963412189506695e-05,
|
| 6019 |
+
"loss": 0.2358,
|
| 6020 |
+
"step": 813000
|
| 6021 |
+
},
|
| 6022 |
+
{
|
| 6023 |
+
"epoch": 12.43,
|
| 6024 |
+
"learning_rate": 2.282952497714145e-05,
|
| 6025 |
+
"loss": 0.2356,
|
| 6026 |
+
"step": 814000
|
| 6027 |
+
},
|
| 6028 |
+
{
|
| 6029 |
+
"epoch": 12.45,
|
| 6030 |
+
"learning_rate": 2.2696262971931538e-05,
|
| 6031 |
+
"loss": 0.2357,
|
| 6032 |
+
"step": 815000
|
| 6033 |
+
},
|
| 6034 |
+
{
|
| 6035 |
+
"epoch": 12.45,
|
| 6036 |
+
"eval_runtime": 0.7163,
|
| 6037 |
+
"eval_samples_per_second": 1396.105,
|
| 6038 |
+
"eval_steps_per_second": 22.338,
|
| 6039 |
+
"step": 815000
|
| 6040 |
+
},
|
| 6041 |
+
{
|
| 6042 |
+
"epoch": 12.46,
|
| 6043 |
+
"learning_rate": 2.2563627631208887e-05,
|
| 6044 |
+
"loss": 0.2355,
|
| 6045 |
+
"step": 816000
|
| 6046 |
+
},
|
| 6047 |
+
{
|
| 6048 |
+
"epoch": 12.48,
|
| 6049 |
+
"learning_rate": 2.2431620405452336e-05,
|
| 6050 |
+
"loss": 0.2351,
|
| 6051 |
+
"step": 817000
|
| 6052 |
+
},
|
| 6053 |
+
{
|
| 6054 |
+
"epoch": 12.49,
|
| 6055 |
+
"learning_rate": 2.230024273827179e-05,
|
| 6056 |
+
"loss": 0.2357,
|
| 6057 |
+
"step": 818000
|
| 6058 |
+
},
|
| 6059 |
+
{
|
| 6060 |
+
"epoch": 12.51,
|
| 6061 |
+
"learning_rate": 2.216949606639231e-05,
|
| 6062 |
+
"loss": 0.2353,
|
| 6063 |
+
"step": 819000
|
| 6064 |
+
},
|
| 6065 |
+
{
|
| 6066 |
+
"epoch": 12.52,
|
| 6067 |
+
"learning_rate": 2.2039381819638596e-05,
|
| 6068 |
+
"loss": 0.2351,
|
| 6069 |
+
"step": 820000
|
| 6070 |
+
},
|
| 6071 |
+
{
|
| 6072 |
+
"epoch": 12.52,
|
| 6073 |
+
"eval_runtime": 0.6211,
|
| 6074 |
+
"eval_samples_per_second": 1609.99,
|
| 6075 |
+
"eval_steps_per_second": 25.76,
|
| 6076 |
+
"step": 820000
|
| 6077 |
}
|
| 6078 |
],
|
| 6079 |
"max_steps": 1000000,
|
| 6080 |
"num_train_epochs": 16,
|
| 6081 |
+
"total_flos": 5.748213519670681e+22,
|
| 6082 |
"trial_name": null,
|
| 6083 |
"trial_params": null
|
| 6084 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:83c02b97525bdc0170d693b11906b5064b3417228255adea84f3fccd20f429d1
|
| 3 |
size 449471589
|