Training in progress, step 960000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +2 -2
- last-checkpoint/rng_state_2.pth +2 -2
- last-checkpoint/rng_state_3.pth +2 -2
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d42f08cee5e5899e421c92c3ab8339aaf83a7d5a71e0f986946de70dd25827b8
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4d19d365ff7d0b903b4bd79b61ced5f6aeb7b3ebb2cae5d9721a7be356fbe96
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c1eb140f3b501b632da9f0e177d93dfa92f96327fc83b8ab3f8543beea312596
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae02e1b16438a09fd7c39d026b7058dcba2dd8135cc32faa7d3ef32101cb552c
|
| 3 |
+
size 14439
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f7bfdfb2e7526898925192623454d203e22c96e04a99fdbcc255d8c59ba2ffd
|
| 3 |
+
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:19bc56ae136ac8d5c6f4be4ce221d0a363428f1eb15ee73fbd5e9c04b3ecc844
|
| 3 |
+
size 14439
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:351338e637aa543d98ac6400f2e05e86270a6a5900e20a3e790dbfa3cb26dbef
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 14.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -7036,11 +7036,85 @@
|
|
| 7036 |
"eval_samples_per_second": 1300.007,
|
| 7037 |
"eval_steps_per_second": 20.8,
|
| 7038 |
"step": 950000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7039 |
}
|
| 7040 |
],
|
| 7041 |
"max_steps": 1000000,
|
| 7042 |
"num_train_epochs": 16,
|
| 7043 |
-
"total_flos": 6.
|
| 7044 |
"trial_name": null,
|
| 7045 |
"trial_params": null
|
| 7046 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 14.659398048467635,
|
| 5 |
+
"global_step": 960000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 7036 |
"eval_samples_per_second": 1300.007,
|
| 7037 |
"eval_steps_per_second": 20.8,
|
| 7038 |
"step": 950000
|
| 7039 |
+
},
|
| 7040 |
+
{
|
| 7041 |
+
"epoch": 14.52,
|
| 7042 |
+
"learning_rate": 1.091698505917036e-05,
|
| 7043 |
+
"loss": 0.2281,
|
| 7044 |
+
"step": 951000
|
| 7045 |
+
},
|
| 7046 |
+
{
|
| 7047 |
+
"epoch": 14.54,
|
| 7048 |
+
"learning_rate": 1.0880016837314599e-05,
|
| 7049 |
+
"loss": 0.2283,
|
| 7050 |
+
"step": 952000
|
| 7051 |
+
},
|
| 7052 |
+
{
|
| 7053 |
+
"epoch": 14.55,
|
| 7054 |
+
"learning_rate": 1.084380450052071e-05,
|
| 7055 |
+
"loss": 0.2281,
|
| 7056 |
+
"step": 953000
|
| 7057 |
+
},
|
| 7058 |
+
{
|
| 7059 |
+
"epoch": 14.57,
|
| 7060 |
+
"learning_rate": 1.0808348444801e-05,
|
| 7061 |
+
"loss": 0.2278,
|
| 7062 |
+
"step": 954000
|
| 7063 |
+
},
|
| 7064 |
+
{
|
| 7065 |
+
"epoch": 14.58,
|
| 7066 |
+
"learning_rate": 1.0773649057897206e-05,
|
| 7067 |
+
"loss": 0.2283,
|
| 7068 |
+
"step": 955000
|
| 7069 |
+
},
|
| 7070 |
+
{
|
| 7071 |
+
"epoch": 14.58,
|
| 7072 |
+
"eval_runtime": 0.7689,
|
| 7073 |
+
"eval_samples_per_second": 1300.511,
|
| 7074 |
+
"eval_steps_per_second": 20.808,
|
| 7075 |
+
"step": 955000
|
| 7076 |
+
},
|
| 7077 |
+
{
|
| 7078 |
+
"epoch": 14.6,
|
| 7079 |
+
"learning_rate": 1.073970671927628e-05,
|
| 7080 |
+
"loss": 0.2277,
|
| 7081 |
+
"step": 956000
|
| 7082 |
+
},
|
| 7083 |
+
{
|
| 7084 |
+
"epoch": 14.61,
|
| 7085 |
+
"learning_rate": 1.0706521800126198e-05,
|
| 7086 |
+
"loss": 0.2279,
|
| 7087 |
+
"step": 957000
|
| 7088 |
+
},
|
| 7089 |
+
{
|
| 7090 |
+
"epoch": 14.63,
|
| 7091 |
+
"learning_rate": 1.0674094663351906e-05,
|
| 7092 |
+
"loss": 0.2278,
|
| 7093 |
+
"step": 958000
|
| 7094 |
+
},
|
| 7095 |
+
{
|
| 7096 |
+
"epoch": 14.64,
|
| 7097 |
+
"learning_rate": 1.0642425663571383e-05,
|
| 7098 |
+
"loss": 0.2279,
|
| 7099 |
+
"step": 959000
|
| 7100 |
+
},
|
| 7101 |
+
{
|
| 7102 |
+
"epoch": 14.66,
|
| 7103 |
+
"learning_rate": 1.0611515147111736e-05,
|
| 7104 |
+
"loss": 0.2279,
|
| 7105 |
+
"step": 960000
|
| 7106 |
+
},
|
| 7107 |
+
{
|
| 7108 |
+
"epoch": 14.66,
|
| 7109 |
+
"eval_runtime": 0.8148,
|
| 7110 |
+
"eval_samples_per_second": 1227.238,
|
| 7111 |
+
"eval_steps_per_second": 19.636,
|
| 7112 |
+
"step": 960000
|
| 7113 |
}
|
| 7114 |
],
|
| 7115 |
"max_steps": 1000000,
|
| 7116 |
"num_train_epochs": 16,
|
| 7117 |
+
"total_flos": 6.729615859959243e+22,
|
| 7118 |
"trial_name": null,
|
| 7119 |
"trial_params": null
|
| 7120 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4d19d365ff7d0b903b4bd79b61ced5f6aeb7b3ebb2cae5d9721a7be356fbe96
|
| 3 |
size 449471589
|