Training in progress, step 960000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04f217d4f2435c53b27b3ffb23b807fa09f40f06e34bfe7a070589d6890dd66f
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8b5376abe825ff6d04d360831bbfd37e2e2d959d6e490763218e7e38b5c10ca1
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b07a1641f2eb1fc754b2daa11b1bd54bb7066b815cea15a3f40a26c5bed3d59
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b07a1641f2eb1fc754b2daa11b1bd54bb7066b815cea15a3f40a26c5bed3d59
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b07a1641f2eb1fc754b2daa11b1bd54bb7066b815cea15a3f40a26c5bed3d59
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b07a1641f2eb1fc754b2daa11b1bd54bb7066b815cea15a3f40a26c5bed3d59
|
| 3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b07a1641f2eb1fc754b2daa11b1bd54bb7066b815cea15a3f40a26c5bed3d59
|
| 3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b07a1641f2eb1fc754b2daa11b1bd54bb7066b815cea15a3f40a26c5bed3d59
|
| 3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b07a1641f2eb1fc754b2daa11b1bd54bb7066b815cea15a3f40a26c5bed3d59
|
| 3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b07a1641f2eb1fc754b2daa11b1bd54bb7066b815cea15a3f40a26c5bed3d59
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:351338e637aa543d98ac6400f2e05e86270a6a5900e20a3e790dbfa3cb26dbef
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 10.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -19006,11 +19006,211 @@
|
|
| 19006 |
"eval_samples_per_second": 878.965,
|
| 19007 |
"eval_steps_per_second": 13.776,
|
| 19008 |
"step": 950000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19009 |
}
|
| 19010 |
],
|
| 19011 |
"max_steps": 1000000,
|
| 19012 |
"num_train_epochs": 12,
|
| 19013 |
-
"total_flos": 6.
|
| 19014 |
"trial_name": null,
|
| 19015 |
"trial_params": null
|
| 19016 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 10.349620244694021,
|
| 5 |
+
"global_step": 960000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 19006 |
"eval_samples_per_second": 878.965,
|
| 19007 |
"eval_steps_per_second": 13.776,
|
| 19008 |
"step": 950000
|
| 19009 |
+
},
|
| 19010 |
+
{
|
| 19011 |
+
"epoch": 10.24,
|
| 19012 |
+
"learning_rate": 1.0935752500982175e-05,
|
| 19013 |
+
"loss": 0.1805,
|
| 19014 |
+
"step": 950500
|
| 19015 |
+
},
|
| 19016 |
+
{
|
| 19017 |
+
"epoch": 10.25,
|
| 19018 |
+
"learning_rate": 1.091698505917036e-05,
|
| 19019 |
+
"loss": 0.1804,
|
| 19020 |
+
"step": 951000
|
| 19021 |
+
},
|
| 19022 |
+
{
|
| 19023 |
+
"epoch": 10.25,
|
| 19024 |
+
"eval_loss": 0.1698637306690216,
|
| 19025 |
+
"eval_runtime": 2.5965,
|
| 19026 |
+
"eval_samples_per_second": 884.656,
|
| 19027 |
+
"eval_steps_per_second": 13.865,
|
| 19028 |
+
"step": 951000
|
| 19029 |
+
},
|
| 19030 |
+
{
|
| 19031 |
+
"epoch": 10.25,
|
| 19032 |
+
"learning_rate": 1.0898406487683472e-05,
|
| 19033 |
+
"loss": 0.1805,
|
| 19034 |
+
"step": 951500
|
| 19035 |
+
},
|
| 19036 |
+
{
|
| 19037 |
+
"epoch": 10.26,
|
| 19038 |
+
"learning_rate": 1.0880016837314599e-05,
|
| 19039 |
+
"loss": 0.1803,
|
| 19040 |
+
"step": 952000
|
| 19041 |
+
},
|
| 19042 |
+
{
|
| 19043 |
+
"epoch": 10.26,
|
| 19044 |
+
"eval_loss": 0.17085076868534088,
|
| 19045 |
+
"eval_runtime": 2.596,
|
| 19046 |
+
"eval_samples_per_second": 884.806,
|
| 19047 |
+
"eval_steps_per_second": 13.867,
|
| 19048 |
+
"step": 952000
|
| 19049 |
+
},
|
| 19050 |
+
{
|
| 19051 |
+
"epoch": 10.27,
|
| 19052 |
+
"learning_rate": 1.0861816158340365e-05,
|
| 19053 |
+
"loss": 0.1807,
|
| 19054 |
+
"step": 952500
|
| 19055 |
+
},
|
| 19056 |
+
{
|
| 19057 |
+
"epoch": 10.27,
|
| 19058 |
+
"learning_rate": 1.084380450052071e-05,
|
| 19059 |
+
"loss": 0.1803,
|
| 19060 |
+
"step": 953000
|
| 19061 |
+
},
|
| 19062 |
+
{
|
| 19063 |
+
"epoch": 10.27,
|
| 19064 |
+
"eval_loss": 0.17190536856651306,
|
| 19065 |
+
"eval_runtime": 2.595,
|
| 19066 |
+
"eval_samples_per_second": 885.153,
|
| 19067 |
+
"eval_steps_per_second": 13.873,
|
| 19068 |
+
"step": 953000
|
| 19069 |
+
},
|
| 19070 |
+
{
|
| 19071 |
+
"epoch": 10.28,
|
| 19072 |
+
"learning_rate": 1.0825981913098828e-05,
|
| 19073 |
+
"loss": 0.1799,
|
| 19074 |
+
"step": 953500
|
| 19075 |
+
},
|
| 19076 |
+
{
|
| 19077 |
+
"epoch": 10.28,
|
| 19078 |
+
"learning_rate": 1.0808348444801e-05,
|
| 19079 |
+
"loss": 0.1802,
|
| 19080 |
+
"step": 954000
|
| 19081 |
+
},
|
| 19082 |
+
{
|
| 19083 |
+
"epoch": 10.28,
|
| 19084 |
+
"eval_loss": 0.16949187219142914,
|
| 19085 |
+
"eval_runtime": 2.6166,
|
| 19086 |
+
"eval_samples_per_second": 877.869,
|
| 19087 |
+
"eval_steps_per_second": 13.758,
|
| 19088 |
+
"step": 954000
|
| 19089 |
+
},
|
| 19090 |
+
{
|
| 19091 |
+
"epoch": 10.29,
|
| 19092 |
+
"learning_rate": 1.0790904143836438e-05,
|
| 19093 |
+
"loss": 0.1804,
|
| 19094 |
+
"step": 954500
|
| 19095 |
+
},
|
| 19096 |
+
{
|
| 19097 |
+
"epoch": 10.29,
|
| 19098 |
+
"learning_rate": 1.0773649057897206e-05,
|
| 19099 |
+
"loss": 0.1802,
|
| 19100 |
+
"step": 955000
|
| 19101 |
+
},
|
| 19102 |
+
{
|
| 19103 |
+
"epoch": 10.29,
|
| 19104 |
+
"eval_loss": 0.16995471715927124,
|
| 19105 |
+
"eval_runtime": 2.6165,
|
| 19106 |
+
"eval_samples_per_second": 877.886,
|
| 19107 |
+
"eval_steps_per_second": 13.759,
|
| 19108 |
+
"step": 955000
|
| 19109 |
+
},
|
| 19110 |
+
{
|
| 19111 |
+
"epoch": 10.3,
|
| 19112 |
+
"learning_rate": 1.0756583234158057e-05,
|
| 19113 |
+
"loss": 0.1799,
|
| 19114 |
+
"step": 955500
|
| 19115 |
+
},
|
| 19116 |
+
{
|
| 19117 |
+
"epoch": 10.31,
|
| 19118 |
+
"learning_rate": 1.073970671927628e-05,
|
| 19119 |
+
"loss": 0.1802,
|
| 19120 |
+
"step": 956000
|
| 19121 |
+
},
|
| 19122 |
+
{
|
| 19123 |
+
"epoch": 10.31,
|
| 19124 |
+
"eval_loss": 0.17191793024539948,
|
| 19125 |
+
"eval_runtime": 2.6164,
|
| 19126 |
+
"eval_samples_per_second": 877.931,
|
| 19127 |
+
"eval_steps_per_second": 13.759,
|
| 19128 |
+
"step": 956000
|
| 19129 |
+
},
|
| 19130 |
+
{
|
| 19131 |
+
"epoch": 10.31,
|
| 19132 |
+
"learning_rate": 1.0723019559391643e-05,
|
| 19133 |
+
"loss": 0.1804,
|
| 19134 |
+
"step": 956500
|
| 19135 |
+
},
|
| 19136 |
+
{
|
| 19137 |
+
"epoch": 10.32,
|
| 19138 |
+
"learning_rate": 1.0706521800126198e-05,
|
| 19139 |
+
"loss": 0.18,
|
| 19140 |
+
"step": 957000
|
| 19141 |
+
},
|
| 19142 |
+
{
|
| 19143 |
+
"epoch": 10.32,
|
| 19144 |
+
"eval_loss": 0.17065568268299103,
|
| 19145 |
+
"eval_runtime": 2.6326,
|
| 19146 |
+
"eval_samples_per_second": 872.521,
|
| 19147 |
+
"eval_steps_per_second": 13.675,
|
| 19148 |
+
"step": 957000
|
| 19149 |
+
},
|
| 19150 |
+
{
|
| 19151 |
+
"epoch": 10.32,
|
| 19152 |
+
"learning_rate": 1.0690213486584175e-05,
|
| 19153 |
+
"loss": 0.18,
|
| 19154 |
+
"step": 957500
|
| 19155 |
+
},
|
| 19156 |
+
{
|
| 19157 |
+
"epoch": 10.33,
|
| 19158 |
+
"learning_rate": 1.0674094663351906e-05,
|
| 19159 |
+
"loss": 0.18,
|
| 19160 |
+
"step": 958000
|
| 19161 |
+
},
|
| 19162 |
+
{
|
| 19163 |
+
"epoch": 10.33,
|
| 19164 |
+
"eval_loss": 0.1698225736618042,
|
| 19165 |
+
"eval_runtime": 2.6744,
|
| 19166 |
+
"eval_samples_per_second": 858.898,
|
| 19167 |
+
"eval_steps_per_second": 13.461,
|
| 19168 |
+
"step": 958000
|
| 19169 |
+
},
|
| 19170 |
+
{
|
| 19171 |
+
"epoch": 10.33,
|
| 19172 |
+
"learning_rate": 1.0658165374497611e-05,
|
| 19173 |
+
"loss": 0.1804,
|
| 19174 |
+
"step": 958500
|
| 19175 |
+
},
|
| 19176 |
+
{
|
| 19177 |
+
"epoch": 10.34,
|
| 19178 |
+
"learning_rate": 1.0642425663571383e-05,
|
| 19179 |
+
"loss": 0.1802,
|
| 19180 |
+
"step": 959000
|
| 19181 |
+
},
|
| 19182 |
+
{
|
| 19183 |
+
"epoch": 10.34,
|
| 19184 |
+
"eval_loss": 0.17188780009746552,
|
| 19185 |
+
"eval_runtime": 2.6352,
|
| 19186 |
+
"eval_samples_per_second": 871.663,
|
| 19187 |
+
"eval_steps_per_second": 13.661,
|
| 19188 |
+
"step": 959000
|
| 19189 |
+
},
|
| 19190 |
+
{
|
| 19191 |
+
"epoch": 10.34,
|
| 19192 |
+
"learning_rate": 1.062687557360497e-05,
|
| 19193 |
+
"loss": 0.1802,
|
| 19194 |
+
"step": 959500
|
| 19195 |
+
},
|
| 19196 |
+
{
|
| 19197 |
+
"epoch": 10.35,
|
| 19198 |
+
"learning_rate": 1.0611515147111736e-05,
|
| 19199 |
+
"loss": 0.1802,
|
| 19200 |
+
"step": 960000
|
| 19201 |
+
},
|
| 19202 |
+
{
|
| 19203 |
+
"epoch": 10.35,
|
| 19204 |
+
"eval_loss": 0.16846837103366852,
|
| 19205 |
+
"eval_runtime": 2.7425,
|
| 19206 |
+
"eval_samples_per_second": 837.549,
|
| 19207 |
+
"eval_steps_per_second": 13.127,
|
| 19208 |
+
"step": 960000
|
| 19209 |
}
|
| 19210 |
],
|
| 19211 |
"max_steps": 1000000,
|
| 19212 |
"num_train_epochs": 12,
|
| 19213 |
+
"total_flos": 6.72957029443817e+22,
|
| 19214 |
"trial_name": null,
|
| 19215 |
"trial_params": null
|
| 19216 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8b5376abe825ff6d04d360831bbfd37e2e2d959d6e490763218e7e38b5c10ca1
|
| 3 |
size 449471589
|