Training in progress, step 320, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 936503576
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c2e4c22cf9e06580af30dce4f279974ede0ee3634a0dd139bd26cb4e25b25ed7
|
| 3 |
size 936503576
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 936544523
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7344bdd1e274ca01246f02556985f7a2cd03b4f3e5340ec3a06f3c587c4caa39
|
| 3 |
size 936544523
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d97c0d3c8cfa82dd1ce5510efad605477e606178221dbf394aa018e5e13a0c32
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -2178,6 +2178,76 @@
|
|
| 2178 |
"learning_rate": 1.9999998545088225e-05,
|
| 2179 |
"loss": 4.3453,
|
| 2180 |
"step": 310
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2181 |
}
|
| 2182 |
],
|
| 2183 |
"logging_steps": 1,
|
|
@@ -2197,7 +2267,7 @@
|
|
| 2197 |
"attributes": {}
|
| 2198 |
}
|
| 2199 |
},
|
| 2200 |
-
"total_flos": 1.
|
| 2201 |
"train_batch_size": 1,
|
| 2202 |
"trial_name": null,
|
| 2203 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.0014225415037597328,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 320,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 2178 |
"learning_rate": 1.9999998545088225e-05,
|
| 2179 |
"loss": 4.3453,
|
| 2180 |
"step": 310
|
| 2181 |
+
},
|
| 2182 |
+
{
|
| 2183 |
+
"epoch": 0.0013825325239664903,
|
| 2184 |
+
"grad_norm": 13.375,
|
| 2185 |
+
"learning_rate": 1.999999853565608e-05,
|
| 2186 |
+
"loss": 4.3769,
|
| 2187 |
+
"step": 311
|
| 2188 |
+
},
|
| 2189 |
+
{
|
| 2190 |
+
"epoch": 0.0013869779661657394,
|
| 2191 |
+
"grad_norm": 12.75,
|
| 2192 |
+
"learning_rate": 1.999999852619346e-05,
|
| 2193 |
+
"loss": 4.2999,
|
| 2194 |
+
"step": 312
|
| 2195 |
+
},
|
| 2196 |
+
{
|
| 2197 |
+
"epoch": 0.0013914234083649887,
|
| 2198 |
+
"grad_norm": 13.1875,
|
| 2199 |
+
"learning_rate": 1.9999998516700373e-05,
|
| 2200 |
+
"loss": 4.4593,
|
| 2201 |
+
"step": 313
|
| 2202 |
+
},
|
| 2203 |
+
{
|
| 2204 |
+
"epoch": 0.0013958688505642377,
|
| 2205 |
+
"grad_norm": 12.75,
|
| 2206 |
+
"learning_rate": 1.9999998507176803e-05,
|
| 2207 |
+
"loss": 4.4465,
|
| 2208 |
+
"step": 314
|
| 2209 |
+
},
|
| 2210 |
+
{
|
| 2211 |
+
"epoch": 0.001400314292763487,
|
| 2212 |
+
"grad_norm": 12.5625,
|
| 2213 |
+
"learning_rate": 1.9999998497622755e-05,
|
| 2214 |
+
"loss": 4.301,
|
| 2215 |
+
"step": 315
|
| 2216 |
+
},
|
| 2217 |
+
{
|
| 2218 |
+
"epoch": 0.001404759734962736,
|
| 2219 |
+
"grad_norm": 11.625,
|
| 2220 |
+
"learning_rate": 1.999999848803824e-05,
|
| 2221 |
+
"loss": 4.7077,
|
| 2222 |
+
"step": 316
|
| 2223 |
+
},
|
| 2224 |
+
{
|
| 2225 |
+
"epoch": 0.0014092051771619853,
|
| 2226 |
+
"grad_norm": 10.75,
|
| 2227 |
+
"learning_rate": 1.9999998478423243e-05,
|
| 2228 |
+
"loss": 4.3979,
|
| 2229 |
+
"step": 317
|
| 2230 |
+
},
|
| 2231 |
+
{
|
| 2232 |
+
"epoch": 0.0014136506193612344,
|
| 2233 |
+
"grad_norm": 8.1875,
|
| 2234 |
+
"learning_rate": 1.9999998468777773e-05,
|
| 2235 |
+
"loss": 4.4527,
|
| 2236 |
+
"step": 318
|
| 2237 |
+
},
|
| 2238 |
+
{
|
| 2239 |
+
"epoch": 0.0014180960615604835,
|
| 2240 |
+
"grad_norm": 12.5,
|
| 2241 |
+
"learning_rate": 1.9999998459101828e-05,
|
| 2242 |
+
"loss": 4.3529,
|
| 2243 |
+
"step": 319
|
| 2244 |
+
},
|
| 2245 |
+
{
|
| 2246 |
+
"epoch": 0.0014225415037597328,
|
| 2247 |
+
"grad_norm": 13.6875,
|
| 2248 |
+
"learning_rate": 1.9999998449395407e-05,
|
| 2249 |
+
"loss": 4.2385,
|
| 2250 |
+
"step": 320
|
| 2251 |
}
|
| 2252 |
],
|
| 2253 |
"logging_steps": 1,
|
|
|
|
| 2267 |
"attributes": {}
|
| 2268 |
}
|
| 2269 |
},
|
| 2270 |
+
"total_flos": 1.26679523524608e+16,
|
| 2271 |
"train_batch_size": 1,
|
| 2272 |
"trial_name": null,
|
| 2273 |
"trial_params": null
|