Instructions to use Team-PIXEL/pixel-base-bigrams with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Team-PIXEL/pixel-base-bigrams with Transformers:
# Load model directly from transformers import AutoModelForPreTraining model = AutoModelForPreTraining.from_pretrained("Team-PIXEL/pixel-base-bigrams", dtype="auto") - Notebooks
- Google Colab
- Kaggle
Training in progress, step 820000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f6d93c3e982e0579f40b8abbb458ec9e37e56aeab75677fc246cecb087804c2e
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:41345120e0d1385984c4967bb7df3bbe42ffb08d61340ff50f089fccaf2a5880
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d63a66ccb6dc97ab9b83be4c1a0e1c26e5449019e9e3ce121febf772eda3a493
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d63a66ccb6dc97ab9b83be4c1a0e1c26e5449019e9e3ce121febf772eda3a493
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d63a66ccb6dc97ab9b83be4c1a0e1c26e5449019e9e3ce121febf772eda3a493
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d63a66ccb6dc97ab9b83be4c1a0e1c26e5449019e9e3ce121febf772eda3a493
|
| 3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d63a66ccb6dc97ab9b83be4c1a0e1c26e5449019e9e3ce121febf772eda3a493
|
| 3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d63a66ccb6dc97ab9b83be4c1a0e1c26e5449019e9e3ce121febf772eda3a493
|
| 3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d63a66ccb6dc97ab9b83be4c1a0e1c26e5449019e9e3ce121febf772eda3a493
|
| 3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d63a66ccb6dc97ab9b83be4c1a0e1c26e5449019e9e3ce121febf772eda3a493
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c63a65d51252613e1cd5f3ab255f2a8e56d55631776ee22be37789c5802ebbf2
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 8.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -16206,11 +16206,211 @@
|
|
| 16206 |
"eval_samples_per_second": 876.614,
|
| 16207 |
"eval_steps_per_second": 13.739,
|
| 16208 |
"step": 810000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16209 |
}
|
| 16210 |
],
|
| 16211 |
"max_steps": 1000000,
|
| 16212 |
"num_train_epochs": 12,
|
| 16213 |
-
"total_flos": 5.
|
| 16214 |
"trial_name": null,
|
| 16215 |
"trial_params": null
|
| 16216 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 8.78821810557309,
|
| 5 |
+
"global_step": 820000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 16206 |
"eval_samples_per_second": 876.614,
|
| 16207 |
"eval_steps_per_second": 13.739,
|
| 16208 |
"step": 810000
|
| 16209 |
+
},
|
| 16210 |
+
{
|
| 16211 |
+
"epoch": 8.68,
|
| 16212 |
+
"learning_rate": 2.3300855876332162e-05,
|
| 16213 |
+
"loss": 0.1868,
|
| 16214 |
+
"step": 810500
|
| 16215 |
+
},
|
| 16216 |
+
{
|
| 16217 |
+
"epoch": 8.69,
|
| 16218 |
+
"learning_rate": 2.32330563722056e-05,
|
| 16219 |
+
"loss": 0.1864,
|
| 16220 |
+
"step": 811000
|
| 16221 |
+
},
|
| 16222 |
+
{
|
| 16223 |
+
"epoch": 8.69,
|
| 16224 |
+
"eval_loss": 0.1779273897409439,
|
| 16225 |
+
"eval_runtime": 2.7339,
|
| 16226 |
+
"eval_samples_per_second": 840.196,
|
| 16227 |
+
"eval_steps_per_second": 13.168,
|
| 16228 |
+
"step": 811000
|
| 16229 |
+
},
|
| 16230 |
+
{
|
| 16231 |
+
"epoch": 8.69,
|
| 16232 |
+
"learning_rate": 2.316541206673529e-05,
|
| 16233 |
+
"loss": 0.1865,
|
| 16234 |
+
"step": 811500
|
| 16235 |
+
},
|
| 16236 |
+
{
|
| 16237 |
+
"epoch": 8.7,
|
| 16238 |
+
"learning_rate": 2.309792314485815e-05,
|
| 16239 |
+
"loss": 0.1866,
|
| 16240 |
+
"step": 812000
|
| 16241 |
+
},
|
| 16242 |
+
{
|
| 16243 |
+
"epoch": 8.7,
|
| 16244 |
+
"eval_loss": 0.17686782777309418,
|
| 16245 |
+
"eval_runtime": 2.8998,
|
| 16246 |
+
"eval_samples_per_second": 792.124,
|
| 16247 |
+
"eval_steps_per_second": 12.415,
|
| 16248 |
+
"step": 812000
|
| 16249 |
+
},
|
| 16250 |
+
{
|
| 16251 |
+
"epoch": 8.7,
|
| 16252 |
+
"learning_rate": 2.3030589791086353e-05,
|
| 16253 |
+
"loss": 0.1865,
|
| 16254 |
+
"step": 812500
|
| 16255 |
+
},
|
| 16256 |
+
{
|
| 16257 |
+
"epoch": 8.71,
|
| 16258 |
+
"learning_rate": 2.2963412189506695e-05,
|
| 16259 |
+
"loss": 0.1869,
|
| 16260 |
+
"step": 813000
|
| 16261 |
+
},
|
| 16262 |
+
{
|
| 16263 |
+
"epoch": 8.71,
|
| 16264 |
+
"eval_loss": 0.1769571304321289,
|
| 16265 |
+
"eval_runtime": 2.6694,
|
| 16266 |
+
"eval_samples_per_second": 860.483,
|
| 16267 |
+
"eval_steps_per_second": 13.486,
|
| 16268 |
+
"step": 813000
|
| 16269 |
+
},
|
| 16270 |
+
{
|
| 16271 |
+
"epoch": 8.72,
|
| 16272 |
+
"learning_rate": 2.2896390523780156e-05,
|
| 16273 |
+
"loss": 0.1865,
|
| 16274 |
+
"step": 813500
|
| 16275 |
+
},
|
| 16276 |
+
{
|
| 16277 |
+
"epoch": 8.72,
|
| 16278 |
+
"learning_rate": 2.282952497714145e-05,
|
| 16279 |
+
"loss": 0.186,
|
| 16280 |
+
"step": 814000
|
| 16281 |
+
},
|
| 16282 |
+
{
|
| 16283 |
+
"epoch": 8.72,
|
| 16284 |
+
"eval_loss": 0.17855176329612732,
|
| 16285 |
+
"eval_runtime": 2.6874,
|
| 16286 |
+
"eval_samples_per_second": 854.735,
|
| 16287 |
+
"eval_steps_per_second": 13.396,
|
| 16288 |
+
"step": 814000
|
| 16289 |
+
},
|
| 16290 |
+
{
|
| 16291 |
+
"epoch": 8.73,
|
| 16292 |
+
"learning_rate": 2.2762815732398387e-05,
|
| 16293 |
+
"loss": 0.1862,
|
| 16294 |
+
"step": 814500
|
| 16295 |
+
},
|
| 16296 |
+
{
|
| 16297 |
+
"epoch": 8.73,
|
| 16298 |
+
"learning_rate": 2.2696262971931538e-05,
|
| 16299 |
+
"loss": 0.1863,
|
| 16300 |
+
"step": 815000
|
| 16301 |
+
},
|
| 16302 |
+
{
|
| 16303 |
+
"epoch": 8.73,
|
| 16304 |
+
"eval_loss": 0.17720898985862732,
|
| 16305 |
+
"eval_runtime": 2.6031,
|
| 16306 |
+
"eval_samples_per_second": 882.4,
|
| 16307 |
+
"eval_steps_per_second": 13.83,
|
| 16308 |
+
"step": 815000
|
| 16309 |
+
},
|
| 16310 |
+
{
|
| 16311 |
+
"epoch": 8.74,
|
| 16312 |
+
"learning_rate": 2.2629866877693577e-05,
|
| 16313 |
+
"loss": 0.1865,
|
| 16314 |
+
"step": 815500
|
| 16315 |
+
},
|
| 16316 |
+
{
|
| 16317 |
+
"epoch": 8.74,
|
| 16318 |
+
"learning_rate": 2.2563627631208887e-05,
|
| 16319 |
+
"loss": 0.1869,
|
| 16320 |
+
"step": 816000
|
| 16321 |
+
},
|
| 16322 |
+
{
|
| 16323 |
+
"epoch": 8.74,
|
| 16324 |
+
"eval_loss": 0.17861302196979523,
|
| 16325 |
+
"eval_runtime": 2.6611,
|
| 16326 |
+
"eval_samples_per_second": 863.188,
|
| 16327 |
+
"eval_steps_per_second": 13.528,
|
| 16328 |
+
"step": 816000
|
| 16329 |
+
},
|
| 16330 |
+
{
|
| 16331 |
+
"epoch": 8.75,
|
| 16332 |
+
"learning_rate": 2.2497545413573065e-05,
|
| 16333 |
+
"loss": 0.1863,
|
| 16334 |
+
"step": 816500
|
| 16335 |
+
},
|
| 16336 |
+
{
|
| 16337 |
+
"epoch": 8.75,
|
| 16338 |
+
"learning_rate": 2.2431620405452336e-05,
|
| 16339 |
+
"loss": 0.1859,
|
| 16340 |
+
"step": 817000
|
| 16341 |
+
},
|
| 16342 |
+
{
|
| 16343 |
+
"epoch": 8.75,
|
| 16344 |
+
"eval_loss": 0.17606213688850403,
|
| 16345 |
+
"eval_runtime": 2.6726,
|
| 16346 |
+
"eval_samples_per_second": 859.474,
|
| 16347 |
+
"eval_steps_per_second": 13.47,
|
| 16348 |
+
"step": 817000
|
| 16349 |
+
},
|
| 16350 |
+
{
|
| 16351 |
+
"epoch": 8.76,
|
| 16352 |
+
"learning_rate": 2.23658527870832e-05,
|
| 16353 |
+
"loss": 0.1863,
|
| 16354 |
+
"step": 817500
|
| 16355 |
+
},
|
| 16356 |
+
{
|
| 16357 |
+
"epoch": 8.77,
|
| 16358 |
+
"learning_rate": 2.230024273827179e-05,
|
| 16359 |
+
"loss": 0.1862,
|
| 16360 |
+
"step": 818000
|
| 16361 |
+
},
|
| 16362 |
+
{
|
| 16363 |
+
"epoch": 8.77,
|
| 16364 |
+
"eval_loss": 0.17465642094612122,
|
| 16365 |
+
"eval_runtime": 2.6831,
|
| 16366 |
+
"eval_samples_per_second": 856.102,
|
| 16367 |
+
"eval_steps_per_second": 13.417,
|
| 16368 |
+
"step": 818000
|
| 16369 |
+
},
|
| 16370 |
+
{
|
| 16371 |
+
"epoch": 8.77,
|
| 16372 |
+
"learning_rate": 2.223479043839345e-05,
|
| 16373 |
+
"loss": 0.1867,
|
| 16374 |
+
"step": 818500
|
| 16375 |
+
},
|
| 16376 |
+
{
|
| 16377 |
+
"epoch": 8.78,
|
| 16378 |
+
"learning_rate": 2.216949606639231e-05,
|
| 16379 |
+
"loss": 0.1863,
|
| 16380 |
+
"step": 819000
|
| 16381 |
+
},
|
| 16382 |
+
{
|
| 16383 |
+
"epoch": 8.78,
|
| 16384 |
+
"eval_loss": 0.17773117125034332,
|
| 16385 |
+
"eval_runtime": 2.6871,
|
| 16386 |
+
"eval_samples_per_second": 854.84,
|
| 16387 |
+
"eval_steps_per_second": 13.398,
|
| 16388 |
+
"step": 819000
|
| 16389 |
+
},
|
| 16390 |
+
{
|
| 16391 |
+
"epoch": 8.78,
|
| 16392 |
+
"learning_rate": 2.2104359800780665e-05,
|
| 16393 |
+
"loss": 0.1859,
|
| 16394 |
+
"step": 819500
|
| 16395 |
+
},
|
| 16396 |
+
{
|
| 16397 |
+
"epoch": 8.79,
|
| 16398 |
+
"learning_rate": 2.2039381819638596e-05,
|
| 16399 |
+
"loss": 0.186,
|
| 16400 |
+
"step": 820000
|
| 16401 |
+
},
|
| 16402 |
+
{
|
| 16403 |
+
"epoch": 8.79,
|
| 16404 |
+
"eval_loss": 0.1770503968000412,
|
| 16405 |
+
"eval_runtime": 2.6357,
|
| 16406 |
+
"eval_samples_per_second": 871.505,
|
| 16407 |
+
"eval_steps_per_second": 13.659,
|
| 16408 |
+
"step": 820000
|
| 16409 |
}
|
| 16410 |
],
|
| 16411 |
"max_steps": 1000000,
|
| 16412 |
"num_train_epochs": 12,
|
| 16413 |
+
"total_flos": 5.748177592908341e+22,
|
| 16414 |
"trial_name": null,
|
| 16415 |
"trial_params": null
|
| 16416 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:41345120e0d1385984c4967bb7df3bbe42ffb08d61340ff50f089fccaf2a5880
|
| 3 |
size 449471589
|