Instructions to use Team-PIXEL/pixel-base-bigrams with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Team-PIXEL/pixel-base-bigrams with Transformers:
# Load model directly from transformers import AutoModelForPreTraining model = AutoModelForPreTraining.from_pretrained("Team-PIXEL/pixel-base-bigrams", dtype="auto") - Notebooks
- Google Colab
- Kaggle
Training in progress, step 810000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:20271fe04ea74ef6c1f5d2a4d1320cc4e3cdd71160c77649afb9825f61cfb447
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5cd0bb237b46c76aefe15f9280bc4734a0b83f8b031790d8bd933f10088a4b40
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c229483d7671f93f7f6ccbe0ea6c92e0f0e6de8b45b16f50c28569110970a54
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c229483d7671f93f7f6ccbe0ea6c92e0f0e6de8b45b16f50c28569110970a54
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c229483d7671f93f7f6ccbe0ea6c92e0f0e6de8b45b16f50c28569110970a54
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c229483d7671f93f7f6ccbe0ea6c92e0f0e6de8b45b16f50c28569110970a54
|
| 3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c229483d7671f93f7f6ccbe0ea6c92e0f0e6de8b45b16f50c28569110970a54
|
| 3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c229483d7671f93f7f6ccbe0ea6c92e0f0e6de8b45b16f50c28569110970a54
|
| 3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c229483d7671f93f7f6ccbe0ea6c92e0f0e6de8b45b16f50c28569110970a54
|
| 3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c229483d7671f93f7f6ccbe0ea6c92e0f0e6de8b45b16f50c28569110970a54
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:80ff99717fc66d7e2670093ac4b787c0d4e68c8bb6b50d5d8a0a59479daaf2a3
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 8.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -16006,11 +16006,211 @@
|
|
| 16006 |
"eval_samples_per_second": 853.408,
|
| 16007 |
"eval_steps_per_second": 13.375,
|
| 16008 |
"step": 800000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16009 |
}
|
| 16010 |
],
|
| 16011 |
"max_steps": 1000000,
|
| 16012 |
"num_train_epochs": 12,
|
| 16013 |
-
"total_flos": 5.
|
| 16014 |
"trial_name": null,
|
| 16015 |
"trial_params": null
|
| 16016 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 8.676689381350167,
|
| 5 |
+
"global_step": 810000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 16006 |
"eval_samples_per_second": 853.408,
|
| 16007 |
"eval_steps_per_second": 13.375,
|
| 16008 |
"step": 800000
|
| 16009 |
+
},
|
| 16010 |
+
{
|
| 16011 |
+
"epoch": 8.57,
|
| 16012 |
+
"learning_rate": 2.4689149133701672e-05,
|
| 16013 |
+
"loss": 0.1867,
|
| 16014 |
+
"step": 800500
|
| 16015 |
+
},
|
| 16016 |
+
{
|
| 16017 |
+
"epoch": 8.58,
|
| 16018 |
+
"learning_rate": 2.461828514290513e-05,
|
| 16019 |
+
"loss": 0.1869,
|
| 16020 |
+
"step": 801000
|
| 16021 |
+
},
|
| 16022 |
+
{
|
| 16023 |
+
"epoch": 8.58,
|
| 16024 |
+
"eval_loss": 0.17704518139362335,
|
| 16025 |
+
"eval_runtime": 2.7376,
|
| 16026 |
+
"eval_samples_per_second": 839.048,
|
| 16027 |
+
"eval_steps_per_second": 13.15,
|
| 16028 |
+
"step": 801000
|
| 16029 |
+
},
|
| 16030 |
+
{
|
| 16031 |
+
"epoch": 8.58,
|
| 16032 |
+
"learning_rate": 2.4547572563602267e-05,
|
| 16033 |
+
"loss": 0.1872,
|
| 16034 |
+
"step": 801500
|
| 16035 |
+
},
|
| 16036 |
+
{
|
| 16037 |
+
"epoch": 8.59,
|
| 16038 |
+
"learning_rate": 2.447701158911855e-05,
|
| 16039 |
+
"loss": 0.1868,
|
| 16040 |
+
"step": 802000
|
| 16041 |
+
},
|
| 16042 |
+
{
|
| 16043 |
+
"epoch": 8.59,
|
| 16044 |
+
"eval_loss": 0.17794357240200043,
|
| 16045 |
+
"eval_runtime": 2.6487,
|
| 16046 |
+
"eval_samples_per_second": 867.219,
|
| 16047 |
+
"eval_steps_per_second": 13.592,
|
| 16048 |
+
"step": 802000
|
| 16049 |
+
},
|
| 16050 |
+
{
|
| 16051 |
+
"epoch": 8.59,
|
| 16052 |
+
"learning_rate": 2.4406602412365027e-05,
|
| 16053 |
+
"loss": 0.187,
|
| 16054 |
+
"step": 802500
|
| 16055 |
+
},
|
| 16056 |
+
{
|
| 16057 |
+
"epoch": 8.6,
|
| 16058 |
+
"learning_rate": 2.4336345225837658e-05,
|
| 16059 |
+
"loss": 0.1872,
|
| 16060 |
+
"step": 803000
|
| 16061 |
+
},
|
| 16062 |
+
{
|
| 16063 |
+
"epoch": 8.6,
|
| 16064 |
+
"eval_loss": 0.1776154637336731,
|
| 16065 |
+
"eval_runtime": 2.7206,
|
| 16066 |
+
"eval_samples_per_second": 844.313,
|
| 16067 |
+
"eval_steps_per_second": 13.233,
|
| 16068 |
+
"step": 803000
|
| 16069 |
+
},
|
| 16070 |
+
{
|
| 16071 |
+
"epoch": 8.6,
|
| 16072 |
+
"learning_rate": 2.4266240221616956e-05,
|
| 16073 |
+
"loss": 0.1873,
|
| 16074 |
+
"step": 803500
|
| 16075 |
+
},
|
| 16076 |
+
{
|
| 16077 |
+
"epoch": 8.61,
|
| 16078 |
+
"learning_rate": 2.4196287591367296e-05,
|
| 16079 |
+
"loss": 0.1868,
|
| 16080 |
+
"step": 804000
|
| 16081 |
+
},
|
| 16082 |
+
{
|
| 16083 |
+
"epoch": 8.61,
|
| 16084 |
+
"eval_loss": 0.176628977060318,
|
| 16085 |
+
"eval_runtime": 2.6337,
|
| 16086 |
+
"eval_samples_per_second": 872.144,
|
| 16087 |
+
"eval_steps_per_second": 13.669,
|
| 16088 |
+
"step": 804000
|
| 16089 |
+
},
|
| 16090 |
+
{
|
| 16091 |
+
"epoch": 8.62,
|
| 16092 |
+
"learning_rate": 2.412648752633649e-05,
|
| 16093 |
+
"loss": 0.1869,
|
| 16094 |
+
"step": 804500
|
| 16095 |
+
},
|
| 16096 |
+
{
|
| 16097 |
+
"epoch": 8.62,
|
| 16098 |
+
"learning_rate": 2.405684021735527e-05,
|
| 16099 |
+
"loss": 0.1866,
|
| 16100 |
+
"step": 805000
|
| 16101 |
+
},
|
| 16102 |
+
{
|
| 16103 |
+
"epoch": 8.62,
|
| 16104 |
+
"eval_loss": 0.1774420291185379,
|
| 16105 |
+
"eval_runtime": 2.6036,
|
| 16106 |
+
"eval_samples_per_second": 882.223,
|
| 16107 |
+
"eval_steps_per_second": 13.827,
|
| 16108 |
+
"step": 805000
|
| 16109 |
+
},
|
| 16110 |
+
{
|
| 16111 |
+
"epoch": 8.63,
|
| 16112 |
+
"learning_rate": 2.39873458548367e-05,
|
| 16113 |
+
"loss": 0.1871,
|
| 16114 |
+
"step": 805500
|
| 16115 |
+
},
|
| 16116 |
+
{
|
| 16117 |
+
"epoch": 8.63,
|
| 16118 |
+
"learning_rate": 2.3918004628775736e-05,
|
| 16119 |
+
"loss": 0.1871,
|
| 16120 |
+
"step": 806000
|
| 16121 |
+
},
|
| 16122 |
+
{
|
| 16123 |
+
"epoch": 8.63,
|
| 16124 |
+
"eval_loss": 0.1766408383846283,
|
| 16125 |
+
"eval_runtime": 2.6553,
|
| 16126 |
+
"eval_samples_per_second": 865.055,
|
| 16127 |
+
"eval_steps_per_second": 13.558,
|
| 16128 |
+
"step": 806000
|
| 16129 |
+
},
|
| 16130 |
+
{
|
| 16131 |
+
"epoch": 8.64,
|
| 16132 |
+
"learning_rate": 2.3848816728748643e-05,
|
| 16133 |
+
"loss": 0.187,
|
| 16134 |
+
"step": 806500
|
| 16135 |
+
},
|
| 16136 |
+
{
|
| 16137 |
+
"epoch": 8.64,
|
| 16138 |
+
"learning_rate": 2.3779782343912463e-05,
|
| 16139 |
+
"loss": 0.1871,
|
| 16140 |
+
"step": 807000
|
| 16141 |
+
},
|
| 16142 |
+
{
|
| 16143 |
+
"epoch": 8.64,
|
| 16144 |
+
"eval_loss": 0.17578239738941193,
|
| 16145 |
+
"eval_runtime": 2.6261,
|
| 16146 |
+
"eval_samples_per_second": 874.674,
|
| 16147 |
+
"eval_steps_per_second": 13.708,
|
| 16148 |
+
"step": 807000
|
| 16149 |
+
},
|
| 16150 |
+
{
|
| 16151 |
+
"epoch": 8.65,
|
| 16152 |
+
"learning_rate": 2.3710901663004604e-05,
|
| 16153 |
+
"loss": 0.1867,
|
| 16154 |
+
"step": 807500
|
| 16155 |
+
},
|
| 16156 |
+
{
|
| 16157 |
+
"epoch": 8.65,
|
| 16158 |
+
"learning_rate": 2.364217487434221e-05,
|
| 16159 |
+
"loss": 0.1867,
|
| 16160 |
+
"step": 808000
|
| 16161 |
+
},
|
| 16162 |
+
{
|
| 16163 |
+
"epoch": 8.65,
|
| 16164 |
+
"eval_loss": 0.17686133086681366,
|
| 16165 |
+
"eval_runtime": 2.5799,
|
| 16166 |
+
"eval_samples_per_second": 890.356,
|
| 16167 |
+
"eval_steps_per_second": 13.954,
|
| 16168 |
+
"step": 808000
|
| 16169 |
+
},
|
| 16170 |
+
{
|
| 16171 |
+
"epoch": 8.66,
|
| 16172 |
+
"learning_rate": 2.3573602165821668e-05,
|
| 16173 |
+
"loss": 0.187,
|
| 16174 |
+
"step": 808500
|
| 16175 |
+
},
|
| 16176 |
+
{
|
| 16177 |
+
"epoch": 8.67,
|
| 16178 |
+
"learning_rate": 2.3505183724918196e-05,
|
| 16179 |
+
"loss": 0.1867,
|
| 16180 |
+
"step": 809000
|
| 16181 |
+
},
|
| 16182 |
+
{
|
| 16183 |
+
"epoch": 8.67,
|
| 16184 |
+
"eval_loss": 0.1787070780992508,
|
| 16185 |
+
"eval_runtime": 2.6677,
|
| 16186 |
+
"eval_samples_per_second": 861.032,
|
| 16187 |
+
"eval_steps_per_second": 13.495,
|
| 16188 |
+
"step": 809000
|
| 16189 |
+
},
|
| 16190 |
+
{
|
| 16191 |
+
"epoch": 8.67,
|
| 16192 |
+
"learning_rate": 2.3436919738685132e-05,
|
| 16193 |
+
"loss": 0.1864,
|
| 16194 |
+
"step": 809500
|
| 16195 |
+
},
|
| 16196 |
+
{
|
| 16197 |
+
"epoch": 8.68,
|
| 16198 |
+
"learning_rate": 2.3368810393753687e-05,
|
| 16199 |
+
"loss": 0.1866,
|
| 16200 |
+
"step": 810000
|
| 16201 |
+
},
|
| 16202 |
+
{
|
| 16203 |
+
"epoch": 8.68,
|
| 16204 |
+
"eval_loss": 0.17782823741436005,
|
| 16205 |
+
"eval_runtime": 2.6203,
|
| 16206 |
+
"eval_samples_per_second": 876.614,
|
| 16207 |
+
"eval_steps_per_second": 13.739,
|
| 16208 |
+
"step": 810000
|
| 16209 |
}
|
| 16210 |
],
|
| 16211 |
"max_steps": 1000000,
|
| 16212 |
"num_train_epochs": 12,
|
| 16213 |
+
"total_flos": 5.678077331877994e+22,
|
| 16214 |
"trial_name": null,
|
| 16215 |
"trial_params": null
|
| 16216 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5cd0bb237b46c76aefe15f9280bc4734a0b83f8b031790d8bd933f10088a4b40
|
| 3 |
size 449471589
|