Instructions to use Team-PIXEL/pixel-base-bigrams with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Team-PIXEL/pixel-base-bigrams with Transformers:
# Load model directly from transformers import AutoModelForPreTraining model = AutoModelForPreTraining.from_pretrained("Team-PIXEL/pixel-base-bigrams", dtype="auto") - Notebooks
- Google Colab
- Kaggle
Training in progress, step 790000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ee6d0b8731469184859b6e2af2323dc331e9f7e709ceb8418eca6fab2f75e9cb
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b3c99a6d8856f7a728dbbbf96bf0c858122cdeb2ae96a80fcc6876c29d8e2666
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c08292d027d8ba65de9023aba96bca0e44920026e543fcb86c6e89c28847c9ce
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c08292d027d8ba65de9023aba96bca0e44920026e543fcb86c6e89c28847c9ce
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c08292d027d8ba65de9023aba96bca0e44920026e543fcb86c6e89c28847c9ce
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c08292d027d8ba65de9023aba96bca0e44920026e543fcb86c6e89c28847c9ce
|
| 3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c08292d027d8ba65de9023aba96bca0e44920026e543fcb86c6e89c28847c9ce
|
| 3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c08292d027d8ba65de9023aba96bca0e44920026e543fcb86c6e89c28847c9ce
|
| 3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c08292d027d8ba65de9023aba96bca0e44920026e543fcb86c6e89c28847c9ce
|
| 3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c08292d027d8ba65de9023aba96bca0e44920026e543fcb86c6e89c28847c9ce
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:83d1297302d20060e31d476195b98906c23904815e65152eb2d3ffb7dd074183
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 8.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -15606,11 +15606,211 @@
|
|
| 15606 |
"eval_samples_per_second": 840.721,
|
| 15607 |
"eval_steps_per_second": 13.176,
|
| 15608 |
"step": 780000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15609 |
}
|
| 15610 |
],
|
| 15611 |
"max_steps": 1000000,
|
| 15612 |
"num_train_epochs": 12,
|
| 15613 |
-
"total_flos": 5.
|
| 15614 |
"trial_name": null,
|
| 15615 |
"trial_params": null
|
| 15616 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 8.453631932904319,
|
| 5 |
+
"global_step": 790000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 15606 |
"eval_samples_per_second": 840.721,
|
| 15607 |
"eval_steps_per_second": 13.176,
|
| 15608 |
"step": 780000
|
| 15609 |
+
},
|
| 15610 |
+
{
|
| 15611 |
+
"epoch": 8.35,
|
| 15612 |
+
"learning_rate": 2.7645596244470935e-05,
|
| 15613 |
+
"loss": 0.1885,
|
| 15614 |
+
"step": 780500
|
| 15615 |
+
},
|
| 15616 |
+
{
|
| 15617 |
+
"epoch": 8.35,
|
| 15618 |
+
"learning_rate": 2.7568839012773365e-05,
|
| 15619 |
+
"loss": 0.1884,
|
| 15620 |
+
"step": 781000
|
| 15621 |
+
},
|
| 15622 |
+
{
|
| 15623 |
+
"epoch": 8.35,
|
| 15624 |
+
"eval_loss": 0.1805545538663864,
|
| 15625 |
+
"eval_runtime": 2.6452,
|
| 15626 |
+
"eval_samples_per_second": 868.354,
|
| 15627 |
+
"eval_steps_per_second": 13.609,
|
| 15628 |
+
"step": 781000
|
| 15629 |
+
},
|
| 15630 |
+
{
|
| 15631 |
+
"epoch": 8.36,
|
| 15632 |
+
"learning_rate": 2.7492225125867825e-05,
|
| 15633 |
+
"loss": 0.1889,
|
| 15634 |
+
"step": 781500
|
| 15635 |
+
},
|
| 15636 |
+
{
|
| 15637 |
+
"epoch": 8.36,
|
| 15638 |
+
"learning_rate": 2.7415754793213826e-05,
|
| 15639 |
+
"loss": 0.1886,
|
| 15640 |
+
"step": 782000
|
| 15641 |
+
},
|
| 15642 |
+
{
|
| 15643 |
+
"epoch": 8.36,
|
| 15644 |
+
"eval_loss": 0.1767302304506302,
|
| 15645 |
+
"eval_runtime": 2.6579,
|
| 15646 |
+
"eval_samples_per_second": 864.226,
|
| 15647 |
+
"eval_steps_per_second": 13.545,
|
| 15648 |
+
"step": 782000
|
| 15649 |
+
},
|
| 15650 |
+
{
|
| 15651 |
+
"epoch": 8.37,
|
| 15652 |
+
"learning_rate": 2.7339428223878283e-05,
|
| 15653 |
+
"loss": 0.1883,
|
| 15654 |
+
"step": 782500
|
| 15655 |
+
},
|
| 15656 |
+
{
|
| 15657 |
+
"epoch": 8.38,
|
| 15658 |
+
"learning_rate": 2.7263245626535116e-05,
|
| 15659 |
+
"loss": 0.1884,
|
| 15660 |
+
"step": 783000
|
| 15661 |
+
},
|
| 15662 |
+
{
|
| 15663 |
+
"epoch": 8.38,
|
| 15664 |
+
"eval_loss": 0.1802656203508377,
|
| 15665 |
+
"eval_runtime": 2.7131,
|
| 15666 |
+
"eval_samples_per_second": 846.621,
|
| 15667 |
+
"eval_steps_per_second": 13.269,
|
| 15668 |
+
"step": 783000
|
| 15669 |
+
},
|
| 15670 |
+
{
|
| 15671 |
+
"epoch": 8.38,
|
| 15672 |
+
"learning_rate": 2.7187207209464687e-05,
|
| 15673 |
+
"loss": 0.1883,
|
| 15674 |
+
"step": 783500
|
| 15675 |
+
},
|
| 15676 |
+
{
|
| 15677 |
+
"epoch": 8.39,
|
| 15678 |
+
"learning_rate": 2.7111313180553077e-05,
|
| 15679 |
+
"loss": 0.1882,
|
| 15680 |
+
"step": 784000
|
| 15681 |
+
},
|
| 15682 |
+
{
|
| 15683 |
+
"epoch": 8.39,
|
| 15684 |
+
"eval_loss": 0.17976997792720795,
|
| 15685 |
+
"eval_runtime": 2.668,
|
| 15686 |
+
"eval_samples_per_second": 860.932,
|
| 15687 |
+
"eval_steps_per_second": 13.493,
|
| 15688 |
+
"step": 784000
|
| 15689 |
+
},
|
| 15690 |
+
{
|
| 15691 |
+
"epoch": 8.39,
|
| 15692 |
+
"learning_rate": 2.703556374729169e-05,
|
| 15693 |
+
"loss": 0.1885,
|
| 15694 |
+
"step": 784500
|
| 15695 |
+
},
|
| 15696 |
+
{
|
| 15697 |
+
"epoch": 8.4,
|
| 15698 |
+
"learning_rate": 2.6959959116776587e-05,
|
| 15699 |
+
"loss": 0.188,
|
| 15700 |
+
"step": 785000
|
| 15701 |
+
},
|
| 15702 |
+
{
|
| 15703 |
+
"epoch": 8.4,
|
| 15704 |
+
"eval_loss": 0.1783231794834137,
|
| 15705 |
+
"eval_runtime": 2.6459,
|
| 15706 |
+
"eval_samples_per_second": 868.123,
|
| 15707 |
+
"eval_steps_per_second": 13.606,
|
| 15708 |
+
"step": 785000
|
| 15709 |
+
},
|
| 15710 |
+
{
|
| 15711 |
+
"epoch": 8.4,
|
| 15712 |
+
"learning_rate": 2.68844994957079e-05,
|
| 15713 |
+
"loss": 0.1881,
|
| 15714 |
+
"step": 785500
|
| 15715 |
+
},
|
| 15716 |
+
{
|
| 15717 |
+
"epoch": 8.41,
|
| 15718 |
+
"learning_rate": 2.6809185090389406e-05,
|
| 15719 |
+
"loss": 0.1884,
|
| 15720 |
+
"step": 786000
|
| 15721 |
+
},
|
| 15722 |
+
{
|
| 15723 |
+
"epoch": 8.41,
|
| 15724 |
+
"eval_loss": 0.18017184734344482,
|
| 15725 |
+
"eval_runtime": 2.6671,
|
| 15726 |
+
"eval_samples_per_second": 861.233,
|
| 15727 |
+
"eval_steps_per_second": 13.498,
|
| 15728 |
+
"step": 786000
|
| 15729 |
+
},
|
| 15730 |
+
{
|
| 15731 |
+
"epoch": 8.41,
|
| 15732 |
+
"learning_rate": 2.6734016106727777e-05,
|
| 15733 |
+
"loss": 0.1881,
|
| 15734 |
+
"step": 786500
|
| 15735 |
+
},
|
| 15736 |
+
{
|
| 15737 |
+
"epoch": 8.42,
|
| 15738 |
+
"learning_rate": 2.6658992750232167e-05,
|
| 15739 |
+
"loss": 0.188,
|
| 15740 |
+
"step": 787000
|
| 15741 |
+
},
|
| 15742 |
+
{
|
| 15743 |
+
"epoch": 8.42,
|
| 15744 |
+
"eval_loss": 0.17710144817829132,
|
| 15745 |
+
"eval_runtime": 2.6667,
|
| 15746 |
+
"eval_samples_per_second": 861.367,
|
| 15747 |
+
"eval_steps_per_second": 13.5,
|
| 15748 |
+
"step": 787000
|
| 15749 |
+
},
|
| 15750 |
+
{
|
| 15751 |
+
"epoch": 8.43,
|
| 15752 |
+
"learning_rate": 2.6584115226013553e-05,
|
| 15753 |
+
"loss": 0.1883,
|
| 15754 |
+
"step": 787500
|
| 15755 |
+
},
|
| 15756 |
+
{
|
| 15757 |
+
"epoch": 8.43,
|
| 15758 |
+
"learning_rate": 2.6509383738784218e-05,
|
| 15759 |
+
"loss": 0.188,
|
| 15760 |
+
"step": 788000
|
| 15761 |
+
},
|
| 15762 |
+
{
|
| 15763 |
+
"epoch": 8.43,
|
| 15764 |
+
"eval_loss": 0.1786525398492813,
|
| 15765 |
+
"eval_runtime": 2.6579,
|
| 15766 |
+
"eval_samples_per_second": 864.232,
|
| 15767 |
+
"eval_steps_per_second": 13.545,
|
| 15768 |
+
"step": 788000
|
| 15769 |
+
},
|
| 15770 |
+
{
|
| 15771 |
+
"epoch": 8.44,
|
| 15772 |
+
"learning_rate": 2.6434798492857228e-05,
|
| 15773 |
+
"loss": 0.1881,
|
| 15774 |
+
"step": 788500
|
| 15775 |
+
},
|
| 15776 |
+
{
|
| 15777 |
+
"epoch": 8.44,
|
| 15778 |
+
"learning_rate": 2.6360359692145757e-05,
|
| 15779 |
+
"loss": 0.1882,
|
| 15780 |
+
"step": 789000
|
| 15781 |
+
},
|
| 15782 |
+
{
|
| 15783 |
+
"epoch": 8.44,
|
| 15784 |
+
"eval_loss": 0.17897970974445343,
|
| 15785 |
+
"eval_runtime": 2.6253,
|
| 15786 |
+
"eval_samples_per_second": 874.933,
|
| 15787 |
+
"eval_steps_per_second": 13.712,
|
| 15788 |
+
"step": 789000
|
| 15789 |
+
},
|
| 15790 |
+
{
|
| 15791 |
+
"epoch": 8.45,
|
| 15792 |
+
"learning_rate": 2.6286067540162677e-05,
|
| 15793 |
+
"loss": 0.1882,
|
| 15794 |
+
"step": 789500
|
| 15795 |
+
},
|
| 15796 |
+
{
|
| 15797 |
+
"epoch": 8.45,
|
| 15798 |
+
"learning_rate": 2.6211922240019883e-05,
|
| 15799 |
+
"loss": 0.1883,
|
| 15800 |
+
"step": 790000
|
| 15801 |
+
},
|
| 15802 |
+
{
|
| 15803 |
+
"epoch": 8.45,
|
| 15804 |
+
"eval_loss": 0.17872017621994019,
|
| 15805 |
+
"eval_runtime": 2.5868,
|
| 15806 |
+
"eval_samples_per_second": 887.972,
|
| 15807 |
+
"eval_steps_per_second": 13.917,
|
| 15808 |
+
"step": 790000
|
| 15809 |
}
|
| 15810 |
],
|
| 15811 |
"max_steps": 1000000,
|
| 15812 |
"num_train_epochs": 12,
|
| 15813 |
+
"total_flos": 5.5378768098172995e+22,
|
| 15814 |
"trial_name": null,
|
| 15815 |
"trial_params": null
|
| 15816 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b3c99a6d8856f7a728dbbbf96bf0c858122cdeb2ae96a80fcc6876c29d8e2666
|
| 3 |
size 449471589
|