Commit
·
ffe42be
1
Parent(s):
1a17623
huggingartists
Browse files- README.md +4 -4
- config.json +4 -2
- evaluation.txt +1 -1
- flax_model.msgpack +1 -1
- optimizer.pt +1 -1
- pytorch_model.bin +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- tokenizer.json +0 -0
- trainer_state.json +87 -7
- training_args.bin +2 -2
README.md
CHANGED
|
@@ -14,7 +14,7 @@ widget:
|
|
| 14 |
<div class="inline-flex flex-col" style="line-height: 1.5;">
|
| 15 |
<div class="flex">
|
| 16 |
<div
|
| 17 |
-
style="display:DISPLAY_1; margin-left: auto; margin-right: auto; width: 92px; height:92px; border-radius: 50%; background-size: cover; background-image: url('https://images.genius.com/
|
| 18 |
</div>
|
| 19 |
</div>
|
| 20 |
<div style="text-align: center; margin-top: 3px; font-size: 16px; font-weight: 800">🤖 HuggingArtists Model 🤖</div>
|
|
@@ -45,15 +45,15 @@ from datasets import load_dataset
|
|
| 45 |
dataset = load_dataset("huggingartists/queen")
|
| 46 |
```
|
| 47 |
|
| 48 |
-
[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/
|
| 49 |
|
| 50 |
## Training procedure
|
| 51 |
|
| 52 |
The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Queen's lyrics.
|
| 53 |
|
| 54 |
-
Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/
|
| 55 |
|
| 56 |
-
At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/
|
| 57 |
|
| 58 |
## How to use
|
| 59 |
|
|
|
|
| 14 |
<div class="inline-flex flex-col" style="line-height: 1.5;">
|
| 15 |
<div class="flex">
|
| 16 |
<div
|
| 17 |
+
style="display:DISPLAY_1; margin-left: auto; margin-right: auto; width: 92px; height:92px; border-radius: 50%; background-size: cover; background-image: url('https://images.genius.com/97bcb5755cb9780d76b37726a0ce4bef.1000x1000x1.jpg')">
|
| 18 |
</div>
|
| 19 |
</div>
|
| 20 |
<div style="text-align: center; margin-top: 3px; font-size: 16px; font-weight: 800">🤖 HuggingArtists Model 🤖</div>
|
|
|
|
| 45 |
dataset = load_dataset("huggingartists/queen")
|
| 46 |
```
|
| 47 |
|
| 48 |
+
[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/1ddcmutf/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
|
| 49 |
|
| 50 |
## Training procedure
|
| 51 |
|
| 52 |
The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Queen's lyrics.
|
| 53 |
|
| 54 |
+
Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1dhu8z5q) for full transparency and reproducibility.
|
| 55 |
|
| 56 |
+
At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1dhu8z5q/artifacts) is logged and versioned.
|
| 57 |
|
| 58 |
## How to use
|
| 59 |
|
config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"_name_or_path": "
|
| 3 |
"activation_function": "gelu_new",
|
| 4 |
"architectures": [
|
| 5 |
"GPT2LMHeadModel"
|
|
@@ -18,7 +18,9 @@
|
|
| 18 |
"n_inner": null,
|
| 19 |
"n_layer": 12,
|
| 20 |
"n_positions": 1024,
|
|
|
|
| 21 |
"resid_pdrop": 0.1,
|
|
|
|
| 22 |
"scale_attn_weights": true,
|
| 23 |
"summary_activation": null,
|
| 24 |
"summary_first_dropout": 0.1,
|
|
@@ -35,7 +37,7 @@
|
|
| 35 |
}
|
| 36 |
},
|
| 37 |
"torch_dtype": "float32",
|
| 38 |
-
"transformers_version": "4.
|
| 39 |
"use_cache": true,
|
| 40 |
"vocab_size": 50257
|
| 41 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"_name_or_path": "queen",
|
| 3 |
"activation_function": "gelu_new",
|
| 4 |
"architectures": [
|
| 5 |
"GPT2LMHeadModel"
|
|
|
|
| 18 |
"n_inner": null,
|
| 19 |
"n_layer": 12,
|
| 20 |
"n_positions": 1024,
|
| 21 |
+
"reorder_and_upcast_attn": false,
|
| 22 |
"resid_pdrop": 0.1,
|
| 23 |
+
"scale_attn_by_inverse_layer_idx": false,
|
| 24 |
"scale_attn_weights": true,
|
| 25 |
"summary_activation": null,
|
| 26 |
"summary_first_dropout": 0.1,
|
|
|
|
| 37 |
}
|
| 38 |
},
|
| 39 |
"torch_dtype": "float32",
|
| 40 |
+
"transformers_version": "4.16.2",
|
| 41 |
"use_cache": true,
|
| 42 |
"vocab_size": 50257
|
| 43 |
}
|
evaluation.txt
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"eval_loss": 1.
|
|
|
|
| 1 |
+
{"eval_loss": 1.0593422651290894, "eval_runtime": 1.2351, "eval_samples_per_second": 74.489, "eval_steps_per_second": 9.716, "epoch": 10.0}
|
flax_model.msgpack
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 497764120
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f71c5d51cdf52a930cd7e68b3903da1eb422d09bd06f91080d5d80355a54ba3c
|
| 3 |
size 497764120
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 995604017
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b8a2f7accefebd6dee4b8dea20bafd47f1f9acf8efc62324b4a2287db757050
|
| 3 |
size 995604017
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 510403817
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d37c527c1768f42033ebf3f95e514029cb59df6fe857c02fe5b6c2968b0e7dd
|
| 3 |
size 510403817
|
rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f49bf0f2acadddffcdc0a4e3487fd525abbe1771468cbd89c3052c1ed8d61d77
|
| 3 |
size 14503
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90d90db3c01e5ca255a7e75446ed55bd62e0e6b8c2052a1a18ccb2f8838ed976
|
| 3 |
size 623
|
tokenizer.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 1.
|
| 3 |
-
"best_model_checkpoint": "output/queen/checkpoint-
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -830,11 +830,91 @@
|
|
| 830 |
"eval_samples_per_second": 22.699,
|
| 831 |
"eval_steps_per_second": 2.87,
|
| 832 |
"step": 621
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 833 |
}
|
| 834 |
],
|
| 835 |
-
"max_steps":
|
| 836 |
-
"num_train_epochs":
|
| 837 |
-
"total_flos":
|
| 838 |
"trial_name": null,
|
| 839 |
"trial_params": null
|
| 840 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 1.0593422651290894,
|
| 3 |
+
"best_model_checkpoint": "output/queen/checkpoint-680",
|
| 4 |
+
"epoch": 10.0,
|
| 5 |
+
"global_step": 680,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 830 |
"eval_samples_per_second": 22.699,
|
| 831 |
"eval_steps_per_second": 2.87,
|
| 832 |
"step": 621
|
| 833 |
+
},
|
| 834 |
+
{
|
| 835 |
+
"epoch": 9.19,
|
| 836 |
+
"learning_rate": 1.2005173242556402e-05,
|
| 837 |
+
"loss": 1.3691,
|
| 838 |
+
"step": 625
|
| 839 |
+
},
|
| 840 |
+
{
|
| 841 |
+
"epoch": 9.26,
|
| 842 |
+
"learning_rate": 2.2384478845846314e-05,
|
| 843 |
+
"loss": 1.5005,
|
| 844 |
+
"step": 630
|
| 845 |
+
},
|
| 846 |
+
{
|
| 847 |
+
"epoch": 9.34,
|
| 848 |
+
"learning_rate": 3.5218932770313436e-05,
|
| 849 |
+
"loss": 1.4065,
|
| 850 |
+
"step": 635
|
| 851 |
+
},
|
| 852 |
+
{
|
| 853 |
+
"epoch": 9.41,
|
| 854 |
+
"learning_rate": 4.982671888105515e-05,
|
| 855 |
+
"loss": 1.3225,
|
| 856 |
+
"step": 640
|
| 857 |
+
},
|
| 858 |
+
{
|
| 859 |
+
"epoch": 9.49,
|
| 860 |
+
"learning_rate": 6.543181473690211e-05,
|
| 861 |
+
"loss": 1.0367,
|
| 862 |
+
"step": 645
|
| 863 |
+
},
|
| 864 |
+
{
|
| 865 |
+
"epoch": 9.56,
|
| 866 |
+
"learning_rate": 8.120521692221662e-05,
|
| 867 |
+
"loss": 1.1784,
|
| 868 |
+
"step": 650
|
| 869 |
+
},
|
| 870 |
+
{
|
| 871 |
+
"epoch": 9.63,
|
| 872 |
+
"learning_rate": 9.630898093421192e-05,
|
| 873 |
+
"loss": 1.2778,
|
| 874 |
+
"step": 655
|
| 875 |
+
},
|
| 876 |
+
{
|
| 877 |
+
"epoch": 9.71,
|
| 878 |
+
"learning_rate": 0.00010994073605561679,
|
| 879 |
+
"loss": 1.3754,
|
| 880 |
+
"step": 660
|
| 881 |
+
},
|
| 882 |
+
{
|
| 883 |
+
"epoch": 9.78,
|
| 884 |
+
"learning_rate": 0.00012137631040942535,
|
| 885 |
+
"loss": 1.1983,
|
| 886 |
+
"step": 665
|
| 887 |
+
},
|
| 888 |
+
{
|
| 889 |
+
"epoch": 9.85,
|
| 890 |
+
"learning_rate": 0.00013000820178695732,
|
| 891 |
+
"loss": 1.3077,
|
| 892 |
+
"step": 670
|
| 893 |
+
},
|
| 894 |
+
{
|
| 895 |
+
"epoch": 9.93,
|
| 896 |
+
"learning_rate": 0.0001353778505291435,
|
| 897 |
+
"loss": 1.4969,
|
| 898 |
+
"step": 675
|
| 899 |
+
},
|
| 900 |
+
{
|
| 901 |
+
"epoch": 10.0,
|
| 902 |
+
"learning_rate": 0.0001372,
|
| 903 |
+
"loss": 1.3263,
|
| 904 |
+
"step": 680
|
| 905 |
+
},
|
| 906 |
+
{
|
| 907 |
+
"epoch": 10.0,
|
| 908 |
+
"eval_loss": 1.0593422651290894,
|
| 909 |
+
"eval_runtime": 1.2161,
|
| 910 |
+
"eval_samples_per_second": 75.649,
|
| 911 |
+
"eval_steps_per_second": 9.867,
|
| 912 |
+
"step": 680
|
| 913 |
}
|
| 914 |
],
|
| 915 |
+
"max_steps": 680,
|
| 916 |
+
"num_train_epochs": 10,
|
| 917 |
+
"total_flos": 704182026240000.0,
|
| 918 |
"trial_name": null,
|
| 919 |
"trial_params": null
|
| 920 |
}
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9bfe077b70ca57958721fcbf6c5a404f931dd10af1a32aa26767d45cbe093bfa
|
| 3 |
+
size 3055
|