Commit
·
b2b0ec7
1
Parent(s):
9205123
huggingartists
Browse files- README.md +3 -3
- config.json +2 -2
- evaluation.txt +1 -1
- flax_model.msgpack +1 -1
- optimizer.pt +1 -1
- pytorch_model.bin +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +143 -7
- training_args.bin +1 -1
README.md
CHANGED
|
@@ -45,15 +45,15 @@ from datasets import load_dataset
|
|
| 45 |
dataset = load_dataset("huggingartists/headie-one")
|
| 46 |
```
|
| 47 |
|
| 48 |
-
[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/
|
| 49 |
|
| 50 |
## Training procedure
|
| 51 |
|
| 52 |
The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Headie One's lyrics.
|
| 53 |
|
| 54 |
-
Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/
|
| 55 |
|
| 56 |
-
At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/
|
| 57 |
|
| 58 |
## How to use
|
| 59 |
|
|
|
|
| 45 |
dataset = load_dataset("huggingartists/headie-one")
|
| 46 |
```
|
| 47 |
|
| 48 |
+
[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/3fzj7qkl/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
|
| 49 |
|
| 50 |
## Training procedure
|
| 51 |
|
| 52 |
The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Headie One's lyrics.
|
| 53 |
|
| 54 |
+
Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1d1n36x9) for full transparency and reproducibility.
|
| 55 |
|
| 56 |
+
At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1d1n36x9/artifacts) is logged and versioned.
|
| 57 |
|
| 58 |
## How to use
|
| 59 |
|
config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"_name_or_path": "
|
| 3 |
"activation_function": "gelu_new",
|
| 4 |
"architectures": [
|
| 5 |
"GPT2LMHeadModel"
|
|
@@ -36,7 +36,7 @@
|
|
| 36 |
}
|
| 37 |
},
|
| 38 |
"torch_dtype": "float32",
|
| 39 |
-
"transformers_version": "4.20.
|
| 40 |
"use_cache": true,
|
| 41 |
"vocab_size": 50257
|
| 42 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"_name_or_path": "headie-one",
|
| 3 |
"activation_function": "gelu_new",
|
| 4 |
"architectures": [
|
| 5 |
"GPT2LMHeadModel"
|
|
|
|
| 36 |
}
|
| 37 |
},
|
| 38 |
"torch_dtype": "float32",
|
| 39 |
+
"transformers_version": "4.20.1",
|
| 40 |
"use_cache": true,
|
| 41 |
"vocab_size": 50257
|
| 42 |
}
|
evaluation.txt
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"eval_loss":
|
|
|
|
| 1 |
+
{"eval_loss": 0.8823016285896301, "eval_runtime": 2.4753, "eval_samples_per_second": 42.419, "eval_steps_per_second": 5.656, "epoch": 70.0}
|
flax_model.msgpack
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 497764120
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e97c2b39904c2125990372bcd2c0af3ae009d0f37cd2567d4ed60579e6209d63
|
| 3 |
size 497764120
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 995604017
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f62d82420e69209e064b44c6ac85956e0739f6dc942fcf17f0200b7f3f0b24c
|
| 3 |
size 995604017
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 510396521
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f1313c47fb885aa521ff58af331fb5d870ec7e29197ced87841b3265ac00a9ea
|
| 3 |
size 510396521
|
rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:edd9978b73bc8d959cf1091d4e60a8b7eea3426d8eb1a224bb815d849bd77207
|
| 3 |
size 14503
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1b1b7cf6b6c584836674e51b2bffb4f225e602280b8da1987fbd27d41d1f5b41
|
| 3 |
size 623
|
trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric":
|
| 3 |
-
"best_model_checkpoint": "output/headie-one/checkpoint-
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -4966,11 +4966,147 @@
|
|
| 4966 |
"eval_samples_per_second": 42.971,
|
| 4967 |
"eval_steps_per_second": 5.477,
|
| 4968 |
"step": 3800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4969 |
}
|
| 4970 |
],
|
| 4971 |
-
"max_steps":
|
| 4972 |
-
"num_train_epochs":
|
| 4973 |
-
"total_flos":
|
| 4974 |
"trial_name": null,
|
| 4975 |
"trial_params": null
|
| 4976 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 0.8823016285896301,
|
| 3 |
+
"best_model_checkpoint": "output/headie-one/checkpoint-3900",
|
| 4 |
+
"epoch": 52.0,
|
| 5 |
+
"global_step": 3900,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 4966 |
"eval_samples_per_second": 42.971,
|
| 4967 |
"eval_steps_per_second": 5.477,
|
| 4968 |
"step": 3800
|
| 4969 |
+
},
|
| 4970 |
+
{
|
| 4971 |
+
"epoch": 50.73,
|
| 4972 |
+
"learning_rate": 2.2697640403783063e-05,
|
| 4973 |
+
"loss": 0.9066,
|
| 4974 |
+
"step": 3805
|
| 4975 |
+
},
|
| 4976 |
+
{
|
| 4977 |
+
"epoch": 50.8,
|
| 4978 |
+
"learning_rate": 1.3101434185879145e-05,
|
| 4979 |
+
"loss": 0.6969,
|
| 4980 |
+
"step": 3810
|
| 4981 |
+
},
|
| 4982 |
+
{
|
| 4983 |
+
"epoch": 50.87,
|
| 4984 |
+
"learning_rate": 5.930781605717916e-06,
|
| 4985 |
+
"loss": 1.0504,
|
| 4986 |
+
"step": 3815
|
| 4987 |
+
},
|
| 4988 |
+
{
|
| 4989 |
+
"epoch": 50.93,
|
| 4990 |
+
"learning_rate": 1.4990745896610897e-06,
|
| 4991 |
+
"loss": 1.4716,
|
| 4992 |
+
"step": 3820
|
| 4993 |
+
},
|
| 4994 |
+
{
|
| 4995 |
+
"epoch": 51.0,
|
| 4996 |
+
"learning_rate": 0.0,
|
| 4997 |
+
"loss": 1.2765,
|
| 4998 |
+
"step": 3825
|
| 4999 |
+
},
|
| 5000 |
+
{
|
| 5001 |
+
"epoch": 51.0,
|
| 5002 |
+
"eval_loss": 1.2292253971099854,
|
| 5003 |
+
"eval_runtime": 2.6668,
|
| 5004 |
+
"eval_samples_per_second": 39.373,
|
| 5005 |
+
"eval_steps_per_second": 5.25,
|
| 5006 |
+
"step": 3825
|
| 5007 |
+
},
|
| 5008 |
+
{
|
| 5009 |
+
"epoch": 51.07,
|
| 5010 |
+
"learning_rate": 1.499074589660808e-06,
|
| 5011 |
+
"loss": 1.2797,
|
| 5012 |
+
"step": 3830
|
| 5013 |
+
},
|
| 5014 |
+
{
|
| 5015 |
+
"epoch": 51.13,
|
| 5016 |
+
"learning_rate": 5.9307816057173676e-06,
|
| 5017 |
+
"loss": 1.9281,
|
| 5018 |
+
"step": 3835
|
| 5019 |
+
},
|
| 5020 |
+
{
|
| 5021 |
+
"epoch": 51.2,
|
| 5022 |
+
"learning_rate": 1.3101434185878354e-05,
|
| 5023 |
+
"loss": 1.4825,
|
| 5024 |
+
"step": 3840
|
| 5025 |
+
},
|
| 5026 |
+
{
|
| 5027 |
+
"epoch": 51.27,
|
| 5028 |
+
"learning_rate": 2.2697640403782067e-05,
|
| 5029 |
+
"loss": 1.4677,
|
| 5030 |
+
"step": 3845
|
| 5031 |
+
},
|
| 5032 |
+
{
|
| 5033 |
+
"epoch": 51.33,
|
| 5034 |
+
"learning_rate": 3.429999999999976e-05,
|
| 5035 |
+
"loss": 1.0241,
|
| 5036 |
+
"step": 3850
|
| 5037 |
+
},
|
| 5038 |
+
{
|
| 5039 |
+
"epoch": 51.4,
|
| 5040 |
+
"learning_rate": 4.740143418587843e-05,
|
| 5041 |
+
"loss": 1.0786,
|
| 5042 |
+
"step": 3855
|
| 5043 |
+
},
|
| 5044 |
+
{
|
| 5045 |
+
"epoch": 51.47,
|
| 5046 |
+
"learning_rate": 6.142934741983887e-05,
|
| 5047 |
+
"loss": 1.4638,
|
| 5048 |
+
"step": 3860
|
| 5049 |
+
},
|
| 5050 |
+
{
|
| 5051 |
+
"epoch": 51.53,
|
| 5052 |
+
"learning_rate": 7.5770652580161e-05,
|
| 5053 |
+
"loss": 1.0569,
|
| 5054 |
+
"step": 3865
|
| 5055 |
+
},
|
| 5056 |
+
{
|
| 5057 |
+
"epoch": 51.6,
|
| 5058 |
+
"learning_rate": 8.979856581412144e-05,
|
| 5059 |
+
"loss": 1.0274,
|
| 5060 |
+
"step": 3870
|
| 5061 |
+
},
|
| 5062 |
+
{
|
| 5063 |
+
"epoch": 51.67,
|
| 5064 |
+
"learning_rate": 0.00010289999999999844,
|
| 5065 |
+
"loss": 0.9631,
|
| 5066 |
+
"step": 3875
|
| 5067 |
+
},
|
| 5068 |
+
{
|
| 5069 |
+
"epoch": 51.73,
|
| 5070 |
+
"learning_rate": 0.00011450235959621783,
|
| 5071 |
+
"loss": 0.7978,
|
| 5072 |
+
"step": 3880
|
| 5073 |
+
},
|
| 5074 |
+
{
|
| 5075 |
+
"epoch": 51.8,
|
| 5076 |
+
"learning_rate": 0.00012409856581412041,
|
| 5077 |
+
"loss": 0.838,
|
| 5078 |
+
"step": 3885
|
| 5079 |
+
},
|
| 5080 |
+
{
|
| 5081 |
+
"epoch": 51.87,
|
| 5082 |
+
"learning_rate": 0.00013126921839428258,
|
| 5083 |
+
"loss": 0.6216,
|
| 5084 |
+
"step": 3890
|
| 5085 |
+
},
|
| 5086 |
+
{
|
| 5087 |
+
"epoch": 51.93,
|
| 5088 |
+
"learning_rate": 0.00013570092541033876,
|
| 5089 |
+
"loss": 1.2543,
|
| 5090 |
+
"step": 3895
|
| 5091 |
+
},
|
| 5092 |
+
{
|
| 5093 |
+
"epoch": 52.0,
|
| 5094 |
+
"learning_rate": 0.0001372,
|
| 5095 |
+
"loss": 1.0127,
|
| 5096 |
+
"step": 3900
|
| 5097 |
+
},
|
| 5098 |
+
{
|
| 5099 |
+
"epoch": 52.0,
|
| 5100 |
+
"eval_loss": 0.8823016285896301,
|
| 5101 |
+
"eval_runtime": 2.4876,
|
| 5102 |
+
"eval_samples_per_second": 42.209,
|
| 5103 |
+
"eval_steps_per_second": 5.628,
|
| 5104 |
+
"step": 3900
|
| 5105 |
}
|
| 5106 |
],
|
| 5107 |
+
"max_steps": 5250,
|
| 5108 |
+
"num_train_epochs": 70,
|
| 5109 |
+
"total_flos": 4029907009536000.0,
|
| 5110 |
"trial_name": null,
|
| 5111 |
"trial_params": null
|
| 5112 |
}
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 3311
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:202761b0e853ed0912633dc56b6e49fb1661b0995bfb2fd6aec7da7426b6f24c
|
| 3 |
size 3311
|