huggingartists

Browse files

Files changed (11) hide show

README.md +3 -3
config.json +1 -1
evaluation.txt +1 -1
flax_model.msgpack +1 -1
optimizer.pt +2 -2
pytorch_model.bin +1 -1
rng_state.pth +1 -1
scheduler.pt +1 -1
tokenizer_config.json +1 -1
trainer_state.json +239 -7
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -45,15 +45,15 @@ from datasets import load_dataset
 dataset = load_dataset("huggingartists/lil-peep")
 ```
-[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/skb4bt7q/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Lil Peep's lyrics.
-Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/2yk68ca1) for full transparency and reproducibility.
-At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/2yk68ca1/artifacts) is logged and versioned.
 ## How to use

 dataset = load_dataset("huggingartists/lil-peep")
 ```
+[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/39q6kspr/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Lil Peep's lyrics.
+Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/g0nxk974) for full transparency and reproducibility.
+At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/g0nxk974/artifacts) is logged and versioned.
 ## How to use

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "gpt2",
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"

 {
+  "_name_or_path": "huggingartists/lil-peep",
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"

evaluation.txt CHANGED Viewed

	@@ -1 +1 @@
1	- {"eval_loss": 2.~~7529633045196533~~, "eval_runtime": 5.~~342~~, "eval_samples_per_second": 21.~~715~~, "eval_steps_per_second": 2.~~808~~, "epoch": 1.0}


1	+ {"eval_loss": 2.5420846939086914, "eval_runtime": 5.9616, "eval_samples_per_second": 20.632, "eval_steps_per_second": 2.684, "epoch": 3.0}

flax_model.msgpack CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3a454493c1c6c01a60fc37ae52c071e78dc5c96002a9a482dfd4fd21ba48ee6e
 size 497764120

 version https://git-lfs.github.com/spec/v1
+oid sha256:8386316df11e411103a10462a9832f1e8219035e210b74b0fdd9e3b942385132
 size 497764120

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0036879c3f12d01591826f96697ce515f289ddef07e5a8680de0846a6cded110
-size 995603825

 version https://git-lfs.github.com/spec/v1
+oid sha256:ddc28d4d1be7b354635653967c4539865af914bcd1b0473439c1ca8f69340698
+size 995604017

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:149aeb7596cc7ea581826b074304d820e38e3a185826b8c50bdc332288edd5f1
 size 510403817

 version https://git-lfs.github.com/spec/v1
+oid sha256:193c961169b042aa775f0595fbf1817184251bfd44b7d569b3d634fcf04b633d
 size 510403817

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:813fa83a7b81f5bcfe540d59048a81fd01cd9734cd427897d8a5036e30efc049
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:216bd8b6a2ee475e4f6d25ab307f7287c7d8e799ef357fe50ed0f1ab918f2686
 size 14503

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dfcdeab6af2cc2dca6b32132fe0c7004c830f6a37b933747e14164b30d1228cb
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:d8970c66116bea18a63ae8f760c2b726160c1083513d6d6a12f0d62c2df987a2
 size 623

tokenizer_config.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"unk_token": "<\|endoftext\|>", "bos_token": "<\|endoftext\|>", "eos_token": "<\|endoftext\|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "~~gpt2~~", "tokenizer_class": "GPT2Tokenizer"}


1	+ {"unk_token": "<\|endoftext\|>", "bos_token": "<\|endoftext\|>", "eos_token": "<\|endoftext\|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "huggingartists/lil-peep", "tokenizer_class": "GPT2Tokenizer"}

trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-  "best_metric": 2.7529633045196533,
-  "best_model_checkpoint": "output/lil-peep/checkpoint-92",
-  "epoch": 1.0,
-  "global_step": 92,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -122,11 +122,243 @@
       "eval_samples_per_second": 22.05,
       "eval_steps_per_second": 2.851,
       "step": 92
     }
   ],
-  "max_steps": 92,
-  "num_train_epochs": 1,
-  "total_flos": 95240945664000.0,
   "trial_name": null,
   "trial_params": null
 }

 {
+  "best_metric": 2.5420846939086914,
+  "best_model_checkpoint": "output/lil-peep/checkpoint-273",
+  "epoch": 3.0,
+  "global_step": 273,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 22.05,
       "eval_steps_per_second": 2.851,
       "step": 92
+    },
+    {
+      "epoch": 1.04,
+      "learning_rate": 6.530415424531654e-07,
+      "loss": 2.6804,
+      "step": 95
+    },
+    {
+      "epoch": 1.1,
+      "learning_rate": 3.2847281185250116e-06,
+      "loss": 2.8371,
+      "step": 100
+    },
+    {
+      "epoch": 1.15,
+      "learning_rate": 7.857716640189785e-06,
+      "loss": 2.912,
+      "step": 105
+    },
+    {
+      "epoch": 1.21,
+      "learning_rate": 1.4236088642155179e-05,
+      "loss": 2.9544,
+      "step": 110
+    },
+    {
+      "epoch": 1.26,
+      "learning_rate": 2.223026601166984e-05,
+      "loss": 2.6108,
+      "step": 115
+    },
+    {
+      "epoch": 1.32,
+      "learning_rate": 3.1602645633354905e-05,
+      "loss": 2.743,
+      "step": 120
+    },
+    {
+      "epoch": 1.37,
+      "learning_rate": 4.207466143421867e-05,
+      "loss": 2.7849,
+      "step": 125
+    },
+    {
+      "epoch": 1.43,
+      "learning_rate": 5.333506393059682e-05,
+      "loss": 2.7864,
+      "step": 130
+    },
+    {
+      "epoch": 1.48,
+      "learning_rate": 6.504917119214327e-05,
+      "loss": 2.7238,
+      "step": 135
+    },
+    {
+      "epoch": 1.54,
+      "learning_rate": 7.686881626551516e-05,
+      "loss": 2.7156,
+      "step": 140
+    },
+    {
+      "epoch": 1.59,
+      "learning_rate": 8.844269540020853e-05,
+      "loss": 2.739,
+      "step": 145
+    },
+    {
+      "epoch": 1.65,
+      "learning_rate": 9.942680950634723e-05,
+      "loss": 2.8584,
+      "step": 150
+    },
+    {
+      "epoch": 1.7,
+      "learning_rate": 0.00010949468850318882,
+      "loss": 2.7199,
+      "step": 155
+    },
+    {
+      "epoch": 1.76,
+      "learning_rate": 0.00011834709467003562,
+      "loss": 2.4955,
+      "step": 160
+    },
+    {
+      "epoch": 1.81,
+      "learning_rate": 0.00012572091659634235,
+      "loss": 2.878,
+      "step": 165
+    },
+    {
+      "epoch": 1.87,
+      "learning_rate": 0.00013139698938484013,
+      "loss": 2.8072,
+      "step": 170
+    },
+    {
+      "epoch": 1.92,
+      "learning_rate": 0.00013520660867542716,
+      "loss": 2.8227,
+      "step": 175
+    },
+    {
+      "epoch": 1.98,
+      "learning_rate": 0.00013703654487986559,
+      "loss": 2.5274,
+      "step": 180
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 2.6319327354431152,
+      "eval_runtime": 5.9403,
+      "eval_samples_per_second": 20.706,
+      "eval_steps_per_second": 2.693,
+      "step": 182
+    },
+    {
+      "epoch": 2.03,
+      "learning_rate": 0.0001368324085940902,
+      "loss": 2.7572,
+      "step": 185
+    },
+    {
+      "epoch": 2.09,
+      "learning_rate": 0.000134600267161271,
+      "loss": 2.5108,
+      "step": 190
+    },
+    {
+      "epoch": 2.14,
+      "learning_rate": 0.00013040646433810595,
+      "loss": 2.5831,
+      "step": 195
+    },
+    {
+      "epoch": 2.2,
+      "learning_rate": 0.00012437564842422732,
+      "loss": 2.5252,
+      "step": 200
+    },
+    {
+      "epoch": 2.25,
+      "learning_rate": 0.00011668706746270184,
+      "loss": 2.5344,
+      "step": 205
+    },
+    {
+      "epoch": 2.31,
+      "learning_rate": 0.00010756924162575734,
+      "loss": 2.4713,
+      "step": 210
+    },
+    {
+      "epoch": 2.36,
+      "learning_rate": 9.72931711332038e-05,
+      "loss": 2.791,
+      "step": 215
+    },
+    {
+      "epoch": 2.42,
+      "learning_rate": 8.616428157794779e-05,
+      "loss": 2.4278,
+      "step": 220
+    },
+    {
+      "epoch": 2.47,
+      "learning_rate": 7.451334605981051e-05,
+      "loss": 2.5167,
+      "step": 225
+    },
+    {
+      "epoch": 2.53,
+      "learning_rate": 6.268665394018953e-05,
+      "loss": 2.3747,
+      "step": 230
+    },
+    {
+      "epoch": 2.58,
+      "learning_rate": 5.103571842205231e-05,
+      "loss": 2.4532,
+      "step": 235
+    },
+    {
+      "epoch": 2.64,
+      "learning_rate": 3.990682886679629e-05,
+      "loss": 2.4794,
+      "step": 240
+    },
+    {
+      "epoch": 2.69,
+      "learning_rate": 2.9630758374242683e-05,
+      "loss": 2.4218,
+      "step": 245
+    },
+    {
+      "epoch": 2.75,
+      "learning_rate": 2.051293253729814e-05,
+      "loss": 2.4528,
+      "step": 250
+    },
+    {
+      "epoch": 2.8,
+      "learning_rate": 1.2824351575772677e-05,
+      "loss": 2.6209,
+      "step": 255
+    },
+    {
+      "epoch": 2.86,
+      "learning_rate": 6.793535661894062e-06,
+      "loss": 2.4639,
+      "step": 260
+    },
+    {
+      "epoch": 2.91,
+      "learning_rate": 2.599732838729015e-06,
+      "loss": 2.4412,
+      "step": 265
+    },
+    {
+      "epoch": 2.97,
+      "learning_rate": 3.6759140590977833e-07,
+      "loss": 2.487,
+      "step": 270
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 2.5420846939086914,
+      "eval_runtime": 5.9485,
+      "eval_samples_per_second": 20.677,
+      "eval_steps_per_second": 2.69,
+      "step": 273
     }
   ],
+  "max_steps": 273,
+  "num_train_epochs": 3,
+  "total_flos": 282587332608000.0,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f065406dd6c37950be071a66a2160e6c3cef5908390735decfe6e3274e4f895b
 size 2671

 version https://git-lfs.github.com/spec/v1
+oid sha256:349d1748d6060582917b308b9a3979d579e6461af49162f5eb43fd204b03fe2f
 size 2671