huggingartists

Browse files

Files changed (11) hide show

README.md +3 -3
config.json +4 -2
evaluation.txt +1 -1
flax_model.msgpack +1 -1
optimizer.pt +2 -2
pytorch_model.bin +1 -1
rng_state.pth +2 -2
scheduler.pt +1 -1
tokenizer.json +0 -0
trainer_state.json +191 -55
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -45,15 +45,15 @@ from datasets import load_dataset
 dataset = load_dataset("huggingartists/death-grips")
 ```
-[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/1z4yxb78/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Death Grips's lyrics.
-Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/18v4x337) for full transparency and reproducibility.
-At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/18v4x337/artifacts) is logged and versioned.
 ## How to use

 dataset = load_dataset("huggingartists/death-grips")
 ```
+[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/2hmeenl7/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Death Grips's lyrics.
+Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/226ak5bw) for full transparency and reproducibility.
+At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/226ak5bw/artifacts) is logged and versioned.
 ## How to use

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "huggingartists/death-grips",
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"
@@ -17,7 +17,9 @@
   "n_inner": null,
   "n_layer": 12,
   "n_positions": 1024,
   "resid_pdrop": 0.1,
   "scale_attn_weights": true,
   "summary_activation": null,
   "summary_first_dropout": 0.1,
@@ -34,7 +36,7 @@
     }
   },
   "torch_dtype": "float32",
-  "transformers_version": "4.11.3",
   "use_cache": true,
   "vocab_size": 50257
 }

 {
+  "_name_or_path": "death-grips",
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"
   "n_inner": null,
   "n_layer": 12,
   "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
   "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
   "scale_attn_weights": true,
   "summary_activation": null,
   "summary_first_dropout": 0.1,
     }
   },
   "torch_dtype": "float32",
+  "transformers_version": "4.16.2",
   "use_cache": true,
   "vocab_size": 50257
 }

evaluation.txt CHANGED Viewed

	@@ -1 +1 @@
1	- {"eval_loss": 3.~~1992340087890625~~, "eval_runtime": 0.~~5011~~, "eval_samples_per_second": 75.~~835~~, "eval_steps_per_second": 9.~~978~~, "epoch": 61.0}


1	+ {"eval_loss": 3.3310177326202393, "eval_runtime": 0.4983, "eval_samples_per_second": 74.249, "eval_steps_per_second": 10.034, "epoch": 5.0}

flax_model.msgpack CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:93ea9acbaf03e5cd973f67cc943630d2e10e1ec2156b1a368c852eef0b80c447
 size 497764120

 version https://git-lfs.github.com/spec/v1
+oid sha256:f7ad0b6c7d0a03b637b1ac65ce7a101961bf799cbb7df8c58e8abe5b67edfe54
 size 497764120

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:91885fb2d0a23add8ab65706c01695bf0d25c6bdb6fa85982f61c7a4403d7562
-size 995603825

 version https://git-lfs.github.com/spec/v1
+oid sha256:9a89cc3dc4bc5bf5749225580f6d1a68f68cb3efcf28ac360ebc78691d00b0d4
+size 995604017

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:45a4d5fb1bb4bc74f1d211806c05b5291969db23c26535e118f8d45dd0493b88
 size 510403817

 version https://git-lfs.github.com/spec/v1
+oid sha256:769a68d1b17288adfefbbdfc899d1c4145d9b39899d6716aa50dc0f324d71194
 size 510403817

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:80bc2e05cd33aba954c4fa5280f46e5f77924d62e7982e05ecf36aff4194107e
-size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:4bc5aa03ef94446dcbc452e8552895048e148b2563e9a24308c35cffc6028de6
+size 14567

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2222362d80f6c4a5547283104536d5c4564191fb641a9b7f7b9e346e9a58c2ee
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:0aadcbf640a1905dd2dacb92f6567732433456c8a12337128489f033e5db42b7
 size 623

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

trainer_state.json CHANGED Viewed

@@ -1,118 +1,254 @@
 {
-  "best_metric": 3.1992340087890625,
-  "best_model_checkpoint": "output/death-grips/checkpoint-66",
-  "epoch": 2.0,
-  "global_step": 66,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.16,
-      "learning_rate": 0.00012909979873429716,
-      "loss": 3.5364,
       "step": 5
     },
     {
-      "epoch": 0.31,
-      "learning_rate": 0.00010671211798514472,
-      "loss": 3.4591,
       "step": 10
     },
     {
-      "epoch": 0.47,
-      "learning_rate": 7.532397582660788e-05,
-      "loss": 3.5388,
       "step": 15
     },
     {
-      "epoch": 0.62,
-      "learning_rate": 4.2347916539754844e-05,
-      "loss": 3.3558,
       "step": 20
     },
     {
-      "epoch": 0.78,
-      "learning_rate": 1.557148289931624e-05,
-      "loss": 3.3211,
       "step": 25
     },
     {
-      "epoch": 0.94,
-      "learning_rate": 1.3181297643383925e-06,
-      "loss": 3.2263,
       "step": 30
     },
     {
       "epoch": 1.0,
-      "eval_loss": 3.4941582679748535,
-      "eval_runtime": 0.5841,
-      "eval_samples_per_second": 75.33,
-      "eval_steps_per_second": 10.272,
-      "step": 32
-    },
-    {
-      "epoch": 1.0,
-      "eval_loss": 3.25174617767334,
-      "eval_runtime": 0.4993,
-      "eval_samples_per_second": 76.107,
-      "eval_steps_per_second": 10.014,
       "step": 33
     },
     {
       "epoch": 1.06,
-      "learning_rate": 1.2396913677783124e-06,
-      "loss": 3.1933,
       "step": 35
     },
     {
       "epoch": 1.21,
-      "learning_rate": 1.4676757700644785e-05,
-      "loss": 3.1172,
       "step": 40
     },
     {
       "epoch": 1.36,
-      "learning_rate": 4.0102530108070535e-05,
-      "loss": 3.4529,
       "step": 45
     },
     {
       "epoch": 1.52,
-      "learning_rate": 7.186411942550872e-05,
-      "loss": 3.2121,
       "step": 50
     },
     {
       "epoch": 1.67,
-      "learning_rate": 0.00010290000000000001,
-      "loss": 3.1379,
       "step": 55
     },
     {
       "epoch": 1.82,
-      "learning_rate": 0.000126309992352219,
-      "loss": 3.1008,
       "step": 60
     },
     {
       "epoch": 1.97,
-      "learning_rate": 0.0001368893738885136,
-      "loss": 3.1177,
       "step": 65
     },
     {
       "epoch": 2.0,
-      "eval_loss": 3.1992340087890625,
-      "eval_runtime": 0.4988,
-      "eval_samples_per_second": 76.185,
-      "eval_steps_per_second": 10.024,
       "step": 66
     }
   ],
-  "max_steps": 2013,
-  "num_train_epochs": 61,
-  "total_flos": 66890760192000.0,
   "trial_name": null,
   "trial_params": null
 }

 {
+  "best_metric": 3.3310177326202393,
+  "best_model_checkpoint": "output/death-grips/checkpoint-165",
+  "epoch": 5.0,
+  "global_step": 165,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.15,
+      "learning_rate": 7.625888222272212e-06,
+      "loss": 2.8634,
       "step": 5
     },
     {
+      "epoch": 0.3,
+      "learning_rate": 2.8808096003415784e-05,
+      "loss": 2.5791,
       "step": 10
     },
     {
+      "epoch": 0.45,
+      "learning_rate": 5.8837202094452676e-05,
+      "loss": 2.7462,
       "step": 15
     },
     {
+      "epoch": 0.61,
+      "learning_rate": 9.103686228357511e-05,
+      "loss": 2.8049,
       "step": 20
     },
     {
+      "epoch": 0.76,
+      "learning_rate": 0.00011824815501400784,
+      "loss": 2.9692,
       "step": 25
     },
     {
+      "epoch": 0.91,
+      "learning_rate": 0.0001344212179899545,
+      "loss": 2.6618,
       "step": 30
     },
     {
       "epoch": 1.0,
+      "eval_loss": 3.3408315181732178,
+      "eval_runtime": 0.4984,
+      "eval_samples_per_second": 74.235,
+      "eval_steps_per_second": 10.032,
       "step": 33
     },
     {
       "epoch": 1.06,
+      "learning_rate": 0.00013596030863222171,
+      "loss": 2.7889,
       "step": 35
     },
     {
       "epoch": 1.21,
+      "learning_rate": 0.00012252324229935523,
+      "loss": 2.8071,
       "step": 40
     },
     {
       "epoch": 1.36,
+      "learning_rate": 9.709746989192948e-05,
+      "loss": 2.7665,
       "step": 45
     },
     {
       "epoch": 1.52,
+      "learning_rate": 6.533588057449117e-05,
+      "loss": 2.4805,
       "step": 50
     },
     {
       "epoch": 1.67,
+      "learning_rate": 3.429999999999996e-05,
+      "loss": 2.509,
       "step": 55
     },
     {
       "epoch": 1.82,
+      "learning_rate": 1.0890007647780984e-05,
+      "loss": 2.5541,
       "step": 60
     },
     {
       "epoch": 1.97,
+      "learning_rate": 3.106261114864048e-07,
+      "loss": 2.4372,
       "step": 65
     },
     {
       "epoch": 2.0,
+      "eval_loss": 3.340346097946167,
+      "eval_runtime": 0.4997,
+      "eval_samples_per_second": 74.05,
+      "eval_steps_per_second": 10.007,
       "step": 66
+    },
+    {
+      "epoch": 2.12,
+      "learning_rate": 4.913959795097369e-06,
+      "loss": 2.7443,
+      "step": 70
+    },
+    {
+      "epoch": 2.27,
+      "learning_rate": 2.3676553651353298e-05,
+      "loss": 2.3401,
+      "step": 75
+    },
+    {
+      "epoch": 2.42,
+      "learning_rate": 5.242693702405328e-05,
+      "loss": 2.6265,
+      "step": 80
+    },
+    {
+      "epoch": 2.58,
+      "learning_rate": 8.477306297594662e-05,
+      "loss": 2.5077,
+      "step": 85
+    },
+    {
+      "epoch": 2.73,
+      "learning_rate": 0.00011352344634864663,
+      "loss": 2.2708,
+      "step": 90
+    },
+    {
+      "epoch": 2.88,
+      "learning_rate": 0.0001322860402049026,
+      "loss": 2.6378,
+      "step": 95
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 3.3379859924316406,
+      "eval_runtime": 0.5006,
+      "eval_samples_per_second": 73.905,
+      "eval_steps_per_second": 9.987,
+      "step": 99
+    },
+    {
+      "epoch": 3.03,
+      "learning_rate": 0.0001368893738885136,
+      "loss": 2.4859,
+      "step": 100
+    },
+    {
+      "epoch": 3.18,
+      "learning_rate": 0.00012630999235221907,
+      "loss": 2.3826,
+      "step": 105
+    },
+    {
+      "epoch": 3.33,
+      "learning_rate": 0.00010290000000000012,
+      "loss": 2.3622,
+      "step": 110
+    },
+    {
+      "epoch": 3.48,
+      "learning_rate": 7.186411942550891e-05,
+      "loss": 2.3022,
+      "step": 115
+    },
+    {
+      "epoch": 3.64,
+      "learning_rate": 4.0102530108070596e-05,
+      "loss": 2.3944,
+      "step": 120
+    },
+    {
+      "epoch": 3.79,
+      "learning_rate": 1.4676757700644823e-05,
+      "loss": 2.3517,
+      "step": 125
+    },
+    {
+      "epoch": 3.94,
+      "learning_rate": 1.2396913677783427e-06,
+      "loss": 2.2771,
+      "step": 130
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 3.3588767051696777,
+      "eval_runtime": 0.497,
+      "eval_samples_per_second": 74.445,
+      "eval_steps_per_second": 10.06,
+      "step": 132
+    },
+    {
+      "epoch": 4.09,
+      "learning_rate": 2.7787820100454274e-06,
+      "loss": 2.2086,
+      "step": 135
+    },
+    {
+      "epoch": 4.24,
+      "learning_rate": 1.8951844985992186e-05,
+      "loss": 2.3738,
+      "step": 140
+    },
+    {
+      "epoch": 4.39,
+      "learning_rate": 4.616313771642481e-05,
+      "loss": 2.0153,
+      "step": 145
+    },
+    {
+      "epoch": 4.55,
+      "learning_rate": 7.836279790554748e-05,
+      "loss": 2.2206,
+      "step": 150
+    },
+    {
+      "epoch": 4.7,
+      "learning_rate": 0.00010839190399658423,
+      "loss": 2.1647,
+      "step": 155
+    },
+    {
+      "epoch": 4.85,
+      "learning_rate": 0.00012957411177772773,
+      "loss": 2.1136,
+      "step": 160
+    },
+    {
+      "epoch": 5.0,
+      "learning_rate": 0.0001372,
+      "loss": 2.4947,
+      "step": 165
+    },
+    {
+      "epoch": 5.0,
+      "eval_loss": 3.3310177326202393,
+      "eval_runtime": 0.4984,
+      "eval_samples_per_second": 74.234,
+      "eval_steps_per_second": 10.032,
+      "step": 165
     }
   ],
+  "max_steps": 165,
+  "num_train_epochs": 5,
+  "total_flos": 169186590720000.0,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:820c88075277a4ce2d8bfc6da26d7fd133ba368b384db802a6a4afafc3e34877
-size 2863

 version https://git-lfs.github.com/spec/v1
+oid sha256:bb348a771651919e39ddf44cbd2248983e52d34f3c89662cce1a833faf738e5d
+size 3055