huggingartists

Browse files

Files changed (11) hide show

README.md +3 -3
config.json +1 -1
evaluation.txt +1 -1
flax_model.msgpack +1 -1
optimizer.pt +2 -2
pytorch_model.bin +1 -1
rng_state.pth +2 -2
scheduler.pt +1 -1
tokenizer_config.json +1 -1
trainer_state.json +321 -7
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -45,15 +45,15 @@ from datasets import load_dataset
 dataset = load_dataset("huggingartists/hyuna")
 ```
-[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/11k7ux75/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on HyunA (현아)'s lyrics.
-Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/2adombki) for full transparency and reproducibility.
-At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/2adombki/artifacts) is logged and versioned.
 ## How to use

 dataset = load_dataset("huggingartists/hyuna")
 ```
+[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/3uo94mxd/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on HyunA (현아)'s lyrics.
+Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1o8t0mq0) for full transparency and reproducibility.
+At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1o8t0mq0/artifacts) is logged and versioned.
 ## How to use

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "gpt2",
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"

 {
+  "_name_or_path": "huggingartists/hyuna",
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"

evaluation.txt CHANGED Viewed

	@@ -1 +1 @@
1	- {"eval_loss": 2.~~4857912063598633~~, "eval_runtime": 0.~~9974~~, "eval_samples_per_second": 75.~~193~~, "eval_steps_per_second": 10.~~026~~, "epoch": 1.0}


1	+ {"eval_loss": 1.948645830154419, "eval_runtime": 1.0716, "eval_samples_per_second": 74.654, "eval_steps_per_second": 9.332, "epoch": 21.0}

flax_model.msgpack CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eea24b5a87ba581a31b68f6feb9d8984cefc4193561631b2a0c37eb533abd099
 size 497764120

 version https://git-lfs.github.com/spec/v1
+oid sha256:9c6c5f3002aa8553ef3d1076f8c241aea7e67dd9a061ae400fb1f73dd0af518e
 size 497764120

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fad55fc0023f5af751d53fae38afce0ad829438474c0eb5ca992b913ed0c7cbc
-size 995603825

 version https://git-lfs.github.com/spec/v1
+oid sha256:b98fd9abc79219d8710076f00fa144a25a95234e0af1cef66fee27c4a7e2f212
+size 995604017

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:acef724cc857ea36533b962cf392ab1ce9f14179d814b19d7dcda36abc7dcc3d
 size 510403817

 version https://git-lfs.github.com/spec/v1
+oid sha256:a147ed0181a141745e94a33aca89bd326c03f5d7245235b650749fb53eeb5c03
 size 510403817

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:07a059709928e7c9750812147dd6bb5220fbe286a930ef0174e5e4fa47c45f0f
-size 14567

 version https://git-lfs.github.com/spec/v1
+oid sha256:85e11aaedead41a17f8b2fd14bbca2c2a93cce3573b1c09a78b5f4f2e0da4ded
+size 14503

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7e450cb9c3dd5d5e299310845f6d7ab8731c0d1d1d427cd56769b403ce862ce4
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2193380bd6253a6f966c23f88da8618a1e1fec5e54a292b0a7960495f5972fa
 size 623

tokenizer_config.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"unk_token": "<\|endoftext\|>", "bos_token": "<\|endoftext\|>", "eos_token": "<\|endoftext\|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "~~gpt2~~", "tokenizer_class": "GPT2Tokenizer"}


1	+ {"unk_token": "<\|endoftext\|>", "bos_token": "<\|endoftext\|>", "eos_token": "<\|endoftext\|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "huggingartists/hyuna", "tokenizer_class": "GPT2Tokenizer"}

trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-  "best_metric": 2.4857912063598633,
-  "best_model_checkpoint": "output/hyuna/checkpoint-58",
-  "epoch": 1.0,
-  "global_step": 58,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -80,11 +80,325 @@
       "eval_samples_per_second": 75.519,
       "eval_steps_per_second": 10.069,
       "step": 58
     }
   ],
-  "max_steps": 58,
-  "num_train_epochs": 1,
-  "total_flos": 60489105408000.0,
   "trial_name": null,
   "trial_params": null
 }

 {
+  "best_metric": 1.948645830154419,
+  "best_model_checkpoint": "output/hyuna/checkpoint-290",
+  "epoch": 5.0,
+  "global_step": 290,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 75.519,
       "eval_steps_per_second": 10.069,
       "step": 58
+    },
+    {
+      "epoch": 1.03,
+      "learning_rate": 4.0213613921093164e-07,
+      "loss": 2.3666,
+      "step": 60
+    },
+    {
+      "epoch": 1.12,
+      "learning_rate": 4.8721970205680935e-06,
+      "loss": 2.4303,
+      "step": 65
+    },
+    {
+      "epoch": 1.21,
+      "learning_rate": 1.3988015692592823e-05,
+      "loss": 2.3856,
+      "step": 70
+    },
+    {
+      "epoch": 1.29,
+      "learning_rate": 2.708504883770769e-05,
+      "loss": 2.1811,
+      "step": 75
+    },
+    {
+      "epoch": 1.38,
+      "learning_rate": 4.320852254368187e-05,
+      "loss": 2.4049,
+      "step": 80
+    },
+    {
+      "epoch": 1.47,
+      "learning_rate": 6.118303533611755e-05,
+      "loss": 2.2865,
+      "step": 85
+    },
+    {
+      "epoch": 1.55,
+      "learning_rate": 7.969824496351964e-05,
+      "loss": 2.1265,
+      "step": 90
+    },
+    {
+      "epoch": 1.64,
+      "learning_rate": 9.740439236703416e-05,
+      "loss": 2.1689,
+      "step": 95
+    },
+    {
+      "epoch": 1.72,
+      "learning_rate": 0.00011301069913603334,
+      "loss": 2.4769,
+      "step": 100
+    },
+    {
+      "epoch": 1.81,
+      "learning_rate": 0.00012537946527356269,
+      "loss": 2.282,
+      "step": 105
+    },
+    {
+      "epoch": 1.9,
+      "learning_rate": 0.00013360900754314024,
+      "loss": 2.2972,
+      "step": 110
+    },
+    {
+      "epoch": 1.98,
+      "learning_rate": 0.0001370993921901871,
+      "loss": 2.3702,
+      "step": 115
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 2.0102086067199707,
+      "eval_runtime": 1.0576,
+      "eval_samples_per_second": 75.643,
+      "eval_steps_per_second": 9.455,
+      "step": 116
+    },
+    {
+      "epoch": 2.07,
+      "learning_rate": 0.00013559617012171197,
+      "loss": 2.0597,
+      "step": 120
+    },
+    {
+      "epoch": 2.16,
+      "learning_rate": 0.00012920892624899717,
+      "loss": 2.1937,
+      "step": 125
+    },
+    {
+      "epoch": 2.24,
+      "learning_rate": 0.00011840329074592684,
+      "loss": 2.1953,
+      "step": 130
+    },
+    {
+      "epoch": 2.33,
+      "learning_rate": 0.00010396699460234374,
+      "loss": 2.0115,
+      "step": 135
+    },
+    {
+      "epoch": 2.41,
+      "learning_rate": 8.69524440231046e-05,
+      "loss": 2.0135,
+      "step": 140
+    },
+    {
+      "epoch": 2.5,
+      "learning_rate": 6.860000000000001e-05,
+      "loss": 1.9796,
+      "step": 145
+    },
+    {
+      "epoch": 2.59,
+      "learning_rate": 5.024755597689551e-05,
+      "loss": 2.0685,
+      "step": 150
+    },
+    {
+      "epoch": 2.67,
+      "learning_rate": 3.3233005397656285e-05,
+      "loss": 2.075,
+      "step": 155
+    },
+    {
+      "epoch": 2.76,
+      "learning_rate": 1.8796709254073232e-05,
+      "loss": 2.0578,
+      "step": 160
+    },
+    {
+      "epoch": 2.84,
+      "learning_rate": 7.99107375100285e-06,
+      "loss": 1.9644,
+      "step": 165
+    },
+    {
+      "epoch": 2.93,
+      "learning_rate": 1.6038298782880706e-06,
+      "loss": 2.0836,
+      "step": 170
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 1.9711263179779053,
+      "eval_runtime": 1.056,
+      "eval_samples_per_second": 75.756,
+      "eval_steps_per_second": 9.469,
+      "step": 174
+    },
+    {
+      "epoch": 3.02,
+      "learning_rate": 1.0060780981290602e-07,
+      "loss": 2.0553,
+      "step": 175
+    },
+    {
+      "epoch": 3.1,
+      "learning_rate": 3.5909924568597365e-06,
+      "loss": 1.9319,
+      "step": 180
+    },
+    {
+      "epoch": 3.19,
+      "learning_rate": 1.182053472643733e-05,
+      "loss": 2.0587,
+      "step": 185
+    },
+    {
+      "epoch": 3.28,
+      "learning_rate": 2.418930086396662e-05,
+      "loss": 2.1358,
+      "step": 190
+    },
+    {
+      "epoch": 3.36,
+      "learning_rate": 3.9795607632965815e-05,
+      "loss": 1.9452,
+      "step": 195
+    },
+    {
+      "epoch": 3.45,
+      "learning_rate": 5.750175503648027e-05,
+      "loss": 1.9591,
+      "step": 200
+    },
+    {
+      "epoch": 3.53,
+      "learning_rate": 7.601696466388229e-05,
+      "loss": 1.8235,
+      "step": 205
+    },
+    {
+      "epoch": 3.62,
+      "learning_rate": 9.39914774563181e-05,
+      "loss": 1.8749,
+      "step": 210
+    },
+    {
+      "epoch": 3.71,
+      "learning_rate": 0.00011011495116229225,
+      "loss": 2.0178,
+      "step": 215
+    },
+    {
+      "epoch": 3.79,
+      "learning_rate": 0.00012321198430740717,
+      "loss": 1.9038,
+      "step": 220
+    },
+    {
+      "epoch": 3.88,
+      "learning_rate": 0.0001323278029794319,
+      "loss": 2.1059,
+      "step": 225
+    },
+    {
+      "epoch": 3.97,
+      "learning_rate": 0.00013679786386078908,
+      "loss": 1.946,
+      "step": 230
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 1.9748159646987915,
+      "eval_runtime": 1.0572,
+      "eval_samples_per_second": 75.673,
+      "eval_steps_per_second": 9.459,
+      "step": 232
+    },
+    {
+      "epoch": 4.05,
+      "learning_rate": 0.0001362962994463487,
+      "loss": 1.8662,
+      "step": 235
+    },
+    {
+      "epoch": 4.14,
+      "learning_rate": 0.00013085967378942767,
+      "loss": 1.9053,
+      "step": 240
+    },
+    {
+      "epoch": 4.22,
+      "learning_rate": 0.00012088431698175582,
+      "loss": 1.8573,
+      "step": 245
+    },
+    {
+      "epoch": 4.31,
+      "learning_rate": 0.00010709743268385941,
+      "loss": 1.9869,
+      "step": 250
+    },
+    {
+      "epoch": 4.4,
+      "learning_rate": 9.050408496732835e-05,
+      "loss": 1.7389,
+      "step": 255
+    },
+    {
+      "epoch": 4.48,
+      "learning_rate": 7.231392912895982e-05,
+      "loss": 2.0663,
+      "step": 260
+    },
+    {
+      "epoch": 4.57,
+      "learning_rate": 5.385302780152384e-05,
+      "loss": 1.8779,
+      "step": 265
+    },
+    {
+      "epoch": 4.66,
+      "learning_rate": 3.646718096799452e-05,
+      "loss": 1.7718,
+      "step": 270
+    },
+    {
+      "epoch": 4.74,
+      "learning_rate": 2.14238171226552e-05,
+      "loss": 1.7325,
+      "step": 275
+    },
+    {
+      "epoch": 4.83,
+      "learning_rate": 9.819597714903422e-06,
+      "loss": 1.7267,
+      "step": 280
+    },
+    {
+      "epoch": 4.91,
+      "learning_rate": 2.5004705131813084e-06,
+      "loss": 1.8411,
+      "step": 285
+    },
+    {
+      "epoch": 5.0,
+      "learning_rate": 0.0,
+      "loss": 1.6194,
+      "step": 290
+    },
+    {
+      "epoch": 5.0,
+      "eval_loss": 1.948645830154419,
+      "eval_runtime": 1.0612,
+      "eval_samples_per_second": 75.387,
+      "eval_steps_per_second": 9.423,
+      "step": 290
     }
   ],
+  "max_steps": 1218,
+  "num_train_epochs": 21,
+  "total_flos": 299832606720000.0,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5fd667400197eea16b356e51e788c416df8d872cfb807cbde9f2de566162e9e1
 size 2863

 version https://git-lfs.github.com/spec/v1
+oid sha256:a24bd115b15d961e04c04be0e6fe76f00bfddd0ce2e3a83546da90037c6341b8
 size 2863