huggingartists

Browse files

Files changed (13) hide show

README.md +3 -3
config.json +1 -1
evaluation.txt +1 -1
flax_model.msgpack +1 -1
optimizer.pt +1 -1
pytorch_model.bin +2 -2
rng_state.pth +1 -1
scheduler.pt +1 -1
special_tokens_map.json +5 -1
tokenizer.json +8 -5
tokenizer_config.json +10 -1
trainer_state.json +113 -7
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -45,15 +45,15 @@ from datasets import load_dataset
 dataset = load_dataset("huggingartists/queen")
 ```
-[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/1ddcmutf/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Queen's lyrics.
-Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1dhu8z5q) for full transparency and reproducibility.
-At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1dhu8z5q/artifacts) is logged and versioned.
 ## How to use

 dataset = load_dataset("huggingartists/queen")
 ```
+[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/33biqcks/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Queen's lyrics.
+Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1k3htq7p) for full transparency and reproducibility.
+At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1k3htq7p/artifacts) is logged and versioned.
 ## How to use

config.json CHANGED Viewed

@@ -37,7 +37,7 @@
     }
   },
   "torch_dtype": "float32",
-  "transformers_version": "4.16.2",
   "use_cache": true,
   "vocab_size": 50257
 }

     }
   },
   "torch_dtype": "float32",
+  "transformers_version": "4.20.0",
   "use_cache": true,
   "vocab_size": 50257
 }

evaluation.txt CHANGED Viewed

	@@ -1 +1 @@
1	- {"eval_loss": 1.~~0593422651290894~~, "eval_runtime": 1.~~2351~~, "eval_samples_per_second": 74.~~489~~, "eval_steps_per_second": 9.~~716~~, "epoch": 10.0}


1	+ {"eval_loss": 0.9721857309341431, "eval_runtime": 1.902, "eval_samples_per_second": 44.689, "eval_steps_per_second": 5.783, "epoch": 11.0}

flax_model.msgpack CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f71c5d51cdf52a930cd7e68b3903da1eb422d09bd06f91080d5d80355a54ba3c
 size 497764120

 version https://git-lfs.github.com/spec/v1
+oid sha256:800bffdd4c52f619bf17474c0d9f571b39795cdad6155abd76ab3d0e828edbee
 size 497764120

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0b8a2f7accefebd6dee4b8dea20bafd47f1f9acf8efc62324b4a2287db757050
 size 995604017

 version https://git-lfs.github.com/spec/v1
+oid sha256:d44e3f3b76fe08f524c40d07198c2501597e28b7eaa180a40a79abc32258dba5
 size 995604017

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8d37c527c1768f42033ebf3f95e514029cb59df6fe857c02fe5b6c2968b0e7dd
-size 510403817

 version https://git-lfs.github.com/spec/v1
+oid sha256:56180aae9f0dfc24b3d1cdbb07dc2eb5acaab5a5b5e60892d48dca7feaf72ef9
+size 510396521

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f49bf0f2acadddffcdc0a4e3487fd525abbe1771468cbd89c3052c1ed8d61d77
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:f441152599d4ecfcd40f2f229153cf77289fd381959e35ff164226f90d545b2b
 size 14503

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:90d90db3c01e5ca255a7e75446ed55bd62e0e6b8c2052a1a18ccb2f8838ed976
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:ae81c5f3c8df017bd46e14a116a6befbfbd6eda859addceb4a11946640558415
 size 623

special_tokens_map.json CHANGED Viewed

	@@ -1 +1,5 @@
1	- {~~"bos_token": "<\|endoftext\|>", "eos_token": "<\|endoftext\|>", "unk_token": "<\|endoftext\|>"}~~

+{
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "unk_token": "<|endoftext|>"
+}

tokenizer.json CHANGED Viewed

@@ -5,29 +5,32 @@
   "added_tokens": [
     {
       "id": 50256,
-      "special": true,
       "content": "<|endoftext|>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": false
     }
   ],
   "normalizer": null,
   "pre_tokenizer": {
     "type": "ByteLevel",
     "add_prefix_space": false,
-    "trim_offsets": true
   },
   "post_processor": {
     "type": "ByteLevel",
     "add_prefix_space": true,
-    "trim_offsets": false
   },
   "decoder": {
     "type": "ByteLevel",
     "add_prefix_space": true,
-    "trim_offsets": true
   },
   "model": {
     "type": "BPE",

   "added_tokens": [
     {
       "id": 50256,
       "content": "<|endoftext|>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": false,
+      "special": true
     }
   ],
   "normalizer": null,
   "pre_tokenizer": {
     "type": "ByteLevel",
     "add_prefix_space": false,
+    "trim_offsets": true,
+    "use_regex": true
   },
   "post_processor": {
     "type": "ByteLevel",
     "add_prefix_space": true,
+    "trim_offsets": false,
+    "use_regex": true
   },
   "decoder": {
     "type": "ByteLevel",
     "add_prefix_space": true,
+    "trim_offsets": true,
+    "use_regex": true
   },
   "model": {
     "type": "BPE",

tokenizer_config.json CHANGED Viewed

	@@ -1 +1,10 @@
1	- {"unk_token": "<\|endoftext\|>", "bos_token": "<\|endoftext\|>", "eos_token": "<\|endoftext\|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "huggingartists/queen", "tokenizer_class": "GPT2Tokenizer"}

+{
+  "add_prefix_space": false,
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "model_max_length": 1024,
+  "name_or_path": "huggingartists/queen",
+  "special_tokens_map_file": null,
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}

trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-  "best_metric": 1.0593422651290894,
-  "best_model_checkpoint": "output/queen/checkpoint-680",
-  "epoch": 10.0,
-  "global_step": 680,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -910,11 +910,117 @@
       "eval_samples_per_second": 75.649,
       "eval_steps_per_second": 9.867,
       "step": 680
     }
   ],
-  "max_steps": 680,
-  "num_train_epochs": 10,
-  "total_flos": 704182026240000.0,
   "trial_name": null,
   "trial_params": null
 }

 {
+  "best_metric": 0.9721857309341431,
+  "best_model_checkpoint": "output/queen/checkpoint-759",
+  "epoch": 11.0,
+  "global_step": 759,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 75.649,
       "eval_steps_per_second": 9.867,
       "step": 680
+    },
+    {
+      "epoch": 9.93,
+      "learning_rate": 0.00013543005707140072,
+      "loss": 1.2301,
+      "step": 685
+    },
+    {
+      "epoch": 10.0,
+      "learning_rate": 0.0001372,
+      "loss": 1.2039,
+      "step": 690
+    },
+    {
+      "epoch": 10.0,
+      "eval_loss": 0.9929937720298767,
+      "eval_runtime": 1.7672,
+      "eval_samples_per_second": 48.098,
+      "eval_steps_per_second": 6.224,
+      "step": 690
+    },
+    {
+      "epoch": 10.07,
+      "learning_rate": 0.00013543005707140074,
+      "loss": 1.1894,
+      "step": 695
+    },
+    {
+      "epoch": 10.14,
+      "learning_rate": 0.0001302115605879495,
+      "loss": 1.2106,
+      "step": 700
+    },
+    {
+      "epoch": 10.22,
+      "learning_rate": 0.00012181379454232323,
+      "loss": 1.144,
+      "step": 705
+    },
+    {
+      "epoch": 10.29,
+      "learning_rate": 0.00011067009906946301,
+      "loss": 1.3006,
+      "step": 710
+    },
+    {
+      "epoch": 10.36,
+      "learning_rate": 9.735550930058323e-05,
+      "loss": 1.2147,
+      "step": 715
+    },
+    {
+      "epoch": 10.43,
+      "learning_rate": 8.255708249541075e-05,
+      "loss": 1.1931,
+      "step": 720
+    },
+    {
+      "epoch": 10.51,
+      "learning_rate": 6.703844462707122e-05,
+      "loss": 1.2735,
+      "step": 725
+    },
+    {
+      "epoch": 10.58,
+      "learning_rate": 5.160038588088712e-05,
+      "loss": 1.1695,
+      "step": 730
+    },
+    {
+      "epoch": 10.65,
+      "learning_rate": 3.703953841164302e-05,
+      "loss": 1.2219,
+      "step": 735
+    },
+    {
+      "epoch": 10.72,
+      "learning_rate": 2.4107268662854985e-05,
+      "loss": 1.1271,
+      "step": 740
+    },
+    {
+      "epoch": 10.8,
+      "learning_rate": 1.3470905479788523e-05,
+      "loss": 1.2963,
+      "step": 745
+    },
+    {
+      "epoch": 10.87,
+      "learning_rate": 5.679304716725944e-06,
+      "loss": 1.0029,
+      "step": 750
+    },
+    {
+      "epoch": 10.94,
+      "learning_rate": 1.1345272679252971e-06,
+      "loss": 1.2789,
+      "step": 755
+    },
+    {
+      "epoch": 11.0,
+      "eval_loss": 0.9721857309341431,
+      "eval_runtime": 1.8495,
+      "eval_samples_per_second": 45.957,
+      "eval_steps_per_second": 5.947,
+      "step": 759
     }
   ],
+  "max_steps": 759,
+  "num_train_epochs": 11,
+  "total_flos": 785966432256000.0,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9bfe077b70ca57958721fcbf6c5a404f931dd10af1a32aa26767d45cbe093bfa
-size 3055

 version https://git-lfs.github.com/spec/v1
+oid sha256:04747b0b5177cd34b6949ed0755471acbad69a771c0daf9bdbba80e2464114ac
+size 3311