huggingartists

Browse files

Files changed (10) hide show

README.md +3 -3
config.json +4 -2
evaluation.txt +1 -1
flax_model.msgpack +1 -1
optimizer.pt +1 -1
pytorch_model.bin +1 -1
rng_state.pth +1 -1
scheduler.pt +1 -1
trainer_state.json +309 -7
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -45,15 +45,15 @@ from datasets import load_dataset
 dataset = load_dataset("huggingartists/50-cent")
 ```
-[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/1awg3ygb/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on 50 Cent's lyrics.
-Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/ld8pvc1j) for full transparency and reproducibility.
-At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/ld8pvc1j/artifacts) is logged and versioned.
 ## How to use

 dataset = load_dataset("huggingartists/50-cent")
 ```
+[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/2iekmvbo/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on 50 Cent's lyrics.
+Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1zrybuom) for full transparency and reproducibility.
+At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1zrybuom/artifacts) is logged and versioned.
 ## How to use

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "huggingartists/50-cent",
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"
@@ -18,7 +18,9 @@
   "n_inner": null,
   "n_layer": 12,
   "n_positions": 1024,
   "resid_pdrop": 0.1,
   "scale_attn_weights": true,
   "summary_activation": null,
   "summary_first_dropout": 0.1,
@@ -35,7 +37,7 @@
     }
   },
   "torch_dtype": "float32",
-  "transformers_version": "4.10.2",
   "use_cache": true,
   "vocab_size": 50257
 }

 {
+  "_name_or_path": "50-cent",
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"
   "n_inner": null,
   "n_layer": 12,
   "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
   "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
   "scale_attn_weights": true,
   "summary_activation": null,
   "summary_first_dropout": 0.1,
     }
   },
   "torch_dtype": "float32",
+  "transformers_version": "4.15.0",
   "use_cache": true,
   "vocab_size": 50257
 }

evaluation.txt CHANGED Viewed

	@@ -1 +1 @@
1	- {"eval_loss": 3.~~3637726306915283~~, "eval_runtime": 16.~~4999~~, "eval_samples_per_second": 20.~~727~~, "eval_steps_per_second": 2.~~606~~, "epoch": 2.0}


1	+ {"eval_loss": 3.1829922199249268, "eval_runtime": 18.7316, "eval_samples_per_second": 20.82, "eval_steps_per_second": 2.616, "epoch": 3.0}

flax_model.msgpack CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:358a6f342b56f9de0136e070ea234ddaa8e96700df6535caddb90f83533ad73a
 size 497764120

 version https://git-lfs.github.com/spec/v1
+oid sha256:5dc7b1e66dd4804ccfb9dc1714f49153dd394cb37828b4d409f91f1c2b62adc8
 size 497764120

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:54d8c0438e44a5c817661dde8d80235759dd9bf6fd31e60b82390ac67a903164
 size 995604017

 version https://git-lfs.github.com/spec/v1
+oid sha256:9948f38cc1a6abe44b5a4dee4a5cf93797f4b56eee3fb384dcc06fc944da98f9
 size 995604017

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:39028db0b9207c67d95d8bc7b1634c498f86342adff06c8b1c9d2d4614a45efe
 size 510403817

 version https://git-lfs.github.com/spec/v1
+oid sha256:2a35c4f858facf554a582294b1f58c98b5633f4416d25525aad47c8b1d043cf4
 size 510403817

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d1962bee387c556be33f8635e4ecc47b5f5d760f1525f5cf294369a7e3bac39b
 size 14439

 version https://git-lfs.github.com/spec/v1
+oid sha256:e8c270da9fce7276d12ff26b0a4bcaa95748084c5a24bfdf380b2b8c8b5bdff2
 size 14439

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:56a03ee9c4f7d46c39bb609d5652834cf98daba2d29c096120b392bb7cba62b1
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:b1916ec374b6c0a9df0adb0631f08e8ad04238433d3a103302e9d6c65c1db806
 size 623

trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-  "best_metric": 3.3637726306915283,
-  "best_model_checkpoint": "output/50-cent/checkpoint-522",
-  "epoch": 2.0,
-  "global_step": 522,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -654,11 +654,313 @@
       "eval_samples_per_second": 20.748,
       "eval_steps_per_second": 2.609,
       "step": 522
     }
   ],
-  "max_steps": 522,
-  "num_train_epochs": 2,
-  "total_flos": 544271302656000.0,
   "trial_name": null,
   "trial_params": null
 }

 {
+  "best_metric": 3.1829922199249268,
+  "best_model_checkpoint": "output/50-cent/checkpoint-765",
+  "epoch": 3.0,
+  "global_step": 765,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 20.748,
       "eval_steps_per_second": 2.609,
       "step": 522
+    },
+    {
+      "epoch": 2.06,
+      "learning_rate": 0.00013603195463831566,
+      "loss": 3.4382,
+      "step": 525
+    },
+    {
+      "epoch": 2.08,
+      "learning_rate": 0.00013512806981200165,
+      "loss": 3.3096,
+      "step": 530
+    },
+    {
+      "epoch": 2.1,
+      "learning_rate": 0.00013397182122930294,
+      "loss": 3.2608,
+      "step": 535
+    },
+    {
+      "epoch": 2.12,
+      "learning_rate": 0.00013256759493713883,
+      "loss": 3.3047,
+      "step": 540
+    },
+    {
+      "epoch": 2.14,
+      "learning_rate": 0.00013092071764681933,
+      "loss": 3.3901,
+      "step": 545
+    },
+    {
+      "epoch": 2.16,
+      "learning_rate": 0.00012903743652800486,
+      "loss": 3.3488,
+      "step": 550
+    },
+    {
+      "epoch": 2.18,
+      "learning_rate": 0.00012692489551105156,
+      "loss": 3.3604,
+      "step": 555
+    },
+    {
+      "epoch": 2.2,
+      "learning_rate": 0.0001245911081876368,
+      "loss": 3.2288,
+      "step": 560
+    },
+    {
+      "epoch": 2.22,
+      "learning_rate": 0.00012204492741246097,
+      "loss": 3.3826,
+      "step": 565
+    },
+    {
+      "epoch": 2.24,
+      "learning_rate": 0.0001192960117213372,
+      "loss": 3.3225,
+      "step": 570
+    },
+    {
+      "epoch": 2.25,
+      "learning_rate": 0.0001163547886930568,
+      "loss": 3.39,
+      "step": 575
+    },
+    {
+      "epoch": 2.27,
+      "learning_rate": 0.00011323241539401106,
+      "loss": 3.2293,
+      "step": 580
+    },
+    {
+      "epoch": 2.29,
+      "learning_rate": 0.00010994073605561706,
+      "loss": 3.24,
+      "step": 585
+    },
+    {
+      "epoch": 2.31,
+      "learning_rate": 0.00010649223714509067,
+      "loss": 3.3242,
+      "step": 590
+    },
+    {
+      "epoch": 2.33,
+      "learning_rate": 0.00010289999999999998,
+      "loss": 3.4037,
+      "step": 595
+    },
+    {
+      "epoch": 2.35,
+      "learning_rate": 9.917765120627052e-05,
+      "loss": 3.4103,
+      "step": 600
+    },
+    {
+      "epoch": 2.37,
+      "learning_rate": 9.53393109078778e-05,
+      "loss": 3.2591,
+      "step": 605
+    },
+    {
+      "epoch": 2.39,
+      "learning_rate": 9.139953924430467e-05,
+      "loss": 3.2654,
+      "step": 610
+    },
+    {
+      "epoch": 2.41,
+      "learning_rate": 8.737328111894491e-05,
+      "loss": 3.1585,
+      "step": 615
+    },
+    {
+      "epoch": 2.43,
+      "learning_rate": 8.327580950796576e-05,
+      "loss": 3.257,
+      "step": 620
+    },
+    {
+      "epoch": 2.45,
+      "learning_rate": 7.912266752467782e-05,
+      "loss": 3.2541,
+      "step": 625
+    },
+    {
+      "epoch": 2.47,
+      "learning_rate": 7.492960945918252e-05,
+      "loss": 3.2145,
+      "step": 630
+    },
+    {
+      "epoch": 2.49,
+      "learning_rate": 7.071254101695329e-05,
+      "loss": 3.3335,
+      "step": 635
+    },
+    {
+      "epoch": 2.51,
+      "learning_rate": 6.648745898304675e-05,
+      "loss": 3.3844,
+      "step": 640
+    },
+    {
+      "epoch": 2.53,
+      "learning_rate": 6.227039054081752e-05,
+      "loss": 3.3846,
+      "step": 645
+    },
+    {
+      "epoch": 2.55,
+      "learning_rate": 5.807733247532229e-05,
+      "loss": 3.3111,
+      "step": 650
+    },
+    {
+      "epoch": 2.57,
+      "learning_rate": 5.392419049203428e-05,
+      "loss": 3.1908,
+      "step": 655
+    },
+    {
+      "epoch": 2.59,
+      "learning_rate": 4.9826718881055135e-05,
+      "loss": 3.3639,
+      "step": 660
+    },
+    {
+      "epoch": 2.61,
+      "learning_rate": 4.580046075569537e-05,
+      "loss": 3.0676,
+      "step": 665
+    },
+    {
+      "epoch": 2.63,
+      "learning_rate": 4.1860689092122226e-05,
+      "loss": 3.2607,
+      "step": 670
+    },
+    {
+      "epoch": 2.65,
+      "learning_rate": 3.8022348793729525e-05,
+      "loss": 3.249,
+      "step": 675
+    },
+    {
+      "epoch": 2.67,
+      "learning_rate": 3.4300000000000054e-05,
+      "loss": 3.2803,
+      "step": 680
+    },
+    {
+      "epoch": 2.69,
+      "learning_rate": 3.0707762854909365e-05,
+      "loss": 3.292,
+      "step": 685
+    },
+    {
+      "epoch": 2.71,
+      "learning_rate": 2.7259263944382986e-05,
+      "loss": 3.3944,
+      "step": 690
+    },
+    {
+      "epoch": 2.73,
+      "learning_rate": 2.3967584605988973e-05,
+      "loss": 3.3062,
+      "step": 695
+    },
+    {
+      "epoch": 2.75,
+      "learning_rate": 2.0845211306943224e-05,
+      "loss": 3.2958,
+      "step": 700
+    },
+    {
+      "epoch": 2.76,
+      "learning_rate": 1.7903988278662788e-05,
+      "loss": 3.0765,
+      "step": 705
+    },
+    {
+      "epoch": 2.78,
+      "learning_rate": 1.515507258753902e-05,
+      "loss": 3.2299,
+      "step": 710
+    },
+    {
+      "epoch": 2.8,
+      "learning_rate": 1.2608891812363205e-05,
+      "loss": 3.3755,
+      "step": 715
+    },
+    {
+      "epoch": 2.82,
+      "learning_rate": 1.0275104488948488e-05,
+      "loss": 3.1482,
+      "step": 720
+    },
+    {
+      "epoch": 2.84,
+      "learning_rate": 8.162563471995208e-06,
+      "loss": 3.1407,
+      "step": 725
+    },
+    {
+      "epoch": 2.86,
+      "learning_rate": 6.279282353180702e-06,
+      "loss": 3.3813,
+      "step": 730
+    },
+    {
+      "epoch": 2.88,
+      "learning_rate": 4.6324050628612214e-06,
+      "loss": 3.0962,
+      "step": 735
+    },
+    {
+      "epoch": 2.9,
+      "learning_rate": 3.228178770697088e-06,
+      "loss": 3.2479,
+      "step": 740
+    },
+    {
+      "epoch": 2.92,
+      "learning_rate": 2.0719301879983714e-06,
+      "loss": 3.0949,
+      "step": 745
+    },
+    {
+      "epoch": 2.94,
+      "learning_rate": 1.1680453616843376e-06,
+      "loss": 3.2467,
+      "step": 750
+    },
+    {
+      "epoch": 2.96,
+      "learning_rate": 5.199530365052086e-07,
+      "loss": 3.1761,
+      "step": 755
+    },
+    {
+      "epoch": 2.98,
+      "learning_rate": 1.3011164863877445e-07,
+      "loss": 3.1651,
+      "step": 760
+    },
+    {
+      "epoch": 3.0,
+      "learning_rate": 0.0,
+      "loss": 3.2256,
+      "step": 765
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 3.1829922199249268,
+      "eval_runtime": 18.683,
+      "eval_samples_per_second": 20.875,
+      "eval_steps_per_second": 2.623,
+      "step": 765
     }
   ],
+  "max_steps": 765,
+  "num_train_epochs": 3,
+  "total_flos": 797985865728000.0,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e6f290bf43c68f84d50f113a11b20c55809e90aac8aa7e58b408f5dbe3f578ab
-size 2671

 version https://git-lfs.github.com/spec/v1
+oid sha256:0953bfb72e20182c57ba68197da351f78842856132e9f560521cdd9c2df2cb65
+size 2991