huggingartists

Browse files

Files changed (9) hide show

README.md +3 -3
evaluation.txt +1 -1
flax_model.msgpack +1 -1
optimizer.pt +1 -1
pytorch_model.bin +1 -1
rng_state.pth +1 -1
scheduler.pt +1 -1
trainer_state.json +129 -7
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -45,15 +45,15 @@ from datasets import load_dataset
 dataset = load_dataset("huggingartists/morgenshtern")
 ```
-[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/2g9p829k/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on MORGENSHTERN's lyrics.
-Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/3cvafvz3) for full transparency and reproducibility.
-At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/3cvafvz3/artifacts) is logged and versioned.
 ## How to use

 dataset = load_dataset("huggingartists/morgenshtern")
 ```
+[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/11daksqo/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on MORGENSHTERN's lyrics.
+Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/3hprs98u) for full transparency and reproducibility.
+At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/3hprs98u/artifacts) is logged and versioned.
 ## How to use

evaluation.txt CHANGED Viewed

	@@ -1 +1 @@
1	- {"eval_loss": 1.~~0789222717285156~~, "eval_runtime": 7.~~246~~, "eval_samples_per_second": 20.~~701~~, "eval_steps_per_second": 2.~~622~~, "epoch": 16.0}


1	+ {"eval_loss": 0.9612834453582764, "eval_runtime": 6.7467, "eval_samples_per_second": 22.53, "eval_steps_per_second": 2.816, "epoch": 2.0}

flax_model.msgpack CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9654bfbbbc16c001bf3ca7f1bde7b170d24b36f207e71c7d64f5e319546120e9
 size 497764120

 version https://git-lfs.github.com/spec/v1
+oid sha256:b2c263070fbf5f7efe3e75dd578e814f49ec64bc199e43a3f45edb236d61fb34
 size 497764120

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2d310145beb326e0a9d566eb5622927be8d76bfb9430b33c32dd51b5c7a06790
 size 995604017

 version https://git-lfs.github.com/spec/v1
+oid sha256:51f82cb2b14e8b08c2a0932e2579488a4a9bec1b0a08c7c5585837660c35aff9
 size 995604017

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ec149b6dfd697bf23788055aefe01a7ddb91b8831442e474a5c68c182988271b
 size 510403817

 version https://git-lfs.github.com/spec/v1
+oid sha256:65bad4addbe634edd37474cda521dc103ca5eb49f77dbe70541dd72d3250de98
 size 510403817

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:160d145d45171c3680d1a0f735e7dba3ead27f25402db68bfdfe08b789e7823a
 size 14567

 version https://git-lfs.github.com/spec/v1
+oid sha256:862a8301ff1a624ea0fb015628a4060abb8cdf66d8bd2a773af256e1cef63c1e
 size 14567

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c82347c45c6ff6d74c3b0b79bef318683f5527c56da1c685e3b3ba8d34edddd5
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:6c6d0bf6916b2b0bc8b9baa1851e61696a7abf151886205b1aec182b9abcf338
 size 623

trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-  "best_metric": 1.0789222717285156,
-  "best_model_checkpoint": "output/morgenshtern/checkpoint-97",
-  "epoch": 1.0,
-  "global_step": 97,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -128,11 +128,133 @@
       "eval_samples_per_second": 21.035,
       "eval_steps_per_second": 2.664,
       "step": 97
     }
   ],
-  "max_steps": 1552,
-  "num_train_epochs": 16,
-  "total_flos": 101381308416000.0,
   "trial_name": null,
   "trial_params": null
 }

 {
+  "best_metric": 0.9612834453582764,
+  "best_model_checkpoint": "output/morgenshtern/checkpoint-194",
+  "epoch": 2.0,
+  "global_step": 194,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 21.035,
       "eval_steps_per_second": 2.664,
       "step": 97
+    },
+    {
+      "epoch": 1.03,
+      "learning_rate": 3.2355730797025283e-07,
+      "loss": 1.094,
+      "step": 100
+    },
+    {
+      "epoch": 1.08,
+      "learning_rate": 2.289809668554777e-06,
+      "loss": 1.173,
+      "step": 105
+    },
+    {
+      "epoch": 1.13,
+      "learning_rate": 5.991173097174171e-06,
+      "loss": 1.1401,
+      "step": 110
+    },
+    {
+      "epoch": 1.19,
+      "learning_rate": 1.1330795562191737e-05,
+      "loss": 1.1561,
+      "step": 115
+    },
+    {
+      "epoch": 1.24,
+      "learning_rate": 1.816895739168569e-05,
+      "loss": 1.103,
+      "step": 120
+    },
+    {
+      "epoch": 1.29,
+      "learning_rate": 2.6326727259776298e-05,
+      "loss": 1.0156,
+      "step": 125
+    },
+    {
+      "epoch": 1.34,
+      "learning_rate": 3.559064420793063e-05,
+      "loss": 1.047,
+      "step": 130
+    },
+    {
+      "epoch": 1.39,
+      "learning_rate": 4.571830318875349e-05,
+      "loss": 1.0605,
+      "step": 135
+    },
+    {
+      "epoch": 1.44,
+      "learning_rate": 5.644469797766504e-05,
+      "loss": 1.0732,
+      "step": 140
+    },
+    {
+      "epoch": 1.49,
+      "learning_rate": 6.748915548014806e-05,
+      "loss": 1.0798,
+      "step": 145
+    },
+    {
+      "epoch": 1.55,
+      "learning_rate": 7.856267998744024e-05,
+      "loss": 1.132,
+      "step": 150
+    },
+    {
+      "epoch": 1.6,
+      "learning_rate": 8.937551520659945e-05,
+      "loss": 1.097,
+      "step": 155
+    },
+    {
+      "epoch": 1.65,
+      "learning_rate": 9.964472619245533e-05,
+      "loss": 1.1685,
+      "step": 160
+    },
+    {
+      "epoch": 1.7,
+      "learning_rate": 0.00010910160278820622,
+      "loss": 1.0965,
+      "step": 165
+    },
+    {
+      "epoch": 1.75,
+      "learning_rate": 0.00011749869085191239,
+      "loss": 1.0409,
+      "step": 170
+    },
+    {
+      "epoch": 1.8,
+      "learning_rate": 0.00012461626728572453,
+      "loss": 1.2079,
+      "step": 175
+    },
+    {
+      "epoch": 1.86,
+      "learning_rate": 0.000130268089438458,
+      "loss": 1.109,
+      "step": 180
+    },
+    {
+      "epoch": 1.91,
+      "learning_rate": 0.00013430626843929596,
+      "loss": 1.1506,
+      "step": 185
+    },
+    {
+      "epoch": 1.96,
+      "learning_rate": 0.00013662513894413276,
+      "loss": 1.074,
+      "step": 190
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.9612834453582764,
+      "eval_runtime": 6.602,
+      "eval_samples_per_second": 23.023,
+      "eval_steps_per_second": 2.878,
+      "step": 194
     }
   ],
+  "max_steps": 194,
+  "num_train_epochs": 2,
+  "total_flos": 202501324800000.0,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5497b3aaf0bdb0eca71885ab53b83d8d3468893d10b8e506f44d16425afcbf44
 size 2671

 version https://git-lfs.github.com/spec/v1
+oid sha256:b4e57e184be42952f12edd861103c10f3a3cdd1a9bd01f214495daf9092e23e9
 size 2671