huggingartists

Browse files

Files changed (11) hide show

README.md +3 -3
config.json +1 -1
evaluation.txt +1 -1
flax_model.msgpack +1 -1
optimizer.pt +1 -1
pytorch_model.bin +1 -1
rng_state.pth +2 -2
scheduler.pt +1 -1
tokenizer_config.json +1 -1
trainer_state.json +375 -7
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -45,15 +45,15 @@ from datasets import load_dataset
 dataset = load_dataset("huggingartists/scriptonite")
 ```
-[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/3iyoxg0v/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Скриптонит (Scriptonite)'s lyrics.
-Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/di1clo79) for full transparency and reproducibility.
-At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/di1clo79/artifacts) is logged and versioned.
 ## How to use

 dataset = load_dataset("huggingartists/scriptonite")
 ```
+[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/13pxeww0/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Скриптонит (Scriptonite)'s lyrics.
+Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1itfp830) for full transparency and reproducibility.
+At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1itfp830/artifacts) is logged and versioned.
 ## How to use

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "gpt2",
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"

 {
+  "_name_or_path": "huggingartists/scriptonite",
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"

evaluation.txt CHANGED Viewed

	@@ -1 +1 @@
1	- {"eval_loss": 1.~~727362871170044~~, "eval_runtime": 20.~~7773~~, "eval_samples_per_second": 21.~~369~~, "eval_steps_per_second": 2.~~695~~, "epoch": 1.0}


1	+ {"eval_loss": 1.5933494567871094, "eval_runtime": 22.2071, "eval_samples_per_second": 20.939, "eval_steps_per_second": 2.657, "epoch": 2.0}

flax_model.msgpack CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0b5c46ac5fd3298d2a01f9b7915560bc450f3997bc18b7dada5a182b6fbdeffa
 size 497764120

 version https://git-lfs.github.com/spec/v1
+oid sha256:c11bfa3837c0edca55ca9f7ccdb4f3d86d19b9ba23fe6cf05c899732f878fa76
 size 497764120

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a8246c465fe86028eab455a564decb982d3c8b4cc33c5a236fa65250065f2dfa
 size 995604017

 version https://git-lfs.github.com/spec/v1
+oid sha256:bde59a917e4907642c41b1ba1a0796fe83190743077f4abc55b70f6d855f3a67
 size 995604017

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4642ea9beb4bc34749ebce90968edfe32be19e17efd0ba3eb969a39d1b3c9fd3
 size 510403817

 version https://git-lfs.github.com/spec/v1
+oid sha256:218d35bc6715a739b952792ffa8615837479989eea7021decc404a768b2fed37
 size 510403817

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:361696374a33a1ba4cfa105bbdd0232fc91f20b020d2290b0ed59f96e83efc13
-size 14439

 version https://git-lfs.github.com/spec/v1
+oid sha256:34f8219694a6e795ffa0539af4f5ebbbfd0956e15ebc61431bdc1cbffbd00148
+size 14567

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5643af0d2d309ea9f8d4e15ffbc8ebd3faca59b9cbbd8bf85f08e202e280408b
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:cc022d3bfdc6685b60396d3e5ef01fcd639162e46eae2acfcd548c1b5128e6c9
 size 623

tokenizer_config.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"unk_token": "<\|endoftext\|>", "bos_token": "<\|endoftext\|>", "eos_token": "<\|endoftext\|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "~~gpt2~~", "tokenizer_class": "GPT2Tokenizer"}


1	+ {"unk_token": "<\|endoftext\|>", "bos_token": "<\|endoftext\|>", "eos_token": "<\|endoftext\|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "huggingartists/scriptonite", "tokenizer_class": "GPT2Tokenizer"}

trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-  "best_metric": 1.727362871170044,
-  "best_model_checkpoint": "output/scriptonite/checkpoint-306",
-  "epoch": 1.0,
-  "global_step": 306,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -380,11 +380,379 @@
       "eval_samples_per_second": 21.336,
       "eval_steps_per_second": 2.691,
       "step": 306
     }
   ],
-  "max_steps": 306,
-  "num_train_epochs": 1,
-  "total_flos": 319560155136000.0,
   "trial_name": null,
   "trial_params": null
 }

 {
+  "best_metric": 1.5933494567871094,
+  "best_model_checkpoint": "output/scriptonite/checkpoint-608",
+  "epoch": 2.0,
+  "global_step": 608,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 21.336,
       "eval_steps_per_second": 2.691,
       "step": 306
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 1.3182872524144475e-07,
+      "loss": 1.7503,
+      "step": 310
+    },
+    {
+      "epoch": 1.04,
+      "learning_rate": 4.427558891872417e-07,
+      "loss": 1.7186,
+      "step": 315
+    },
+    {
+      "epoch": 1.05,
+      "learning_rate": 9.35614586573241e-07,
+      "loss": 1.7355,
+      "step": 320
+    },
+    {
+      "epoch": 1.07,
+      "learning_rate": 1.6090892340046806e-06,
+      "loss": 1.7325,
+      "step": 325
+    },
+    {
+      "epoch": 1.09,
+      "learning_rate": 2.461382131516746e-06,
+      "loss": 1.5602,
+      "step": 330
+    },
+    {
+      "epoch": 1.1,
+      "learning_rate": 3.4902182611592783e-06,
+      "loss": 1.5739,
+      "step": 335
+    },
+    {
+      "epoch": 1.12,
+      "learning_rate": 4.692851359682261e-06,
+      "loss": 1.7024,
+      "step": 340
+    },
+    {
+      "epoch": 1.13,
+      "learning_rate": 6.066071249112177e-06,
+      "loss": 1.7912,
+      "step": 345
+    },
+    {
+      "epoch": 1.15,
+      "learning_rate": 7.606212405651899e-06,
+      "loss": 1.7776,
+      "step": 350
+    },
+    {
+      "epoch": 1.17,
+      "learning_rate": 9.309163744031197e-06,
+      "loss": 1.8068,
+      "step": 355
+    },
+    {
+      "epoch": 1.18,
+      "learning_rate": 1.1170379591190527e-05,
+      "loss": 1.7059,
+      "step": 360
+    },
+    {
+      "epoch": 1.2,
+      "learning_rate": 1.3184891820006337e-05,
+      "loss": 1.7498,
+      "step": 365
+    },
+    {
+      "epoch": 1.22,
+      "learning_rate": 1.5347323110669033e-05,
+      "loss": 1.7696,
+      "step": 370
+    },
+    {
+      "epoch": 1.23,
+      "learning_rate": 1.7651901304315657e-05,
+      "loss": 1.703,
+      "step": 375
+    },
+    {
+      "epoch": 1.25,
+      "learning_rate": 2.009247481060283e-05,
+      "loss": 1.6793,
+      "step": 380
+    },
+    {
+      "epoch": 1.27,
+      "learning_rate": 2.2662529028092886e-05,
+      "loss": 1.7064,
+      "step": 385
+    },
+    {
+      "epoch": 1.28,
+      "learning_rate": 2.5355203733622272e-05,
+      "loss": 1.6889,
+      "step": 390
+    },
+    {
+      "epoch": 1.3,
+      "learning_rate": 2.816331139423508e-05,
+      "loss": 1.7357,
+      "step": 395
+    },
+    {
+      "epoch": 1.32,
+      "learning_rate": 3.1079356352801514e-05,
+      "loss": 1.6548,
+      "step": 400
+    },
+    {
+      "epoch": 1.33,
+      "learning_rate": 3.4095554836109936e-05,
+      "loss": 1.7133,
+      "step": 405
+    },
+    {
+      "epoch": 1.35,
+      "learning_rate": 3.7203855732024184e-05,
+      "loss": 1.6095,
+      "step": 410
+    },
+    {
+      "epoch": 1.37,
+      "learning_rate": 4.039596208024663e-05,
+      "loss": 1.6151,
+      "step": 415
+    },
+    {
+      "epoch": 1.38,
+      "learning_rate": 4.3663353219321276e-05,
+      "loss": 1.6988,
+      "step": 420
+    },
+    {
+      "epoch": 1.4,
+      "learning_rate": 4.6997307530760796e-05,
+      "loss": 1.8323,
+      "step": 425
+    },
+    {
+      "epoch": 1.41,
+      "learning_rate": 5.038892571958626e-05,
+      "loss": 1.6735,
+      "step": 430
+    },
+    {
+      "epoch": 1.43,
+      "learning_rate": 5.382915456913676e-05,
+      "loss": 1.6212,
+      "step": 435
+    },
+    {
+      "epoch": 1.45,
+      "learning_rate": 5.7308811106741675e-05,
+      "loss": 1.7788,
+      "step": 440
+    },
+    {
+      "epoch": 1.46,
+      "learning_rate": 6.0818607115748475e-05,
+      "loss": 1.6847,
+      "step": 445
+    },
+    {
+      "epoch": 1.48,
+      "learning_rate": 6.434917392847842e-05,
+      "loss": 1.6746,
+      "step": 450
+    },
+    {
+      "epoch": 1.5,
+      "learning_rate": 6.78910874339285e-05,
+      "loss": 1.7122,
+      "step": 455
+    },
+    {
+      "epoch": 1.51,
+      "learning_rate": 7.143489323346855e-05,
+      "loss": 1.6207,
+      "step": 460
+    },
+    {
+      "epoch": 1.53,
+      "learning_rate": 7.497113187738444e-05,
+      "loss": 1.719,
+      "step": 465
+    },
+    {
+      "epoch": 1.55,
+      "learning_rate": 7.849036411490396e-05,
+      "loss": 1.814,
+      "step": 470
+    },
+    {
+      "epoch": 1.56,
+      "learning_rate": 8.19831960903064e-05,
+      "loss": 1.7822,
+      "step": 475
+    },
+    {
+      "epoch": 1.58,
+      "learning_rate": 8.54403044178588e-05,
+      "loss": 1.7602,
+      "step": 480
+    },
+    {
+      "epoch": 1.6,
+      "learning_rate": 8.885246106864805e-05,
+      "loss": 1.6178,
+      "step": 485
+    },
+    {
+      "epoch": 1.61,
+      "learning_rate": 9.221055800287699e-05,
+      "loss": 1.7397,
+      "step": 490
+    },
+    {
+      "epoch": 1.63,
+      "learning_rate": 9.550563148187563e-05,
+      "loss": 1.6071,
+      "step": 495
+    },
+    {
+      "epoch": 1.64,
+      "learning_rate": 9.87288859949296e-05,
+      "loss": 1.66,
+      "step": 500
+    },
+    {
+      "epoch": 1.66,
+      "learning_rate": 0.00010187171773706003,
+      "loss": 1.6028,
+      "step": 505
+    },
+    {
+      "epoch": 1.68,
+      "learning_rate": 0.00010492573757508462,
+      "loss": 1.697,
+      "step": 510
+    },
+    {
+      "epoch": 1.69,
+      "learning_rate": 0.0001078827934406569,
+      "loss": 1.5973,
+      "step": 515
+    },
+    {
+      "epoch": 1.71,
+      "learning_rate": 0.00011073499209051121,
+      "loss": 1.6779,
+      "step": 520
+    },
+    {
+      "epoch": 1.73,
+      "learning_rate": 0.00011347472017582718,
+      "loss": 1.639,
+      "step": 525
+    },
+    {
+      "epoch": 1.74,
+      "learning_rate": 0.00011609466456447531,
+      "loss": 1.7209,
+      "step": 530
+    },
+    {
+      "epoch": 1.76,
+      "learning_rate": 0.00011858783186189604,
+      "loss": 1.7896,
+      "step": 535
+    },
+    {
+      "epoch": 1.78,
+      "learning_rate": 0.00012094756707850673,
+      "loss": 1.7093,
+      "step": 540
+    },
+    {
+      "epoch": 1.79,
+      "learning_rate": 0.00012316757139380674,
+      "loss": 1.614,
+      "step": 545
+    },
+    {
+      "epoch": 1.81,
+      "learning_rate": 0.0001252419189697637,
+      "loss": 1.7043,
+      "step": 550
+    },
+    {
+      "epoch": 1.83,
+      "learning_rate": 0.0001271650727686013,
+      "loss": 1.6544,
+      "step": 555
+    },
+    {
+      "epoch": 1.84,
+      "learning_rate": 0.00012893189933276512,
+      "loss": 1.6355,
+      "step": 560
+    },
+    {
+      "epoch": 1.86,
+      "learning_rate": 0.00013053768248761616,
+      "loss": 1.7327,
+      "step": 565
+    },
+    {
+      "epoch": 1.88,
+      "learning_rate": 0.00013197813593027427,
+      "loss": 1.5672,
+      "step": 570
+    },
+    {
+      "epoch": 1.89,
+      "learning_rate": 0.00013324941467100882,
+      "loss": 1.6036,
+      "step": 575
+    },
+    {
+      "epoch": 1.91,
+      "learning_rate": 0.00013434812529663614,
+      "loss": 1.5748,
+      "step": 580
+    },
+    {
+      "epoch": 1.92,
+      "learning_rate": 0.00013527133502852737,
+      "loss": 1.6181,
+      "step": 585
+    },
+    {
+      "epoch": 1.94,
+      "learning_rate": 0.00013601657955104887,
+      "loss": 1.6947,
+      "step": 590
+    },
+    {
+      "epoch": 1.96,
+      "learning_rate": 0.00013658186958953817,
+      "loss": 1.6579,
+      "step": 595
+    },
+    {
+      "epoch": 1.97,
+      "learning_rate": 0.00013696569622025754,
+      "loss": 1.6297,
+      "step": 600
+    },
+    {
+      "epoch": 1.99,
+      "learning_rate": 0.00013716703489815072,
+      "loss": 1.6272,
+      "step": 605
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 1.5933494567871094,
+      "eval_runtime": 22.1769,
+      "eval_samples_per_second": 20.968,
+      "eval_steps_per_second": 2.66,
+      "step": 608
     }
   ],
+  "max_steps": 608,
+  "num_train_epochs": 2,
+  "total_flos": 634286407680000.0,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aaaf6708f9300a70cac38b2d9556974db0fe2981edabe48909dbaf43501c3dca
 size 2671

 version https://git-lfs.github.com/spec/v1
+oid sha256:9edd5597a8f5fd166f6b738b741bd4ca0054d7d9cca4c5ca6734b697c0e34cda
 size 2671