Training in progress, epoch 1

Browse files

Files changed (10) hide show

README.md +36 -50
adapter_model.safetensors +2 -2
added_tokens.json +3 -2
all_results.json +11 -11
eval_results.json +6 -6
special_tokens_map.json +1 -14
tokenizer.json +12 -3
tokenizer_config.json +13 -6
train_results.json +6 -6
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -5,18 +5,18 @@ base_model: gpt2
 tags:
 - generated_from_trainer
 model-index:
-- name: Se124M10KInfPrompt_endtoken
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-# Se124M10KInfPrompt_endtoken
 This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.7552
 ## Model description
@@ -46,53 +46,39 @@ The following hyperparameters were used during training:
 ### Training results
-| Training Loss | Epoch | Step | Validation Loss |
-|:-------------:|:-----:|:----:|:---------------:|
-| 0.6082        | 1.0   | 153  | 1.4745          |
-| 0.3175        | 2.0   | 306  | 1.0258          |
-| 0.2798        | 3.0   | 459  | 0.9099          |
-| 0.244         | 4.0   | 612  | 0.8688          |
-| 0.2373        | 5.0   | 765  | 0.8479          |
-| 0.2252        | 6.0   | 918  | 0.8343          |
-| 0.2289        | 7.0   | 1071 | 0.8216          |
-| 0.2209        | 8.0   | 1224 | 0.8143          |
-| 0.2211        | 9.0   | 1377 | 0.8082          |
-| 0.2176        | 10.0  | 1530 | 0.8029          |
-| 0.2157        | 11.0  | 1683 | 0.7990          |
-| 0.2097        | 12.0  | 1836 | 0.7945          |
-| 0.2113        | 13.0  | 1989 | 0.7921          |
-| 0.2099        | 14.0  | 2142 | 0.7891          |
-| 0.2073        | 15.0  | 2295 | 0.7863          |
-| 0.2055        | 16.0  | 2448 | 0.7805          |
-| 0.2051        | 17.0  | 2601 | 0.7806          |
-| 0.2031        | 18.0  | 2754 | 0.7776          |
-| 0.2046        | 19.0  | 2907 | 0.7760          |
-| 0.206         | 20.0  | 3060 | 0.7720          |
-| 0.2043        | 21.0  | 3213 | 0.7725          |
-| 0.204         | 22.0  | 3366 | 0.7707          |
-| 0.2032        | 23.0  | 3519 | 0.7681          |
-| 0.2026        | 24.0  | 3672 | 0.7678          |
-| 0.1991        | 25.0  | 3825 | 0.7665          |
-| 0.2037        | 26.0  | 3978 | 0.7660          |
-| 0.2011        | 27.0  | 4131 | 0.7634          |
-| 0.2015        | 28.0  | 4284 | 0.7635          |
-| 0.2006        | 29.0  | 4437 | 0.7620          |
-| 0.2014        | 30.0  | 4590 | 0.7640          |
-| 0.2           | 31.0  | 4743 | 0.7609          |
-| 0.202         | 32.0  | 4896 | 0.7606          |
-| 0.1989        | 33.0  | 5049 | 0.7599          |
-| 0.1983        | 34.0  | 5202 | 0.7594          |
-| 0.2           | 35.0  | 5355 | 0.7596          |
-| 0.1991        | 36.0  | 5508 | 0.7588          |
-| 0.1978        | 37.0  | 5661 | 0.7576          |
-| 0.1975        | 38.0  | 5814 | 0.7572          |
-| 0.2007        | 39.0  | 5967 | 0.7569          |
-| 0.1987        | 40.0  | 6120 | 0.7563          |
-| 0.2002        | 41.0  | 6273 | 0.7561          |
-| 0.1961        | 42.0  | 6426 | 0.7563          |
-| 0.201         | 43.0  | 6579 | 0.7552          |
-| 0.1993        | 44.0  | 6732 | 0.7553          |
-| 0.1969        | 45.0  | 6885 | 0.7553          |
 ### Framework versions

 tags:
 - generated_from_trainer
 model-index:
+- name: Se124M500KInfPrompt_endtoken
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+# Se124M500KInfPrompt_endtoken
 This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.6716
 ## Model description
 ### Training results
+| Training Loss | Epoch | Step   | Validation Loss |
+|:-------------:|:-----:|:------:|:---------------:|
+| 0.1898        | 1.0   | 5427   | 0.7433          |
+| 0.1857        | 2.0   | 10854  | 0.7238          |
+| 0.1843        | 3.0   | 16281  | 0.7118          |
+| 0.1813        | 4.0   | 21708  | 0.7045          |
+| 0.1802        | 5.0   | 27135  | 0.6990          |
+| 0.1785        | 6.0   | 32562  | 0.6944          |
+| 0.1769        | 7.0   | 37989  | 0.6918          |
+| 0.1743        | 8.0   | 43416  | 0.6875          |
+| 0.1752        | 9.0   | 48843  | 0.6854          |
+| 0.1756        | 10.0  | 54270  | 0.6854          |
+| 0.1736        | 11.0  | 59697  | 0.6837          |
+| 0.1756        | 12.0  | 65124  | 0.6812          |
+| 0.173         | 13.0  | 70551  | 0.6798          |
+| 0.1737        | 14.0  | 75978  | 0.6791          |
+| 0.1741        | 15.0  | 81405  | 0.6783          |
+| 0.177         | 16.0  | 86832  | 0.6771          |
+| 0.1734        | 17.0  | 92259  | 0.6765          |
+| 0.1719        | 18.0  | 97686  | 0.6760          |
+| 0.1737        | 19.0  | 103113 | 0.6763          |
+| 0.1716        | 20.0  | 108540 | 0.6747          |
+| 0.1713        | 21.0  | 113967 | 0.6741          |
+| 0.1739        | 22.0  | 119394 | 0.6738          |
+| 0.1694        | 23.0  | 124821 | 0.6737          |
+| 0.1703        | 24.0  | 130248 | 0.6743          |
+| 0.1697        | 25.0  | 135675 | 0.6730          |
+| 0.172         | 26.0  | 141102 | 0.6731          |
+| 0.1711        | 27.0  | 146529 | 0.6720          |
+| 0.1726        | 28.0  | 151956 | 0.6720          |
+| 0.1703        | 29.0  | 157383 | 0.6716          |
+| 0.1732        | 30.0  | 162810 | 0.6716          |
+| 0.171         | 31.0  | 168237 | 0.6719          |
 ### Framework versions

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:becdfda6ffd9bb761a7db78b383bdc2364d7d7864f0e26a424ad9e0dcbd174e2
-size 309974336

 version https://git-lfs.github.com/spec/v1
+oid sha256:97369058fd0481fa20cb01f0f83e4c65dc6f4ae1a380505ea9622d50bc4a9660
+size 309980480

added_tokens.json CHANGED Viewed

@@ -1,4 +1,5 @@
 {
-  "<endofex>": 50258,
-  "<startofex>": 50257
 }

 {
+  "<endofex>": 50257,
+  "<pad>": 50258,
+  "<startofex>": 50259
 }

all_results.json CHANGED Viewed

@@ -1,13 +1,13 @@
 {
-    "epoch": 45.0,
-    "eval_loss": 0.7551774382591248,
-    "eval_runtime": 6.5379,
-    "eval_samples_per_second": 161.672,
-    "eval_steps_per_second": 5.2,
-    "perplexity": 2.127989076535295,
-    "total_flos": 1.438582110879744e+16,
-    "train_loss": 0.2186263353822884,
-    "train_runtime": 769.5598,
-    "train_samples_per_second": 316.869,
-    "train_steps_per_second": 9.941
 }

 {
+    "epoch": 31.0,
+    "eval_loss": 0.6716023087501526,
+    "eval_runtime": 232.3059,
+    "eval_samples_per_second": 160.323,
+    "eval_steps_per_second": 5.011,
+    "perplexity": 1.9573711221830141,
+    "total_flos": 3.528546650263388e+17,
+    "train_loss": 0.17647521918996942,
+    "train_runtime": 17128.7716,
+    "train_samples_per_second": 506.884,
+    "train_steps_per_second": 15.842
 }

eval_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 45.0,
-    "eval_loss": 0.7551774382591248,
-    "eval_runtime": 6.5379,
-    "eval_samples_per_second": 161.672,
-    "eval_steps_per_second": 5.2,
-    "perplexity": 2.127989076535295
 }

 {
+    "epoch": 31.0,
+    "eval_loss": 0.6716023087501526,
+    "eval_runtime": 232.3059,
+    "eval_samples_per_second": 160.323,
+    "eval_steps_per_second": 5.011,
+    "perplexity": 1.9573711221830141
 }

special_tokens_map.json CHANGED Viewed

@@ -6,23 +6,10 @@
       "normalized": false,
       "rstrip": false,
       "single_word": false
-    },
-    {
-      "content": "<endofex>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false
     }
   ],
   "bos_token": "<|endoftext|>",
   "eos_token": "<endofex>",
-  "pad_token": {
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
   "unk_token": "<|endoftext|>"
 }

       "normalized": false,
       "rstrip": false,
       "single_word": false
     }
   ],
   "bos_token": "<|endoftext|>",
   "eos_token": "<endofex>",
+  "pad_token": "<pad>",
   "unk_token": "<|endoftext|>"
 }

tokenizer.json CHANGED Viewed

@@ -14,12 +14,12 @@
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": false,
       "special": true
     },
     {
       "id": 50257,
-      "content": "<startofex>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
@@ -28,7 +28,16 @@
     },
     {
       "id": 50258,
-      "content": "<endofex>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,

       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": true,
       "special": true
     },
     {
       "id": 50257,
+      "content": "<endofex>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
     },
     {
       "id": 50258,
+      "content": "<pad>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 50259,
+      "content": "<startofex>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,

tokenizer_config.json CHANGED Viewed

@@ -4,13 +4,13 @@
     "50256": {
       "content": "<|endoftext|>",
       "lstrip": false,
-      "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
     "50257": {
-      "content": "<startofex>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -18,7 +18,15 @@
       "special": true
     },
     "50258": {
-      "content": "<endofex>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -27,15 +35,14 @@
     }
   },
   "additional_special_tokens": [
-    "<startofex>",
-    "<endofex>"
   ],
   "bos_token": "<|endoftext|>",
   "clean_up_tokenization_spaces": false,
   "eos_token": "<endofex>",
   "extra_special_tokens": {},
   "model_max_length": 1024,
-  "pad_token": "<|endoftext|>",
   "tokenizer_class": "GPT2Tokenizer",
   "unk_token": "<|endoftext|>"
 }

     "50256": {
       "content": "<|endoftext|>",
       "lstrip": false,
+      "normalized": true,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
     "50257": {
+      "content": "<endofex>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "special": true
     },
     "50258": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50259": {
+      "content": "<startofex>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
     }
   },
   "additional_special_tokens": [
+    "<startofex>"
   ],
   "bos_token": "<|endoftext|>",
   "clean_up_tokenization_spaces": false,
   "eos_token": "<endofex>",
   "extra_special_tokens": {},
   "model_max_length": 1024,
+  "pad_token": "<pad>",
   "tokenizer_class": "GPT2Tokenizer",
   "unk_token": "<|endoftext|>"
 }

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 45.0,
-    "total_flos": 1.438582110879744e+16,
-    "train_loss": 0.2186263353822884,
-    "train_runtime": 769.5598,
-    "train_samples_per_second": 316.869,
-    "train_steps_per_second": 9.941
 }

 {
+    "epoch": 31.0,
+    "total_flos": 3.528546650263388e+17,
+    "train_loss": 0.17647521918996942,
+    "train_runtime": 17128.7716,
+    "train_samples_per_second": 506.884,
+    "train_steps_per_second": 15.842
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4cb8d07c46dde0176d6d1431dfbd1cc46896dd403ee33fdaae0d352f111ea564
 size 5432

 version https://git-lfs.github.com/spec/v1
+oid sha256:2a96b81c7f50a7f675785b2ef6fe69a21a57d6c26d5a1dbe61b36bcfb02febcf
 size 5432