ninagroot/GPT2-705Mtest

Files changed (10) hide show

README.md CHANGED Viewed

@@ -13,7 +13,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 3.6555
 ## Model description
@@ -48,9 +48,9 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| 3.073         | 1.0   | 194  | 3.6424          |
-| 2.0148        | 2.0   | 389  | 3.6091          |
-| 1.3653        | 2.99  | 582  | 3.6555          |
 ### Framework versions

 This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 3.4932
 ## Model description
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 5.4196        | 0.99  | 45   | 4.4846          |
+| 3.5716        | 2.0   | 91   | 3.7327          |
+| 2.7534        | 2.97  | 135  | 3.4932          |
 ### Framework versions

config.json CHANGED Viewed

@@ -14,7 +14,7 @@
   "n_head": 8,
   "n_inner": null,
   "n_layer": 24,
-  "n_positions": 60,
   "pad_token_id": 50256,
   "reorder_and_upcast_attn": false,
   "resid_pdrop": 0.0,

   "n_head": 8,
   "n_inner": null,
   "n_layer": 24,
+  "n_positions": 256,
   "pad_token_id": 50256,
   "reorder_and_upcast_attn": false,
   "resid_pdrop": 0.0,

merges.txt CHANGED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2abf6ffbb842f8f7acca2c6c3bd5c2c8b0897130820c1844f28256896bf9f235
-size 3029016352

 version https://git-lfs.github.com/spec/v1
+oid sha256:4552a5b5466615ad92a7df8601aeb12b396b0e729bd38657c99ae7ee2b2117a4
+size 3030220576

runs/Apr05_15-45-27_gcn47.local.snellius.surf.nl/events.out.tfevents.1712324738.gcn47.local.snellius.surf.nl.3130322.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:8c22649c732c1d458597e4f8e9240209867b171175fa347cc541ae0a92e53121
+size 7082

special_tokens_map.json CHANGED Viewed

@@ -2,11 +2,5 @@
   "bos_token": "<s>",
   "eos_token": "</s>",
   "pad_token": "<pad>",
-  "unk_token": {
-    "content": "<unk>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  }
 }

   "bos_token": "<s>",
   "eos_token": "</s>",
   "pad_token": "<pad>",
+  "unk_token": "<|endoftext|>"
 }

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

@@ -1,42 +1,20 @@
 {
-  "add_bos_token": true,
-  "add_eos_token": false,
-  "add_prefix_space": true,
   "added_tokens_decoder": {
-    "0": {
-      "content": "<unk>",
       "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "1": {
-      "content": "<s>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "2": {
-      "content": "</s>",
-      "lstrip": false,
-      "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     }
   },
   "bos_token": "<s>",
-  "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
-  "legacy": true,
-  "model_max_length": 128,
   "pad_token": "<pad>",
-  "sp_model_kwargs": {},
-  "spaces_between_special_tokens": false,
-  "tokenizer_class": "LlamaTokenizer",
-  "unk_token": "<unk>",
-  "use_default_system_prompt": false
 }

 {
+  "add_prefix_space": false,
   "added_tokens_decoder": {
+    "50256": {
+      "content": "<|endoftext|>",
       "lstrip": false,
+      "normalized": true,
       "rstrip": false,
       "single_word": false,
       "special": true
     }
   },
   "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
   "eos_token": "</s>",
+  "model_max_length": 30,
   "pad_token": "<pad>",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c40e2376c3eb0011bb960b01f5109ca28ebed0d38154671666787999fe15f16c
 size 4984

 version https://git-lfs.github.com/spec/v1
+oid sha256:ebd58b9a0e5a6daadd81f963d70b620afd0fec2098be85f765ad1702c6773750
 size 4984

vocab.json CHANGED Viewed

The diff for this file is too large to render. See raw diff