ninagroot/Llama-360Mtest

Files changed (9) hide show

README.md CHANGED Viewed

@@ -13,7 +13,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 4.3345
 ## Model description
@@ -48,21 +48,21 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| 8.5386        | 0.99  | 34   | 8.3354          |
-| 7.896         | 2.0   | 69   | 7.4834          |
-| 6.8868        | 2.99  | 103  | 6.7625          |
-| 6.4658        | 4.0   | 138  | 6.1839          |
-| 5.8471        | 4.99  | 172  | 5.7833          |
-| 5.2893        | 6.0   | 207  | 5.2802          |
-| 4.6612        | 6.99  | 241  | 4.8722          |
-| 4.5265        | 8.0   | 276  | 4.6448          |
-| 4.093         | 8.99  | 310  | 4.5199          |
-| 3.8628        | 10.0  | 345  | 4.4178          |
-| 3.7924        | 10.99 | 379  | 4.3819          |
-| 3.5135        | 12.0  | 414  | 4.3379          |
-| 3.2653        | 12.99 | 448  | 4.3451          |
-| 3.2222        | 14.0  | 483  | 4.3393          |
-| 3.2136        | 14.78 | 510  | 4.3345          |
 ### Framework versions

 This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 4.3475
 ## Model description
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 8.4018        | 0.99  | 44   | 8.2779          |
+| 7.5564        | 1.99  | 88   | 7.2124          |
+| 6.742         | 2.98  | 132  | 6.4726          |
+| 6.0531        | 4.0   | 177  | 5.8848          |
+| 5.1195        | 4.99  | 221  | 5.2837          |
+| 4.5893        | 5.99  | 265  | 4.8101          |
+| 4.3185        | 6.98  | 309  | 4.6188          |
+| 4.0957        | 8.0   | 354  | 4.4767          |
+| 3.7674        | 8.99  | 398  | 4.4084          |
+| 3.6238        | 9.99  | 442  | 4.3695          |
+| 3.5106        | 10.98 | 486  | 4.3419          |
+| 3.2515        | 12.0  | 531  | 4.3291          |
+| 3.0916        | 12.99 | 575  | 4.3472          |
+| 3.0072        | 13.99 | 619  | 4.3490          |
+| 3.0306        | 14.92 | 660  | 4.3475          |
 ### Framework versions

config.json CHANGED Viewed

@@ -10,7 +10,7 @@
   "hidden_size": 1024,
   "initializer_range": 0.02,
   "intermediate_size": 3072,
-  "max_position_embeddings": 256,
   "model_type": "llama",
   "num_attention_heads": 8,
   "num_hidden_layers": 24,

   "hidden_size": 1024,
   "initializer_range": 0.02,
   "intermediate_size": 3072,
+  "max_position_embeddings": 200,
   "model_type": "llama",
   "num_attention_heads": 8,
   "num_hidden_layers": 24,

merges.txt CHANGED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4df6aa3aecda4f15a5dfe56935f69dca228660de335ada3018f6e3224cb03fdc
 size 1344172280

 version https://git-lfs.github.com/spec/v1
+oid sha256:70a56446823a6b9000e94343024eda185ec2de87f84c064ed04df281a8c996f0
 size 1344172280

runs/Mar20_15-08-12_gcn7.local.snellius.surf.nl/events.out.tfevents.1710943700.gcn7.local.snellius.surf.nl.1412407.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:9dc3297b0ea3dea3e05d5d18476c75834522822754d613f7668124b6799ca3f1
+size 13889

special_tokens_map.json CHANGED Viewed

@@ -2,11 +2,5 @@
   "bos_token": "<s>",
   "eos_token": "</s>",
   "pad_token": "<pad>",
-  "unk_token": {
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  }
 }

   "bos_token": "<s>",
   "eos_token": "</s>",
   "pad_token": "<pad>",
+  "unk_token": "<|endoftext|>"
 }

tokenizer_config.json CHANGED Viewed

@@ -1,11 +1,34 @@
 {
-  "add_bos_token": false,
   "add_prefix_space": false,
   "added_tokens_decoder": {
-    "50256": {
       "content": "<|endoftext|>",
       "lstrip": false,
-      "normalized": true,
       "rstrip": false,
       "single_word": false,
       "special": true
@@ -14,7 +37,6 @@
   "bos_token": "<s>",
   "clean_up_tokenization_spaces": true,
   "eos_token": "</s>",
-  "errors": "replace",
   "model_max_length": 128,
   "pad_token": "<pad>",
   "tokenizer_class": "GPT2Tokenizer",

 {
   "add_prefix_space": false,
   "added_tokens_decoder": {
+    "0": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4312": {
       "content": "<|endoftext|>",
       "lstrip": false,
+      "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
   "bos_token": "<s>",
   "clean_up_tokenization_spaces": true,
   "eos_token": "</s>",
   "model_max_length": 128,
   "pad_token": "<pad>",
   "tokenizer_class": "GPT2Tokenizer",

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c18414f50a1a1d06089eb32903b401e02fe3d9906ae6e5586de44ccbeda6707a
 size 4728

 version https://git-lfs.github.com/spec/v1
+oid sha256:438d09597f6c502b8b3134429dc8a3ce76adbac05dab665a370b5b862e1699ee
 size 4728

vocab.json CHANGED Viewed

The diff for this file is too large to render. See raw diff