ninagroot/Llama-360Mtest

Files changed (10) hide show

README.md CHANGED Viewed

@@ -13,7 +13,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 4.7211
 ## Model description
@@ -32,7 +32,7 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 3e-05
 - train_batch_size: 1
 - eval_batch_size: 8
 - seed: 42
@@ -48,21 +48,21 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| 10.8127       | 0.98  | 41   | 10.5984         |
-| 9.6409        | 1.98  | 83   | 9.0771          |
-| 8.3725        | 2.99  | 125  | 8.0192          |
-| 7.2983        | 3.99  | 167  | 7.0163          |
-| 6.408         | 4.99  | 209  | 6.2115          |
-| 5.4886        | 5.99  | 251  | 5.4663          |
-| 4.8987        | 7.0   | 293  | 5.1240          |
-| 4.4918        | 8.0   | 335  | 4.9146          |
-| 4.2381        | 8.98  | 376  | 4.8076          |
-| 4.0984        | 9.98  | 418  | 4.7645          |
-| 3.9135        | 10.99 | 460  | 4.7195          |
-| 3.6736        | 11.99 | 502  | 4.7150          |
-| 3.467         | 12.99 | 544  | 4.7175          |
-| 3.4323        | 13.99 | 586  | 4.7194          |
-| 3.443         | 14.69 | 615  | 4.7211          |
 ### Framework versions

 This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 4.4066
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 0.0003
 - train_batch_size: 1
 - eval_batch_size: 8
 - seed: 42
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 7.1842        | 0.99  | 44   | 6.6664          |
+| 5.6972        | 1.99  | 88   | 5.4155          |
+| 4.8124        | 2.98  | 132  | 4.6722          |
+| 4.1773        | 4.0   | 177  | 4.4451          |
+| 3.6907        | 4.99  | 221  | 4.3309          |
+| 3.5861        | 5.99  | 265  | 4.2794          |
+| 3.3727        | 6.98  | 309  | 4.2343          |
+| 3.1369        | 8.0   | 354  | 4.2810          |
+| 2.7116        | 8.99  | 398  | 4.2828          |
+| 2.3491        | 9.99  | 442  | 4.3183          |
+| 2.0411        | 10.98 | 486  | 4.3084          |
+| 1.5302        | 12.0  | 531  | 4.3548          |
+| 1.1022        | 12.99 | 575  | 4.3852          |
+| 0.8591        | 13.99 | 619  | 4.4058          |
+| 0.7952        | 14.92 | 660  | 4.4066          |
 ### Framework versions

config.json CHANGED Viewed

@@ -15,7 +15,7 @@
   "num_attention_heads": 8,
   "num_hidden_layers": 24,
   "num_key_value_heads": 8,
-  "pad_token_id": 50256,
   "pretraining_tp": 1,
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,
@@ -24,5 +24,5 @@
   "torch_dtype": "float32",
   "transformers_version": "4.37.2",
   "use_cache": true,
-  "vocab_size": 50257
 }

   "num_attention_heads": 8,
   "num_hidden_layers": 24,
   "num_key_value_heads": 8,
+  "pad_token_id": 0,
   "pretraining_tp": 1,
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,
   "torch_dtype": "float32",
   "transformers_version": "4.37.2",
   "use_cache": true,
+  "vocab_size": 4312
 }

generation_config.json CHANGED Viewed

@@ -2,6 +2,6 @@
   "_from_model_config": true,
   "bos_token_id": 1,
   "eos_token_id": 2,
-  "pad_token_id": 50256,
   "transformers_version": "4.37.2"
 }

   "_from_model_config": true,
   "bos_token_id": 1,
   "eos_token_id": 2,
+  "pad_token_id": 0,
   "transformers_version": "4.37.2"
 }

merges.txt CHANGED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:52c8d6589ac866b90c44e5b803d259a7ee96f9f9029f707aa7db6e82b4515797
-size 1720553864

 version https://git-lfs.github.com/spec/v1
+oid sha256:78c1fe6f7d2437c2bafc14f73fde089a7b787a9de39c6d9b251e0f45d3dad955
+size 1344172280

runs/Mar21_14-19-37_gcn7.local.snellius.surf.nl/events.out.tfevents.1711027186.gcn7.local.snellius.surf.nl.2387174.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:e98aa76f66cf91ee9f810b47e7b86d7e7453e0a328b397f5b569aecf19ea7277
+size 13890

special_tokens_map.json CHANGED Viewed

@@ -2,5 +2,11 @@
   "bos_token": "<s>",
   "eos_token": "</s>",
   "pad_token": "<pad>",
-  "unk_token": "<|endoftext|>"
 }

   "bos_token": "<s>",
   "eos_token": "</s>",
   "pad_token": "<pad>",
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
 }

tokenizer_config.json CHANGED Viewed

@@ -1,34 +1,11 @@
 {
   "add_prefix_space": false,
   "added_tokens_decoder": {
-    "0": {
-      "content": "<pad>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "1": {
-      "content": "<s>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "2": {
-      "content": "</s>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "4312": {
       "content": "<|endoftext|>",
       "lstrip": false,
-      "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
@@ -37,6 +14,7 @@
   "bos_token": "<s>",
   "clean_up_tokenization_spaces": true,
   "eos_token": "</s>",
   "model_max_length": 100,
   "pad_token": "<pad>",
   "tokenizer_class": "GPT2Tokenizer",

 {
+  "add_bos_token": false,
   "add_prefix_space": false,
   "added_tokens_decoder": {
+    "50256": {
       "content": "<|endoftext|>",
       "lstrip": false,
+      "normalized": true,
       "rstrip": false,
       "single_word": false,
       "special": true
   "bos_token": "<s>",
   "clean_up_tokenization_spaces": true,
   "eos_token": "</s>",
+  "errors": "replace",
   "model_max_length": 100,
   "pad_token": "<pad>",
   "tokenizer_class": "GPT2Tokenizer",

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6930c0ab087b6ba724e4cf9b2bcb7b1cd8afe2a296b2b6ad30397ad33891d6cf
 size 4728

 version https://git-lfs.github.com/spec/v1
+oid sha256:f13366bfb195d096cd3184a397ce800d616db058f4a3641146de48e5a915eec0
 size 4728

vocab.json CHANGED Viewed

The diff for this file is too large to render. See raw diff