ninagroot/Llama-360Mtest
Browse files- README.md +10 -42
- added_tokens.json +3 -0
- merges.txt +0 -0
- model.safetensors +1 -1
- runs/Apr18_11-29-14_gcn64.local.snellius.surf.nl/events.out.tfevents.1713432565.gcn64.local.snellius.surf.nl.3701576.0 +3 -0
- runs/Apr18_11-29-42_gcn29.local.snellius.surf.nl/events.out.tfevents.1713432590.gcn29.local.snellius.surf.nl.424529.0 +3 -0
- special_tokens_map.json +6 -0
- tokenizer.json +0 -0
- tokenizer_config.json +44 -0
- training_args.bin +1 -1
- vocab.json +0 -0
README.md
CHANGED
|
@@ -13,7 +13,7 @@ should probably proofread and complete it, then remove this comment. -->
|
|
| 13 |
|
| 14 |
This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
|
| 15 |
It achieves the following results on the evaluation set:
|
| 16 |
-
- Loss: 5.
|
| 17 |
|
| 18 |
## Model description
|
| 19 |
|
|
@@ -41,53 +41,21 @@ The following hyperparameters were used during training:
|
|
| 41 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
| 42 |
- lr_scheduler_type: cosine
|
| 43 |
- lr_scheduler_warmup_steps: 50
|
| 44 |
-
- num_epochs:
|
| 45 |
- mixed_precision_training: Native AMP
|
| 46 |
|
| 47 |
### Training results
|
| 48 |
|
| 49 |
| Training Loss | Epoch | Step | Validation Loss |
|
| 50 |
|:-------------:|:-----:|:----:|:---------------:|
|
| 51 |
-
| 8.
|
| 52 |
-
| 7.
|
| 53 |
-
| 6.
|
| 54 |
-
| 5.
|
| 55 |
-
| 4.
|
| 56 |
-
| 3.
|
| 57 |
-
| 3.
|
| 58 |
-
| 2.
|
| 59 |
-
| 1.9961 | 8.98 | 64 | 5.1621 |
|
| 60 |
-
| 1.4468 | 9.96 | 71 | 5.2455 |
|
| 61 |
-
| 1.0269 | 10.95 | 78 | 5.3081 |
|
| 62 |
-
| 0.7106 | 11.93 | 85 | 5.2484 |
|
| 63 |
-
| 0.4967 | 12.91 | 92 | 5.3469 |
|
| 64 |
-
| 0.3478 | 13.89 | 99 | 5.3402 |
|
| 65 |
-
| 0.2494 | 14.88 | 106 | 5.4144 |
|
| 66 |
-
| 0.1696 | 16.0 | 114 | 5.4190 |
|
| 67 |
-
| 0.1245 | 16.98 | 121 | 5.4780 |
|
| 68 |
-
| 0.0799 | 17.96 | 128 | 5.5194 |
|
| 69 |
-
| 0.0618 | 18.95 | 135 | 5.5302 |
|
| 70 |
-
| 0.0375 | 19.93 | 142 | 5.5205 |
|
| 71 |
-
| 0.032 | 20.91 | 149 | 5.5534 |
|
| 72 |
-
| 0.0275 | 21.89 | 156 | 5.5555 |
|
| 73 |
-
| 0.0218 | 22.88 | 163 | 5.6052 |
|
| 74 |
-
| 0.0196 | 24.0 | 171 | 5.6138 |
|
| 75 |
-
| 0.0203 | 24.98 | 178 | 5.6179 |
|
| 76 |
-
| 0.018 | 25.96 | 185 | 5.6200 |
|
| 77 |
-
| 0.0189 | 26.95 | 192 | 5.6299 |
|
| 78 |
-
| 0.0181 | 27.93 | 199 | 5.6347 |
|
| 79 |
-
| 0.016 | 28.91 | 206 | 5.6402 |
|
| 80 |
-
| 0.018 | 29.89 | 213 | 5.6432 |
|
| 81 |
-
| 0.016 | 30.88 | 220 | 5.6474 |
|
| 82 |
-
| 0.0166 | 32.0 | 228 | 5.6500 |
|
| 83 |
-
| 0.0169 | 32.98 | 235 | 5.6515 |
|
| 84 |
-
| 0.0166 | 33.96 | 242 | 5.6531 |
|
| 85 |
-
| 0.0159 | 34.95 | 249 | 5.6547 |
|
| 86 |
-
| 0.0164 | 35.93 | 256 | 5.6556 |
|
| 87 |
-
| 0.0159 | 36.91 | 263 | 5.6561 |
|
| 88 |
-
| 0.0144 | 37.89 | 270 | 5.6562 |
|
| 89 |
-
| 0.0142 | 38.88 | 277 | 5.6562 |
|
| 90 |
-
| 0.016 | 39.3 | 280 | 5.6562 |
|
| 91 |
|
| 92 |
|
| 93 |
### Framework versions
|
|
|
|
| 13 |
|
| 14 |
This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
|
| 15 |
It achieves the following results on the evaluation set:
|
| 16 |
+
- Loss: 5.1779
|
| 17 |
|
| 18 |
## Model description
|
| 19 |
|
|
|
|
| 41 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
| 42 |
- lr_scheduler_type: cosine
|
| 43 |
- lr_scheduler_warmup_steps: 50
|
| 44 |
+
- num_epochs: 8
|
| 45 |
- mixed_precision_training: Native AMP
|
| 46 |
|
| 47 |
### Training results
|
| 48 |
|
| 49 |
| Training Loss | Epoch | Step | Validation Loss |
|
| 50 |
|:-------------:|:-----:|:----:|:---------------:|
|
| 51 |
+
| 8.4314 | 0.98 | 7 | 8.5119 |
|
| 52 |
+
| 7.2356 | 1.96 | 14 | 7.6465 |
|
| 53 |
+
| 6.2886 | 2.95 | 21 | 6.9648 |
|
| 54 |
+
| 5.4784 | 3.93 | 28 | 6.3689 |
|
| 55 |
+
| 4.6903 | 4.91 | 35 | 5.8731 |
|
| 56 |
+
| 3.7605 | 5.89 | 42 | 5.4457 |
|
| 57 |
+
| 3.1642 | 6.88 | 49 | 5.2128 |
|
| 58 |
+
| 2.5642 | 7.86 | 56 | 5.1779 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
|
| 61 |
### Framework versions
|
added_tokens.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"<|endoftext|>": 12198
|
| 3 |
+
}
|
merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1408774432
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:01d6d12c843483a0a4db6355a59e50ded6fe4efdd47de1ec7ef6bc007e12c82f
|
| 3 |
size 1408774432
|
runs/Apr18_11-29-14_gcn64.local.snellius.surf.nl/events.out.tfevents.1713432565.gcn64.local.snellius.surf.nl.3701576.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3ff796cf33b2d0dd70fd49288da73f9a8131df27463fb5d065d7c7fa8905c1f7
|
| 3 |
+
size 18629
|
runs/Apr18_11-29-42_gcn29.local.snellius.surf.nl/events.out.tfevents.1713432590.gcn29.local.snellius.surf.nl.424529.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:570b9f2dfcc75adfe9c8b38453fe7cf0d3bc6960463b4a61c8dd191aa5a9e58f
|
| 3 |
+
size 6276
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "<s>",
|
| 3 |
+
"eos_token": "</s>",
|
| 4 |
+
"pad_token": "<pad>",
|
| 5 |
+
"unk_token": "<|endoftext|>"
|
| 6 |
+
}
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": false,
|
| 3 |
+
"added_tokens_decoder": {
|
| 4 |
+
"0": {
|
| 5 |
+
"content": "<pad>",
|
| 6 |
+
"lstrip": false,
|
| 7 |
+
"normalized": false,
|
| 8 |
+
"rstrip": false,
|
| 9 |
+
"single_word": false,
|
| 10 |
+
"special": true
|
| 11 |
+
},
|
| 12 |
+
"1": {
|
| 13 |
+
"content": "<s>",
|
| 14 |
+
"lstrip": false,
|
| 15 |
+
"normalized": false,
|
| 16 |
+
"rstrip": false,
|
| 17 |
+
"single_word": false,
|
| 18 |
+
"special": true
|
| 19 |
+
},
|
| 20 |
+
"2": {
|
| 21 |
+
"content": "</s>",
|
| 22 |
+
"lstrip": false,
|
| 23 |
+
"normalized": false,
|
| 24 |
+
"rstrip": false,
|
| 25 |
+
"single_word": false,
|
| 26 |
+
"special": true
|
| 27 |
+
},
|
| 28 |
+
"12198": {
|
| 29 |
+
"content": "<|endoftext|>",
|
| 30 |
+
"lstrip": false,
|
| 31 |
+
"normalized": false,
|
| 32 |
+
"rstrip": false,
|
| 33 |
+
"single_word": false,
|
| 34 |
+
"special": true
|
| 35 |
+
}
|
| 36 |
+
},
|
| 37 |
+
"bos_token": "<s>",
|
| 38 |
+
"clean_up_tokenization_spaces": true,
|
| 39 |
+
"eos_token": "</s>",
|
| 40 |
+
"model_max_length": 128,
|
| 41 |
+
"pad_token": "<pad>",
|
| 42 |
+
"tokenizer_class": "GPT2Tokenizer",
|
| 43 |
+
"unk_token": "<|endoftext|>"
|
| 44 |
+
}
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:48cba8e54c48f085335f15c64978dc87d647a46edb2b350432a13b072d3fcc1e
|
| 3 |
size 4984
|
vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|