ninagroot/Llama-360Mtest
Browse files- README.md +8 -8
- config.json +2 -2
- model.safetensors +2 -2
- runs/Apr10_10-13-45_gcn68.local.snellius.surf.nl/events.out.tfevents.1712736835.gcn68.local.snellius.surf.nl.3597541.0 +3 -0
- runs/Apr10_10-19-18_gcn16.local.snellius.surf.nl/events.out.tfevents.1712737168.gcn16.local.snellius.surf.nl.150583.0 +3 -0
- training_args.bin +1 -1
README.md
CHANGED
|
@@ -13,7 +13,7 @@ should probably proofread and complete it, then remove this comment. -->
|
|
| 13 |
|
| 14 |
This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
|
| 15 |
It achieves the following results on the evaluation set:
|
| 16 |
-
- Loss:
|
| 17 |
|
| 18 |
## Model description
|
| 19 |
|
|
@@ -48,13 +48,13 @@ The following hyperparameters were used during training:
|
|
| 48 |
|
| 49 |
| Training Loss | Epoch | Step | Validation Loss |
|
| 50 |
|:-------------:|:-----:|:----:|:---------------:|
|
| 51 |
-
|
|
| 52 |
-
|
|
| 53 |
-
|
|
| 54 |
-
| 0.
|
| 55 |
-
| 0.
|
| 56 |
-
| 0.
|
| 57 |
-
| 0.
|
| 58 |
|
| 59 |
|
| 60 |
### Framework versions
|
|
|
|
| 13 |
|
| 14 |
This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
|
| 15 |
It achieves the following results on the evaluation set:
|
| 16 |
+
- Loss: 3.4944
|
| 17 |
|
| 18 |
## Model description
|
| 19 |
|
|
|
|
| 48 |
|
| 49 |
| Training Loss | Epoch | Step | Validation Loss |
|
| 50 |
|:-------------:|:-----:|:----:|:---------------:|
|
| 51 |
+
| 3.0504 | 1.0 | 197 | 3.3870 |
|
| 52 |
+
| 1.9707 | 2.0 | 394 | 3.2436 |
|
| 53 |
+
| 0.9107 | 3.0 | 591 | 3.3470 |
|
| 54 |
+
| 0.5113 | 4.0 | 788 | 3.4348 |
|
| 55 |
+
| 0.3692 | 5.0 | 985 | 3.4695 |
|
| 56 |
+
| 0.2936 | 6.0 | 1182 | 3.4905 |
|
| 57 |
+
| 0.2342 | 7.0 | 1379 | 3.4944 |
|
| 58 |
|
| 59 |
|
| 60 |
### Framework versions
|
config.json
CHANGED
|
@@ -10,7 +10,7 @@
|
|
| 10 |
"hidden_size": 1024,
|
| 11 |
"initializer_range": 0.02,
|
| 12 |
"intermediate_size": 3072,
|
| 13 |
-
"max_position_embeddings":
|
| 14 |
"model_type": "llama",
|
| 15 |
"num_attention_heads": 8,
|
| 16 |
"num_hidden_layers": 24,
|
|
@@ -24,5 +24,5 @@
|
|
| 24 |
"torch_dtype": "float32",
|
| 25 |
"transformers_version": "4.39.1",
|
| 26 |
"use_cache": true,
|
| 27 |
-
"vocab_size":
|
| 28 |
}
|
|
|
|
| 10 |
"hidden_size": 1024,
|
| 11 |
"initializer_range": 0.02,
|
| 12 |
"intermediate_size": 3072,
|
| 13 |
+
"max_position_embeddings": 64,
|
| 14 |
"model_type": "llama",
|
| 15 |
"num_attention_heads": 8,
|
| 16 |
"num_hidden_layers": 24,
|
|
|
|
| 24 |
"torch_dtype": "float32",
|
| 25 |
"transformers_version": "4.39.1",
|
| 26 |
"use_cache": true,
|
| 27 |
+
"vocab_size": 32000
|
| 28 |
}
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:59b8a5e4c46e9d9a9f3a4211804b7c30c33ff390779c553649ef74ee8ab7cb36
|
| 3 |
+
size 1570992472
|
runs/Apr10_10-13-45_gcn68.local.snellius.surf.nl/events.out.tfevents.1712736835.gcn68.local.snellius.surf.nl.3597541.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d70e0bdd4cd3a06c0b23d8ea797205d2805135b31f5cc2e59c4d81aec544723b
|
| 3 |
+
size 13007
|
runs/Apr10_10-19-18_gcn16.local.snellius.surf.nl/events.out.tfevents.1712737168.gcn16.local.snellius.surf.nl.150583.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d91c1da480cb97fc52467549542d5313f88afaff7f18fdfee41499544f3a13d9
|
| 3 |
+
size 21137
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c909776f3d4c8ae287c4e7b022d707381710ed10e5e8ad0394e3fe7325f879d8
|
| 3 |
size 4984
|