ninagroot commited on
Commit
86bd94d
·
verified ·
1 Parent(s): f2aa3fb

ninagroot/Llama-360Mtest

Browse files
README.md CHANGED
@@ -13,7 +13,7 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
- - Loss: 3.2009
17
 
18
  ## Model description
19
 
@@ -48,9 +48,9 @@ The following hyperparameters were used during training:
48
 
49
  | Training Loss | Epoch | Step | Validation Loss |
50
  |:-------------:|:-----:|:----:|:---------------:|
51
- | 2.9075 | 1.0 | 210 | 3.3783 |
52
- | 1.7841 | 2.0 | 420 | 3.2031 |
53
- | 0.9611 | 3.0 | 630 | 3.2009 |
54
 
55
 
56
  ### Framework versions
 
13
 
14
  This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
+ - Loss: 3.3555
17
 
18
  ## Model description
19
 
 
48
 
49
  | Training Loss | Epoch | Step | Validation Loss |
50
  |:-------------:|:-----:|:----:|:---------------:|
51
+ | 6.5087 | 0.99 | 45 | 5.0172 |
52
+ | 3.5388 | 2.0 | 91 | 3.5244 |
53
+ | 2.4645 | 2.97 | 135 | 3.3555 |
54
 
55
 
56
  ### Framework versions
config.json CHANGED
@@ -10,12 +10,12 @@
10
  "hidden_size": 1024,
11
  "initializer_range": 0.02,
12
  "intermediate_size": 3072,
13
- "max_position_embeddings": 60,
14
  "model_type": "llama",
15
  "num_attention_heads": 8,
16
  "num_hidden_layers": 24,
17
  "num_key_value_heads": 8,
18
- "pad_token_id": 0,
19
  "pretraining_tp": 1,
20
  "rms_norm_eps": 1e-06,
21
  "rope_scaling": null,
@@ -24,5 +24,5 @@
24
  "torch_dtype": "float32",
25
  "transformers_version": "4.39.1",
26
  "use_cache": true,
27
- "vocab_size": 32000
28
  }
 
10
  "hidden_size": 1024,
11
  "initializer_range": 0.02,
12
  "intermediate_size": 3072,
13
+ "max_position_embeddings": 256,
14
  "model_type": "llama",
15
  "num_attention_heads": 8,
16
  "num_hidden_layers": 24,
17
  "num_key_value_heads": 8,
18
+ "pad_token_id": 50256,
19
  "pretraining_tp": 1,
20
  "rms_norm_eps": 1e-06,
21
  "rope_scaling": null,
 
24
  "torch_dtype": "float32",
25
  "transformers_version": "4.39.1",
26
  "use_cache": true,
27
+ "vocab_size": 50257
28
  }
generation_config.json CHANGED
@@ -2,6 +2,6 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
- "pad_token_id": 0,
6
  "transformers_version": "4.39.1"
7
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
+ "pad_token_id": 50256,
6
  "transformers_version": "4.39.1"
7
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59ad553e073602086265868ee976391542d2c79bd6b9a676bda55cc75035c179
3
- size 1570992472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8edaec246532c355a71eed5d1e21f98b73538fff9497fab79efab02e9806d1d6
3
+ size 1720553864
runs/Apr10_09-53-11_gcn37.local.snellius.surf.nl/events.out.tfevents.1712735600.gcn37.local.snellius.surf.nl.1085902.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c918c79e787df1bdd8c95a29b516042be44267a89a726a65f5a830048330fb4b
3
+ size 6966
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:391459dd6269ecbb83dba9c883903f96777586da6a2d64ed6471804569bbd266
3
  size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ece1839304b089bc2ce12ceb44e6cc28d7b73192701f0dd19aa3fa12c7b00a73
3
  size 4984