ninagroot commited on
Commit
81de0d3
·
verified ·
1 Parent(s): 982540f

ninagroot/Llama-360Mtest

Browse files
README.md CHANGED
@@ -13,7 +13,7 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
- - Loss: 4.6344
17
 
18
  ## Model description
19
 
@@ -48,13 +48,13 @@ The following hyperparameters were used during training:
48
 
49
  | Training Loss | Epoch | Step | Validation Loss |
50
  |:-------------:|:-----:|:----:|:---------------:|
51
- | 4.5524 | 1.0 | 42 | 4.7852 |
52
- | 2.1679 | 2.0 | 84 | 4.1345 |
53
- | 1.4522 | 3.0 | 126 | 4.2880 |
54
- | 0.9328 | 4.0 | 168 | 4.4565 |
55
- | 0.5606 | 5.0 | 210 | 4.6031 |
56
- | 0.3514 | 6.0 | 252 | 4.6564 |
57
- | 0.2839 | 7.0 | 294 | 4.6344 |
58
 
59
 
60
  ### Framework versions
 
13
 
14
  This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
+ - Loss: 3.4944
17
 
18
  ## Model description
19
 
 
48
 
49
  | Training Loss | Epoch | Step | Validation Loss |
50
  |:-------------:|:-----:|:----:|:---------------:|
51
+ | 3.0504 | 1.0 | 197 | 3.3870 |
52
+ | 1.9707 | 2.0 | 394 | 3.2436 |
53
+ | 0.9107 | 3.0 | 591 | 3.3470 |
54
+ | 0.5113 | 4.0 | 788 | 3.4348 |
55
+ | 0.3692 | 5.0 | 985 | 3.4695 |
56
+ | 0.2936 | 6.0 | 1182 | 3.4905 |
57
+ | 0.2342 | 7.0 | 1379 | 3.4944 |
58
 
59
 
60
  ### Framework versions
config.json CHANGED
@@ -10,7 +10,7 @@
10
  "hidden_size": 1024,
11
  "initializer_range": 0.02,
12
  "intermediate_size": 3072,
13
- "max_position_embeddings": 256,
14
  "model_type": "llama",
15
  "num_attention_heads": 8,
16
  "num_hidden_layers": 24,
@@ -24,5 +24,5 @@
24
  "torch_dtype": "float32",
25
  "transformers_version": "4.39.1",
26
  "use_cache": true,
27
- "vocab_size": 12214
28
  }
 
10
  "hidden_size": 1024,
11
  "initializer_range": 0.02,
12
  "intermediate_size": 3072,
13
+ "max_position_embeddings": 64,
14
  "model_type": "llama",
15
  "num_attention_heads": 8,
16
  "num_hidden_layers": 24,
 
24
  "torch_dtype": "float32",
25
  "transformers_version": "4.39.1",
26
  "use_cache": true,
27
+ "vocab_size": 32000
28
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:581986c8393aab378ba562c2e1515606e5f06fcb1b3cd14bf03d7ba824cd0bce
3
- size 1408905504
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59b8a5e4c46e9d9a9f3a4211804b7c30c33ff390779c553649ef74ee8ab7cb36
3
+ size 1570992472
runs/Apr10_10-13-45_gcn68.local.snellius.surf.nl/events.out.tfevents.1712736835.gcn68.local.snellius.surf.nl.3597541.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d70e0bdd4cd3a06c0b23d8ea797205d2805135b31f5cc2e59c4d81aec544723b
3
+ size 13007
runs/Apr10_10-19-18_gcn16.local.snellius.surf.nl/events.out.tfevents.1712737168.gcn16.local.snellius.surf.nl.150583.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d91c1da480cb97fc52467549542d5313f88afaff7f18fdfee41499544f3a13d9
3
+ size 21137
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ed61a658032aee43721b02ee38be59a2bf2b6b2a5447552875f86900ad7932e
3
  size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c909776f3d4c8ae287c4e7b022d707381710ed10e5e8ad0394e3fe7325f879d8
3
  size 4984