ninagroot commited on
Commit
4594b26
·
verified ·
1 Parent(s): 1735bfb

ninagroot/Llama-360Mtest

Browse files
README.md CHANGED
@@ -13,7 +13,7 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
- - Loss: 4.1886
17
 
18
  ## Model description
19
 
@@ -41,24 +41,23 @@ The following hyperparameters were used during training:
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
  - lr_scheduler_type: cosine
43
  - lr_scheduler_warmup_steps: 100
44
- - num_epochs: 6
45
  - mixed_precision_training: Native AMP
46
 
47
  ### Training results
48
 
49
  | Training Loss | Epoch | Step | Validation Loss |
50
  |:-------------:|:-----:|:----:|:---------------:|
51
- | 5.9345 | 1.0 | 138 | 5.6878 |
52
- | 4.7674 | 2.0 | 276 | 4.7003 |
53
- | 3.6914 | 3.0 | 414 | 4.3374 |
54
- | 3.6076 | 4.0 | 552 | 4.2433 |
55
- | 3.3436 | 5.0 | 690 | 4.1851 |
56
- | 2.939 | 6.0 | 828 | 4.1886 |
57
 
58
 
59
  ### Framework versions
60
 
61
- - Transformers 4.37.2
62
  - Pytorch 2.1.2+cu121
63
  - Datasets 2.16.1
64
  - Tokenizers 0.15.0
 
13
 
14
  This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
+ - Loss: 4.1441
17
 
18
  ## Model description
19
 
 
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
  - lr_scheduler_type: cosine
43
  - lr_scheduler_warmup_steps: 100
44
+ - num_epochs: 5
45
  - mixed_precision_training: Native AMP
46
 
47
  ### Training results
48
 
49
  | Training Loss | Epoch | Step | Validation Loss |
50
  |:-------------:|:-----:|:----:|:---------------:|
51
+ | 5.7623 | 1.0 | 145 | 5.6148 |
52
+ | 4.6318 | 2.0 | 290 | 4.6321 |
53
+ | 3.8186 | 3.0 | 435 | 4.2714 |
54
+ | 3.447 | 4.0 | 580 | 4.1596 |
55
+ | 3.2664 | 5.0 | 725 | 4.1441 |
 
56
 
57
 
58
  ### Framework versions
59
 
60
+ - Transformers 4.39.1
61
  - Pytorch 2.1.2+cu121
62
  - Datasets 2.16.1
63
  - Tokenizers 0.15.0
config.json CHANGED
@@ -22,7 +22,7 @@
22
  "rope_theta": 10000.0,
23
  "tie_word_embeddings": false,
24
  "torch_dtype": "float32",
25
- "transformers_version": "4.37.2",
26
  "use_cache": true,
27
- "vocab_size": 4312
28
  }
 
22
  "rope_theta": 10000.0,
23
  "tie_word_embeddings": false,
24
  "torch_dtype": "float32",
25
+ "transformers_version": "4.39.1",
26
  "use_cache": true,
27
+ "vocab_size": 4425
28
  }
generation_config.json CHANGED
@@ -3,5 +3,5 @@
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
  "pad_token_id": 0,
6
- "transformers_version": "4.37.2"
7
  }
 
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
  "pad_token_id": 0,
6
+ "transformers_version": "4.39.1"
7
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b4bcbb5356dde8a15329ab11495572f2d66bb3dabc8f7a980ddf651d1e4cd90
3
- size 1344172280
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b0b6f38c404c54d4e5c7875184e8efb93d17e41df03afd29e8128cbb901267c
3
+ size 1345097976
runs/Apr02_11-45-25_gcn21.local.snellius.surf.nl/events.out.tfevents.1712051134.gcn21.local.snellius.surf.nl.4193788.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:667deadef273bc1072c516ddc78e9711e561dbcd34fa9c98bf64726ef75d9252
3
+ size 13842
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95beafee183dd8c0f8967da1e54d6cb38ab5fe23f886d744fcaba66038c60105
3
- size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdc9b49c944ad1d600b4c700b9096b7cb8e099f01d17b84e8484cdb0a927b2fc
3
+ size 4984