simon-muenker commited on
Commit
14b2b75
·
verified ·
1 Parent(s): 264314f

Model save

Browse files
README.md CHANGED
@@ -20,7 +20,7 @@ should probably proofread and complete it, then remove this comment. -->
20
 
21
  This model is a fine-tuned version of [meta-llama/Llama-3.2-3B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct) on the generator dataset.
22
  It achieves the following results on the evaluation set:
23
- - Loss: 3.0378
24
 
25
  ## Model description
26
 
@@ -51,12 +51,12 @@ The following hyperparameters were used during training:
51
 
52
  | Training Loss | Epoch | Step | Validation Loss |
53
  |:-------------:|:-----:|:----:|:---------------:|
54
- | 3.3261 | 1.25 | 5 | 3.3720 |
55
- | 3.1698 | 2.5 | 10 | 3.2575 |
56
- | 3.0862 | 3.75 | 15 | 3.1654 |
57
- | 3.0221 | 5.0 | 20 | 3.1012 |
58
- | 2.9626 | 6.25 | 25 | 3.0603 |
59
- | 2.9274 | 7.5 | 30 | 3.0402 |
60
 
61
 
62
  ### Framework versions
 
20
 
21
  This model is a fine-tuned version of [meta-llama/Llama-3.2-3B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct) on the generator dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 2.9401
24
 
25
  ## Model description
26
 
 
51
 
52
  | Training Loss | Epoch | Step | Validation Loss |
53
  |:-------------:|:-----:|:----:|:---------------:|
54
+ | 3.3716 | 1.25 | 5 | 3.2267 |
55
+ | 3.2374 | 2.5 | 10 | 3.1263 |
56
+ | 3.1718 | 3.75 | 15 | 3.0495 |
57
+ | 3.0543 | 5.0 | 20 | 2.9950 |
58
+ | 3.0153 | 6.25 | 25 | 2.9599 |
59
+ | 2.9787 | 7.5 | 30 | 2.9422 |
60
 
61
 
62
  ### Framework versions
adapter_config.json CHANGED
@@ -20,8 +20,8 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "q_proj",
24
- "v_proj"
25
  ],
26
  "task_type": "CAUSAL_LM",
27
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "v_proj",
24
+ "q_proj"
25
  ],
26
  "task_type": "CAUSAL_LM",
27
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8414f3e1180a6191a58a108bcb0e1c601d5426f6a10b1de97e9944310fe3769e
3
  size 9189792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f783db783113281c507bd79b87f05ed4f94fde52397f4b9b91d9aa5c5af0c82
3
  size 9189792
runs/Jun25_12-23-58_twon/events.out.tfevents.1750854249.twon.441941.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a0947599db7eb965b5192f606c19091c534728a53c6fcd3c05d001fcb8bf76b
3
+ size 9026
runs/Jun25_12-23-58_twon/events.out.tfevents.1750854318.twon.441941.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5ae8d9a7b7a84173f88364a020a5a074216bc199da09ff6693a2a43c063d8ec
3
+ size 354
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9698b1b4eab3bf1598e5c6d4cc60911af4a7040a405d7da4dbe105a966826dfe
3
  size 5688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3573d5b83134ea6f5c79a2ab577cfe1571a2bf5ac952cdd7147f6472582e0b33
3
  size 5688