baby-dev commited on
Commit
195095c
·
verified ·
1 Parent(s): cc8522f

End of training

Browse files
README.md CHANGED
@@ -114,7 +114,7 @@ xformers_attention: null
114
 
115
  This model is a fine-tuned version of [HuggingFaceM4/tiny-random-LlamaForCausalLM](https://huggingface.co/HuggingFaceM4/tiny-random-LlamaForCausalLM) on the None dataset.
116
  It achieves the following results on the evaluation set:
117
- - Loss: 10.0460
118
 
119
  ## Model description
120
 
@@ -148,12 +148,12 @@ The following hyperparameters were used during training:
148
 
149
  | Training Loss | Epoch | Step | Validation Loss |
150
  |:-------------:|:------:|:----:|:---------------:|
151
- | No log | 0.0017 | 1 | 10.3634 |
152
- | 10.1375 | 0.0846 | 50 | 10.1435 |
153
- | 10.0115 | 0.1693 | 100 | 10.0550 |
154
- | 10.0111 | 0.2539 | 150 | 10.0513 |
155
- | 10.0157 | 0.3386 | 200 | 10.0494 |
156
- | 10.0101 | 0.4232 | 250 | 10.0460 |
157
 
158
 
159
  ### Framework versions
 
114
 
115
  This model is a fine-tuned version of [HuggingFaceM4/tiny-random-LlamaForCausalLM](https://huggingface.co/HuggingFaceM4/tiny-random-LlamaForCausalLM) on the None dataset.
116
  It achieves the following results on the evaluation set:
117
+ - Loss: 10.0458
118
 
119
  ## Model description
120
 
 
148
 
149
  | Training Loss | Epoch | Step | Validation Loss |
150
  |:-------------:|:------:|:----:|:---------------:|
151
+ | No log | 0.0017 | 1 | 10.3633 |
152
+ | 10.1286 | 0.0846 | 50 | 10.1285 |
153
+ | 10.0127 | 0.1693 | 100 | 10.0565 |
154
+ | 10.0111 | 0.2539 | 150 | 10.0516 |
155
+ | 10.0149 | 0.3386 | 200 | 10.0488 |
156
+ | 10.0091 | 0.4232 | 250 | 10.0458 |
157
 
158
 
159
  ### Framework versions
adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
23
  "o_proj",
24
  "gate_proj",
25
- "v_proj",
26
- "q_proj",
27
- "down_proj",
28
  "k_proj",
29
- "up_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "down_proj",
24
+ "q_proj",
25
  "o_proj",
26
  "gate_proj",
27
+ "up_proj",
 
 
28
  "k_proj",
29
+ "v_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f4cc1c911cdb6b75e6ef79b90816dff388e9b440f1518da129c0e75eae58174
3
  size 104322
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:205782a99f62519600cd2b68e7422316b22672b1d147d80e76630f1382c8466e
3
  size 104322
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2277498183f175e157bfe2a8cb6c913c6815b956b72ce057d1a61e34eaaf0e38
3
  size 97728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4e4fe181e83e6c7c00bc6ec075e87fb77fe72f07a40d0f4551a2e673e9d70c7
3
  size 97728
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4780c040b65b637eb80c0e56444fb3f2f6c0dba70fe84c9344982277dc43bda0
3
  size 6776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a1ec810967320e3c24d8c7b6696308cae904fc205727ef08a59093620d7caa2
3
  size 6776