baby-dev commited on
Commit
dc3cdc0
·
verified ·
1 Parent(s): cfcffae

End of training

Browse files
README.md CHANGED
@@ -51,7 +51,7 @@ fp16: false
51
  fsdp: null
52
  fsdp_config: null
53
  gradient_accumulation_steps: 4
54
- gradient_checkpointing: false
55
  group_by_length: true
56
  hub_model_id: baby-dev/test-default-01
57
  hub_repo: null
@@ -114,7 +114,7 @@ xformers_attention: null
114
 
115
  This model is a fine-tuned version of [HuggingFaceM4/tiny-random-LlamaForCausalLM](https://huggingface.co/HuggingFaceM4/tiny-random-LlamaForCausalLM) on the None dataset.
116
  It achieves the following results on the evaluation set:
117
- - Loss: 10.0461
118
 
119
  ## Model description
120
 
@@ -148,12 +148,12 @@ The following hyperparameters were used during training:
148
 
149
  | Training Loss | Epoch | Step | Validation Loss |
150
  |:-------------:|:------:|:----:|:---------------:|
151
- | No log | 0.0017 | 1 | 10.3632 |
152
- | 10.1451 | 0.0846 | 50 | 10.1555 |
153
- | 10.0091 | 0.1693 | 100 | 10.0546 |
154
- | 10.0116 | 0.2539 | 150 | 10.0513 |
155
- | 10.0141 | 0.3386 | 200 | 10.0488 |
156
- | 10.0093 | 0.4232 | 250 | 10.0461 |
157
 
158
 
159
  ### Framework versions
 
51
  fsdp: null
52
  fsdp_config: null
53
  gradient_accumulation_steps: 4
54
+ gradient_checkpointing: true
55
  group_by_length: true
56
  hub_model_id: baby-dev/test-default-01
57
  hub_repo: null
 
114
 
115
  This model is a fine-tuned version of [HuggingFaceM4/tiny-random-LlamaForCausalLM](https://huggingface.co/HuggingFaceM4/tiny-random-LlamaForCausalLM) on the None dataset.
116
  It achieves the following results on the evaluation set:
117
+ - Loss: 10.0453
118
 
119
  ## Model description
120
 
 
148
 
149
  | Training Loss | Epoch | Step | Validation Loss |
150
  |:-------------:|:------:|:----:|:---------------:|
151
+ | No log | 0.0017 | 1 | 10.3635 |
152
+ | 10.1304 | 0.0846 | 50 | 10.1295 |
153
+ | 10.0067 | 0.1693 | 100 | 10.0532 |
154
+ | 10.012 | 0.2539 | 150 | 10.0511 |
155
+ | 10.0145 | 0.3386 | 200 | 10.0490 |
156
+ | 10.0083 | 0.4232 | 250 | 10.0453 |
157
 
158
 
159
  ### Framework versions
adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "v_proj",
24
  "down_proj",
25
- "o_proj",
26
- "k_proj",
27
  "gate_proj",
28
- "q_proj",
29
- "up_proj"
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "q_proj",
24
  "down_proj",
 
 
25
  "gate_proj",
26
+ "up_proj",
27
+ "k_proj",
28
+ "v_proj",
29
+ "o_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:173082152d06d9881a0ba836787780672922d74b240bc452817a1a9e1dd65a88
3
  size 104322
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e75e617f9e6840d8288418829673005f2fe868ad7d77c870670479a4900aa5e
3
  size 104322
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:787c931a61dcef9a09f4865f585ca9a1676ee6c8557bc725f6b3d862616f39ad
3
  size 97728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a44d204943dcb33adf4a57f7ef22094330b22960900c28db21b98ccba913b9f6
3
  size 97728
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:247807b9fbae7bb66e11564d2bbc1ddd55c45a3a7ad93a60eaea6cb9b7e34d32
3
  size 6776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83d8db823c389ea7d7b23fe13e45ed7c229764879e8d6d9de4bcbc82529ea944
3
  size 6776