lukeleeai commited on
Commit
b24f2b2
·
verified ·
1 Parent(s): 4029b7e

Training in progress, step 50

Browse files
README.md CHANGED
@@ -1,10 +1,10 @@
1
  ---
2
- license: apache-2.0
3
- base_model: mistralai/Mistral-7B-Instruct-v0.1
4
  tags:
 
 
5
  - generated_from_trainer
6
- datasets:
7
- - openwebtext
8
  model-index:
9
  - name: Mistral_Sparse_pretraining_80_percent
10
  results: []
@@ -15,9 +15,10 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # Mistral_Sparse_pretraining_80_percent
17
 
18
- This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) on the openwebtext dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 2.7862
 
21
 
22
  ## Model description
23
 
@@ -47,7 +48,7 @@ The following hyperparameters were used during training:
47
  - total_eval_batch_size: 64
48
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
49
  - lr_scheduler_type: linear
50
- - training_steps: 2
51
 
52
  ### Training results
53
 
 
1
  ---
 
 
2
  tags:
3
+ - trl
4
+ - sft
5
  - generated_from_trainer
6
+ metrics:
7
+ - accuracy
8
  model-index:
9
  - name: Mistral_Sparse_pretraining_80_percent
10
  results: []
 
15
 
16
  # Mistral_Sparse_pretraining_80_percent
17
 
18
+ This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 3.8189
21
+ - Accuracy: 0.6641
22
 
23
  ## Model description
24
 
 
48
  - total_eval_batch_size: 64
49
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
50
  - lr_scheduler_type: linear
51
+ - training_steps: 1
52
 
53
  ### Training results
54
 
adapter_config.json CHANGED
@@ -20,9 +20,9 @@
20
  "revision": null,
21
  "target_modules": [
22
  "q_proj",
 
23
  "gate_proj",
24
- "v_proj",
25
- "down_proj"
26
  ],
27
  "task_type": "CAUSAL_LM"
28
  }
 
20
  "revision": null,
21
  "target_modules": [
22
  "q_proj",
23
+ "down_proj",
24
  "gate_proj",
25
+ "v_proj"
 
26
  ],
27
  "task_type": "CAUSAL_LM"
28
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
3
- size 48
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:715308f7a1dbf23335a3ffeef5a5716660ef688b147267e8286b194a9ded4feb
3
+ size 205555592
config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "_name_or_path": "mistralai/Mistral-7B-Instruct-v0.1",
3
  "architectures": [
4
  "SparseMistral"
5
  ],
@@ -23,7 +22,7 @@
23
  "torch_dtype": "bfloat16",
24
  "transformers_version": "4.35.2",
25
  "use_cache": true,
26
- "use_sparse_model": false,
27
- "use_sparse_regularization": false,
28
  "vocab_size": 32000
29
  }
 
1
  {
 
2
  "architectures": [
3
  "SparseMistral"
4
  ],
 
22
  "torch_dtype": "bfloat16",
23
  "transformers_version": "4.35.2",
24
  "use_cache": true,
25
+ "use_sparse_model": true,
26
+ "use_sparse_regularization": true,
27
  "vocab_size": 32000
28
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce255e1c678bab67d40e0e7470e0bff12a4cabea3cac6dcb8f41c2adf0b57c9a
3
  size 4943163992
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67fcf1345c6603af992dd45400b09d9662edd8e016b463d859d6ac6bf81387da
3
  size 4943163992
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68e21ccc1b9703ae7a6ec4264f248cf3f8f9227b24723d96438e16d265d5443b
3
  size 4999821144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05a6eb0e29e868f5dcdd9627d9efa0d1b37dd47649332dce435dfd55e4a2db40
3
  size 4999821144
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f99e43cac8aa500195dae0c7f6fe4fdc6c1a5353ee99b1a63ab6b5385852461
3
  size 4540517840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e668f826fc812803bfc893d877cb26727328fcc769f0dcbc5c3f174c64b6b1e9
3
  size 4540517840
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0edd9a9aea652105a2d8eed2ea59d50affc6d9013c74ab173064adef41bfe0fb
3
  size 6008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b30269d5033b60660e10b05bed7611f596ff0ec82922cffadfa27940bb42cd47
3
  size 6008