Training in progress, step 50

Files changed (8) hide show

README.md CHANGED Viewed

@@ -1,10 +1,10 @@
 ---
-license: apache-2.0
-base_model: mistralai/Mistral-7B-Instruct-v0.1
 tags:
 - generated_from_trainer
-datasets:
-- openwebtext
 model-index:
 - name: Mistral_Sparse_pretraining_80_percent
   results: []
@@ -15,9 +15,10 @@ should probably proofread and complete it, then remove this comment. -->
 # Mistral_Sparse_pretraining_80_percent
-This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) on the openwebtext dataset.
 It achieves the following results on the evaluation set:
-- Loss: 2.7862
 ## Model description
@@ -47,7 +48,7 @@ The following hyperparameters were used during training:
 - total_eval_batch_size: 64
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
-- training_steps: 2
 ### Training results

 ---
 tags:
+- trl
+- sft
 - generated_from_trainer
+metrics:
+- accuracy
 model-index:
 - name: Mistral_Sparse_pretraining_80_percent
   results: []
 # Mistral_Sparse_pretraining_80_percent
+This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 3.8189
+- Accuracy: 0.6641
 ## Model description
 - total_eval_batch_size: 64
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
+- training_steps: 1
 ### Training results

adapter_config.json CHANGED Viewed

@@ -20,9 +20,9 @@
   "revision": null,
   "target_modules": [
     "q_proj",
     "gate_proj",
-    "v_proj",
-    "down_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

   "revision": null,
   "target_modules": [
     "q_proj",
+    "down_proj",
     "gate_proj",
+    "v_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
-size 48

 version https://git-lfs.github.com/spec/v1
+oid sha256:715308f7a1dbf23335a3ffeef5a5716660ef688b147267e8286b194a9ded4feb
+size 205555592

config.json CHANGED Viewed

@@ -1,5 +1,4 @@
 {
-  "_name_or_path": "mistralai/Mistral-7B-Instruct-v0.1",
   "architectures": [
     "SparseMistral"
   ],
@@ -23,7 +22,7 @@
   "torch_dtype": "bfloat16",
   "transformers_version": "4.35.2",
   "use_cache": true,
-  "use_sparse_model": false,
-  "use_sparse_regularization": false,
   "vocab_size": 32000
 }

 {
   "architectures": [
     "SparseMistral"
   ],
   "torch_dtype": "bfloat16",
   "transformers_version": "4.35.2",
   "use_cache": true,
+  "use_sparse_model": true,
+  "use_sparse_regularization": true,
   "vocab_size": 32000
 }

model-00001-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ce255e1c678bab67d40e0e7470e0bff12a4cabea3cac6dcb8f41c2adf0b57c9a
 size 4943163992

 version https://git-lfs.github.com/spec/v1
+oid sha256:67fcf1345c6603af992dd45400b09d9662edd8e016b463d859d6ac6bf81387da
 size 4943163992

model-00002-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:68e21ccc1b9703ae7a6ec4264f248cf3f8f9227b24723d96438e16d265d5443b
 size 4999821144

 version https://git-lfs.github.com/spec/v1
+oid sha256:05a6eb0e29e868f5dcdd9627d9efa0d1b37dd47649332dce435dfd55e4a2db40
 size 4999821144

model-00003-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3f99e43cac8aa500195dae0c7f6fe4fdc6c1a5353ee99b1a63ab6b5385852461
 size 4540517840

 version https://git-lfs.github.com/spec/v1
+oid sha256:e668f826fc812803bfc893d877cb26727328fcc769f0dcbc5c3f174c64b6b1e9
 size 4540517840

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0edd9a9aea652105a2d8eed2ea59d50affc6d9013c74ab173064adef41bfe0fb
 size 6008

 version https://git-lfs.github.com/spec/v1
+oid sha256:b30269d5033b60660e10b05bed7611f596ff0ec82922cffadfa27940bb42cd47
 size 6008