End of training

Files changed (8) hide show

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-base_model: unsloth/deepseek-r1-distill-llama-8b-unsloth-bnb-4bit
 library_name: transformers
 model_name: outputs
 tags:
@@ -12,7 +12,7 @@ licence: license
 # Model Card for outputs
-This model is a fine-tuned version of [unsloth/deepseek-r1-distill-llama-8b-unsloth-bnb-4bit](https://huggingface.co/unsloth/deepseek-r1-distill-llama-8b-unsloth-bnb-4bit).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
@@ -36,10 +36,10 @@ This model was trained with SFT.
 ### Framework versions
 - TRL: 0.15.2
-- Transformers: 4.51.1
 - Pytorch: 2.6.0
-- Datasets: 3.5.0
-- Tokenizers: 0.21.0
 ## Citations

 ---
+base_model: unsloth/mistral-7b-instruct-v0.3-bnb-4bit
 library_name: transformers
 model_name: outputs
 tags:
 # Model Card for outputs
+This model is a fine-tuned version of [unsloth/mistral-7b-instruct-v0.3-bnb-4bit](https://huggingface.co/unsloth/mistral-7b-instruct-v0.3-bnb-4bit).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
 ### Framework versions
 - TRL: 0.15.2
+- Transformers: 4.51.3
 - Pytorch: 2.6.0
+- Datasets: 3.6.0
+- Tokenizers: 0.21.1
 ## Citations

adapter_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "alpha_pattern": {},
   "auto_mapping": null,
-  "base_model_name_or_path": "unsloth/deepseek-r1-distill-llama-8b-unsloth-bnb-4bit",
   "bias": "none",
   "eva_config": null,
   "exclude_modules": null,
@@ -12,23 +12,26 @@
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
-  "lora_alpha": 16,
   "lora_bias": false,
-  "lora_dropout": 0,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
-  "r": 4,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "v_proj",
-    "q_proj",
     "k_proj",
-    "o_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,
-  "use_rslora": false
 }

 {
   "alpha_pattern": {},
   "auto_mapping": null,
+  "base_model_name_or_path": "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
   "bias": "none",
   "eva_config": null,
   "exclude_modules": null,
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
+  "lora_alpha": 32,
   "lora_bias": false,
+  "lora_dropout": 0.05,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
+  "r": 32,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "down_proj",
     "k_proj",
+    "q_proj",
+    "v_proj",
+    "up_proj",
+    "o_proj",
+    "gate_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,
+  "use_rslora": true
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9b4c6a4fca556a0f90d60ef92e3d25325dd2721f25d5e8f7b9d151d28b2942b9
-size 6849416

 version https://git-lfs.github.com/spec/v1
+oid sha256:6f3b88e6c06c696a27cfda9303b29ce50aa2861954ab7d4acdee8fa50a2d3363
+size 167832240

special_tokens_map.json CHANGED Viewed

@@ -1,20 +1,27 @@
 {
   "bos_token": {
-    "content": "<｜begin▁of▁sentence｜>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "eos_token": {
-    "content": "<｜end▁of▁sentence｜>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "pad_token": {
-    "content": "<|finetune_right_pad_id|>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,

 {
   "bos_token": {
+    "content": "<s>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "eos_token": {
+    "content": "</s>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "pad_token": {
+    "content": "[control_768]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,

tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b013cffaaca3e8d7e218974014c241d83e5fe33d7ce45c2e8e63c9ecfa149d41
-size 17209807

 version https://git-lfs.github.com/spec/v1
+oid sha256:60c3fc985cbfedcb429d05994efe548bdfecd6a00226fcdc8380c36fd894a3be
+size 3671968

tokenizer.model ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:37f00374dea48658ee8f5d0f21895b9bc55cb0103939607c8185bfd1c6ca1f89
+size 587404

tokenizer_config.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cd1db315c16dbeb1e948e4507cc2d4cc5ff0013282895ffe04545968bb3adc10
 size 5560

 version https://git-lfs.github.com/spec/v1
+oid sha256:ffeeb919aeca24624cd4aa49fce02eb0fb4158a6a9d699fbfbd0979c08e9c9c1
 size 5560