Martinamousa95 commited on
Commit
ffde50b
·
verified ·
1 Parent(s): fe2fbe3

End of training

Browse files
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- base_model: unsloth/deepseek-r1-distill-llama-8b-unsloth-bnb-4bit
3
  library_name: transformers
4
  model_name: outputs
5
  tags:
@@ -12,7 +12,7 @@ licence: license
12
 
13
  # Model Card for outputs
14
 
15
- This model is a fine-tuned version of [unsloth/deepseek-r1-distill-llama-8b-unsloth-bnb-4bit](https://huggingface.co/unsloth/deepseek-r1-distill-llama-8b-unsloth-bnb-4bit).
16
  It has been trained using [TRL](https://github.com/huggingface/trl).
17
 
18
  ## Quick start
@@ -36,10 +36,10 @@ This model was trained with SFT.
36
  ### Framework versions
37
 
38
  - TRL: 0.15.2
39
- - Transformers: 4.51.1
40
  - Pytorch: 2.6.0
41
- - Datasets: 3.5.0
42
- - Tokenizers: 0.21.0
43
 
44
  ## Citations
45
 
 
1
  ---
2
+ base_model: unsloth/mistral-7b-instruct-v0.3-bnb-4bit
3
  library_name: transformers
4
  model_name: outputs
5
  tags:
 
12
 
13
  # Model Card for outputs
14
 
15
+ This model is a fine-tuned version of [unsloth/mistral-7b-instruct-v0.3-bnb-4bit](https://huggingface.co/unsloth/mistral-7b-instruct-v0.3-bnb-4bit).
16
  It has been trained using [TRL](https://github.com/huggingface/trl).
17
 
18
  ## Quick start
 
36
  ### Framework versions
37
 
38
  - TRL: 0.15.2
39
+ - Transformers: 4.51.3
40
  - Pytorch: 2.6.0
41
+ - Datasets: 3.6.0
42
+ - Tokenizers: 0.21.1
43
 
44
  ## Citations
45
 
adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "unsloth/deepseek-r1-distill-llama-8b-unsloth-bnb-4bit",
5
  "bias": "none",
6
  "eva_config": null,
7
  "exclude_modules": null,
@@ -12,23 +12,26 @@
12
  "layers_pattern": null,
13
  "layers_to_transform": null,
14
  "loftq_config": {},
15
- "lora_alpha": 16,
16
  "lora_bias": false,
17
- "lora_dropout": 0,
18
  "megatron_config": null,
19
  "megatron_core": "megatron.core",
20
  "modules_to_save": null,
21
  "peft_type": "LORA",
22
- "r": 4,
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "v_proj",
27
- "q_proj",
28
  "k_proj",
29
- "o_proj"
 
 
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
33
- "use_rslora": false
34
  }
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
5
  "bias": "none",
6
  "eva_config": null,
7
  "exclude_modules": null,
 
12
  "layers_pattern": null,
13
  "layers_to_transform": null,
14
  "loftq_config": {},
15
+ "lora_alpha": 32,
16
  "lora_bias": false,
17
+ "lora_dropout": 0.05,
18
  "megatron_config": null,
19
  "megatron_core": "megatron.core",
20
  "modules_to_save": null,
21
  "peft_type": "LORA",
22
+ "r": 32,
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
+ "down_proj",
 
27
  "k_proj",
28
+ "q_proj",
29
+ "v_proj",
30
+ "up_proj",
31
+ "o_proj",
32
+ "gate_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
36
+ "use_rslora": true
37
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b4c6a4fca556a0f90d60ef92e3d25325dd2721f25d5e8f7b9d151d28b2942b9
3
- size 6849416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f3b88e6c06c696a27cfda9303b29ce50aa2861954ab7d4acdee8fa50a2d3363
3
+ size 167832240
special_tokens_map.json CHANGED
@@ -1,20 +1,27 @@
1
  {
2
  "bos_token": {
3
- "content": "<|begin▁of▁sentence|>",
4
  "lstrip": false,
5
  "normalized": false,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "eos_token": {
10
- "content": "<|end▁of▁sentence|>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
  "pad_token": {
17
- "content": "<|finetune_right_pad_id|>",
 
 
 
 
 
 
 
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
 
1
  {
2
  "bos_token": {
3
+ "content": "<s>",
4
  "lstrip": false,
5
  "normalized": false,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "eos_token": {
10
+ "content": "</s>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
  "pad_token": {
17
+ "content": "[control_768]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
  "lstrip": false,
26
  "normalized": false,
27
  "rstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b013cffaaca3e8d7e218974014c241d83e5fe33d7ce45c2e8e63c9ecfa149d41
3
- size 17209807
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60c3fc985cbfedcb429d05994efe548bdfecd6a00226fcdc8380c36fd894a3be
3
+ size 3671968
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37f00374dea48658ee8f5d0f21895b9bc55cb0103939607c8185bfd1c6ca1f89
3
+ size 587404
tokenizer_config.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd1db315c16dbeb1e948e4507cc2d4cc5ff0013282895ffe04545968bb3adc10
3
  size 5560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffeeb919aeca24624cd4aa49fce02eb0fb4158a6a9d699fbfbd0979c08e9c9c1
3
  size 5560