ibokajordan commited on
Commit
1c2ab34
·
verified ·
1 Parent(s): ea5cb89

ibokajordan/QWEN_rag

Browse files
README.md CHANGED
@@ -1,7 +1,6 @@
1
  ---
2
- library_name: peft
3
- license: other
4
- base_model: duxx/DeepSeek-R1-Distill-Qwen-1.5B-Turkish
5
  tags:
6
  - generated_from_trainer
7
  model-index:
@@ -14,7 +13,7 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  # QWEN_rag
16
 
17
- This model is a fine-tuned version of [duxx/DeepSeek-R1-Distill-Qwen-1.5B-Turkish](https://huggingface.co/duxx/DeepSeek-R1-Distill-Qwen-1.5B-Turkish) on the None dataset.
18
 
19
  ## Model description
20
 
@@ -41,7 +40,7 @@ The following hyperparameters were used during training:
41
  - total_train_batch_size: 4
42
  - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
43
  - lr_scheduler_type: linear
44
- - num_epochs: 1
45
  - mixed_precision_training: Native AMP
46
 
47
  ### Training results
@@ -50,8 +49,7 @@ The following hyperparameters were used during training:
50
 
51
  ### Framework versions
52
 
53
- - PEFT 0.15.2
54
  - Transformers 4.51.3
55
  - Pytorch 2.6.0+cu124
56
  - Datasets 2.14.4
57
- - Tokenizers 0.21.1
 
1
  ---
2
+ library_name: transformers
3
+ base_model: redrussianarmy/gpt2-turkish-cased
 
4
  tags:
5
  - generated_from_trainer
6
  model-index:
 
13
 
14
  # QWEN_rag
15
 
16
+ This model is a fine-tuned version of [redrussianarmy/gpt2-turkish-cased](https://huggingface.co/redrussianarmy/gpt2-turkish-cased) on the None dataset.
17
 
18
  ## Model description
19
 
 
40
  - total_train_batch_size: 4
41
  - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
42
  - lr_scheduler_type: linear
43
+ - num_epochs: 3
44
  - mixed_precision_training: Native AMP
45
 
46
  ### Training results
 
49
 
50
  ### Framework versions
51
 
 
52
  - Transformers 4.51.3
53
  - Pytorch 2.6.0+cu124
54
  - Datasets 2.14.4
55
+ - Tokenizers 0.21.1
added_tokens.json CHANGED
@@ -1,24 +1,3 @@
1
  {
2
- "</tool_call>": 151658,
3
- "<tool_call>": 151657,
4
- "<|box_end|>": 151649,
5
- "<|box_start|>": 151648,
6
- "<|endoftext|>": 151643,
7
- "<|file_sep|>": 151664,
8
- "<|fim_middle|>": 151660,
9
- "<|fim_pad|>": 151662,
10
- "<|fim_prefix|>": 151659,
11
- "<|fim_suffix|>": 151661,
12
- "<|im_end|>": 151645,
13
- "<|im_start|>": 151644,
14
- "<|image_pad|>": 151655,
15
- "<|object_ref_end|>": 151647,
16
- "<|object_ref_start|>": 151646,
17
- "<|quad_end|>": 151651,
18
- "<|quad_start|>": 151650,
19
- "<|repo_name|>": 151663,
20
- "<|video_pad|>": 151656,
21
- "<|vision_end|>": 151653,
22
- "<|vision_pad|>": 151654,
23
- "<|vision_start|>": 151652
24
  }
 
1
  {
2
+ "<|endoftext|>": 50257
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  }
config.json CHANGED
@@ -1,43 +1,39 @@
1
  {
 
2
  "architectures": [
3
- "Qwen2ForCausalLM"
4
  ],
5
- "attention_dropout": 0.0,
6
- "bos_token_id": 151643,
7
- "eos_token_id": 151645,
8
- "hidden_act": "silu",
9
- "hidden_size": 3584,
10
  "initializer_range": 0.02,
11
- "intermediate_size": 18944,
12
- "max_position_embeddings": 32768,
13
- "max_window_layers": 28,
14
- "model_type": "qwen2",
15
- "num_attention_heads": 28,
16
- "num_hidden_layers": 28,
17
- "num_key_value_heads": 4,
18
- "quantization_config": {
19
- "_load_in_4bit": true,
20
- "_load_in_8bit": false,
21
- "bnb_4bit_compute_dtype": "float16",
22
- "bnb_4bit_quant_storage": "uint8",
23
- "bnb_4bit_quant_type": "nf4",
24
- "bnb_4bit_use_double_quant": true,
25
- "llm_int8_enable_fp32_cpu_offload": false,
26
- "llm_int8_has_fp16_weight": false,
27
- "llm_int8_skip_modules": null,
28
- "llm_int8_threshold": 6.0,
29
- "load_in_4bit": true,
30
- "load_in_8bit": false,
31
- "quant_method": "bitsandbytes"
 
32
  },
33
- "rms_norm_eps": 1e-06,
34
- "rope_scaling": null,
35
- "rope_theta": 1000000.0,
36
- "sliding_window": 131072,
37
- "tie_word_embeddings": false,
38
- "torch_dtype": "float16",
39
  "transformers_version": "4.51.3",
40
  "use_cache": true,
41
- "use_sliding_window": false,
42
- "vocab_size": 152064
43
  }
 
1
  {
2
+ "activation_function": "gelu_new",
3
  "architectures": [
4
+ "GPT2LMHeadModel"
5
  ],
6
+ "attn_pdrop": 0.1,
7
+ "bos_token_id": 50256,
8
+ "embd_pdrop": 0.1,
9
+ "eos_token_id": 50256,
10
+ "gradient_checkpointing": false,
11
  "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
  },
35
+ "torch_dtype": "float32",
 
 
 
 
 
36
  "transformers_version": "4.51.3",
37
  "use_cache": true,
38
+ "vocab_size": 50258
 
39
  }
generation_config.json CHANGED
@@ -1,14 +1,6 @@
1
  {
2
- "bos_token_id": 151643,
3
- "do_sample": true,
4
- "eos_token_id": [
5
- 151645,
6
- 151643
7
- ],
8
- "pad_token_id": 151643,
9
- "repetition_penalty": 1.05,
10
- "temperature": 0.7,
11
- "top_k": 20,
12
- "top_p": 0.8,
13
  "transformers_version": "4.51.3"
14
  }
 
1
  {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
 
 
 
 
 
 
 
 
5
  "transformers_version": "4.51.3"
6
  }
merges.txt CHANGED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09cc0fd4a75fa06aea289d29b073695c2adb2b6224cf5f2f32b7c99477b46370
3
+ size 497777280
special_tokens_map.json CHANGED
@@ -1,20 +1,24 @@
1
  {
2
- "additional_special_tokens": [
3
- "<|im_end|>"
4
- ],
5
  "bos_token": {
6
- "content": "<|begin▁of▁sentence|>",
7
  "lstrip": false,
8
- "normalized": false,
9
  "rstrip": false,
10
  "single_word": false
11
  },
12
  "eos_token": {
13
- "content": "<|end▁of▁sentence|>",
14
  "lstrip": false,
15
- "normalized": false,
16
  "rstrip": false,
17
  "single_word": false
18
  },
19
- "pad_token": "<|end▁of▁sentence|>"
 
 
 
 
 
 
 
20
  }
 
1
  {
 
 
 
2
  "bos_token": {
3
+ "content": "<|endoftext|>",
4
  "lstrip": false,
5
+ "normalized": true,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "eos_token": {
10
+ "content": "<|endoftext|>",
11
  "lstrip": false,
12
+ "normalized": true,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": "<|endoftext|>",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
  }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f532078cf6d42d5b494756c29aec3c35dae85587b6a0802aea13417e405287b
3
- size 11423257
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f457a6d2cff417acee68f3edf76fd0972162ce2317a79cbf7c2e5bd06fe0e29
3
+ size 3831841
tokenizer_config.json CHANGED
@@ -1,208 +1,23 @@
1
  {
2
- "add_bos_token": true,
3
- "add_eos_token": false,
4
- "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
- "151643": {
7
- "content": "<|end▁of▁sentence|>",
8
  "lstrip": false,
9
- "normalized": false,
10
- "rstrip": false,
11
- "single_word": false,
12
- "special": true
13
- },
14
- "151644": {
15
- "content": "<|User|>",
16
- "lstrip": false,
17
- "normalized": false,
18
- "rstrip": false,
19
- "single_word": false,
20
- "special": false
21
- },
22
- "151645": {
23
- "content": "<|Assistant|>",
24
- "lstrip": false,
25
- "normalized": false,
26
- "rstrip": false,
27
- "single_word": false,
28
- "special": false
29
- },
30
- "151646": {
31
- "content": "<|begin▁of▁sentence|>",
32
- "lstrip": false,
33
- "normalized": false,
34
- "rstrip": false,
35
- "single_word": false,
36
- "special": true
37
- },
38
- "151647": {
39
- "content": "<|EOT|>",
40
- "lstrip": false,
41
- "normalized": false,
42
- "rstrip": false,
43
- "single_word": false,
44
- "special": false
45
- },
46
- "151648": {
47
- "content": "<think>",
48
- "lstrip": false,
49
- "normalized": false,
50
- "rstrip": false,
51
- "single_word": false,
52
- "special": false
53
- },
54
- "151649": {
55
- "content": "</think>",
56
- "lstrip": false,
57
- "normalized": false,
58
- "rstrip": false,
59
- "single_word": false,
60
- "special": false
61
- },
62
- "151650": {
63
- "content": "<|quad_start|>",
64
- "lstrip": false,
65
- "normalized": false,
66
- "rstrip": false,
67
- "single_word": false,
68
- "special": true
69
- },
70
- "151651": {
71
- "content": "<|quad_end|>",
72
- "lstrip": false,
73
- "normalized": false,
74
- "rstrip": false,
75
- "single_word": false,
76
- "special": true
77
- },
78
- "151652": {
79
- "content": "<|vision_start|>",
80
- "lstrip": false,
81
- "normalized": false,
82
- "rstrip": false,
83
- "single_word": false,
84
- "special": true
85
- },
86
- "151653": {
87
- "content": "<|vision_end|>",
88
- "lstrip": false,
89
- "normalized": false,
90
- "rstrip": false,
91
- "single_word": false,
92
- "special": true
93
- },
94
- "151654": {
95
- "content": "<|vision_pad|>",
96
- "lstrip": false,
97
- "normalized": false,
98
- "rstrip": false,
99
- "single_word": false,
100
- "special": true
101
- },
102
- "151655": {
103
- "content": "<|image_pad|>",
104
- "lstrip": false,
105
- "normalized": false,
106
- "rstrip": false,
107
- "single_word": false,
108
- "special": true
109
- },
110
- "151656": {
111
- "content": "<|video_pad|>",
112
- "lstrip": false,
113
- "normalized": false,
114
- "rstrip": false,
115
- "single_word": false,
116
- "special": true
117
- },
118
- "151657": {
119
- "content": "<tool_call>",
120
- "lstrip": false,
121
- "normalized": false,
122
- "rstrip": false,
123
- "single_word": false,
124
- "special": false
125
- },
126
- "151658": {
127
- "content": "</tool_call>",
128
- "lstrip": false,
129
- "normalized": false,
130
- "rstrip": false,
131
- "single_word": false,
132
- "special": false
133
- },
134
- "151659": {
135
- "content": "<|fim_prefix|>",
136
- "lstrip": false,
137
- "normalized": false,
138
- "rstrip": false,
139
- "single_word": false,
140
- "special": false
141
- },
142
- "151660": {
143
- "content": "<|fim_middle|>",
144
- "lstrip": false,
145
- "normalized": false,
146
- "rstrip": false,
147
- "single_word": false,
148
- "special": false
149
- },
150
- "151661": {
151
- "content": "<|fim_suffix|>",
152
- "lstrip": false,
153
- "normalized": false,
154
- "rstrip": false,
155
- "single_word": false,
156
- "special": false
157
- },
158
- "151662": {
159
- "content": "<|fim_pad|>",
160
- "lstrip": false,
161
- "normalized": false,
162
- "rstrip": false,
163
- "single_word": false,
164
- "special": false
165
- },
166
- "151663": {
167
- "content": "<|repo_name|>",
168
- "lstrip": false,
169
- "normalized": false,
170
- "rstrip": false,
171
- "single_word": false,
172
- "special": false
173
- },
174
- "151664": {
175
- "content": "<|file_sep|>",
176
- "lstrip": false,
177
- "normalized": false,
178
- "rstrip": false,
179
- "single_word": false,
180
- "special": false
181
- },
182
- "151665": {
183
- "content": "<|im_end|>",
184
- "lstrip": false,
185
- "normalized": false,
186
  "rstrip": false,
187
  "single_word": false,
188
  "special": true
189
  }
190
  },
191
- "additional_special_tokens": [
192
- "<|im_end|>"
193
- ],
194
- "bos_token": "<|begin▁of▁sentence|>",
195
- "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
196
  "clean_up_tokenization_spaces": false,
197
- "eos_token": "<|end▁of▁sentence|>",
 
198
  "extra_special_tokens": {},
199
- "legacy": true,
200
- "model_max_length": 4500,
201
- "pad_token": "<|end▁of▁sentence|>",
202
- "padding_side": "right",
203
- "sp_model_kwargs": {},
204
- "split_special_tokens": false,
205
- "tokenizer_class": "LlamaTokenizerFast",
206
- "unk_token": null,
207
- "use_default_system_prompt": false
208
  }
 
1
  {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
 
4
  "added_tokens_decoder": {
5
+ "50257": {
6
+ "content": "<|endoftext|>",
7
  "lstrip": false,
8
+ "normalized": true,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  "rstrip": false,
10
  "single_word": false,
11
  "special": true
12
  }
13
  },
14
+ "bos_token": "<|endoftext|>",
 
 
 
 
15
  "clean_up_tokenization_spaces": false,
16
+ "eos_token": "<|endoftext|>",
17
+ "errors": "replace",
18
  "extra_special_tokens": {},
19
+ "model_max_length": 1024,
20
+ "pad_token": "<|endoftext|>",
21
+ "tokenizer_class": "GPT2Tokenizer",
22
+ "unk_token": "<|endoftext|>"
 
 
 
 
 
23
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d5b910d10e51b8c2eff9236289cb7b884e550b512c0c0eef474d0bcc3eb7e91
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70f97722bc6bf46d878d9ef87014f52650a33fb911c1061c719f255cb5db709f
3
  size 5304
vocab.json CHANGED
The diff for this file is too large to render. See raw diff