Add pipeline tag, link to paper

#1
by nielsr HF Staff - opened
Files changed (1) hide show
  1. README.md +353 -4
README.md CHANGED
@@ -1,12 +1,361 @@
1
  ---
 
2
  library_name: transformers
3
  license: mit
 
 
4
  languages:
5
  - en
6
  - zh
7
- base_model: DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B
8
- tags:
9
- - safe
10
  ---
11
 
12
- # RealSafe-R1-1.5B
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ base_model: DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B
3
  library_name: transformers
4
  license: mit
5
+ tags:
6
+ - safe
7
  languages:
8
  - en
9
  - zh
10
+ pipeline_tag: text-generation
 
 
11
  ---
12
 
13
+ # RealSafe-R1-1.5B
14
+
15
+ This repository contains the model card based on the paper [](https://huggingface.co/papers/2504.10081).
16
+
17
+ # File information
18
+
19
+ The repository contains the following file information:
20
+
21
+ Filename: tokenizer.json
22
+ Content: "Content of the file is larger than 50 KB, too long to display."
23
+
24
+ Filename: all_results.json
25
+ Content: {
26
+ "epoch": 0.9978021978021978,
27
+ "total_flos": 7339342036992.0,
28
+ "train_loss": 1.2485807309591823,
29
+ "train_runtime": 995.4655,
30
+ "train_samples_per_second": 14.624,
31
+ "train_steps_per_second": 0.228
32
+ }
33
+
34
+ Filename: generation_config.json
35
+ Content: {
36
+ "_from_model_config": true,
37
+ "bos_token_id": 151646,
38
+ "do_sample": true,
39
+ "eos_token_id": 151643,
40
+ "temperature": 0.6,
41
+ "top_p": 0.95,
42
+ "transformers_version": "4.45.2"
43
+ }
44
+
45
+ Filename: train_results.json
46
+ Content: {
47
+ "epoch": 0.9978021978021978,
48
+ "total_flos": 7339342036992.0,
49
+ "train_loss": 1.2485807309591823,
50
+ "train_runtime": 995.4655,
51
+ "train_samples_per_second": 14.624,
52
+ "train_steps_per_second": 0.228
53
+ }
54
+
55
+ Filename: special_tokens_map.json
56
+ Content: {
57
+ "bos_token": {
58
+ "content": "<\uff5cbegin\u2581of\u2581sentence\uff5c>",
59
+ "lstrip": false,
60
+ "normalized": false,
61
+ "rstrip": false,
62
+ "single_word": false
63
+ },
64
+ "eos_token": {
65
+ "content": "<\uff5cend\u2581of\u2581sentence\uff5c>",
66
+ "lstrip": false,
67
+ "normalized": false,
68
+ "rstrip": false,
69
+ "single_word": false
70
+ },
71
+ "pad_token": {
72
+ "content": "<\uff5cend\u2581of\u2581sentence\uff5c>",
73
+ "lstrip": false,
74
+ "normalized": false,
75
+ "rstrip": false,
76
+ "single_word": false
77
+ }
78
+ }
79
+
80
+ Filename: trainer_state.json
81
+ Content: {
82
+ "best_metric": null,
83
+ "best_model_checkpoint": null,
84
+ "epoch": 0.9978021978021978,
85
+ "eval_steps": 500,
86
+ "global_step": 227,
87
+ "is_hyper_param_search": false,
88
+ "is_local_process_zero": true,
89
+ "is_world_process_zero": true,
90
+ "log_history": [
91
+ {
92
+ "epoch": 0.9978021978021978,
93
+ "step": 227,
94
+ "total_flos": 7339342036992.0,
95
+ "train_loss": 1.2485807309591823,
96
+ "train_runtime": 995.4655,
97
+ "train_samples_per_second": 14.624,
98
+ "train_steps_per_second": 0.228
99
+ }
100
+ ],
101
+ "logging_steps": 500,
102
+ "max_steps": 227,
103
+ "num_input_tokens_seen": 0,
104
+ "num_train_epochs": 1,
105
+ "save_steps": 500,
106
+ "stateful_callbacks": {
107
+ "TrainerControl": {
108
+ "args": {
109
+ "should_epoch_stop": false,
110
+ "should_evaluate": false,
111
+ "should_log": false,
112
+ "should_save": true,
113
+ "should_training_stop": true
114
+ },
115
+ "attributes": {}
116
+ }
117
+ },
118
+ "total_flos": 7339342036992.0,
119
+ "train_batch_size": 2,
120
+ "trial_name": null,
121
+ "trial_params": null
122
+ }
123
+
124
+ Filename: tokenizer_config.json
125
+ Content: {
126
+ "add_bos_token": true,
127
+ "add_eos_token": false,
128
+ "add_prefix_space": null,
129
+ "added_tokens_decoder": {
130
+ "151643": {
131
+ "content": "<\uff5cend\u2581of\u2581sentence\uff5c>",
132
+ "lstrip": false,
133
+ "normalized": false,
134
+ "rstrip": false,
135
+ "single_word": false,
136
+ "special": true
137
+ },
138
+ "151644": {
139
+ "content": "<\uff5cUser\uff5c>",
140
+ "lstrip": false,
141
+ "normalized": false,
142
+ "rstrip": false,
143
+ "single_word": false,
144
+ "special": false
145
+ },
146
+ "151645": {
147
+ "content": "<\uff5cAssistant\uff5c>",
148
+ "lstrip": false,
149
+ "normalized": false,
150
+ "rstrip": false,
151
+ "single_word": false,
152
+ "special": false
153
+ },
154
+ "151646": {
155
+ "content": "<\uff5cbegin\u2581of\u2581sentence\uff5c>",
156
+ "lstrip": false,
157
+ "normalized": false,
158
+ "rstrip": false,
159
+ "single_word": false,
160
+ "special": true
161
+ },
162
+ "151647": {
163
+ "content": "<|EOT|>",
164
+ "lstrip": false,
165
+ "normalized": false,
166
+ "rstrip": false,
167
+ "single_word": false,
168
+ "special": false
169
+ },
170
+ "151648": {
171
+ "content": "<think>",
172
+ "lstrip": false,
173
+ "normalized": false,
174
+ "rstrip": false,
175
+ "single_word": false,
176
+ "special": false
177
+ },
178
+ "151649": {
179
+ "content": "</think>",
180
+ "lstrip": false,
181
+ "normalized": false,
182
+ "rstrip": false,
183
+ "single_word": false,
184
+ "special": false
185
+ },
186
+ "151650": {
187
+ "content": "<|quad_start|>",
188
+ "lstrip": false,
189
+ "normalized": false,
190
+ "rstrip": false,
191
+ "single_word": false,
192
+ "special": true
193
+ },
194
+ "151651": {
195
+ "content": "<|quad_end|>",
196
+ "lstrip": false,
197
+ "normalized": false,
198
+ "rstrip": false,
199
+ "single_word": false,
200
+ "special": true
201
+ },
202
+ "151652": {
203
+ "content": "<|vision_start|>",
204
+ "lstrip": false,
205
+ "normalized": false,
206
+ "rstrip": false,
207
+ "single_word": false,
208
+ "special": true
209
+ },
210
+ "151653": {
211
+ "content": "<|vision_end|>",
212
+ "lstrip": false,
213
+ "normalized": false,
214
+ "rstrip": false,
215
+ "single_word": false,
216
+ "special": true
217
+ },
218
+ "151654": {
219
+ "content": "<|vision_pad|>",
220
+ "lstrip": false,
221
+ "normalized": false,
222
+ "rstrip": false,
223
+ "single_word": false,
224
+ "special": true
225
+ },
226
+ "151655": {
227
+ "content": "<|image_pad|>",
228
+ "lstrip": false,
229
+ "normalized": false,
230
+ "rstrip": false,
231
+ "single_word": false,
232
+ "special": true
233
+ },
234
+ "151656": {
235
+ "content": "<|video_pad|>",
236
+ "lstrip": false,
237
+ "normalized": false,
238
+ "rstrip": false,
239
+ "single_word": false,
240
+ "special": true
241
+ },
242
+ "151657": {
243
+ "content": "<tool_call>",
244
+ "lstrip": false,
245
+ "normalized": false,
246
+ "rstrip": false,
247
+ "single_word": false,
248
+ "special": false
249
+ },
250
+ "151658": {
251
+ "content": "</tool_call>",
252
+ "lstrip": false,
253
+ "normalized": false,
254
+ "rstrip": false,
255
+ "single_word": false,
256
+ "special": false
257
+ },
258
+ "151659": {
259
+ "content": "<|fim_prefix|>",
260
+ "lstrip": false,
261
+ "normalized": false,
262
+ "rstrip": false,
263
+ "single_word": false,
264
+ "special": false
265
+ },
266
+ "151660": {
267
+ "content": "<|fim_middle|>",
268
+ "lstrip": false,
269
+ "normalized": false,
270
+ "rstrip": false,
271
+ "single_word": false,
272
+ "special": false
273
+ },
274
+ "151661": {
275
+ "content": "<|fim_suffix|>",
276
+ "lstrip": false,
277
+ "normalized": false,
278
+ "rstrip": false,
279
+ "single_word": false,
280
+ "special": false
281
+ },
282
+ "151662": {
283
+ "content": "<|fim_pad|>",
284
+ "lstrip": false,
285
+ "normalized": false,
286
+ "rstrip": false,
287
+ "single_word": false,
288
+ "special": false
289
+ },
290
+ "151663": {
291
+ "content": "<|repo_name|>",
292
+ "lstrip": false,
293
+ "normalized": false,
294
+ "rstrip": false,
295
+ "single_word": false,
296
+ "special": false
297
+ },
298
+ "151664": {
299
+ "content": "<|file_sep|>",
300
+ "lstrip": false,
301
+ "normalized": false,
302
+ "rstrip": false,
303
+ "single_word": false,
304
+ "special": false
305
+ }
306
+ },
307
+ "bos_token": "<\uff5cbegin\u2581of\u2581sentence\uff5c>",
308
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<\uff5cUser\uff5c>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<\uff5cAssistant\uff5c><\uff5ctool\u2581calls\u2581begin\uff5c><\uff5ctool\u2581call\u2581begin\uff5c>' + tool['type'] + '<\uff5ctool\u2581sep\uff5c>' + tool['function']['name'] + '\
309
+ ' + '```json' + '\
310
+ ' + tool['function']['arguments'] + '\
311
+ ' + '```' + '<\uff5ctool\u2581call\u2581end\uff5c>'}}{%- set ns.is_first = true -%}{%- else %}{{'\
312
+ ' + '<\uff5ctool\u2581call\u2581begin\uff5c>' + tool['type'] + '<\uff5ctool\u2581sep\uff5c>' + tool['function']['name'] + '\
313
+ ' + '```json' + '\
314
+ ' + tool['function']['arguments'] + '\
315
+ ' + '```' + '<\uff5ctool\u2581call\u2581end\uff5c>'}}{{'<\uff5ctool\u2581calls\u2581end\uff5c><\uff5cend\u2581of\u2581sentence\uff5c>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<\uff5ctool\u2581outputs\u2581end\uff5c>' + message['content'] + '<\uff5cend\u2581of\u2581sentence\uff5c>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<\uff5cAssistant\uff5c>' + content + '<\uff5cend\u2581of\u2581sentence\uff5c>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<\uff5ctool\u2581outputs\u2581begin\uff5c><\uff5ctool\u2581output\u2581begin\uff5c>' + message['content'] + '<\uff5ctool\u2581output\u2581end\uff5c>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\
316
+ <\uff5ctool\u2581output\u2581begin\uff5c>' + message['content'] + '<\uff5ctool\u2581output\u2581end\uff5c>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<\uff5ctool\u2581outputs\u2581end\uff5c>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<\uff5cAssistant\uff5c><think>\
317
+ '}}{% endif %}",
318
+ "clean_up_tokenization_spaces": false,
319
+ "eos_token": "<\uff5cend\u2581of\u2581sentence\uff5c>",
320
+ "legacy": true,
321
+ "model_max_length": 4096,
322
+ "pad_token": "<\uff5cend\u2581of\u2581sentence\uff5c>",
323
+ "padding_side": "right",
324
+ "sp_model_kwargs": {},
325
+ "split_special_tokens": false,
326
+ "tokenizer_class": "LlamaTokenizer",
327
+ "unk_token": null,
328
+ "use_default_system_prompt": false
329
+ }
330
+
331
+ Filename: config.json
332
+ Content: {
333
+ "_name_or_path": "/nfs2/models/DeepSeek-R1-Distill-Qwen-1.5B/",
334
+ "architectures": [
335
+ "Qwen2ForCausalLM"
336
+ ],
337
+ "attention_dropout": 0.0,
338
+ "bos_token_id": 151646,
339
+ "eos_token_id": 151643,
340
+ "hidden_act": "silu",
341
+ "hidden_size": 1536,
342
+ "initializer_range": 0.02,
343
+ "intermediate_size": 8960,
344
+ "max_position_embeddings": 131072,
345
+ "max_window_layers": 21,
346
+ "model_type": "qwen2",
347
+ "num_attention_heads": 12,
348
+ "num_hidden_layers": 28,
349
+ "num_key_value_heads": 2,
350
+ "rms_norm_eps": 1e-06,
351
+ "rope_scaling": null,
352
+ "rope_theta": 10000,
353
+ "sliding_window": null,
354
+ "tie_word_embeddings": false,
355
+ "torch_dtype": "bfloat16",
356
+ "transformers_version": "4.45.2",
357
+ "use_cache": false,
358
+ "use_mrope": false,
359
+ "use_sliding_window": false,
360
+ "vocab_size": 151936
361
+ }