|
|
--- |
|
|
base_model: DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B |
|
|
library_name: transformers |
|
|
license: mit |
|
|
tags: |
|
|
- safe |
|
|
languages: |
|
|
- en |
|
|
- zh |
|
|
pipeline_tag: text-generation |
|
|
--- |
|
|
|
|
|
# RealSafe-R1-1.5B |
|
|
|
|
|
This repository contains the model card based on the paper [](https://huggingface.co/papers/2504.10081). |
|
|
|
|
|
# File information |
|
|
|
|
|
The repository contains the following file information: |
|
|
|
|
|
Filename: tokenizer.json |
|
|
Content: "Content of the file is larger than 50 KB, too long to display." |
|
|
|
|
|
Filename: all_results.json |
|
|
Content: { |
|
|
"epoch": 0.9978021978021978, |
|
|
"total_flos": 7339342036992.0, |
|
|
"train_loss": 1.2485807309591823, |
|
|
"train_runtime": 995.4655, |
|
|
"train_samples_per_second": 14.624, |
|
|
"train_steps_per_second": 0.228 |
|
|
} |
|
|
|
|
|
Filename: generation_config.json |
|
|
Content: { |
|
|
"_from_model_config": true, |
|
|
"bos_token_id": 151646, |
|
|
"do_sample": true, |
|
|
"eos_token_id": 151643, |
|
|
"temperature": 0.6, |
|
|
"top_p": 0.95, |
|
|
"transformers_version": "4.45.2" |
|
|
} |
|
|
|
|
|
Filename: train_results.json |
|
|
Content: { |
|
|
"epoch": 0.9978021978021978, |
|
|
"total_flos": 7339342036992.0, |
|
|
"train_loss": 1.2485807309591823, |
|
|
"train_runtime": 995.4655, |
|
|
"train_samples_per_second": 14.624, |
|
|
"train_steps_per_second": 0.228 |
|
|
} |
|
|
|
|
|
Filename: special_tokens_map.json |
|
|
Content: { |
|
|
"bos_token": { |
|
|
"content": "<\uff5cbegin\u2581of\u2581sentence\uff5c>", |
|
|
"lstrip": false, |
|
|
"normalized": false, |
|
|
"rstrip": false, |
|
|
"single_word": false |
|
|
}, |
|
|
"eos_token": { |
|
|
"content": "<\uff5cend\u2581of\u2581sentence\uff5c>", |
|
|
"lstrip": false, |
|
|
"normalized": false, |
|
|
"rstrip": false, |
|
|
"single_word": false |
|
|
}, |
|
|
"pad_token": { |
|
|
"content": "<\uff5cend\u2581of\u2581sentence\uff5c>", |
|
|
"lstrip": false, |
|
|
"normalized": false, |
|
|
"rstrip": false, |
|
|
"single_word": false |
|
|
} |
|
|
} |
|
|
|
|
|
Filename: trainer_state.json |
|
|
Content: { |
|
|
"best_metric": null, |
|
|
"best_model_checkpoint": null, |
|
|
"epoch": 0.9978021978021978, |
|
|
"eval_steps": 500, |
|
|
"global_step": 227, |
|
|
"is_hyper_param_search": false, |
|
|
"is_local_process_zero": true, |
|
|
"is_world_process_zero": true, |
|
|
"log_history": [ |
|
|
{ |
|
|
"epoch": 0.9978021978021978, |
|
|
"step": 227, |
|
|
"total_flos": 7339342036992.0, |
|
|
"train_loss": 1.2485807309591823, |
|
|
"train_runtime": 995.4655, |
|
|
"train_samples_per_second": 14.624, |
|
|
"train_steps_per_second": 0.228 |
|
|
} |
|
|
], |
|
|
"logging_steps": 500, |
|
|
"max_steps": 227, |
|
|
"num_input_tokens_seen": 0, |
|
|
"num_train_epochs": 1, |
|
|
"save_steps": 500, |
|
|
"stateful_callbacks": { |
|
|
"TrainerControl": { |
|
|
"args": { |
|
|
"should_epoch_stop": false, |
|
|
"should_evaluate": false, |
|
|
"should_log": false, |
|
|
"should_save": true, |
|
|
"should_training_stop": true |
|
|
}, |
|
|
"attributes": {} |
|
|
} |
|
|
}, |
|
|
"total_flos": 7339342036992.0, |
|
|
"train_batch_size": 2, |
|
|
"trial_name": null, |
|
|
"trial_params": null |
|
|
} |
|
|
|
|
|
Filename: tokenizer_config.json |
|
|
Content: { |
|
|
"add_bos_token": true, |
|
|
"add_eos_token": false, |
|
|
"add_prefix_space": null, |
|
|
"added_tokens_decoder": { |
|
|
"151643": { |
|
|
"content": "<\uff5cend\u2581of\u2581sentence\uff5c>", |
|
|
"lstrip": false, |
|
|
"normalized": false, |
|
|
"rstrip": false, |
|
|
"single_word": false, |
|
|
"special": true |
|
|
}, |
|
|
"151644": { |
|
|
"content": "<\uff5cUser\uff5c>", |
|
|
"lstrip": false, |
|
|
"normalized": false, |
|
|
"rstrip": false, |
|
|
"single_word": false, |
|
|
"special": false |
|
|
}, |
|
|
"151645": { |
|
|
"content": "<\uff5cAssistant\uff5c>", |
|
|
"lstrip": false, |
|
|
"normalized": false, |
|
|
"rstrip": false, |
|
|
"single_word": false, |
|
|
"special": false |
|
|
}, |
|
|
"151646": { |
|
|
"content": "<\uff5cbegin\u2581of\u2581sentence\uff5c>", |
|
|
"lstrip": false, |
|
|
"normalized": false, |
|
|
"rstrip": false, |
|
|
"single_word": false, |
|
|
"special": true |
|
|
}, |
|
|
"151647": { |
|
|
"content": "<|EOT|>", |
|
|
"lstrip": false, |
|
|
"normalized": false, |
|
|
"rstrip": false, |
|
|
"single_word": false, |
|
|
"special": false |
|
|
}, |
|
|
"151648": { |
|
|
"content": "<think>", |
|
|
"lstrip": false, |
|
|
"normalized": false, |
|
|
"rstrip": false, |
|
|
"single_word": false, |
|
|
"special": false |
|
|
}, |
|
|
"151649": { |
|
|
"content": "</think>", |
|
|
"lstrip": false, |
|
|
"normalized": false, |
|
|
"rstrip": false, |
|
|
"single_word": false, |
|
|
"special": false |
|
|
}, |
|
|
"151650": { |
|
|
"content": "<|quad_start|>", |
|
|
"lstrip": false, |
|
|
"normalized": false, |
|
|
"rstrip": false, |
|
|
"single_word": false, |
|
|
"special": true |
|
|
}, |
|
|
"151651": { |
|
|
"content": "<|quad_end|>", |
|
|
"lstrip": false, |
|
|
"normalized": false, |
|
|
"rstrip": false, |
|
|
"single_word": false, |
|
|
"special": true |
|
|
}, |
|
|
"151652": { |
|
|
"content": "<|vision_start|>", |
|
|
"lstrip": false, |
|
|
"normalized": false, |
|
|
"rstrip": false, |
|
|
"single_word": false, |
|
|
"special": true |
|
|
}, |
|
|
"151653": { |
|
|
"content": "<|vision_end|>", |
|
|
"lstrip": false, |
|
|
"normalized": false, |
|
|
"rstrip": false, |
|
|
"single_word": false, |
|
|
"special": true |
|
|
}, |
|
|
"151654": { |
|
|
"content": "<|vision_pad|>", |
|
|
"lstrip": false, |
|
|
"normalized": false, |
|
|
"rstrip": false, |
|
|
"single_word": false, |
|
|
"special": true |
|
|
}, |
|
|
"151655": { |
|
|
"content": "<|image_pad|>", |
|
|
"lstrip": false, |
|
|
"normalized": false, |
|
|
"rstrip": false, |
|
|
"single_word": false, |
|
|
"special": true |
|
|
}, |
|
|
"151656": { |
|
|
"content": "<|video_pad|>", |
|
|
"lstrip": false, |
|
|
"normalized": false, |
|
|
"rstrip": false, |
|
|
"single_word": false, |
|
|
"special": true |
|
|
}, |
|
|
"151657": { |
|
|
"content": "<tool_call>", |
|
|
"lstrip": false, |
|
|
"normalized": false, |
|
|
"rstrip": false, |
|
|
"single_word": false, |
|
|
"special": false |
|
|
}, |
|
|
"151658": { |
|
|
"content": "</tool_call>", |
|
|
"lstrip": false, |
|
|
"normalized": false, |
|
|
"rstrip": false, |
|
|
"single_word": false, |
|
|
"special": false |
|
|
}, |
|
|
"151659": { |
|
|
"content": "<|fim_prefix|>", |
|
|
"lstrip": false, |
|
|
"normalized": false, |
|
|
"rstrip": false, |
|
|
"single_word": false, |
|
|
"special": false |
|
|
}, |
|
|
"151660": { |
|
|
"content": "<|fim_middle|>", |
|
|
"lstrip": false, |
|
|
"normalized": false, |
|
|
"rstrip": false, |
|
|
"single_word": false, |
|
|
"special": false |
|
|
}, |
|
|
"151661": { |
|
|
"content": "<|fim_suffix|>", |
|
|
"lstrip": false, |
|
|
"normalized": false, |
|
|
"rstrip": false, |
|
|
"single_word": false, |
|
|
"special": false |
|
|
}, |
|
|
"151662": { |
|
|
"content": "<|fim_pad|>", |
|
|
"lstrip": false, |
|
|
"normalized": false, |
|
|
"rstrip": false, |
|
|
"single_word": false, |
|
|
"special": false |
|
|
}, |
|
|
"151663": { |
|
|
"content": "<|repo_name|>", |
|
|
"lstrip": false, |
|
|
"normalized": false, |
|
|
"rstrip": false, |
|
|
"single_word": false, |
|
|
"special": false |
|
|
}, |
|
|
"151664": { |
|
|
"content": "<|file_sep|>", |
|
|
"lstrip": false, |
|
|
"normalized": false, |
|
|
"rstrip": false, |
|
|
"single_word": false, |
|
|
"special": false |
|
|
} |
|
|
}, |
|
|
"bos_token": "<\uff5cbegin\u2581of\u2581sentence\uff5c>", |
|
|
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<\uff5cUser\uff5c>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<\uff5cAssistant\uff5c><\uff5ctool\u2581calls\u2581begin\uff5c><\uff5ctool\u2581call\u2581begin\uff5c>' + tool['type'] + '<\uff5ctool\u2581sep\uff5c>' + tool['function']['name'] + '\ |
|
|
' + '```json' + '\ |
|
|
' + tool['function']['arguments'] + '\ |
|
|
' + '```' + '<\uff5ctool\u2581call\u2581end\uff5c>'}}{%- set ns.is_first = true -%}{%- else %}{{'\ |
|
|
' + '<\uff5ctool\u2581call\u2581begin\uff5c>' + tool['type'] + '<\uff5ctool\u2581sep\uff5c>' + tool['function']['name'] + '\ |
|
|
' + '```json' + '\ |
|
|
' + tool['function']['arguments'] + '\ |
|
|
' + '```' + '<\uff5ctool\u2581call\u2581end\uff5c>'}}{{'<\uff5ctool\u2581calls\u2581end\uff5c><\uff5cend\u2581of\u2581sentence\uff5c>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<\uff5ctool\u2581outputs\u2581end\uff5c>' + message['content'] + '<\uff5cend\u2581of\u2581sentence\uff5c>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<\uff5cAssistant\uff5c>' + content + '<\uff5cend\u2581of\u2581sentence\uff5c>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<\uff5ctool\u2581outputs\u2581begin\uff5c><\uff5ctool\u2581output\u2581begin\uff5c>' + message['content'] + '<\uff5ctool\u2581output\u2581end\uff5c>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\ |
|
|
<\uff5ctool\u2581output\u2581begin\uff5c>' + message['content'] + '<\uff5ctool\u2581output\u2581end\uff5c>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<\uff5ctool\u2581outputs\u2581end\uff5c>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<\uff5cAssistant\uff5c><think>\ |
|
|
'}}{% endif %}", |
|
|
"clean_up_tokenization_spaces": false, |
|
|
"eos_token": "<\uff5cend\u2581of\u2581sentence\uff5c>", |
|
|
"legacy": true, |
|
|
"model_max_length": 4096, |
|
|
"pad_token": "<\uff5cend\u2581of\u2581sentence\uff5c>", |
|
|
"padding_side": "right", |
|
|
"sp_model_kwargs": {}, |
|
|
"split_special_tokens": false, |
|
|
"tokenizer_class": "LlamaTokenizer", |
|
|
"unk_token": null, |
|
|
"use_default_system_prompt": false |
|
|
} |
|
|
|
|
|
Filename: config.json |
|
|
Content: { |
|
|
"_name_or_path": "/nfs2/models/DeepSeek-R1-Distill-Qwen-1.5B/", |
|
|
"architectures": [ |
|
|
"Qwen2ForCausalLM" |
|
|
], |
|
|
"attention_dropout": 0.0, |
|
|
"bos_token_id": 151646, |
|
|
"eos_token_id": 151643, |
|
|
"hidden_act": "silu", |
|
|
"hidden_size": 1536, |
|
|
"initializer_range": 0.02, |
|
|
"intermediate_size": 8960, |
|
|
"max_position_embeddings": 131072, |
|
|
"max_window_layers": 21, |
|
|
"model_type": "qwen2", |
|
|
"num_attention_heads": 12, |
|
|
"num_hidden_layers": 28, |
|
|
"num_key_value_heads": 2, |
|
|
"rms_norm_eps": 1e-06, |
|
|
"rope_scaling": null, |
|
|
"rope_theta": 10000, |
|
|
"sliding_window": null, |
|
|
"tie_word_embeddings": false, |
|
|
"torch_dtype": "bfloat16", |
|
|
"transformers_version": "4.45.2", |
|
|
"use_cache": false, |
|
|
"use_mrope": false, |
|
|
"use_sliding_window": false, |
|
|
"vocab_size": 151936 |
|
|
} |