RealSafe-R1-1.5B / README.md
nielsr's picture
nielsr HF Staff
Add pipeline tag, link to paper
0ce42d3 verified
|
raw
history blame
10.7 kB
---
base_model: DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B
library_name: transformers
license: mit
tags:
- safe
languages:
- en
- zh
pipeline_tag: text-generation
---
# RealSafe-R1-1.5B
This repository contains the model card based on the paper [](https://huggingface.co/papers/2504.10081).
# File information
The repository contains the following file information:
Filename: tokenizer.json
Content: "Content of the file is larger than 50 KB, too long to display."
Filename: all_results.json
Content: {
"epoch": 0.9978021978021978,
"total_flos": 7339342036992.0,
"train_loss": 1.2485807309591823,
"train_runtime": 995.4655,
"train_samples_per_second": 14.624,
"train_steps_per_second": 0.228
}
Filename: generation_config.json
Content: {
"_from_model_config": true,
"bos_token_id": 151646,
"do_sample": true,
"eos_token_id": 151643,
"temperature": 0.6,
"top_p": 0.95,
"transformers_version": "4.45.2"
}
Filename: train_results.json
Content: {
"epoch": 0.9978021978021978,
"total_flos": 7339342036992.0,
"train_loss": 1.2485807309591823,
"train_runtime": 995.4655,
"train_samples_per_second": 14.624,
"train_steps_per_second": 0.228
}
Filename: special_tokens_map.json
Content: {
"bos_token": {
"content": "<\uff5cbegin\u2581of\u2581sentence\uff5c>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"eos_token": {
"content": "<\uff5cend\u2581of\u2581sentence\uff5c>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"pad_token": {
"content": "<\uff5cend\u2581of\u2581sentence\uff5c>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}
Filename: trainer_state.json
Content: {
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9978021978021978,
"eval_steps": 500,
"global_step": 227,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9978021978021978,
"step": 227,
"total_flos": 7339342036992.0,
"train_loss": 1.2485807309591823,
"train_runtime": 995.4655,
"train_samples_per_second": 14.624,
"train_steps_per_second": 0.228
}
],
"logging_steps": 500,
"max_steps": 227,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7339342036992.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}
Filename: tokenizer_config.json
Content: {
"add_bos_token": true,
"add_eos_token": false,
"add_prefix_space": null,
"added_tokens_decoder": {
"151643": {
"content": "<\uff5cend\u2581of\u2581sentence\uff5c>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151644": {
"content": "<\uff5cUser\uff5c>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151645": {
"content": "<\uff5cAssistant\uff5c>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151646": {
"content": "<\uff5cbegin\u2581of\u2581sentence\uff5c>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151647": {
"content": "<|EOT|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151648": {
"content": "<think>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151649": {
"content": "</think>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151650": {
"content": "<|quad_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151651": {
"content": "<|quad_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151652": {
"content": "<|vision_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151653": {
"content": "<|vision_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151654": {
"content": "<|vision_pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151655": {
"content": "<|image_pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151656": {
"content": "<|video_pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151657": {
"content": "<tool_call>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151658": {
"content": "</tool_call>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151659": {
"content": "<|fim_prefix|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151660": {
"content": "<|fim_middle|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151661": {
"content": "<|fim_suffix|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151662": {
"content": "<|fim_pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151663": {
"content": "<|repo_name|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151664": {
"content": "<|file_sep|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
}
},
"bos_token": "<\uff5cbegin\u2581of\u2581sentence\uff5c>",
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<\uff5cUser\uff5c>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<\uff5cAssistant\uff5c><\uff5ctool\u2581calls\u2581begin\uff5c><\uff5ctool\u2581call\u2581begin\uff5c>' + tool['type'] + '<\uff5ctool\u2581sep\uff5c>' + tool['function']['name'] + '\
' + '```json' + '\
' + tool['function']['arguments'] + '\
' + '```' + '<\uff5ctool\u2581call\u2581end\uff5c>'}}{%- set ns.is_first = true -%}{%- else %}{{'\
' + '<\uff5ctool\u2581call\u2581begin\uff5c>' + tool['type'] + '<\uff5ctool\u2581sep\uff5c>' + tool['function']['name'] + '\
' + '```json' + '\
' + tool['function']['arguments'] + '\
' + '```' + '<\uff5ctool\u2581call\u2581end\uff5c>'}}{{'<\uff5ctool\u2581calls\u2581end\uff5c><\uff5cend\u2581of\u2581sentence\uff5c>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<\uff5ctool\u2581outputs\u2581end\uff5c>' + message['content'] + '<\uff5cend\u2581of\u2581sentence\uff5c>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<\uff5cAssistant\uff5c>' + content + '<\uff5cend\u2581of\u2581sentence\uff5c>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<\uff5ctool\u2581outputs\u2581begin\uff5c><\uff5ctool\u2581output\u2581begin\uff5c>' + message['content'] + '<\uff5ctool\u2581output\u2581end\uff5c>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\
<\uff5ctool\u2581output\u2581begin\uff5c>' + message['content'] + '<\uff5ctool\u2581output\u2581end\uff5c>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<\uff5ctool\u2581outputs\u2581end\uff5c>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<\uff5cAssistant\uff5c><think>\
'}}{% endif %}",
"clean_up_tokenization_spaces": false,
"eos_token": "<\uff5cend\u2581of\u2581sentence\uff5c>",
"legacy": true,
"model_max_length": 4096,
"pad_token": "<\uff5cend\u2581of\u2581sentence\uff5c>",
"padding_side": "right",
"sp_model_kwargs": {},
"split_special_tokens": false,
"tokenizer_class": "LlamaTokenizer",
"unk_token": null,
"use_default_system_prompt": false
}
Filename: config.json
Content: {
"_name_or_path": "/nfs2/models/DeepSeek-R1-Distill-Qwen-1.5B/",
"architectures": [
"Qwen2ForCausalLM"
],
"attention_dropout": 0.0,
"bos_token_id": 151646,
"eos_token_id": 151643,
"hidden_act": "silu",
"hidden_size": 1536,
"initializer_range": 0.02,
"intermediate_size": 8960,
"max_position_embeddings": 131072,
"max_window_layers": 21,
"model_type": "qwen2",
"num_attention_heads": 12,
"num_hidden_layers": 28,
"num_key_value_heads": 2,
"rms_norm_eps": 1e-06,
"rope_scaling": null,
"rope_theta": 10000,
"sliding_window": null,
"tie_word_embeddings": false,
"torch_dtype": "bfloat16",
"transformers_version": "4.45.2",
"use_cache": false,
"use_mrope": false,
"use_sliding_window": false,
"vocab_size": 151936
}