Upload folder using huggingface_hub
Browse files- .gitattributes +5 -28
- README.md +312 -0
- added_tokens.json +28 -0
- args.json +112 -0
- chat_demo/README.md +169 -0
- chat_demo/chat_demo.py +536 -0
- chat_demo/coser_scenarios.json +0 -0
- chat_template.jinja +85 -0
- config.json +35 -0
- figure2github.png +3 -0
- generation_config.json +13 -0
- merges.txt +0 -0
- model-00001-of-00014.safetensors +3 -0
- model-00002-of-00014.safetensors +3 -0
- model-00003-of-00014.safetensors +3 -0
- model-00004-of-00014.safetensors +3 -0
- model-00005-of-00014.safetensors +3 -0
- model-00006-of-00014.safetensors +3 -0
- model-00007-of-00014.safetensors +3 -0
- model-00008-of-00014.safetensors +3 -0
- model-00009-of-00014.safetensors +3 -0
- model-00010-of-00014.safetensors +3 -0
- model-00011-of-00014.safetensors +3 -0
- model-00012-of-00014.safetensors +3 -0
- model-00013-of-00014.safetensors +3 -0
- model-00014-of-00014.safetensors +3 -0
- model.safetensors.index.json +714 -0
- special_tokens_map.json +31 -0
- tokenizer.json +3 -0
- tokenizer_config.json +240 -0
- vocab.json +0 -0
.gitattributes
CHANGED
|
@@ -1,35 +1,12 @@
|
|
| 1 |
-
*.
|
| 2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
*.pkl filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 22 |
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
-
*.
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 1 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 2 |
*.bin filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
*.h5 filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
| 5 |
*.ot filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
| 6 |
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 8 |
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 9 |
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
figure2github.png filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
ADDED
|
@@ -0,0 +1,312 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
language:
|
| 3 |
+
- zh
|
| 4 |
+
- en
|
| 5 |
+
license: apache-2.0
|
| 6 |
+
library_name: transformers
|
| 7 |
+
pipeline_tag: text-generation
|
| 8 |
+
tags:
|
| 9 |
+
- roleplay
|
| 10 |
+
- dialogue
|
| 11 |
+
- multi-turn
|
| 12 |
+
- qwen
|
| 13 |
+
- sft
|
| 14 |
+
- chat
|
| 15 |
+
base_model: Qwen/Qwen3-32B
|
| 16 |
+
---
|
| 17 |
+
|
| 18 |
+
# HER-SFT-Qwen-32B
|
| 19 |
+
|
| 20 |
+
<p align="center">
|
| 21 |
+
<a href="https://arxiv.org/abs/xxxx.xxxxx"><img src="https://img.shields.io/badge/Paper-arXiv-red?logo=arxiv" alt="Paper"></a>
|
| 22 |
+
<a href="https://huggingface.co/datasets/ADOHAHA123/HER-Dataset"><img src="https://img.shields.io/badge/🤗%20Dataset-HER--Dataset-yellow" alt="Dataset"></a>
|
| 23 |
+
<a href="https://huggingface.co/ADOHAHA123/HER-Qwen3-32B"><img src="https://img.shields.io/badge/🤗%20Model-HER--RL-blue" alt="HER-RL"></a>
|
| 24 |
+
<a href="https://huggingface.co/ADOHAHA123/HER-SFT-Qwen3-32B"><img src="https://img.shields.io/badge/🤗%20Model-HER--SFT-green" alt="HER-SFT"></a>
|
| 25 |
+
<a href="https://github.com/your-username/HER"><img src="https://img.shields.io/badge/GitHub-Code-black?logo=github" alt="GitHub"></a>
|
| 26 |
+
</p>
|
| 27 |
+
|
| 28 |
+
HER-SFT (Human Emulation Reasoning - Supervised Fine-Tuning) is a role-playing language model built upon Qwen-32B base model. This is the supervised fine-tuned version of HER, trained on reasoning-augmented role-playing data constructed through reverse engineering synthesis.
|
| 29 |
+
|
| 30 |
+
HER-SFT serves as the foundation for HER-RL (the reinforcement learning enhanced version) and demonstrates strong role-playing capabilities through **Dual-layer Thinking**:
|
| 31 |
+
- **System Thinking** (third-person): LLM's meta-level planning on how to portray the character
|
| 32 |
+
- **Role Thinking** (first-person): Character's inner thoughts and cognitive processes
|
| 33 |
+
|
| 34 |
+
This model achieves competitive performance on role-playing benchmarks, with HER-RL further improving upon it through preference-aligned reinforcement learning.
|
| 35 |
+
|
| 36 |
+
## Model Information
|
| 37 |
+
|
| 38 |
+
- **Base Model**: Qwen-32B
|
| 39 |
+
- **Training Method**: Supervised Fine-Tuning (SFT)
|
| 40 |
+
- **Training Data**: Reasoning-augmented role-playing dialogues with dual-layer thinking
|
| 41 |
+
- **Model Size**: 32B parameters
|
| 42 |
+
- **Enhanced Version**: [HER-RL](../her-rl-qwen-32b) (RL-enhanced version available)
|
| 43 |
+
|
| 44 |
+
## Key Features
|
| 45 |
+
|
| 46 |
+
Our model generates responses with rich, interleaved structure:
|
| 47 |
+
|
| 48 |
+
- `<system_thinking>`: Third-person analysis of how to portray the role
|
| 49 |
+
- `<role_thinking>`: Character's inner thoughts (invisible to others)
|
| 50 |
+
- `<role_action>`: Character's physical actions and expressions
|
| 51 |
+
- Speech: Natural dialogue text
|
| 52 |
+
|
| 53 |
+
This hierarchical approach enables more nuanced and authentic character portrayal.
|
| 54 |
+
|
| 55 |
+
## How to Use
|
| 56 |
+
|
| 57 |
+
### Quick Start: Interactive Chat Demo
|
| 58 |
+
|
| 59 |
+
The easiest way to try the model is using our interactive chat demo:
|
| 60 |
+
|
| 61 |
+
```bash
|
| 62 |
+
cd chat_demo
|
| 63 |
+
python chat_demo.py
|
| 64 |
+
```
|
| 65 |
+
|
| 66 |
+
This will start an interactive session where you can:
|
| 67 |
+
1. Choose a scenario from classic literature (Pride and Prejudice, The Great Gatsby, etc.)
|
| 68 |
+
2. Select which character the AI should play
|
| 69 |
+
3. Select which character you want to play
|
| 70 |
+
4. Start chatting with the AI character!
|
| 71 |
+
|
| 72 |
+
**Demo Options:**
|
| 73 |
+
|
| 74 |
+
```bash
|
| 75 |
+
# Show the model's reasoning process (system thinking)
|
| 76 |
+
python chat_demo.py --show-think
|
| 77 |
+
|
| 78 |
+
# Show character's inner thoughts (role thinking)
|
| 79 |
+
python chat_demo.py --show-rolethink
|
| 80 |
+
|
| 81 |
+
# Directly specify scenario and character
|
| 82 |
+
python chat_demo.py --scenario 0 --character 1
|
| 83 |
+
|
| 84 |
+
# Use simple built-in scenarios
|
| 85 |
+
python chat_demo.py --simple
|
| 86 |
+
```
|
| 87 |
+
|
| 88 |
+
**Chat Commands:**
|
| 89 |
+
- `quit` / `exit` / `q` - Exit the chat
|
| 90 |
+
- `clear` - Clear conversation history
|
| 91 |
+
- `history` - View conversation history
|
| 92 |
+
- `prompt` - View the full prompt
|
| 93 |
+
|
| 94 |
+
See `chat_demo/README.md` for detailed instructions.
|
| 95 |
+
|
| 96 |
+
### Programmatic Usage
|
| 97 |
+
|
| 98 |
+
```python
|
| 99 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 100 |
+
|
| 101 |
+
model_name = "your-username/her-sft-qwen-32b"
|
| 102 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 103 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 104 |
+
model_name,
|
| 105 |
+
torch_dtype="auto",
|
| 106 |
+
device_map="auto"
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
# Example: Role-playing as Elizabeth Bennet from Pride and Prejudice
|
| 110 |
+
system_prompt = """You are Elizabeth Bennet from Pride and Prejudice.
|
| 111 |
+
|
| 112 |
+
===Elizabeth Bennet's Profile===
|
| 113 |
+
The protagonist, intelligent and strong-willed. Quick-witted with a playful sense of humor. Values honesty and integrity. Maintains composure under pressure while harboring deep emotions beneath the surface.
|
| 114 |
+
|
| 115 |
+
Background: Second of five daughters in the Bennet family. Known for her intelligence, independence, and refusal to conform to societal expectations.
|
| 116 |
+
|
| 117 |
+
Personality: Quick-witted with a playful sense of humor. Values honesty and integrity. Maintains composure under pressure.
|
| 118 |
+
|
| 119 |
+
===Current Scenario===
|
| 120 |
+
The scene is set in Mr. Bennet's private study. Elizabeth has been summoned unexpectedly after Lady Catherine's confrontational visit, where she refused to promise not to marry Mr. Darcy. The tension is palpable as Mr. Bennet holds a mysterious letter.
|
| 121 |
+
|
| 122 |
+
===Output Format===
|
| 123 |
+
Your output should follow this structure:
|
| 124 |
+
1. System Thinking: Wrapped in <system_thinking></system_thinking> tags - third-person analysis of how to portray the role
|
| 125 |
+
2. Role-play Response: Including <role_thinking> for inner thoughts, <role_action> for actions, and plain text for speech"""
|
| 126 |
+
|
| 127 |
+
user_input = "Well, my dear Lizzy, I trust you are not too greatly troubled by recent events?"
|
| 128 |
+
|
| 129 |
+
messages = [
|
| 130 |
+
{"role": "system", "content": system_prompt},
|
| 131 |
+
{"role": "user", "content": user_input}
|
| 132 |
+
]
|
| 133 |
+
|
| 134 |
+
text = tokenizer.apply_chat_template(
|
| 135 |
+
messages,
|
| 136 |
+
tokenize=False,
|
| 137 |
+
add_generation_prompt=True
|
| 138 |
+
)
|
| 139 |
+
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
|
| 140 |
+
|
| 141 |
+
generated_ids = model.generate(
|
| 142 |
+
**model_inputs,
|
| 143 |
+
max_new_tokens=512,
|
| 144 |
+
temperature=0.8,
|
| 145 |
+
top_p=0.9
|
| 146 |
+
)
|
| 147 |
+
generated_ids = [
|
| 148 |
+
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
|
| 149 |
+
]
|
| 150 |
+
|
| 151 |
+
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
| 152 |
+
print(response)
|
| 153 |
+
```
|
| 154 |
+
|
| 155 |
+
## Framework Overview
|
| 156 |
+
|
| 157 |
+
<p align="center">
|
| 158 |
+
<img src="figure2github.png" alt="HER Framework" width="100%">
|
| 159 |
+
</p>
|
| 160 |
+
|
| 161 |
+
<p align="center">
|
| 162 |
+
<em>HER Framework: Dual-layer Thinking for Cognitive-Level Persona Simulation</em>
|
| 163 |
+
</p>
|
| 164 |
+
|
| 165 |
+
## Training Methodology
|
| 166 |
+
|
| 167 |
+
HER-SFT employs a comprehensive training pipeline:
|
| 168 |
+
|
| 169 |
+
1. **Dual-layer Thinking**: Separates hidden third-person system thinking (how the LLM plans to portray the character) from first-person role thinking (the character's actual inner thoughts). This dual-layer structure enables more authentic and cognitively grounded character simulation.
|
| 170 |
+
|
| 171 |
+
2. **Reverse Engineering Data Synthesis**: We curate reasoning-augmented role-playing data through a three-stage reverse synthesis pipeline:
|
| 172 |
+
- Stage 1: Role thinking enhancement - Enriching characters' psychological activities
|
| 173 |
+
- Stage 2: Pattern diversification - Creating diverse response patterns
|
| 174 |
+
- Stage 3: System thinking generation - Adding meta-level planning analysis
|
| 175 |
+
|
| 176 |
+
3. **Multi-layered Response Generation**: Models learn to generate system thinking (planning), role thinking (inner thoughts), role actions, and speech in an interleaved manner, avoiding monotonous patterns.
|
| 177 |
+
|
| 178 |
+
## Performance
|
| 179 |
+
|
| 180 |
+
### Main Leaderboard Results
|
| 181 |
+
|
| 182 |
+
| Rank | Model | CoSER Avg | CoSER SC | CoSER AN | CoSER CF | CoSER SQ | MiniMax Avg | MiniMax Worlds (50%) | MiniMax Stories (25%) | MiniMax Pref (25%) | 95% CI |
|
| 183 |
+
|------|-------|-----------|----------|----------|----------|----------|-------------|----------------------|----------------------|--------------------|---------|
|
| 184 |
+
| 1 | Claude-4.5-Opus | **62.43** | 63.74 | **64.28** | 58.45 | 63.24 | 76.62 | 67.23 | 82.10 | 89.90 | [75.5, 77.7] |
|
| 185 |
+
| 2 | Gemini-3-Pro | 61.80 | **65.95** | 60.42 | **58.34** | 62.49 | 75.60 | 62.72 | 83.87 | 93.08 | [74.5, 76.7] |
|
| 186 |
+
| 3 | GPT-5.1 | 61.10 | 64.95 | 53.99 | 60.13 | 65.35 | 80.63 | 76.62 | 72.21 | 97.05 | [79.6, 81.6] |
|
| 187 |
+
| 4 | Gemini-2.5-Pro | 60.68 | 61.05 | 60.80 | 57.48 | 63.40 | 68.23 | 52.36 | 82.11 | 86.08 | [67.1, 69.3] |
|
| 188 |
+
| 5 | DeepSeek-v3.2 | 58.68 | 55.85 | 57.07 | 57.44 | 64.35 | 60.27 | 45.81 | 66.64 | 82.83 | [59.2, 61.4] |
|
| 189 |
+
| 6 | MiniMax-M2-RP | 57.30 | 60.03 | 50.11 | 49.30 | **69.77** | **84.65** | **80.55** | 79.97 | **97.51** | [83.6, 85.7] |
|
| 190 |
+
| 7 | DeepSeek-v3.1 | 53.50 | 50.15 | 53.18 | 53.93 | 56.72 | 64.22 | 51.11 | 66.45 | 88.21 | [62.9, 65.5] |
|
| 191 |
+
| 8 | HER-RL | 53.12 | 54.33 | 47.26 | 52.78 | 58.12 | 65.73 | 59.13 | 57.74 | 86.90 | [63.0, 68.4] |
|
| 192 |
+
| **9** | **HER-SFT (this model)** | **50.92** | **50.52** | **45.99** | **49.78** | **57.37** | **58.44** | **47.29** | **52.78** | **86.40** | **[56.5, 60.4]** |
|
| 193 |
+
| 10 | Grok-4.1-Fast | 47.40 | 49.21 | 47.57 | 42.64 | 50.17 | 48.47 | 29.87 | 47.51 | 86.64 | [47.4, 49.5] |
|
| 194 |
+
| 11 | Claude-4.5-Sonnet | 45.21 | 47.18 | 36.02 | 47.55 | 50.09 | 69.35 | 55.72 | 75.66 | 90.28 | [68.2, 70.5] |
|
| 195 |
+
| 12 | Claude-3.7-Think | 39.73 | 44.84 | 31.00 | 42.45 | 40.65 | 61.25 | 50.66 | 59.53 | 84.15 | [58.5, 64.0] |
|
| 196 |
+
| 13 | CoSER-70B | 35.95 | 35.05 | 31.16 | 32.28 | 45.33 | 45.38 | 34.32 | 30.32 | 82.58 | [43.5, 47.2] |
|
| 197 |
+
| 14 | GPT-5-Mini | 32.97 | 38.10 | 24.60 | 27.20 | 42.00 | 57.63 | 43.32 | 50.11 | 93.78 | [55.9, 59.3] |
|
| 198 |
+
| 15 | GPT-4o-240806 | 27.69 | 34.00 | 14.90 | 22.90 | 38.90 | 66.39 | 64.96 | 46.23 | 89.40 | [64.1, 68.7] |
|
| 199 |
+
| 16 | GPT-OSS-120B | 26.12 | 32.80 | 14.80 | 21.50 | 35.40 | 60.72 | 47.27 | 56.65 | 91.71 | [58.0, 63.4] |
|
| 200 |
+
| 17 | Qwen3-32B | 22.86 | 30.56 | 19.61 | 15.52 | 30.56 | 50.76 | 40.38 | 32.82 | 89.48 | [48.4, 53.2] |
|
| 201 |
+
|
| 202 |
+
**CoSER Benchmark**: Evaluates role-playing quality on 0-100 scale across four dimensions:
|
| 203 |
+
- **SC** (Story Consistency): Narrative coherence and plot continuity
|
| 204 |
+
- **AN** (Anthropomorphism): Human-like behavior and natural expression
|
| 205 |
+
- **CF** (Character Fidelity): Consistency with character profile and traits
|
| 206 |
+
- **SQ** (Storyline Quality): Overall narrative quality and engagement
|
| 207 |
+
|
| 208 |
+
**MiniMax Role-Play Bench**: Comprehensive evaluation on 0-100 scale:
|
| 209 |
+
- **Worlds** (50%): Basic text quality and world-building
|
| 210 |
+
- **Stories** (25%): Diversity and content logic
|
| 211 |
+
- **Preferences** (25%): Interaction quality and user satisfaction
|
| 212 |
+
|
| 213 |
+
**HER-SFT** achieves **rank 9** overall, demonstrating strong baseline performance:
|
| 214 |
+
- **Significant improvement over base model**: +28.06 CoSER average vs Qwen3-32B baseline (+122.8% relative improvement)
|
| 215 |
+
- **Strong preference scores (86.40)**: Competitive interaction quality, nearly matching HER-RL (86.90)
|
| 216 |
+
- **Solid foundation for RL**: HER-RL builds upon this SFT model to achieve further gains (+2.20 CoSER average)
|
| 217 |
+
|
| 218 |
+
The supervised fine-tuning establishes a strong baseline, with reinforcement learning providing incremental improvements in character consistency and narrative coherence.
|
| 219 |
+
|
| 220 |
+
## Example Use Case
|
| 221 |
+
|
| 222 |
+
### Elizabeth Bennet from Pride and Prejudice
|
| 223 |
+
|
| 224 |
+
**System Prompt** (abbreviated):
|
| 225 |
+
```
|
| 226 |
+
You are Elizabeth Bennet from Pride and Prejudice. The protagonist, intelligent and strong-willed...
|
| 227 |
+
|
| 228 |
+
===Current Scenario===
|
| 229 |
+
The scene is set in Mr. Bennet's private study. Elizabeth has been summoned after Lady Catherine's visit...
|
| 230 |
+
```
|
| 231 |
+
|
| 232 |
+
**User Input**:
|
| 233 |
+
```
|
| 234 |
+
Well, my dear Lizzy, I trust you are not too greatly troubled by recent events?
|
| 235 |
+
```
|
| 236 |
+
|
| 237 |
+
**Model Output**:
|
| 238 |
+
```
|
| 239 |
+
<system_thinking>
|
| 240 |
+
I need to generate a response for Elizabeth Bennet at the beginning of this scene.
|
| 241 |
+
|
| 242 |
+
Context Analysis: Mr. Bennet has just spoken, picking up a letter and asking if I am "greatly troubled" by recent events. His tone is sardonic, yet there is underlying tension. The scenario implies the aftermath of Lady Catherine's visit.
|
| 243 |
+
|
| 244 |
+
Character State (Elizabeth):
|
| 245 |
+
- Internal: Feeling a mix of relief (that I stood my ground) and anxiety (about the consequences)
|
| 246 |
+
- External: Need to appear composed, cannot let him see me crumble
|
| 247 |
+
|
| 248 |
+
Goal: Answer honestly but carefully. Convey that I am holding up well.
|
| 249 |
+
|
| 250 |
+
Plan:
|
| 251 |
+
- Role Thinking: Reflect on the complexity of feelings
|
| 252 |
+
- Role Action: Use physical action to show composure
|
| 253 |
+
- Speech: Direct answer showing resilience
|
| 254 |
+
</system_thinking>
|
| 255 |
+
|
| 256 |
+
Elizabeth Bennet: <role_thinking>His tone is light, but the air feels heavy. I cannot let him see how much Lady Catherine's intrusion still stings, nor how anxious I am about what he knows</role_thinking><role_action>takes a steadying breath, smoothing the folds of her dress to mask a tremor</role_action>I believe I can manage, Father. Though I must admit, I am curious about what this letter contains.
|
| 257 |
+
```
|
| 258 |
+
|
| 259 |
+
## Training Data
|
| 260 |
+
|
| 261 |
+
The model is trained on a comprehensive role-playing dataset that includes:
|
| 262 |
+
|
| 263 |
+
- Authentic multi-turn, multi-character dialogues from classic literature
|
| 264 |
+
- Character profiles with detailed background information
|
| 265 |
+
- Scenario descriptions and conversation contexts
|
| 266 |
+
- Rich annotations including system thinking, role thinking, and role actions
|
| 267 |
+
- Diverse response patterns avoiding monotonous structures
|
| 268 |
+
|
| 269 |
+
## Ethical Considerations
|
| 270 |
+
|
| 271 |
+
We have conducted safety checks on the training dataset and implemented safeguards. However, users should be aware that:
|
| 272 |
+
|
| 273 |
+
- The models may generate content that reflects biases present in the training data
|
| 274 |
+
- Role-playing as certain characters might involve generating content with specific personality traits or behaviors
|
| 275 |
+
- Users should implement appropriate content filtering when deploying these models in production applications
|
| 276 |
+
- The models include safety evaluation dimensions to minimize harmful outputs
|
| 277 |
+
|
| 278 |
+
## Model Comparison
|
| 279 |
+
|
| 280 |
+
| Feature | HER-SFT (this model) | HER-RL |
|
| 281 |
+
|---------|---------------------|---------|
|
| 282 |
+
| Training Method | Supervised Fine-Tuning | SFT + Reinforcement Learning |
|
| 283 |
+
| CoSER Average | 50.92 | 53.12 (+2.20) |
|
| 284 |
+
| MiniMax Average | 58.44 | 65.73 (+7.29) |
|
| 285 |
+
| Character Consistency | Good | Better (+2.00 CF) |
|
| 286 |
+
| Interaction Quality | 86.40 Pref | 86.90 Pref (+0.50) |
|
| 287 |
+
| Best Use Case | General role-playing | Preference-aligned interactions |
|
| 288 |
+
|
| 289 |
+
## Citation
|
| 290 |
+
|
| 291 |
+
If you use HER-SFT in your research, please cite our paper:
|
| 292 |
+
|
| 293 |
+
```bibtex
|
| 294 |
+
@article{her2025,
|
| 295 |
+
title={HER: Human Emulation Reasoning for Cognitive-Level Role-Playing Language Models},
|
| 296 |
+
author={[Your Author Names]},
|
| 297 |
+
journal={[Conference/Journal Name]},
|
| 298 |
+
year={2025}
|
| 299 |
+
}
|
| 300 |
+
```
|
| 301 |
+
|
| 302 |
+
## License
|
| 303 |
+
|
| 304 |
+
Apache-2.0
|
| 305 |
+
|
| 306 |
+
## Acknowledgments
|
| 307 |
+
|
| 308 |
+
This model is based on Qwen-32B developed by Alibaba Cloud. We thank the Qwen team for their excellent base model.
|
| 309 |
+
|
| 310 |
+
## Related Models
|
| 311 |
+
|
| 312 |
+
- **[HER-RL](../her-rl-qwen-32b)**: Enhanced version with reinforcement learning for improved character consistency and preference alignment
|
added_tokens.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"</think>": 151668,
|
| 3 |
+
"</tool_call>": 151658,
|
| 4 |
+
"</tool_response>": 151666,
|
| 5 |
+
"<think>": 151667,
|
| 6 |
+
"<tool_call>": 151657,
|
| 7 |
+
"<tool_response>": 151665,
|
| 8 |
+
"<|box_end|>": 151649,
|
| 9 |
+
"<|box_start|>": 151648,
|
| 10 |
+
"<|endoftext|>": 151643,
|
| 11 |
+
"<|file_sep|>": 151664,
|
| 12 |
+
"<|fim_middle|>": 151660,
|
| 13 |
+
"<|fim_pad|>": 151662,
|
| 14 |
+
"<|fim_prefix|>": 151659,
|
| 15 |
+
"<|fim_suffix|>": 151661,
|
| 16 |
+
"<|im_end|>": 151645,
|
| 17 |
+
"<|im_start|>": 151644,
|
| 18 |
+
"<|image_pad|>": 151655,
|
| 19 |
+
"<|object_ref_end|>": 151647,
|
| 20 |
+
"<|object_ref_start|>": 151646,
|
| 21 |
+
"<|quad_end|>": 151651,
|
| 22 |
+
"<|quad_start|>": 151650,
|
| 23 |
+
"<|repo_name|>": 151663,
|
| 24 |
+
"<|video_pad|>": 151656,
|
| 25 |
+
"<|vision_end|>": 151653,
|
| 26 |
+
"<|vision_pad|>": 151654,
|
| 27 |
+
"<|vision_start|>": 151652
|
| 28 |
+
}
|
args.json
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/data/minimax-dialogue/experiment/qwen/Qwen3-32B-long",
|
| 3 |
+
"model_type": "qwen3",
|
| 4 |
+
"model_revision": null,
|
| 5 |
+
"task_type": "causal_lm",
|
| 6 |
+
"torch_dtype": "bfloat16",
|
| 7 |
+
"attn_impl": null,
|
| 8 |
+
"num_labels": null,
|
| 9 |
+
"problem_type": null,
|
| 10 |
+
"rope_scaling": null,
|
| 11 |
+
"device_map": null,
|
| 12 |
+
"max_memory": {},
|
| 13 |
+
"local_repo_path": null,
|
| 14 |
+
"init_strategy": null,
|
| 15 |
+
"template": "qwen3",
|
| 16 |
+
"system": null,
|
| 17 |
+
"max_length": 2048,
|
| 18 |
+
"truncation_strategy": "delete",
|
| 19 |
+
"max_pixels": null,
|
| 20 |
+
"agent_template": null,
|
| 21 |
+
"norm_bbox": null,
|
| 22 |
+
"use_chat_template": true,
|
| 23 |
+
"padding_free": false,
|
| 24 |
+
"padding_side": "right",
|
| 25 |
+
"loss_scale": "default",
|
| 26 |
+
"sequence_parallel_size": 1,
|
| 27 |
+
"response_prefix": null,
|
| 28 |
+
"template_backend": "swift",
|
| 29 |
+
"dataset": [],
|
| 30 |
+
"val_dataset": [],
|
| 31 |
+
"split_dataset_ratio": 0.01,
|
| 32 |
+
"data_seed": 42,
|
| 33 |
+
"dataset_num_proc": 1,
|
| 34 |
+
"load_from_cache_file": true,
|
| 35 |
+
"dataset_shuffle": true,
|
| 36 |
+
"val_dataset_shuffle": false,
|
| 37 |
+
"streaming": false,
|
| 38 |
+
"interleave_prob": null,
|
| 39 |
+
"stopping_strategy": "first_exhausted",
|
| 40 |
+
"shuffle_buffer_size": 1000,
|
| 41 |
+
"download_mode": "reuse_dataset_if_exists",
|
| 42 |
+
"columns": {},
|
| 43 |
+
"strict": false,
|
| 44 |
+
"remove_unused_columns": true,
|
| 45 |
+
"model_name": null,
|
| 46 |
+
"model_author": null,
|
| 47 |
+
"custom_dataset_info": [],
|
| 48 |
+
"quant_method": null,
|
| 49 |
+
"quant_bits": null,
|
| 50 |
+
"hqq_axis": null,
|
| 51 |
+
"bnb_4bit_compute_dtype": "bfloat16",
|
| 52 |
+
"bnb_4bit_quant_type": "nf4",
|
| 53 |
+
"bnb_4bit_use_double_quant": true,
|
| 54 |
+
"bnb_4bit_quant_storage": null,
|
| 55 |
+
"max_new_tokens": null,
|
| 56 |
+
"temperature": null,
|
| 57 |
+
"top_k": null,
|
| 58 |
+
"top_p": null,
|
| 59 |
+
"repetition_penalty": null,
|
| 60 |
+
"num_beams": 1,
|
| 61 |
+
"stream": false,
|
| 62 |
+
"stop_words": [],
|
| 63 |
+
"logprobs": false,
|
| 64 |
+
"top_logprobs": null,
|
| 65 |
+
"ckpt_dir": "/data/minimax-dialogue/users/ado/082025project/final_roleplay_v2/ckpt/Qwen3-32B-with-systhink/v9-20251222-082607",
|
| 66 |
+
"lora_modules": [],
|
| 67 |
+
"tuner_backend": "peft",
|
| 68 |
+
"train_type": "lora",
|
| 69 |
+
"adapters": [],
|
| 70 |
+
"external_plugins": [],
|
| 71 |
+
"seed": 42,
|
| 72 |
+
"model_kwargs": {},
|
| 73 |
+
"load_args": true,
|
| 74 |
+
"load_data_args": false,
|
| 75 |
+
"packing": false,
|
| 76 |
+
"packing_cache": null,
|
| 77 |
+
"custom_register_path": [],
|
| 78 |
+
"use_hf": false,
|
| 79 |
+
"hub_token": null,
|
| 80 |
+
"ddp_timeout": 18000000,
|
| 81 |
+
"ddp_backend": null,
|
| 82 |
+
"ignore_args_error": false,
|
| 83 |
+
"use_swift_lora": false,
|
| 84 |
+
"merge_lora": false,
|
| 85 |
+
"safe_serialization": true,
|
| 86 |
+
"max_shard_size": "5GB",
|
| 87 |
+
"output_dir": "/data/minimax-dialogue/users/ado/082025project/final_roleplay_v2/ckpt/Qwen3-32B-with-systhink/v9-20251222-082607/hf_0882",
|
| 88 |
+
"quant_n_samples": 256,
|
| 89 |
+
"quant_batch_size": 1,
|
| 90 |
+
"group_size": 128,
|
| 91 |
+
"to_ollama": false,
|
| 92 |
+
"to_mcore": false,
|
| 93 |
+
"to_hf": true,
|
| 94 |
+
"mcore_model": "/data/minimax-dialogue/users/ado/082025project/final_roleplay_v2/ckpt/Qwen3-32B-with-systhink/v9-20251222-082607",
|
| 95 |
+
"thread_count": 7,
|
| 96 |
+
"test_convert_precision": false,
|
| 97 |
+
"push_to_hub": false,
|
| 98 |
+
"hub_model_id": null,
|
| 99 |
+
"hub_private_repo": false,
|
| 100 |
+
"commit_message": "update files",
|
| 101 |
+
"to_peft_format": false,
|
| 102 |
+
"exist_ok": false,
|
| 103 |
+
"rank": 0,
|
| 104 |
+
"local_rank": -1,
|
| 105 |
+
"global_world_size": 1,
|
| 106 |
+
"local_world_size": 1,
|
| 107 |
+
"model_suffix": "Qwen3-32B-long",
|
| 108 |
+
"model_info": "ModelInfo(model_type='qwen3', model_dir='/data/minimax-dialogue/experiment/qwen/Qwen3-32B-long', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, rope_scaling={'rope_type': 'yarn', 'factor': 4.0, 'original_max_position_embeddings': 32768}, config=None, task_type='causal_lm', num_labels=None)",
|
| 109 |
+
"model_meta": "ModelMeta(model_type='qwen3', model_groups=[ModelGroup(models=[Model(ms_model_id='Qwen/Qwen3-0.6B-Base', hf_model_id='Qwen/Qwen3-0.6B-Base', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-1.7B-Base', hf_model_id='Qwen/Qwen3-1.7B-Base', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-4B-Base', hf_model_id='Qwen/Qwen3-4B-Base', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-8B-Base', hf_model_id='Qwen/Qwen3-8B-Base', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-14B-Base', hf_model_id='Qwen/Qwen3-14B-Base', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-0.6B', hf_model_id='Qwen/Qwen3-0.6B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-1.7B', hf_model_id='Qwen/Qwen3-1.7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-4B', hf_model_id='Qwen/Qwen3-4B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-8B', hf_model_id='Qwen/Qwen3-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-14B', hf_model_id='Qwen/Qwen3-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-32B', hf_model_id='Qwen/Qwen3-32B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-0.6B-FP8', hf_model_id='Qwen/Qwen3-0.6B-FP8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-1.7B-FP8', hf_model_id='Qwen/Qwen3-1.7B-FP8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-4B-FP8', hf_model_id='Qwen/Qwen3-4B-FP8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-8B-FP8', hf_model_id='Qwen/Qwen3-8B-FP8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-14B-FP8', hf_model_id='Qwen/Qwen3-14B-FP8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-32B-FP8', hf_model_id='Qwen/Qwen3-32B-FP8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-4B-AWQ', hf_model_id='Qwen/Qwen3-4B-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-8B-AWQ', hf_model_id='Qwen/Qwen3-8B-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-14B-AWQ', hf_model_id='Qwen/Qwen3-14B-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-32B-AWQ', hf_model_id='Qwen/Qwen3-32B-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='swift/Qwen3-32B-AWQ', hf_model_id=None, model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='qwen3', get_function=<function get_model_tokenizer_with_flash_attn at 0x7fb81ee028c0>, model_arch='llama', architectures=['Qwen3ForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=None, requires=['transformers>=4.51'], tags=[])",
|
| 110 |
+
"model_dir": "/data/minimax-dialogue/experiment/qwen/Qwen3-32B-long",
|
| 111 |
+
"hub": "<class 'swift.hub.hub.MSHub'>"
|
| 112 |
+
}
|
chat_demo/README.md
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Interactive Chat Demo
|
| 2 |
+
|
| 3 |
+
This directory contains an interactive chat tool to test the HER model with character role-playing scenarios.
|
| 4 |
+
|
| 5 |
+
## Quick Start
|
| 6 |
+
|
| 7 |
+
```bash
|
| 8 |
+
# Basic chat (uses 200 CoSER scenarios from classic books)
|
| 9 |
+
python chat_demo.py
|
| 10 |
+
|
| 11 |
+
# Show system thinking process
|
| 12 |
+
python chat_demo.py --show-think
|
| 13 |
+
|
| 14 |
+
# Show role thinking
|
| 15 |
+
python chat_demo.py --show-rolethink
|
| 16 |
+
|
| 17 |
+
# Use simple built-in scenarios (2 scenarios: Pride and Prejudice, The Great Gatsby)
|
| 18 |
+
python chat_demo.py --simple
|
| 19 |
+
```
|
| 20 |
+
|
| 21 |
+
## Features
|
| 22 |
+
|
| 23 |
+
- **Character Role-Playing**: Chat with AI as characters from classic literature
|
| 24 |
+
- **Multi-turn Dialogue**: Maintains conversation history and context
|
| 25 |
+
- **Dual-layer Thinking Display**: Optional display of system thinking and role thinking
|
| 26 |
+
- **Format Transformation**: Converts XML tags to readable format:
|
| 27 |
+
- `<role_thinking>` → `[inner thought]`
|
| 28 |
+
- `<role_action>` → `(physical action)`
|
| 29 |
+
- **Auto-save**: Saves conversation logs on exit
|
| 30 |
+
|
| 31 |
+
## Usage
|
| 32 |
+
|
| 33 |
+
### Interactive Mode
|
| 34 |
+
|
| 35 |
+
```bash
|
| 36 |
+
python chat_demo.py
|
| 37 |
+
```
|
| 38 |
+
|
| 39 |
+
The script will prompt you to:
|
| 40 |
+
1. Choose a scenario (book and scene)
|
| 41 |
+
2. Select which character the AI should play
|
| 42 |
+
3. Select which character you want to play
|
| 43 |
+
4. Start chatting!
|
| 44 |
+
|
| 45 |
+
### Commands During Chat
|
| 46 |
+
|
| 47 |
+
| Command | Function |
|
| 48 |
+
|---------|----------|
|
| 49 |
+
| `quit` / `exit` / `q` | Exit chat |
|
| 50 |
+
| `clear` | Clear conversation history |
|
| 51 |
+
| `history` | View current conversation history |
|
| 52 |
+
| `prompt` | View full prompt |
|
| 53 |
+
|
| 54 |
+
## Example Scenarios
|
| 55 |
+
|
| 56 |
+
### CoSER Dataset (200 Scenarios)
|
| 57 |
+
|
| 58 |
+
By default, the demo uses the **CoSER test dataset** (`coser_scenarios.json`) with 200 rich scenarios from classic literature:
|
| 59 |
+
|
| 60 |
+
- **Pride and Prejudice** (Elizabeth Bennet, Mr. Darcy, Mr. Bennet, etc.)
|
| 61 |
+
- **A Game of Thrones** (Jon Snow, Tyrion Lannister, Daenerys Targaryen, etc.)
|
| 62 |
+
- **The Great Gatsby** (Jay Gatsby, Nick Carraway, Daisy Buchanan, etc.)
|
| 63 |
+
- **To Kill a Mockingbird** (Atticus Finch, Scout Finch, etc.)
|
| 64 |
+
- **1984** (Winston Smith, Julia, O'Brien, etc.)
|
| 65 |
+
- **Harry Potter** (Harry Potter, Hermione Granger, Ron Weasley, etc.)
|
| 66 |
+
- **The Lord of the Rings** (Frodo, Gandalf, Aragorn, etc.)
|
| 67 |
+
- And 150+ more scenarios from renowned novels!
|
| 68 |
+
|
| 69 |
+
Each scenario includes:
|
| 70 |
+
- **Book title** and author context
|
| 71 |
+
- **Scene description** with detailed setting
|
| 72 |
+
- **Character profiles** for all participants
|
| 73 |
+
- **Initial character thoughts** and motivations
|
| 74 |
+
- **Topic/situation** summary
|
| 75 |
+
|
| 76 |
+
### Built-in Scenarios (2 Simple Examples)
|
| 77 |
+
|
| 78 |
+
If you prefer simpler scenarios or want to test without the full dataset, use `--simple` flag to load 2 basic scenarios:
|
| 79 |
+
- Pride and Prejudice (Mr. Bennet and Elizabeth)
|
| 80 |
+
- The Great Gatsby (Gatsby and Nick Carraway)
|
| 81 |
+
|
| 82 |
+
## Options
|
| 83 |
+
|
| 84 |
+
| Option | Description | Default |
|
| 85 |
+
|--------|-------------|---------|
|
| 86 |
+
| `--model-path` | Path to HER model directory | `.` (current dir) |
|
| 87 |
+
| `--show-think` | Show `<system_thinking>` | False |
|
| 88 |
+
| `--show-rolethink` | Show `<role_thinking>` | False |
|
| 89 |
+
| `--scenario` | Scenario index | Interactive |
|
| 90 |
+
| `--character` | Character index | Interactive |
|
| 91 |
+
| `--simple` | Use 2 built-in scenarios instead of 200 CoSER scenarios | False |
|
| 92 |
+
|
| 93 |
+
## Output Format
|
| 94 |
+
|
| 95 |
+
The model generates responses with:
|
| 96 |
+
|
| 97 |
+
1. **System Thinking** (optional display):
|
| 98 |
+
- Third-person analysis of how to portray the character
|
| 99 |
+
- Planning and reasoning about the response
|
| 100 |
+
|
| 101 |
+
2. **Role Response**:
|
| 102 |
+
- **Role Thinking** `[...]`: Character's inner thoughts (invisible to others)
|
| 103 |
+
- **Role Action** `(...)`: Physical actions and expressions
|
| 104 |
+
- **Speech**: Natural dialogue
|
| 105 |
+
|
| 106 |
+
### Example Output
|
| 107 |
+
|
| 108 |
+
```
|
| 109 |
+
════════════════════════════════════════════════════════════════════════════════
|
| 110 |
+
🎭 【Elizabeth Bennet's Response】
|
| 111 |
+
════════════════════════════════════════════════════════════════════════════════
|
| 112 |
+
[His tone is light, but the air feels heavy. I cannot let him see how much
|
| 113 |
+
Lady Catherine's intrusion still stings.]
|
| 114 |
+
(takes a steadying breath, smoothing the folds of her dress)
|
| 115 |
+
I believe I can manage, Father. Though I must admit, I am curious about
|
| 116 |
+
what this letter contains.
|
| 117 |
+
════════════════════════════════════════════════════════════════════════════════
|
| 118 |
+
```
|
| 119 |
+
|
| 120 |
+
## Requirements
|
| 121 |
+
|
| 122 |
+
- Python 3.8+
|
| 123 |
+
- transformers
|
| 124 |
+
- torch
|
| 125 |
+
|
| 126 |
+
Install dependencies:
|
| 127 |
+
```bash
|
| 128 |
+
pip install transformers torch
|
| 129 |
+
```
|
| 130 |
+
|
| 131 |
+
## File Structure
|
| 132 |
+
|
| 133 |
+
```
|
| 134 |
+
chat_demo/
|
| 135 |
+
├── README.md # This file
|
| 136 |
+
├── chat_demo.py # Main chat script
|
| 137 |
+
├── coser_scenarios.json # 200 CoSER test scenarios (2.4MB)
|
| 138 |
+
├── scenarios.json # Simple built-in scenarios (auto-created if using --simple)
|
| 139 |
+
└── chat_logs/ # Saved chat logs (auto-created)
|
| 140 |
+
└── {book}_{character}_{timestamp}.txt
|
| 141 |
+
```
|
| 142 |
+
|
| 143 |
+
## Notes
|
| 144 |
+
|
| 145 |
+
1. **System Thinking**: Used for training and analysis, not included in conversation history
|
| 146 |
+
2. **Role Thinking**: Character's inner thoughts, invisible to other characters
|
| 147 |
+
3. **Role Action**: Physical behaviors visible to others
|
| 148 |
+
4. **Speech**: What the character says out loud
|
| 149 |
+
|
| 150 |
+
## Tips for Best Results
|
| 151 |
+
|
| 152 |
+
- Stay in character when chatting
|
| 153 |
+
- Provide context in your messages
|
| 154 |
+
- Use the character's background knowledge
|
| 155 |
+
- Be patient - the model generates thoughtful responses with reasoning
|
| 156 |
+
|
| 157 |
+
## Troubleshooting
|
| 158 |
+
|
| 159 |
+
**Model not loading?**
|
| 160 |
+
- Ensure the model files are in the correct directory
|
| 161 |
+
- Check that you have enough GPU memory
|
| 162 |
+
|
| 163 |
+
**Empty responses?**
|
| 164 |
+
- Try adjusting temperature (default: 0.7)
|
| 165 |
+
- Check the prompt format
|
| 166 |
+
|
| 167 |
+
**Inconsistent character behavior?**
|
| 168 |
+
- Review the character profile
|
| 169 |
+
- Ensure your messages align with the scenario context
|
chat_demo/chat_demo.py
ADDED
|
@@ -0,0 +1,536 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Interactive Chat Demo for HER Model
|
| 4 |
+
Chat with AI characters from classic literature using role-playing scenarios.
|
| 5 |
+
|
| 6 |
+
Usage:
|
| 7 |
+
python chat_demo.py
|
| 8 |
+
python chat_demo.py --show-think
|
| 9 |
+
python chat_demo.py --show-rolethink
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import re
|
| 13 |
+
import json
|
| 14 |
+
import argparse
|
| 15 |
+
from pathlib import Path
|
| 16 |
+
from datetime import datetime
|
| 17 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 18 |
+
|
| 19 |
+
# Colors for terminal output
|
| 20 |
+
class Colors:
|
| 21 |
+
HEADER = '\033[95m'
|
| 22 |
+
BLUE = '\033[94m'
|
| 23 |
+
CYAN = '\033[96m'
|
| 24 |
+
GREEN = '\033[92m'
|
| 25 |
+
YELLOW = '\033[93m'
|
| 26 |
+
RED = '\033[91m'
|
| 27 |
+
BOLD = '\033[1m'
|
| 28 |
+
UNDERLINE = '\033[4m'
|
| 29 |
+
END = '\033[0m'
|
| 30 |
+
GRAY = '\033[90m'
|
| 31 |
+
MAGENTA = '\033[35m'
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def remove_system_thinking(text: str) -> str:
|
| 35 |
+
"""Remove <system_thinking>...</system_thinking> tags and content"""
|
| 36 |
+
if not text:
|
| 37 |
+
return text
|
| 38 |
+
pattern = r'<system_thinking>.*?</system_thinking>\s*'
|
| 39 |
+
cleaned = re.sub(pattern, '', text, flags=re.DOTALL)
|
| 40 |
+
return cleaned.strip()
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def extract_system_thinking(text: str) -> str:
|
| 44 |
+
"""Extract system_thinking content (without role tags inside)"""
|
| 45 |
+
if not text:
|
| 46 |
+
return ""
|
| 47 |
+
match = re.search(r'<system_thinking>(.*?)</system_thinking>', text, flags=re.DOTALL)
|
| 48 |
+
if match:
|
| 49 |
+
content = match.group(1).strip()
|
| 50 |
+
# Remove any role tags that might have leaked in
|
| 51 |
+
content = re.sub(r'</?role_\w+>', '', content)
|
| 52 |
+
return content
|
| 53 |
+
return ""
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def format_for_display(text: str, show_rolethink: bool = True) -> str:
|
| 57 |
+
"""Format for display: replace role_thinking with [], role_action with ()"""
|
| 58 |
+
if not text:
|
| 59 |
+
return text
|
| 60 |
+
|
| 61 |
+
result = text
|
| 62 |
+
|
| 63 |
+
# Handle role_thinking
|
| 64 |
+
if show_rolethink:
|
| 65 |
+
result = result.replace('<role_thinking>', '[').replace('</role_thinking>', ']')
|
| 66 |
+
else:
|
| 67 |
+
result = re.sub(r'<role_thinking>.*?</role_thinking>', '', result, flags=re.DOTALL)
|
| 68 |
+
|
| 69 |
+
# Replace role_action with ()
|
| 70 |
+
result = result.replace('<role_action>', '(').replace('</role_action>', ')')
|
| 71 |
+
result = result.replace('<role_speech>', '').replace('</role_speech>', '')
|
| 72 |
+
|
| 73 |
+
return result.strip()
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def load_sample_scenarios(use_coser: bool = True):
|
| 77 |
+
"""Load or create sample scenarios
|
| 78 |
+
|
| 79 |
+
Args:
|
| 80 |
+
use_coser: If True, try to load CoSER scenarios first (200 scenarios from classic books)
|
| 81 |
+
"""
|
| 82 |
+
# Try to load CoSER scenarios if available
|
| 83 |
+
if use_coser:
|
| 84 |
+
coser_file = Path(__file__).parent / "coser_scenarios.json"
|
| 85 |
+
if coser_file.exists():
|
| 86 |
+
print(f"{Colors.CYAN}📚 Loading CoSER scenarios (200 book scenes)...{Colors.END}")
|
| 87 |
+
with open(coser_file, 'r', encoding='utf-8') as f:
|
| 88 |
+
return json.load(f)
|
| 89 |
+
|
| 90 |
+
# Otherwise, use built-in scenarios
|
| 91 |
+
scenarios_file = Path(__file__).parent / "scenarios.json"
|
| 92 |
+
|
| 93 |
+
# If scenarios file exists, load it
|
| 94 |
+
if scenarios_file.exists():
|
| 95 |
+
with open(scenarios_file, 'r', encoding='utf-8') as f:
|
| 96 |
+
return json.load(f)
|
| 97 |
+
|
| 98 |
+
# Otherwise create sample scenarios
|
| 99 |
+
scenarios = [
|
| 100 |
+
{
|
| 101 |
+
"book": "Pride and Prejudice",
|
| 102 |
+
"topic": "Mr. Bennet confronts Elizabeth about Mr. Darcy's proposal",
|
| 103 |
+
"scenario": "The scene is set in Mr. Bennet's private study, a sanctuary of leather-bound books and quiet contemplation. Elizabeth has been summoned unexpectedly, and Mr. Bennet holds a letter that seems to spark his characteristic sardonic amusement.",
|
| 104 |
+
"character_profiles": {
|
| 105 |
+
"Mr Bennet": "Elizabeth's father, known for his sarcastic wit and detachment. Highly intelligent and well-read, preferring the solitude of his library. Known for his biting sarcasm and sardonic humor.",
|
| 106 |
+
"Elizabeth Bennet": "The protagonist, intelligent and strong-willed. Quick-witted with a playful sense of humor. Values honesty and integrity. Maintains composure under pressure."
|
| 107 |
+
},
|
| 108 |
+
"key_characters": [
|
| 109 |
+
{
|
| 110 |
+
"name": "Mr Bennet",
|
| 111 |
+
"thought": "It is a delicate matter, this business with Darcy. I must gauge Elizabeth's true feelings without being overly sentimental."
|
| 112 |
+
},
|
| 113 |
+
{
|
| 114 |
+
"name": "Elizabeth Bennet",
|
| 115 |
+
"thought": "Father's summoning me at this hour is unusual. I hope this isn't about Lady Catherine's visit."
|
| 116 |
+
}
|
| 117 |
+
]
|
| 118 |
+
},
|
| 119 |
+
{
|
| 120 |
+
"book": "The Great Gatsby",
|
| 121 |
+
"topic": "Nick Carraway encounters Gatsby at one of his lavish parties",
|
| 122 |
+
"scenario": "The party is in full swing at Gatsby's mansion. Jazz music fills the air, champagne flows freely, and well-dressed guests mingle on the lawn. Nick has been wandering alone, observing the spectacle, when he encounters a mysterious man by the library.",
|
| 123 |
+
"character_profiles": {
|
| 124 |
+
"Jay Gatsby": "The enigmatic millionaire who throws lavish parties. Behind his elegant facade lies a romantic dreamer obsessed with recapturing the past. Charming yet deeply lonely.",
|
| 125 |
+
"Nick Carraway": "The story's narrator, a Yale graduate from the Midwest. Honest, tolerant, and inclined to reserve judgment. Both drawn to and repelled by the excess around him."
|
| 126 |
+
},
|
| 127 |
+
"key_characters": [
|
| 128 |
+
{
|
| 129 |
+
"name": "Jay Gatsby",
|
| 130 |
+
"thought": "Another party, another night of waiting. Perhaps tonight she'll come. I must maintain appearances."
|
| 131 |
+
},
|
| 132 |
+
{
|
| 133 |
+
"name": "Nick Carraway",
|
| 134 |
+
"thought": "I've never met my host. These parties are magnificent, yet there's something hollow about all this revelry."
|
| 135 |
+
}
|
| 136 |
+
]
|
| 137 |
+
}
|
| 138 |
+
]
|
| 139 |
+
|
| 140 |
+
# Save scenarios for future use
|
| 141 |
+
with open(scenarios_file, 'w', encoding='utf-8') as f:
|
| 142 |
+
json.dump(scenarios, f, indent=2, ensure_ascii=False)
|
| 143 |
+
|
| 144 |
+
return scenarios
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
def print_scenarios(scenarios: list):
|
| 148 |
+
"""Print available scenarios"""
|
| 149 |
+
print(f"\n{Colors.HEADER}{'='*80}{Colors.END}")
|
| 150 |
+
print(f"{Colors.HEADER}📚 Available Scenarios{Colors.END}")
|
| 151 |
+
print(f"{Colors.HEADER}{'='*80}{Colors.END}\n")
|
| 152 |
+
|
| 153 |
+
for i, s in enumerate(scenarios):
|
| 154 |
+
print(f"{Colors.CYAN}[{i}]{Colors.END} {Colors.BOLD}📖 {s['book']}{Colors.END}")
|
| 155 |
+
print(f" {Colors.GRAY}{s['topic']}{Colors.END}")
|
| 156 |
+
chars = list(s['character_profiles'].keys())
|
| 157 |
+
print(f" {Colors.MAGENTA}👥 Characters: {', '.join(chars)}{Colors.END}")
|
| 158 |
+
print()
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
def print_characters(scenario: dict):
|
| 162 |
+
"""Print available characters in the scenario"""
|
| 163 |
+
print(f"\n{Colors.HEADER}{'='*80}{Colors.END}")
|
| 164 |
+
print(f"{Colors.HEADER}👥 Available Characters - {scenario['book']}{Colors.END}")
|
| 165 |
+
print(f"{Colors.HEADER}{'='*80}{Colors.END}\n")
|
| 166 |
+
|
| 167 |
+
for i, (name, profile) in enumerate(scenario['character_profiles'].items()):
|
| 168 |
+
print(f"{Colors.CYAN}[{i}]{Colors.END} {Colors.BOLD}{name}{Colors.END}")
|
| 169 |
+
preview = profile[:150] + "..." if len(profile) > 150 else profile
|
| 170 |
+
print(f" {Colors.GRAY}{preview}{Colors.END}")
|
| 171 |
+
print()
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
def build_system_prompt(scenario: dict, character_name: str, user_character_name: str) -> str:
|
| 175 |
+
"""Build system prompt for the character"""
|
| 176 |
+
book = scenario['book']
|
| 177 |
+
scene = scenario['scenario']
|
| 178 |
+
profiles = scenario['character_profiles']
|
| 179 |
+
|
| 180 |
+
char_profile = profiles.get(character_name, "")
|
| 181 |
+
user_profile = profiles.get(user_character_name, "A person interacting with the character.")
|
| 182 |
+
|
| 183 |
+
# Find character's initial thought
|
| 184 |
+
char_thought = ""
|
| 185 |
+
for kc in scenario['key_characters']:
|
| 186 |
+
if kc['name'] == character_name:
|
| 187 |
+
char_thought = kc.get('thought', '')
|
| 188 |
+
break
|
| 189 |
+
|
| 190 |
+
prompt = f"""You are role-playing as {character_name} from the book "{book}".
|
| 191 |
+
|
| 192 |
+
==={character_name}'s Profile===
|
| 193 |
+
{char_profile}
|
| 194 |
+
|
| 195 |
+
===Current Scene===
|
| 196 |
+
{scene}
|
| 197 |
+
|
| 198 |
+
===Your Current Thoughts===
|
| 199 |
+
{char_thought}
|
| 200 |
+
|
| 201 |
+
===The Person You Are Interacting With===
|
| 202 |
+
{user_character_name}: {user_profile}
|
| 203 |
+
|
| 204 |
+
===Instructions===
|
| 205 |
+
- Stay in character as {character_name} at all times
|
| 206 |
+
- Keep responses natural and engaging, consistent with the book's style
|
| 207 |
+
- Respond from {character_name}'s perspective
|
| 208 |
+
- **IMPORTANT: Speak DIRECTLY to "{user_character_name}" using "you" (second person). Do NOT use third person.**
|
| 209 |
+
|
| 210 |
+
===Output Format===
|
| 211 |
+
Your output should include thought, speech, and action in this two-part structure:
|
| 212 |
+
|
| 213 |
+
1. System Thinking: A single block at the very beginning, wrapped in <system_thinking> and </system_thinking>. This is third-person analysis of how to portray the character.
|
| 214 |
+
|
| 215 |
+
2. Role-play Response: The character's actual response including:
|
| 216 |
+
- <role_thinking>inner thoughts</role_thinking> (invisible to others)
|
| 217 |
+
- <role_action>physical actions</role_action> (visible to others)
|
| 218 |
+
- Speech (plain text, what the character says out loud)"""
|
| 219 |
+
|
| 220 |
+
return prompt
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
def save_chat_log(messages: list, scenario: dict, character_name: str, user_character: str):
|
| 224 |
+
"""Save chat log to file"""
|
| 225 |
+
log_dir = Path(__file__).parent / "chat_logs"
|
| 226 |
+
log_dir.mkdir(exist_ok=True)
|
| 227 |
+
|
| 228 |
+
safe_book = re.sub(r'[^\w\-]', '_', scenario['book'])[:30]
|
| 229 |
+
safe_char = re.sub(r'[^\w\-]', '_', character_name)[:20]
|
| 230 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 231 |
+
filename = f"{safe_book}_{safe_char}_{timestamp}.txt"
|
| 232 |
+
filepath = log_dir / filename
|
| 233 |
+
|
| 234 |
+
lines = [
|
| 235 |
+
"=" * 80,
|
| 236 |
+
"HER Chat Demo - Conversation Log",
|
| 237 |
+
"=" * 80,
|
| 238 |
+
f"Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
|
| 239 |
+
f"Book: {scenario['book']}",
|
| 240 |
+
f"AI Character: {character_name}",
|
| 241 |
+
f"User Character: {user_character}",
|
| 242 |
+
"=" * 80,
|
| 243 |
+
"",
|
| 244 |
+
"【Scene】",
|
| 245 |
+
scenario['scenario'][:500],
|
| 246 |
+
"",
|
| 247 |
+
"=" * 80,
|
| 248 |
+
"【Conversation】",
|
| 249 |
+
"=" * 80,
|
| 250 |
+
]
|
| 251 |
+
|
| 252 |
+
for msg in messages:
|
| 253 |
+
role = msg['role']
|
| 254 |
+
content = msg['content']
|
| 255 |
+
if role == 'system':
|
| 256 |
+
continue
|
| 257 |
+
elif role == 'user':
|
| 258 |
+
if "===Conversation Start===" not in content:
|
| 259 |
+
lines.append(f"\n【{user_character}】")
|
| 260 |
+
lines.append(content)
|
| 261 |
+
elif role == 'assistant':
|
| 262 |
+
lines.append(f"\n【{character_name}】")
|
| 263 |
+
lines.append(content)
|
| 264 |
+
|
| 265 |
+
lines.extend(["\n" + "=" * 80, "--- End of Conversation ---"])
|
| 266 |
+
|
| 267 |
+
with open(filepath, 'w', encoding='utf-8') as f:
|
| 268 |
+
f.write('\n'.join(lines))
|
| 269 |
+
|
| 270 |
+
return filepath
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
def chat_loop(model, tokenizer, scenario: dict, character_name: str, user_character: str,
|
| 274 |
+
show_think: bool = False, show_rolethink: bool = True):
|
| 275 |
+
"""Main chat loop"""
|
| 276 |
+
book = scenario['book']
|
| 277 |
+
|
| 278 |
+
print(f"\n{Colors.HEADER}{'='*80}{Colors.END}")
|
| 279 |
+
print(f"{Colors.HEADER}🎭 Starting Conversation - {book}{Colors.END}")
|
| 280 |
+
print(f"{Colors.HEADER}{'='*80}{Colors.END}")
|
| 281 |
+
print(f"{Colors.GREEN}You play: {user_character}{Colors.END}")
|
| 282 |
+
print(f"{Colors.MAGENTA}AI plays: {character_name}{Colors.END}")
|
| 283 |
+
print(f"{Colors.GRAY}Show system_thinking: {'Yes' if show_think else 'No'}{Colors.END}")
|
| 284 |
+
print(f"{Colors.GRAY}Show role_thinking: {'Yes' if show_rolethink else 'No'}{Colors.END}")
|
| 285 |
+
print(f"{Colors.GRAY}Commands: 'quit' to exit, 'clear' to reset, 'history' to view{Colors.END}")
|
| 286 |
+
print(f"{Colors.HEADER}{'='*80}{Colors.END}\n")
|
| 287 |
+
|
| 288 |
+
# Display scene
|
| 289 |
+
print(f"{Colors.CYAN}📍 Scene:{Colors.END}")
|
| 290 |
+
print(f"{Colors.GRAY}{scenario['scenario'][:300]}...{Colors.END}\n")
|
| 291 |
+
|
| 292 |
+
# Build messages
|
| 293 |
+
system_prompt = build_system_prompt(scenario, character_name, user_character)
|
| 294 |
+
|
| 295 |
+
# Initial greeting
|
| 296 |
+
greeting = f"*{character_name} looks at you*"
|
| 297 |
+
for kc in scenario.get('key_characters', []):
|
| 298 |
+
if kc['name'] == character_name:
|
| 299 |
+
greeting = f"*enters the scene* Hello, {user_character}."
|
| 300 |
+
break
|
| 301 |
+
|
| 302 |
+
messages = [
|
| 303 |
+
{"role": "system", "content": system_prompt},
|
| 304 |
+
{"role": "user", "content": "===Conversation Start==="},
|
| 305 |
+
{"role": "assistant", "content": greeting}
|
| 306 |
+
]
|
| 307 |
+
|
| 308 |
+
print(f"{Colors.GREEN}{character_name}:{Colors.END} {greeting}\n")
|
| 309 |
+
|
| 310 |
+
while True:
|
| 311 |
+
try:
|
| 312 |
+
user_input = input(f"{Colors.BLUE}{user_character}:{Colors.END} ").strip()
|
| 313 |
+
|
| 314 |
+
if not user_input:
|
| 315 |
+
continue
|
| 316 |
+
|
| 317 |
+
if user_input.lower() in ['quit', 'exit', 'q']:
|
| 318 |
+
print(f"\n{Colors.YELLOW}👋 Goodbye!{Colors.END}")
|
| 319 |
+
break
|
| 320 |
+
|
| 321 |
+
if user_input.lower() == 'clear':
|
| 322 |
+
messages = [
|
| 323 |
+
{"role": "system", "content": system_prompt},
|
| 324 |
+
{"role": "user", "content": "===Conversation Start==="},
|
| 325 |
+
{"role": "assistant", "content": greeting}
|
| 326 |
+
]
|
| 327 |
+
print(f"{Colors.YELLOW}🔄 Conversation history cleared{Colors.END}\n")
|
| 328 |
+
print(f"{Colors.GREEN}{character_name}:{Colors.END} {greeting}\n")
|
| 329 |
+
continue
|
| 330 |
+
|
| 331 |
+
if user_input.lower() == 'history':
|
| 332 |
+
print(f"\n{Colors.CYAN}📜 Conversation History ({len(messages)} messages):{Colors.END}")
|
| 333 |
+
for i, msg in enumerate(messages[1:], 1):
|
| 334 |
+
content = msg['content'][:80] + '...' if len(msg['content']) > 80 else msg['content']
|
| 335 |
+
print(f" [{i}] {msg['role']}: {content}")
|
| 336 |
+
print()
|
| 337 |
+
continue
|
| 338 |
+
|
| 339 |
+
# Add user message
|
| 340 |
+
messages.append({"role": "user", "content": user_input})
|
| 341 |
+
|
| 342 |
+
# Generate response
|
| 343 |
+
print(f"{Colors.GRAY}⏳ Thinking...{Colors.END}", end='\r')
|
| 344 |
+
|
| 345 |
+
# Format messages for model
|
| 346 |
+
text = tokenizer.apply_chat_template(
|
| 347 |
+
messages + [{"role": "assistant", "content": "<system_thinking>"}],
|
| 348 |
+
tokenize=False,
|
| 349 |
+
add_generation_prompt=False
|
| 350 |
+
)
|
| 351 |
+
|
| 352 |
+
inputs = tokenizer([text], return_tensors="pt").to(model.device)
|
| 353 |
+
|
| 354 |
+
try:
|
| 355 |
+
outputs = model.generate(
|
| 356 |
+
**inputs,
|
| 357 |
+
max_new_tokens=1024,
|
| 358 |
+
temperature=0.7,
|
| 359 |
+
top_p=0.9,
|
| 360 |
+
do_sample=True,
|
| 361 |
+
pad_token_id=tokenizer.eos_token_id
|
| 362 |
+
)
|
| 363 |
+
|
| 364 |
+
response = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=False)
|
| 365 |
+
|
| 366 |
+
# Clean up response
|
| 367 |
+
response = response.replace("<|im_end|>", "").replace("<|im_start|>", "").strip()
|
| 368 |
+
|
| 369 |
+
full_response = "<system_thinking>" + response
|
| 370 |
+
clean_response = remove_system_thinking(full_response)
|
| 371 |
+
|
| 372 |
+
except Exception as e:
|
| 373 |
+
print(f"{Colors.RED}❌ Generation failed: {e}{Colors.END}")
|
| 374 |
+
messages.pop()
|
| 375 |
+
continue
|
| 376 |
+
|
| 377 |
+
print(" " * 50, end='\r')
|
| 378 |
+
|
| 379 |
+
# Display system thinking if requested
|
| 380 |
+
if show_think:
|
| 381 |
+
think_content = extract_system_thinking(full_response)
|
| 382 |
+
if think_content:
|
| 383 |
+
print(f"\n{Colors.GRAY}{'─'*80}{Colors.END}")
|
| 384 |
+
print(f"{Colors.GRAY}📝 【System Thinking】{Colors.END}")
|
| 385 |
+
print(f"{Colors.GRAY}{'─'*80}{Colors.END}")
|
| 386 |
+
for line in think_content.split('\n')[:10]: # Limit lines
|
| 387 |
+
print(f"{Colors.GRAY} {line}{Colors.END}")
|
| 388 |
+
print(f"{Colors.GRAY}{'─'*80}{Colors.END}\n")
|
| 389 |
+
|
| 390 |
+
# Display character response
|
| 391 |
+
print(f"{Colors.GREEN}{'═'*80}{Colors.END}")
|
| 392 |
+
print(f"{Colors.GREEN}🎭 【{character_name}'s Response】{Colors.END}")
|
| 393 |
+
print(f"{Colors.GREEN}{'═'*80}{Colors.END}")
|
| 394 |
+
display_response = format_for_display(clean_response, show_rolethink=show_rolethink)
|
| 395 |
+
print(f"{Colors.GREEN}{display_response}{Colors.END}")
|
| 396 |
+
print(f"{Colors.GREEN}{'═'*80}{Colors.END}\n")
|
| 397 |
+
|
| 398 |
+
messages.append({"role": "assistant", "content": clean_response})
|
| 399 |
+
|
| 400 |
+
except KeyboardInterrupt:
|
| 401 |
+
print(f"\n{Colors.YELLOW}👋 Goodbye!{Colors.END}")
|
| 402 |
+
break
|
| 403 |
+
except EOFError:
|
| 404 |
+
print(f"\n{Colors.YELLOW}👋 Goodbye!{Colors.END}")
|
| 405 |
+
break
|
| 406 |
+
|
| 407 |
+
return messages
|
| 408 |
+
|
| 409 |
+
|
| 410 |
+
def main():
|
| 411 |
+
parser = argparse.ArgumentParser(description="Interactive Chat Demo for HER Model")
|
| 412 |
+
parser.add_argument("--model-path", type=str, default=".",
|
| 413 |
+
help="Path to model directory (default: current directory)")
|
| 414 |
+
parser.add_argument("--show-think", action="store_true",
|
| 415 |
+
help="Show system_thinking")
|
| 416 |
+
parser.add_argument("--show-rolethink", action="store_true",
|
| 417 |
+
help="Show role_thinking (default: hidden)")
|
| 418 |
+
parser.add_argument("--scenario", type=int, default=None,
|
| 419 |
+
help="Scenario index (default: interactive selection)")
|
| 420 |
+
parser.add_argument("--character", type=int, default=None,
|
| 421 |
+
help="Character index (default: interactive selection)")
|
| 422 |
+
parser.add_argument("--simple", action="store_true",
|
| 423 |
+
help="Use simple built-in scenarios instead of CoSER dataset")
|
| 424 |
+
|
| 425 |
+
args = parser.parse_args()
|
| 426 |
+
|
| 427 |
+
# Load scenarios
|
| 428 |
+
scenarios = load_sample_scenarios(use_coser=not args.simple)
|
| 429 |
+
print(f"{Colors.GREEN}✅ Loaded {len(scenarios)} scenarios{Colors.END}")
|
| 430 |
+
|
| 431 |
+
# Select scenario
|
| 432 |
+
if args.scenario is not None:
|
| 433 |
+
if 0 <= args.scenario < len(scenarios):
|
| 434 |
+
scenario = scenarios[args.scenario]
|
| 435 |
+
else:
|
| 436 |
+
print(f"{Colors.RED}❌ Invalid scenario index{Colors.END}")
|
| 437 |
+
return
|
| 438 |
+
else:
|
| 439 |
+
print_scenarios(scenarios)
|
| 440 |
+
while True:
|
| 441 |
+
try:
|
| 442 |
+
idx = int(input(f"{Colors.CYAN}Select scenario (0-{len(scenarios)-1}): {Colors.END}"))
|
| 443 |
+
if 0 <= idx < len(scenarios):
|
| 444 |
+
scenario = scenarios[idx]
|
| 445 |
+
break
|
| 446 |
+
print(f"{Colors.RED}Invalid index{Colors.END}")
|
| 447 |
+
except (ValueError, KeyboardInterrupt, EOFError):
|
| 448 |
+
print(f"\n{Colors.YELLOW}👋 Goodbye!{Colors.END}")
|
| 449 |
+
return
|
| 450 |
+
|
| 451 |
+
print(f"\n{Colors.GREEN}✅ Selected: {scenario['book']}{Colors.END}")
|
| 452 |
+
|
| 453 |
+
# Select character
|
| 454 |
+
char_names = list(scenario['character_profiles'].keys())
|
| 455 |
+
print_characters(scenario)
|
| 456 |
+
|
| 457 |
+
if args.character is not None:
|
| 458 |
+
if 0 <= args.character < len(char_names):
|
| 459 |
+
character_name = char_names[args.character]
|
| 460 |
+
else:
|
| 461 |
+
print(f"{Colors.RED}❌ Invalid character index{Colors.END}")
|
| 462 |
+
return
|
| 463 |
+
else:
|
| 464 |
+
while True:
|
| 465 |
+
try:
|
| 466 |
+
idx = int(input(f"{Colors.CYAN}Select AI character (0-{len(char_names)-1}): {Colors.END}"))
|
| 467 |
+
if 0 <= idx < len(char_names):
|
| 468 |
+
character_name = char_names[idx]
|
| 469 |
+
break
|
| 470 |
+
print(f"{Colors.RED}Invalid index{Colors.END}")
|
| 471 |
+
except (ValueError, KeyboardInterrupt, EOFError):
|
| 472 |
+
print(f"\n{Colors.YELLOW}👋 Goodbye!{Colors.END}")
|
| 473 |
+
return
|
| 474 |
+
|
| 475 |
+
# Select user character
|
| 476 |
+
remaining_chars = [c for c in char_names if c != character_name]
|
| 477 |
+
if remaining_chars:
|
| 478 |
+
print(f"\n{Colors.CYAN}Who do you want to play?{Colors.END}")
|
| 479 |
+
for i, c in enumerate(remaining_chars):
|
| 480 |
+
print(f" [{i}] {c}")
|
| 481 |
+
print(f" [{len(remaining_chars)}] Custom name")
|
| 482 |
+
|
| 483 |
+
while True:
|
| 484 |
+
try:
|
| 485 |
+
idx = int(input(f"{Colors.CYAN}Select (0-{len(remaining_chars)}): {Colors.END}"))
|
| 486 |
+
if idx == len(remaining_chars):
|
| 487 |
+
user_character = input(f"{Colors.CYAN}Your name: {Colors.END}").strip() or "User"
|
| 488 |
+
break
|
| 489 |
+
elif 0 <= idx < len(remaining_chars):
|
| 490 |
+
user_character = remaining_chars[idx]
|
| 491 |
+
break
|
| 492 |
+
except (ValueError, KeyboardInterrupt, EOFError):
|
| 493 |
+
print(f"\n{Colors.YELLOW}👋 Goodbye!{Colors.END}")
|
| 494 |
+
return
|
| 495 |
+
else:
|
| 496 |
+
user_character = "User"
|
| 497 |
+
|
| 498 |
+
print(f"\n{Colors.GREEN}✅ AI plays: {character_name}{Colors.END}")
|
| 499 |
+
print(f"{Colors.BLUE}✅ You play: {user_character}{Colors.END}")
|
| 500 |
+
|
| 501 |
+
# Load model
|
| 502 |
+
print(f"\n{Colors.CYAN}🔧 Loading model...{Colors.END}")
|
| 503 |
+
try:
|
| 504 |
+
tokenizer = AutoTokenizer.from_pretrained(args.model_path)
|
| 505 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 506 |
+
args.model_path,
|
| 507 |
+
torch_dtype="auto",
|
| 508 |
+
device_map="auto"
|
| 509 |
+
)
|
| 510 |
+
print(f"{Colors.GREEN}✅ Model loaded successfully{Colors.END}")
|
| 511 |
+
except Exception as e:
|
| 512 |
+
print(f"{Colors.RED}❌ Model loading failed: {e}{Colors.END}")
|
| 513 |
+
return
|
| 514 |
+
|
| 515 |
+
# Start chat
|
| 516 |
+
messages = chat_loop(
|
| 517 |
+
model,
|
| 518 |
+
tokenizer,
|
| 519 |
+
scenario,
|
| 520 |
+
character_name,
|
| 521 |
+
user_character,
|
| 522 |
+
show_think=args.show_think,
|
| 523 |
+
show_rolethink=args.show_rolethink
|
| 524 |
+
)
|
| 525 |
+
|
| 526 |
+
# Save log
|
| 527 |
+
if messages and len(messages) > 3:
|
| 528 |
+
try:
|
| 529 |
+
log_path = save_chat_log(messages, scenario, character_name, user_character)
|
| 530 |
+
print(f"\n{Colors.CYAN}📝 Chat log saved: {log_path}{Colors.END}")
|
| 531 |
+
except Exception as e:
|
| 532 |
+
print(f"\n{Colors.RED}❌ Save failed: {e}{Colors.END}")
|
| 533 |
+
|
| 534 |
+
|
| 535 |
+
if __name__ == "__main__":
|
| 536 |
+
main()
|
chat_demo/coser_scenarios.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
chat_template.jinja
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{%- if tools %}
|
| 2 |
+
{{- '<|im_start|>system\n' }}
|
| 3 |
+
{%- if messages[0].role == 'system' %}
|
| 4 |
+
{{- messages[0].content + '\n\n' }}
|
| 5 |
+
{%- endif %}
|
| 6 |
+
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
| 7 |
+
{%- for tool in tools %}
|
| 8 |
+
{{- "\n" }}
|
| 9 |
+
{{- tool | tojson }}
|
| 10 |
+
{%- endfor %}
|
| 11 |
+
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
| 12 |
+
{%- else %}
|
| 13 |
+
{%- if messages[0].role == 'system' %}
|
| 14 |
+
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
| 15 |
+
{%- endif %}
|
| 16 |
+
{%- endif %}
|
| 17 |
+
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
| 18 |
+
{%- for message in messages[::-1] %}
|
| 19 |
+
{%- set index = (messages|length - 1) - loop.index0 %}
|
| 20 |
+
{%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
|
| 21 |
+
{%- set ns.multi_step_tool = false %}
|
| 22 |
+
{%- set ns.last_query_index = index %}
|
| 23 |
+
{%- endif %}
|
| 24 |
+
{%- endfor %}
|
| 25 |
+
{%- for message in messages %}
|
| 26 |
+
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
| 27 |
+
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
|
| 28 |
+
{%- elif message.role == "assistant" %}
|
| 29 |
+
{%- set content = message.content %}
|
| 30 |
+
{%- set reasoning_content = '' %}
|
| 31 |
+
{%- if message.reasoning_content is defined and message.reasoning_content is not none %}
|
| 32 |
+
{%- set reasoning_content = message.reasoning_content %}
|
| 33 |
+
{%- else %}
|
| 34 |
+
{%- if '</think>' in message.content %}
|
| 35 |
+
{%- set content = message.content.split('</think>')[-1].lstrip('\n') %}
|
| 36 |
+
{%- set reasoning_content = message.content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
| 37 |
+
{%- endif %}
|
| 38 |
+
{%- endif %}
|
| 39 |
+
{%- if loop.index0 > ns.last_query_index %}
|
| 40 |
+
{%- if loop.last or (not loop.last and reasoning_content) %}
|
| 41 |
+
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
|
| 42 |
+
{%- else %}
|
| 43 |
+
{{- '<|im_start|>' + message.role + '\n' + content }}
|
| 44 |
+
{%- endif %}
|
| 45 |
+
{%- else %}
|
| 46 |
+
{{- '<|im_start|>' + message.role + '\n' + content }}
|
| 47 |
+
{%- endif %}
|
| 48 |
+
{%- if message.tool_calls %}
|
| 49 |
+
{%- for tool_call in message.tool_calls %}
|
| 50 |
+
{%- if (loop.first and content) or (not loop.first) %}
|
| 51 |
+
{{- '\n' }}
|
| 52 |
+
{%- endif %}
|
| 53 |
+
{%- if tool_call.function %}
|
| 54 |
+
{%- set tool_call = tool_call.function %}
|
| 55 |
+
{%- endif %}
|
| 56 |
+
{{- '<tool_call>\n{"name": "' }}
|
| 57 |
+
{{- tool_call.name }}
|
| 58 |
+
{{- '", "arguments": ' }}
|
| 59 |
+
{%- if tool_call.arguments is string %}
|
| 60 |
+
{{- tool_call.arguments }}
|
| 61 |
+
{%- else %}
|
| 62 |
+
{{- tool_call.arguments | tojson }}
|
| 63 |
+
{%- endif %}
|
| 64 |
+
{{- '}\n</tool_call>' }}
|
| 65 |
+
{%- endfor %}
|
| 66 |
+
{%- endif %}
|
| 67 |
+
{{- '<|im_end|>\n' }}
|
| 68 |
+
{%- elif message.role == "tool" %}
|
| 69 |
+
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
| 70 |
+
{{- '<|im_start|>user' }}
|
| 71 |
+
{%- endif %}
|
| 72 |
+
{{- '\n<tool_response>\n' }}
|
| 73 |
+
{{- message.content }}
|
| 74 |
+
{{- '\n</tool_response>' }}
|
| 75 |
+
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
| 76 |
+
{{- '<|im_end|>\n' }}
|
| 77 |
+
{%- endif %}
|
| 78 |
+
{%- endif %}
|
| 79 |
+
{%- endfor %}
|
| 80 |
+
{%- if add_generation_prompt %}
|
| 81 |
+
{{- '<|im_start|>assistant\n' }}
|
| 82 |
+
{%- if enable_thinking is defined and enable_thinking is false %}
|
| 83 |
+
{{- '<think>\n\n</think>\n\n' }}
|
| 84 |
+
{%- endif %}
|
| 85 |
+
{%- endif %}
|
config.json
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"Qwen3ForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 151643,
|
| 8 |
+
"eos_token_id": 151645,
|
| 9 |
+
"head_dim": 128,
|
| 10 |
+
"hidden_act": "silu",
|
| 11 |
+
"hidden_size": 5120,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 25600,
|
| 14 |
+
"max_position_embeddings": 131072,
|
| 15 |
+
"max_window_layers": 64,
|
| 16 |
+
"model_type": "qwen3",
|
| 17 |
+
"num_attention_heads": 64,
|
| 18 |
+
"num_hidden_layers": 64,
|
| 19 |
+
"num_key_value_heads": 8,
|
| 20 |
+
"pad_token_id": 151643,
|
| 21 |
+
"rms_norm_eps": 1e-06,
|
| 22 |
+
"rope_scaling": {
|
| 23 |
+
"factor": 4.0,
|
| 24 |
+
"original_max_position_embeddings": 32768,
|
| 25 |
+
"rope_type": "yarn"
|
| 26 |
+
},
|
| 27 |
+
"rope_theta": 1000000,
|
| 28 |
+
"sliding_window": null,
|
| 29 |
+
"tie_word_embeddings": false,
|
| 30 |
+
"torch_dtype": "bfloat16",
|
| 31 |
+
"transformers_version": "4.51.3",
|
| 32 |
+
"use_cache": true,
|
| 33 |
+
"use_sliding_window": false,
|
| 34 |
+
"vocab_size": 151936
|
| 35 |
+
}
|
figure2github.png
ADDED
|
Git LFS Details
|
generation_config.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token_id": 151643,
|
| 3 |
+
"do_sample": true,
|
| 4 |
+
"eos_token_id": [
|
| 5 |
+
151645,
|
| 6 |
+
151643
|
| 7 |
+
],
|
| 8 |
+
"pad_token_id": 151643,
|
| 9 |
+
"temperature": 0.6,
|
| 10 |
+
"top_k": 20,
|
| 11 |
+
"top_p": 0.95,
|
| 12 |
+
"transformers_version": "4.51.3"
|
| 13 |
+
}
|
merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
model-00001-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b97b83b3c8168e902028fd11e4e91a515f1cba31e54a162474b41f23d6aa1de
|
| 3 |
+
size 4932307584
|
model-00002-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9618d50f57aa0dbc3190dc68d3c81fd95f43369c5af8f83d8b4dd2fe05cacf13
|
| 3 |
+
size 4875989696
|
model-00003-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ec492359aefacdd63367741ef4aaebbd80aacf9da67fd03358678c8adbea92de
|
| 3 |
+
size 4875989720
|
model-00004-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e1ee7bd92f16a732ed3bc85ab353a9567a702ffddf294e73abdf8eb4ebd46ca0
|
| 3 |
+
size 4875989752
|
model-00005-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:31c197b948b4c34270e840d73157dbfef91790ea2dcab7fdc5c48aeca109caaf
|
| 3 |
+
size 4875989752
|
model-00006-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f9fa51d292f58485d45d30a3f404901629facffccd748586f3158d10e5957216
|
| 3 |
+
size 4875989752
|
model-00007-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b0171752afc83cc8d0a2e37b1d8a9ae87c27f2be13c4d5749c51af6f25b63c05
|
| 3 |
+
size 4875989752
|
model-00008-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f73ad5ad74130a569896a8e4077d41f0c46f8472445c553c4574584834f6142
|
| 3 |
+
size 4875989752
|
model-00009-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb42912cbec2bfd2a2823d1c8b3722143809825e4867cba7d0ac9d4139cbc04b
|
| 3 |
+
size 4875989752
|
model-00010-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:564ee9e25502d9a7d572f6b9790c1e0eb920b0f477961394771f0da8471173ad
|
| 3 |
+
size 4875989752
|
model-00011-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d16920215b39cc893503f80d04d3bbf11765512f827dfa3eea7b27e10111688e
|
| 3 |
+
size 4875989752
|
model-00012-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b3a95c6f45de6e8aaa36c6dfbb52d68116b46d2d78b88a1e2aab2f904d1d5e00
|
| 3 |
+
size 4875989752
|
model-00013-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5135bf1eff7b55eb56450324649c5e997a7af8344f16dd6d63b00fb1e44fc6af
|
| 3 |
+
size 4875989752
|
model-00014-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3f918023d21c5cba43e5991e36f698d447b42033ef03eada7822b1a30d75b276
|
| 3 |
+
size 2080144040
|
model.safetensors.index.json
ADDED
|
@@ -0,0 +1,714 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"metadata": {
|
| 3 |
+
"total_size": 65524246528
|
| 4 |
+
},
|
| 5 |
+
"weight_map": {
|
| 6 |
+
"lm_head.weight": "model-00014-of-00014.safetensors",
|
| 7 |
+
"model.embed_tokens.weight": "model-00001-of-00014.safetensors",
|
| 8 |
+
"model.layers.0.input_layernorm.weight": "model-00001-of-00014.safetensors",
|
| 9 |
+
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00014.safetensors",
|
| 10 |
+
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00014.safetensors",
|
| 11 |
+
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00014.safetensors",
|
| 12 |
+
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00014.safetensors",
|
| 13 |
+
"model.layers.0.self_attn.k_norm.weight": "model-00001-of-00014.safetensors",
|
| 14 |
+
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00014.safetensors",
|
| 15 |
+
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00014.safetensors",
|
| 16 |
+
"model.layers.0.self_attn.q_norm.weight": "model-00001-of-00014.safetensors",
|
| 17 |
+
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00014.safetensors",
|
| 18 |
+
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00014.safetensors",
|
| 19 |
+
"model.layers.1.input_layernorm.weight": "model-00001-of-00014.safetensors",
|
| 20 |
+
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00014.safetensors",
|
| 21 |
+
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00014.safetensors",
|
| 22 |
+
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00014.safetensors",
|
| 23 |
+
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00014.safetensors",
|
| 24 |
+
"model.layers.1.self_attn.k_norm.weight": "model-00001-of-00014.safetensors",
|
| 25 |
+
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00014.safetensors",
|
| 26 |
+
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00014.safetensors",
|
| 27 |
+
"model.layers.1.self_attn.q_norm.weight": "model-00001-of-00014.safetensors",
|
| 28 |
+
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00014.safetensors",
|
| 29 |
+
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00014.safetensors",
|
| 30 |
+
"model.layers.10.input_layernorm.weight": "model-00003-of-00014.safetensors",
|
| 31 |
+
"model.layers.10.mlp.down_proj.weight": "model-00003-of-00014.safetensors",
|
| 32 |
+
"model.layers.10.mlp.gate_proj.weight": "model-00003-of-00014.safetensors",
|
| 33 |
+
"model.layers.10.mlp.up_proj.weight": "model-00003-of-00014.safetensors",
|
| 34 |
+
"model.layers.10.post_attention_layernorm.weight": "model-00003-of-00014.safetensors",
|
| 35 |
+
"model.layers.10.self_attn.k_norm.weight": "model-00003-of-00014.safetensors",
|
| 36 |
+
"model.layers.10.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
|
| 37 |
+
"model.layers.10.self_attn.o_proj.weight": "model-00003-of-00014.safetensors",
|
| 38 |
+
"model.layers.10.self_attn.q_norm.weight": "model-00003-of-00014.safetensors",
|
| 39 |
+
"model.layers.10.self_attn.q_proj.weight": "model-00003-of-00014.safetensors",
|
| 40 |
+
"model.layers.10.self_attn.v_proj.weight": "model-00003-of-00014.safetensors",
|
| 41 |
+
"model.layers.11.input_layernorm.weight": "model-00003-of-00014.safetensors",
|
| 42 |
+
"model.layers.11.mlp.down_proj.weight": "model-00003-of-00014.safetensors",
|
| 43 |
+
"model.layers.11.mlp.gate_proj.weight": "model-00003-of-00014.safetensors",
|
| 44 |
+
"model.layers.11.mlp.up_proj.weight": "model-00003-of-00014.safetensors",
|
| 45 |
+
"model.layers.11.post_attention_layernorm.weight": "model-00003-of-00014.safetensors",
|
| 46 |
+
"model.layers.11.self_attn.k_norm.weight": "model-00003-of-00014.safetensors",
|
| 47 |
+
"model.layers.11.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
|
| 48 |
+
"model.layers.11.self_attn.o_proj.weight": "model-00003-of-00014.safetensors",
|
| 49 |
+
"model.layers.11.self_attn.q_norm.weight": "model-00003-of-00014.safetensors",
|
| 50 |
+
"model.layers.11.self_attn.q_proj.weight": "model-00003-of-00014.safetensors",
|
| 51 |
+
"model.layers.11.self_attn.v_proj.weight": "model-00003-of-00014.safetensors",
|
| 52 |
+
"model.layers.12.input_layernorm.weight": "model-00003-of-00014.safetensors",
|
| 53 |
+
"model.layers.12.mlp.down_proj.weight": "model-00003-of-00014.safetensors",
|
| 54 |
+
"model.layers.12.mlp.gate_proj.weight": "model-00003-of-00014.safetensors",
|
| 55 |
+
"model.layers.12.mlp.up_proj.weight": "model-00003-of-00014.safetensors",
|
| 56 |
+
"model.layers.12.post_attention_layernorm.weight": "model-00003-of-00014.safetensors",
|
| 57 |
+
"model.layers.12.self_attn.k_norm.weight": "model-00003-of-00014.safetensors",
|
| 58 |
+
"model.layers.12.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
|
| 59 |
+
"model.layers.12.self_attn.o_proj.weight": "model-00003-of-00014.safetensors",
|
| 60 |
+
"model.layers.12.self_attn.q_norm.weight": "model-00003-of-00014.safetensors",
|
| 61 |
+
"model.layers.12.self_attn.q_proj.weight": "model-00003-of-00014.safetensors",
|
| 62 |
+
"model.layers.12.self_attn.v_proj.weight": "model-00003-of-00014.safetensors",
|
| 63 |
+
"model.layers.13.input_layernorm.weight": "model-00004-of-00014.safetensors",
|
| 64 |
+
"model.layers.13.mlp.down_proj.weight": "model-00004-of-00014.safetensors",
|
| 65 |
+
"model.layers.13.mlp.gate_proj.weight": "model-00003-of-00014.safetensors",
|
| 66 |
+
"model.layers.13.mlp.up_proj.weight": "model-00004-of-00014.safetensors",
|
| 67 |
+
"model.layers.13.post_attention_layernorm.weight": "model-00004-of-00014.safetensors",
|
| 68 |
+
"model.layers.13.self_attn.k_norm.weight": "model-00003-of-00014.safetensors",
|
| 69 |
+
"model.layers.13.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
|
| 70 |
+
"model.layers.13.self_attn.o_proj.weight": "model-00003-of-00014.safetensors",
|
| 71 |
+
"model.layers.13.self_attn.q_norm.weight": "model-00003-of-00014.safetensors",
|
| 72 |
+
"model.layers.13.self_attn.q_proj.weight": "model-00003-of-00014.safetensors",
|
| 73 |
+
"model.layers.13.self_attn.v_proj.weight": "model-00003-of-00014.safetensors",
|
| 74 |
+
"model.layers.14.input_layernorm.weight": "model-00004-of-00014.safetensors",
|
| 75 |
+
"model.layers.14.mlp.down_proj.weight": "model-00004-of-00014.safetensors",
|
| 76 |
+
"model.layers.14.mlp.gate_proj.weight": "model-00004-of-00014.safetensors",
|
| 77 |
+
"model.layers.14.mlp.up_proj.weight": "model-00004-of-00014.safetensors",
|
| 78 |
+
"model.layers.14.post_attention_layernorm.weight": "model-00004-of-00014.safetensors",
|
| 79 |
+
"model.layers.14.self_attn.k_norm.weight": "model-00004-of-00014.safetensors",
|
| 80 |
+
"model.layers.14.self_attn.k_proj.weight": "model-00004-of-00014.safetensors",
|
| 81 |
+
"model.layers.14.self_attn.o_proj.weight": "model-00004-of-00014.safetensors",
|
| 82 |
+
"model.layers.14.self_attn.q_norm.weight": "model-00004-of-00014.safetensors",
|
| 83 |
+
"model.layers.14.self_attn.q_proj.weight": "model-00004-of-00014.safetensors",
|
| 84 |
+
"model.layers.14.self_attn.v_proj.weight": "model-00004-of-00014.safetensors",
|
| 85 |
+
"model.layers.15.input_layernorm.weight": "model-00004-of-00014.safetensors",
|
| 86 |
+
"model.layers.15.mlp.down_proj.weight": "model-00004-of-00014.safetensors",
|
| 87 |
+
"model.layers.15.mlp.gate_proj.weight": "model-00004-of-00014.safetensors",
|
| 88 |
+
"model.layers.15.mlp.up_proj.weight": "model-00004-of-00014.safetensors",
|
| 89 |
+
"model.layers.15.post_attention_layernorm.weight": "model-00004-of-00014.safetensors",
|
| 90 |
+
"model.layers.15.self_attn.k_norm.weight": "model-00004-of-00014.safetensors",
|
| 91 |
+
"model.layers.15.self_attn.k_proj.weight": "model-00004-of-00014.safetensors",
|
| 92 |
+
"model.layers.15.self_attn.o_proj.weight": "model-00004-of-00014.safetensors",
|
| 93 |
+
"model.layers.15.self_attn.q_norm.weight": "model-00004-of-00014.safetensors",
|
| 94 |
+
"model.layers.15.self_attn.q_proj.weight": "model-00004-of-00014.safetensors",
|
| 95 |
+
"model.layers.15.self_attn.v_proj.weight": "model-00004-of-00014.safetensors",
|
| 96 |
+
"model.layers.16.input_layernorm.weight": "model-00004-of-00014.safetensors",
|
| 97 |
+
"model.layers.16.mlp.down_proj.weight": "model-00004-of-00014.safetensors",
|
| 98 |
+
"model.layers.16.mlp.gate_proj.weight": "model-00004-of-00014.safetensors",
|
| 99 |
+
"model.layers.16.mlp.up_proj.weight": "model-00004-of-00014.safetensors",
|
| 100 |
+
"model.layers.16.post_attention_layernorm.weight": "model-00004-of-00014.safetensors",
|
| 101 |
+
"model.layers.16.self_attn.k_norm.weight": "model-00004-of-00014.safetensors",
|
| 102 |
+
"model.layers.16.self_attn.k_proj.weight": "model-00004-of-00014.safetensors",
|
| 103 |
+
"model.layers.16.self_attn.o_proj.weight": "model-00004-of-00014.safetensors",
|
| 104 |
+
"model.layers.16.self_attn.q_norm.weight": "model-00004-of-00014.safetensors",
|
| 105 |
+
"model.layers.16.self_attn.q_proj.weight": "model-00004-of-00014.safetensors",
|
| 106 |
+
"model.layers.16.self_attn.v_proj.weight": "model-00004-of-00014.safetensors",
|
| 107 |
+
"model.layers.17.input_layernorm.weight": "model-00004-of-00014.safetensors",
|
| 108 |
+
"model.layers.17.mlp.down_proj.weight": "model-00004-of-00014.safetensors",
|
| 109 |
+
"model.layers.17.mlp.gate_proj.weight": "model-00004-of-00014.safetensors",
|
| 110 |
+
"model.layers.17.mlp.up_proj.weight": "model-00004-of-00014.safetensors",
|
| 111 |
+
"model.layers.17.post_attention_layernorm.weight": "model-00004-of-00014.safetensors",
|
| 112 |
+
"model.layers.17.self_attn.k_norm.weight": "model-00004-of-00014.safetensors",
|
| 113 |
+
"model.layers.17.self_attn.k_proj.weight": "model-00004-of-00014.safetensors",
|
| 114 |
+
"model.layers.17.self_attn.o_proj.weight": "model-00004-of-00014.safetensors",
|
| 115 |
+
"model.layers.17.self_attn.q_norm.weight": "model-00004-of-00014.safetensors",
|
| 116 |
+
"model.layers.17.self_attn.q_proj.weight": "model-00004-of-00014.safetensors",
|
| 117 |
+
"model.layers.17.self_attn.v_proj.weight": "model-00004-of-00014.safetensors",
|
| 118 |
+
"model.layers.18.input_layernorm.weight": "model-00005-of-00014.safetensors",
|
| 119 |
+
"model.layers.18.mlp.down_proj.weight": "model-00005-of-00014.safetensors",
|
| 120 |
+
"model.layers.18.mlp.gate_proj.weight": "model-00004-of-00014.safetensors",
|
| 121 |
+
"model.layers.18.mlp.up_proj.weight": "model-00005-of-00014.safetensors",
|
| 122 |
+
"model.layers.18.post_attention_layernorm.weight": "model-00005-of-00014.safetensors",
|
| 123 |
+
"model.layers.18.self_attn.k_norm.weight": "model-00004-of-00014.safetensors",
|
| 124 |
+
"model.layers.18.self_attn.k_proj.weight": "model-00004-of-00014.safetensors",
|
| 125 |
+
"model.layers.18.self_attn.o_proj.weight": "model-00004-of-00014.safetensors",
|
| 126 |
+
"model.layers.18.self_attn.q_norm.weight": "model-00004-of-00014.safetensors",
|
| 127 |
+
"model.layers.18.self_attn.q_proj.weight": "model-00004-of-00014.safetensors",
|
| 128 |
+
"model.layers.18.self_attn.v_proj.weight": "model-00004-of-00014.safetensors",
|
| 129 |
+
"model.layers.19.input_layernorm.weight": "model-00005-of-00014.safetensors",
|
| 130 |
+
"model.layers.19.mlp.down_proj.weight": "model-00005-of-00014.safetensors",
|
| 131 |
+
"model.layers.19.mlp.gate_proj.weight": "model-00005-of-00014.safetensors",
|
| 132 |
+
"model.layers.19.mlp.up_proj.weight": "model-00005-of-00014.safetensors",
|
| 133 |
+
"model.layers.19.post_attention_layernorm.weight": "model-00005-of-00014.safetensors",
|
| 134 |
+
"model.layers.19.self_attn.k_norm.weight": "model-00005-of-00014.safetensors",
|
| 135 |
+
"model.layers.19.self_attn.k_proj.weight": "model-00005-of-00014.safetensors",
|
| 136 |
+
"model.layers.19.self_attn.o_proj.weight": "model-00005-of-00014.safetensors",
|
| 137 |
+
"model.layers.19.self_attn.q_norm.weight": "model-00005-of-00014.safetensors",
|
| 138 |
+
"model.layers.19.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
|
| 139 |
+
"model.layers.19.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
|
| 140 |
+
"model.layers.2.input_layernorm.weight": "model-00001-of-00014.safetensors",
|
| 141 |
+
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00014.safetensors",
|
| 142 |
+
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00014.safetensors",
|
| 143 |
+
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00014.safetensors",
|
| 144 |
+
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00014.safetensors",
|
| 145 |
+
"model.layers.2.self_attn.k_norm.weight": "model-00001-of-00014.safetensors",
|
| 146 |
+
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00014.safetensors",
|
| 147 |
+
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00014.safetensors",
|
| 148 |
+
"model.layers.2.self_attn.q_norm.weight": "model-00001-of-00014.safetensors",
|
| 149 |
+
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00014.safetensors",
|
| 150 |
+
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00014.safetensors",
|
| 151 |
+
"model.layers.20.input_layernorm.weight": "model-00005-of-00014.safetensors",
|
| 152 |
+
"model.layers.20.mlp.down_proj.weight": "model-00005-of-00014.safetensors",
|
| 153 |
+
"model.layers.20.mlp.gate_proj.weight": "model-00005-of-00014.safetensors",
|
| 154 |
+
"model.layers.20.mlp.up_proj.weight": "model-00005-of-00014.safetensors",
|
| 155 |
+
"model.layers.20.post_attention_layernorm.weight": "model-00005-of-00014.safetensors",
|
| 156 |
+
"model.layers.20.self_attn.k_norm.weight": "model-00005-of-00014.safetensors",
|
| 157 |
+
"model.layers.20.self_attn.k_proj.weight": "model-00005-of-00014.safetensors",
|
| 158 |
+
"model.layers.20.self_attn.o_proj.weight": "model-00005-of-00014.safetensors",
|
| 159 |
+
"model.layers.20.self_attn.q_norm.weight": "model-00005-of-00014.safetensors",
|
| 160 |
+
"model.layers.20.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
|
| 161 |
+
"model.layers.20.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
|
| 162 |
+
"model.layers.21.input_layernorm.weight": "model-00005-of-00014.safetensors",
|
| 163 |
+
"model.layers.21.mlp.down_proj.weight": "model-00005-of-00014.safetensors",
|
| 164 |
+
"model.layers.21.mlp.gate_proj.weight": "model-00005-of-00014.safetensors",
|
| 165 |
+
"model.layers.21.mlp.up_proj.weight": "model-00005-of-00014.safetensors",
|
| 166 |
+
"model.layers.21.post_attention_layernorm.weight": "model-00005-of-00014.safetensors",
|
| 167 |
+
"model.layers.21.self_attn.k_norm.weight": "model-00005-of-00014.safetensors",
|
| 168 |
+
"model.layers.21.self_attn.k_proj.weight": "model-00005-of-00014.safetensors",
|
| 169 |
+
"model.layers.21.self_attn.o_proj.weight": "model-00005-of-00014.safetensors",
|
| 170 |
+
"model.layers.21.self_attn.q_norm.weight": "model-00005-of-00014.safetensors",
|
| 171 |
+
"model.layers.21.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
|
| 172 |
+
"model.layers.21.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
|
| 173 |
+
"model.layers.22.input_layernorm.weight": "model-00005-of-00014.safetensors",
|
| 174 |
+
"model.layers.22.mlp.down_proj.weight": "model-00005-of-00014.safetensors",
|
| 175 |
+
"model.layers.22.mlp.gate_proj.weight": "model-00005-of-00014.safetensors",
|
| 176 |
+
"model.layers.22.mlp.up_proj.weight": "model-00005-of-00014.safetensors",
|
| 177 |
+
"model.layers.22.post_attention_layernorm.weight": "model-00005-of-00014.safetensors",
|
| 178 |
+
"model.layers.22.self_attn.k_norm.weight": "model-00005-of-00014.safetensors",
|
| 179 |
+
"model.layers.22.self_attn.k_proj.weight": "model-00005-of-00014.safetensors",
|
| 180 |
+
"model.layers.22.self_attn.o_proj.weight": "model-00005-of-00014.safetensors",
|
| 181 |
+
"model.layers.22.self_attn.q_norm.weight": "model-00005-of-00014.safetensors",
|
| 182 |
+
"model.layers.22.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
|
| 183 |
+
"model.layers.22.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
|
| 184 |
+
"model.layers.23.input_layernorm.weight": "model-00006-of-00014.safetensors",
|
| 185 |
+
"model.layers.23.mlp.down_proj.weight": "model-00006-of-00014.safetensors",
|
| 186 |
+
"model.layers.23.mlp.gate_proj.weight": "model-00005-of-00014.safetensors",
|
| 187 |
+
"model.layers.23.mlp.up_proj.weight": "model-00006-of-00014.safetensors",
|
| 188 |
+
"model.layers.23.post_attention_layernorm.weight": "model-00006-of-00014.safetensors",
|
| 189 |
+
"model.layers.23.self_attn.k_norm.weight": "model-00005-of-00014.safetensors",
|
| 190 |
+
"model.layers.23.self_attn.k_proj.weight": "model-00005-of-00014.safetensors",
|
| 191 |
+
"model.layers.23.self_attn.o_proj.weight": "model-00005-of-00014.safetensors",
|
| 192 |
+
"model.layers.23.self_attn.q_norm.weight": "model-00005-of-00014.safetensors",
|
| 193 |
+
"model.layers.23.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
|
| 194 |
+
"model.layers.23.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
|
| 195 |
+
"model.layers.24.input_layernorm.weight": "model-00006-of-00014.safetensors",
|
| 196 |
+
"model.layers.24.mlp.down_proj.weight": "model-00006-of-00014.safetensors",
|
| 197 |
+
"model.layers.24.mlp.gate_proj.weight": "model-00006-of-00014.safetensors",
|
| 198 |
+
"model.layers.24.mlp.up_proj.weight": "model-00006-of-00014.safetensors",
|
| 199 |
+
"model.layers.24.post_attention_layernorm.weight": "model-00006-of-00014.safetensors",
|
| 200 |
+
"model.layers.24.self_attn.k_norm.weight": "model-00006-of-00014.safetensors",
|
| 201 |
+
"model.layers.24.self_attn.k_proj.weight": "model-00006-of-00014.safetensors",
|
| 202 |
+
"model.layers.24.self_attn.o_proj.weight": "model-00006-of-00014.safetensors",
|
| 203 |
+
"model.layers.24.self_attn.q_norm.weight": "model-00006-of-00014.safetensors",
|
| 204 |
+
"model.layers.24.self_attn.q_proj.weight": "model-00006-of-00014.safetensors",
|
| 205 |
+
"model.layers.24.self_attn.v_proj.weight": "model-00006-of-00014.safetensors",
|
| 206 |
+
"model.layers.25.input_layernorm.weight": "model-00006-of-00014.safetensors",
|
| 207 |
+
"model.layers.25.mlp.down_proj.weight": "model-00006-of-00014.safetensors",
|
| 208 |
+
"model.layers.25.mlp.gate_proj.weight": "model-00006-of-00014.safetensors",
|
| 209 |
+
"model.layers.25.mlp.up_proj.weight": "model-00006-of-00014.safetensors",
|
| 210 |
+
"model.layers.25.post_attention_layernorm.weight": "model-00006-of-00014.safetensors",
|
| 211 |
+
"model.layers.25.self_attn.k_norm.weight": "model-00006-of-00014.safetensors",
|
| 212 |
+
"model.layers.25.self_attn.k_proj.weight": "model-00006-of-00014.safetensors",
|
| 213 |
+
"model.layers.25.self_attn.o_proj.weight": "model-00006-of-00014.safetensors",
|
| 214 |
+
"model.layers.25.self_attn.q_norm.weight": "model-00006-of-00014.safetensors",
|
| 215 |
+
"model.layers.25.self_attn.q_proj.weight": "model-00006-of-00014.safetensors",
|
| 216 |
+
"model.layers.25.self_attn.v_proj.weight": "model-00006-of-00014.safetensors",
|
| 217 |
+
"model.layers.26.input_layernorm.weight": "model-00006-of-00014.safetensors",
|
| 218 |
+
"model.layers.26.mlp.down_proj.weight": "model-00006-of-00014.safetensors",
|
| 219 |
+
"model.layers.26.mlp.gate_proj.weight": "model-00006-of-00014.safetensors",
|
| 220 |
+
"model.layers.26.mlp.up_proj.weight": "model-00006-of-00014.safetensors",
|
| 221 |
+
"model.layers.26.post_attention_layernorm.weight": "model-00006-of-00014.safetensors",
|
| 222 |
+
"model.layers.26.self_attn.k_norm.weight": "model-00006-of-00014.safetensors",
|
| 223 |
+
"model.layers.26.self_attn.k_proj.weight": "model-00006-of-00014.safetensors",
|
| 224 |
+
"model.layers.26.self_attn.o_proj.weight": "model-00006-of-00014.safetensors",
|
| 225 |
+
"model.layers.26.self_attn.q_norm.weight": "model-00006-of-00014.safetensors",
|
| 226 |
+
"model.layers.26.self_attn.q_proj.weight": "model-00006-of-00014.safetensors",
|
| 227 |
+
"model.layers.26.self_attn.v_proj.weight": "model-00006-of-00014.safetensors",
|
| 228 |
+
"model.layers.27.input_layernorm.weight": "model-00006-of-00014.safetensors",
|
| 229 |
+
"model.layers.27.mlp.down_proj.weight": "model-00006-of-00014.safetensors",
|
| 230 |
+
"model.layers.27.mlp.gate_proj.weight": "model-00006-of-00014.safetensors",
|
| 231 |
+
"model.layers.27.mlp.up_proj.weight": "model-00006-of-00014.safetensors",
|
| 232 |
+
"model.layers.27.post_attention_layernorm.weight": "model-00006-of-00014.safetensors",
|
| 233 |
+
"model.layers.27.self_attn.k_norm.weight": "model-00006-of-00014.safetensors",
|
| 234 |
+
"model.layers.27.self_attn.k_proj.weight": "model-00006-of-00014.safetensors",
|
| 235 |
+
"model.layers.27.self_attn.o_proj.weight": "model-00006-of-00014.safetensors",
|
| 236 |
+
"model.layers.27.self_attn.q_norm.weight": "model-00006-of-00014.safetensors",
|
| 237 |
+
"model.layers.27.self_attn.q_proj.weight": "model-00006-of-00014.safetensors",
|
| 238 |
+
"model.layers.27.self_attn.v_proj.weight": "model-00006-of-00014.safetensors",
|
| 239 |
+
"model.layers.28.input_layernorm.weight": "model-00007-of-00014.safetensors",
|
| 240 |
+
"model.layers.28.mlp.down_proj.weight": "model-00007-of-00014.safetensors",
|
| 241 |
+
"model.layers.28.mlp.gate_proj.weight": "model-00006-of-00014.safetensors",
|
| 242 |
+
"model.layers.28.mlp.up_proj.weight": "model-00007-of-00014.safetensors",
|
| 243 |
+
"model.layers.28.post_attention_layernorm.weight": "model-00007-of-00014.safetensors",
|
| 244 |
+
"model.layers.28.self_attn.k_norm.weight": "model-00006-of-00014.safetensors",
|
| 245 |
+
"model.layers.28.self_attn.k_proj.weight": "model-00006-of-00014.safetensors",
|
| 246 |
+
"model.layers.28.self_attn.o_proj.weight": "model-00006-of-00014.safetensors",
|
| 247 |
+
"model.layers.28.self_attn.q_norm.weight": "model-00006-of-00014.safetensors",
|
| 248 |
+
"model.layers.28.self_attn.q_proj.weight": "model-00006-of-00014.safetensors",
|
| 249 |
+
"model.layers.28.self_attn.v_proj.weight": "model-00006-of-00014.safetensors",
|
| 250 |
+
"model.layers.29.input_layernorm.weight": "model-00007-of-00014.safetensors",
|
| 251 |
+
"model.layers.29.mlp.down_proj.weight": "model-00007-of-00014.safetensors",
|
| 252 |
+
"model.layers.29.mlp.gate_proj.weight": "model-00007-of-00014.safetensors",
|
| 253 |
+
"model.layers.29.mlp.up_proj.weight": "model-00007-of-00014.safetensors",
|
| 254 |
+
"model.layers.29.post_attention_layernorm.weight": "model-00007-of-00014.safetensors",
|
| 255 |
+
"model.layers.29.self_attn.k_norm.weight": "model-00007-of-00014.safetensors",
|
| 256 |
+
"model.layers.29.self_attn.k_proj.weight": "model-00007-of-00014.safetensors",
|
| 257 |
+
"model.layers.29.self_attn.o_proj.weight": "model-00007-of-00014.safetensors",
|
| 258 |
+
"model.layers.29.self_attn.q_norm.weight": "model-00007-of-00014.safetensors",
|
| 259 |
+
"model.layers.29.self_attn.q_proj.weight": "model-00007-of-00014.safetensors",
|
| 260 |
+
"model.layers.29.self_attn.v_proj.weight": "model-00007-of-00014.safetensors",
|
| 261 |
+
"model.layers.3.input_layernorm.weight": "model-00002-of-00014.safetensors",
|
| 262 |
+
"model.layers.3.mlp.down_proj.weight": "model-00002-of-00014.safetensors",
|
| 263 |
+
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00014.safetensors",
|
| 264 |
+
"model.layers.3.mlp.up_proj.weight": "model-00002-of-00014.safetensors",
|
| 265 |
+
"model.layers.3.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
|
| 266 |
+
"model.layers.3.self_attn.k_norm.weight": "model-00001-of-00014.safetensors",
|
| 267 |
+
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00014.safetensors",
|
| 268 |
+
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00014.safetensors",
|
| 269 |
+
"model.layers.3.self_attn.q_norm.weight": "model-00001-of-00014.safetensors",
|
| 270 |
+
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00014.safetensors",
|
| 271 |
+
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00014.safetensors",
|
| 272 |
+
"model.layers.30.input_layernorm.weight": "model-00007-of-00014.safetensors",
|
| 273 |
+
"model.layers.30.mlp.down_proj.weight": "model-00007-of-00014.safetensors",
|
| 274 |
+
"model.layers.30.mlp.gate_proj.weight": "model-00007-of-00014.safetensors",
|
| 275 |
+
"model.layers.30.mlp.up_proj.weight": "model-00007-of-00014.safetensors",
|
| 276 |
+
"model.layers.30.post_attention_layernorm.weight": "model-00007-of-00014.safetensors",
|
| 277 |
+
"model.layers.30.self_attn.k_norm.weight": "model-00007-of-00014.safetensors",
|
| 278 |
+
"model.layers.30.self_attn.k_proj.weight": "model-00007-of-00014.safetensors",
|
| 279 |
+
"model.layers.30.self_attn.o_proj.weight": "model-00007-of-00014.safetensors",
|
| 280 |
+
"model.layers.30.self_attn.q_norm.weight": "model-00007-of-00014.safetensors",
|
| 281 |
+
"model.layers.30.self_attn.q_proj.weight": "model-00007-of-00014.safetensors",
|
| 282 |
+
"model.layers.30.self_attn.v_proj.weight": "model-00007-of-00014.safetensors",
|
| 283 |
+
"model.layers.31.input_layernorm.weight": "model-00007-of-00014.safetensors",
|
| 284 |
+
"model.layers.31.mlp.down_proj.weight": "model-00007-of-00014.safetensors",
|
| 285 |
+
"model.layers.31.mlp.gate_proj.weight": "model-00007-of-00014.safetensors",
|
| 286 |
+
"model.layers.31.mlp.up_proj.weight": "model-00007-of-00014.safetensors",
|
| 287 |
+
"model.layers.31.post_attention_layernorm.weight": "model-00007-of-00014.safetensors",
|
| 288 |
+
"model.layers.31.self_attn.k_norm.weight": "model-00007-of-00014.safetensors",
|
| 289 |
+
"model.layers.31.self_attn.k_proj.weight": "model-00007-of-00014.safetensors",
|
| 290 |
+
"model.layers.31.self_attn.o_proj.weight": "model-00007-of-00014.safetensors",
|
| 291 |
+
"model.layers.31.self_attn.q_norm.weight": "model-00007-of-00014.safetensors",
|
| 292 |
+
"model.layers.31.self_attn.q_proj.weight": "model-00007-of-00014.safetensors",
|
| 293 |
+
"model.layers.31.self_attn.v_proj.weight": "model-00007-of-00014.safetensors",
|
| 294 |
+
"model.layers.32.input_layernorm.weight": "model-00007-of-00014.safetensors",
|
| 295 |
+
"model.layers.32.mlp.down_proj.weight": "model-00007-of-00014.safetensors",
|
| 296 |
+
"model.layers.32.mlp.gate_proj.weight": "model-00007-of-00014.safetensors",
|
| 297 |
+
"model.layers.32.mlp.up_proj.weight": "model-00007-of-00014.safetensors",
|
| 298 |
+
"model.layers.32.post_attention_layernorm.weight": "model-00007-of-00014.safetensors",
|
| 299 |
+
"model.layers.32.self_attn.k_norm.weight": "model-00007-of-00014.safetensors",
|
| 300 |
+
"model.layers.32.self_attn.k_proj.weight": "model-00007-of-00014.safetensors",
|
| 301 |
+
"model.layers.32.self_attn.o_proj.weight": "model-00007-of-00014.safetensors",
|
| 302 |
+
"model.layers.32.self_attn.q_norm.weight": "model-00007-of-00014.safetensors",
|
| 303 |
+
"model.layers.32.self_attn.q_proj.weight": "model-00007-of-00014.safetensors",
|
| 304 |
+
"model.layers.32.self_attn.v_proj.weight": "model-00007-of-00014.safetensors",
|
| 305 |
+
"model.layers.33.input_layernorm.weight": "model-00008-of-00014.safetensors",
|
| 306 |
+
"model.layers.33.mlp.down_proj.weight": "model-00008-of-00014.safetensors",
|
| 307 |
+
"model.layers.33.mlp.gate_proj.weight": "model-00007-of-00014.safetensors",
|
| 308 |
+
"model.layers.33.mlp.up_proj.weight": "model-00008-of-00014.safetensors",
|
| 309 |
+
"model.layers.33.post_attention_layernorm.weight": "model-00008-of-00014.safetensors",
|
| 310 |
+
"model.layers.33.self_attn.k_norm.weight": "model-00007-of-00014.safetensors",
|
| 311 |
+
"model.layers.33.self_attn.k_proj.weight": "model-00007-of-00014.safetensors",
|
| 312 |
+
"model.layers.33.self_attn.o_proj.weight": "model-00007-of-00014.safetensors",
|
| 313 |
+
"model.layers.33.self_attn.q_norm.weight": "model-00007-of-00014.safetensors",
|
| 314 |
+
"model.layers.33.self_attn.q_proj.weight": "model-00007-of-00014.safetensors",
|
| 315 |
+
"model.layers.33.self_attn.v_proj.weight": "model-00007-of-00014.safetensors",
|
| 316 |
+
"model.layers.34.input_layernorm.weight": "model-00008-of-00014.safetensors",
|
| 317 |
+
"model.layers.34.mlp.down_proj.weight": "model-00008-of-00014.safetensors",
|
| 318 |
+
"model.layers.34.mlp.gate_proj.weight": "model-00008-of-00014.safetensors",
|
| 319 |
+
"model.layers.34.mlp.up_proj.weight": "model-00008-of-00014.safetensors",
|
| 320 |
+
"model.layers.34.post_attention_layernorm.weight": "model-00008-of-00014.safetensors",
|
| 321 |
+
"model.layers.34.self_attn.k_norm.weight": "model-00008-of-00014.safetensors",
|
| 322 |
+
"model.layers.34.self_attn.k_proj.weight": "model-00008-of-00014.safetensors",
|
| 323 |
+
"model.layers.34.self_attn.o_proj.weight": "model-00008-of-00014.safetensors",
|
| 324 |
+
"model.layers.34.self_attn.q_norm.weight": "model-00008-of-00014.safetensors",
|
| 325 |
+
"model.layers.34.self_attn.q_proj.weight": "model-00008-of-00014.safetensors",
|
| 326 |
+
"model.layers.34.self_attn.v_proj.weight": "model-00008-of-00014.safetensors",
|
| 327 |
+
"model.layers.35.input_layernorm.weight": "model-00008-of-00014.safetensors",
|
| 328 |
+
"model.layers.35.mlp.down_proj.weight": "model-00008-of-00014.safetensors",
|
| 329 |
+
"model.layers.35.mlp.gate_proj.weight": "model-00008-of-00014.safetensors",
|
| 330 |
+
"model.layers.35.mlp.up_proj.weight": "model-00008-of-00014.safetensors",
|
| 331 |
+
"model.layers.35.post_attention_layernorm.weight": "model-00008-of-00014.safetensors",
|
| 332 |
+
"model.layers.35.self_attn.k_norm.weight": "model-00008-of-00014.safetensors",
|
| 333 |
+
"model.layers.35.self_attn.k_proj.weight": "model-00008-of-00014.safetensors",
|
| 334 |
+
"model.layers.35.self_attn.o_proj.weight": "model-00008-of-00014.safetensors",
|
| 335 |
+
"model.layers.35.self_attn.q_norm.weight": "model-00008-of-00014.safetensors",
|
| 336 |
+
"model.layers.35.self_attn.q_proj.weight": "model-00008-of-00014.safetensors",
|
| 337 |
+
"model.layers.35.self_attn.v_proj.weight": "model-00008-of-00014.safetensors",
|
| 338 |
+
"model.layers.36.input_layernorm.weight": "model-00008-of-00014.safetensors",
|
| 339 |
+
"model.layers.36.mlp.down_proj.weight": "model-00008-of-00014.safetensors",
|
| 340 |
+
"model.layers.36.mlp.gate_proj.weight": "model-00008-of-00014.safetensors",
|
| 341 |
+
"model.layers.36.mlp.up_proj.weight": "model-00008-of-00014.safetensors",
|
| 342 |
+
"model.layers.36.post_attention_layernorm.weight": "model-00008-of-00014.safetensors",
|
| 343 |
+
"model.layers.36.self_attn.k_norm.weight": "model-00008-of-00014.safetensors",
|
| 344 |
+
"model.layers.36.self_attn.k_proj.weight": "model-00008-of-00014.safetensors",
|
| 345 |
+
"model.layers.36.self_attn.o_proj.weight": "model-00008-of-00014.safetensors",
|
| 346 |
+
"model.layers.36.self_attn.q_norm.weight": "model-00008-of-00014.safetensors",
|
| 347 |
+
"model.layers.36.self_attn.q_proj.weight": "model-00008-of-00014.safetensors",
|
| 348 |
+
"model.layers.36.self_attn.v_proj.weight": "model-00008-of-00014.safetensors",
|
| 349 |
+
"model.layers.37.input_layernorm.weight": "model-00008-of-00014.safetensors",
|
| 350 |
+
"model.layers.37.mlp.down_proj.weight": "model-00008-of-00014.safetensors",
|
| 351 |
+
"model.layers.37.mlp.gate_proj.weight": "model-00008-of-00014.safetensors",
|
| 352 |
+
"model.layers.37.mlp.up_proj.weight": "model-00008-of-00014.safetensors",
|
| 353 |
+
"model.layers.37.post_attention_layernorm.weight": "model-00008-of-00014.safetensors",
|
| 354 |
+
"model.layers.37.self_attn.k_norm.weight": "model-00008-of-00014.safetensors",
|
| 355 |
+
"model.layers.37.self_attn.k_proj.weight": "model-00008-of-00014.safetensors",
|
| 356 |
+
"model.layers.37.self_attn.o_proj.weight": "model-00008-of-00014.safetensors",
|
| 357 |
+
"model.layers.37.self_attn.q_norm.weight": "model-00008-of-00014.safetensors",
|
| 358 |
+
"model.layers.37.self_attn.q_proj.weight": "model-00008-of-00014.safetensors",
|
| 359 |
+
"model.layers.37.self_attn.v_proj.weight": "model-00008-of-00014.safetensors",
|
| 360 |
+
"model.layers.38.input_layernorm.weight": "model-00009-of-00014.safetensors",
|
| 361 |
+
"model.layers.38.mlp.down_proj.weight": "model-00009-of-00014.safetensors",
|
| 362 |
+
"model.layers.38.mlp.gate_proj.weight": "model-00008-of-00014.safetensors",
|
| 363 |
+
"model.layers.38.mlp.up_proj.weight": "model-00009-of-00014.safetensors",
|
| 364 |
+
"model.layers.38.post_attention_layernorm.weight": "model-00009-of-00014.safetensors",
|
| 365 |
+
"model.layers.38.self_attn.k_norm.weight": "model-00008-of-00014.safetensors",
|
| 366 |
+
"model.layers.38.self_attn.k_proj.weight": "model-00008-of-00014.safetensors",
|
| 367 |
+
"model.layers.38.self_attn.o_proj.weight": "model-00008-of-00014.safetensors",
|
| 368 |
+
"model.layers.38.self_attn.q_norm.weight": "model-00008-of-00014.safetensors",
|
| 369 |
+
"model.layers.38.self_attn.q_proj.weight": "model-00008-of-00014.safetensors",
|
| 370 |
+
"model.layers.38.self_attn.v_proj.weight": "model-00008-of-00014.safetensors",
|
| 371 |
+
"model.layers.39.input_layernorm.weight": "model-00009-of-00014.safetensors",
|
| 372 |
+
"model.layers.39.mlp.down_proj.weight": "model-00009-of-00014.safetensors",
|
| 373 |
+
"model.layers.39.mlp.gate_proj.weight": "model-00009-of-00014.safetensors",
|
| 374 |
+
"model.layers.39.mlp.up_proj.weight": "model-00009-of-00014.safetensors",
|
| 375 |
+
"model.layers.39.post_attention_layernorm.weight": "model-00009-of-00014.safetensors",
|
| 376 |
+
"model.layers.39.self_attn.k_norm.weight": "model-00009-of-00014.safetensors",
|
| 377 |
+
"model.layers.39.self_attn.k_proj.weight": "model-00009-of-00014.safetensors",
|
| 378 |
+
"model.layers.39.self_attn.o_proj.weight": "model-00009-of-00014.safetensors",
|
| 379 |
+
"model.layers.39.self_attn.q_norm.weight": "model-00009-of-00014.safetensors",
|
| 380 |
+
"model.layers.39.self_attn.q_proj.weight": "model-00009-of-00014.safetensors",
|
| 381 |
+
"model.layers.39.self_attn.v_proj.weight": "model-00009-of-00014.safetensors",
|
| 382 |
+
"model.layers.4.input_layernorm.weight": "model-00002-of-00014.safetensors",
|
| 383 |
+
"model.layers.4.mlp.down_proj.weight": "model-00002-of-00014.safetensors",
|
| 384 |
+
"model.layers.4.mlp.gate_proj.weight": "model-00002-of-00014.safetensors",
|
| 385 |
+
"model.layers.4.mlp.up_proj.weight": "model-00002-of-00014.safetensors",
|
| 386 |
+
"model.layers.4.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
|
| 387 |
+
"model.layers.4.self_attn.k_norm.weight": "model-00002-of-00014.safetensors",
|
| 388 |
+
"model.layers.4.self_attn.k_proj.weight": "model-00002-of-00014.safetensors",
|
| 389 |
+
"model.layers.4.self_attn.o_proj.weight": "model-00002-of-00014.safetensors",
|
| 390 |
+
"model.layers.4.self_attn.q_norm.weight": "model-00002-of-00014.safetensors",
|
| 391 |
+
"model.layers.4.self_attn.q_proj.weight": "model-00002-of-00014.safetensors",
|
| 392 |
+
"model.layers.4.self_attn.v_proj.weight": "model-00002-of-00014.safetensors",
|
| 393 |
+
"model.layers.40.input_layernorm.weight": "model-00009-of-00014.safetensors",
|
| 394 |
+
"model.layers.40.mlp.down_proj.weight": "model-00009-of-00014.safetensors",
|
| 395 |
+
"model.layers.40.mlp.gate_proj.weight": "model-00009-of-00014.safetensors",
|
| 396 |
+
"model.layers.40.mlp.up_proj.weight": "model-00009-of-00014.safetensors",
|
| 397 |
+
"model.layers.40.post_attention_layernorm.weight": "model-00009-of-00014.safetensors",
|
| 398 |
+
"model.layers.40.self_attn.k_norm.weight": "model-00009-of-00014.safetensors",
|
| 399 |
+
"model.layers.40.self_attn.k_proj.weight": "model-00009-of-00014.safetensors",
|
| 400 |
+
"model.layers.40.self_attn.o_proj.weight": "model-00009-of-00014.safetensors",
|
| 401 |
+
"model.layers.40.self_attn.q_norm.weight": "model-00009-of-00014.safetensors",
|
| 402 |
+
"model.layers.40.self_attn.q_proj.weight": "model-00009-of-00014.safetensors",
|
| 403 |
+
"model.layers.40.self_attn.v_proj.weight": "model-00009-of-00014.safetensors",
|
| 404 |
+
"model.layers.41.input_layernorm.weight": "model-00009-of-00014.safetensors",
|
| 405 |
+
"model.layers.41.mlp.down_proj.weight": "model-00009-of-00014.safetensors",
|
| 406 |
+
"model.layers.41.mlp.gate_proj.weight": "model-00009-of-00014.safetensors",
|
| 407 |
+
"model.layers.41.mlp.up_proj.weight": "model-00009-of-00014.safetensors",
|
| 408 |
+
"model.layers.41.post_attention_layernorm.weight": "model-00009-of-00014.safetensors",
|
| 409 |
+
"model.layers.41.self_attn.k_norm.weight": "model-00009-of-00014.safetensors",
|
| 410 |
+
"model.layers.41.self_attn.k_proj.weight": "model-00009-of-00014.safetensors",
|
| 411 |
+
"model.layers.41.self_attn.o_proj.weight": "model-00009-of-00014.safetensors",
|
| 412 |
+
"model.layers.41.self_attn.q_norm.weight": "model-00009-of-00014.safetensors",
|
| 413 |
+
"model.layers.41.self_attn.q_proj.weight": "model-00009-of-00014.safetensors",
|
| 414 |
+
"model.layers.41.self_attn.v_proj.weight": "model-00009-of-00014.safetensors",
|
| 415 |
+
"model.layers.42.input_layernorm.weight": "model-00009-of-00014.safetensors",
|
| 416 |
+
"model.layers.42.mlp.down_proj.weight": "model-00009-of-00014.safetensors",
|
| 417 |
+
"model.layers.42.mlp.gate_proj.weight": "model-00009-of-00014.safetensors",
|
| 418 |
+
"model.layers.42.mlp.up_proj.weight": "model-00009-of-00014.safetensors",
|
| 419 |
+
"model.layers.42.post_attention_layernorm.weight": "model-00009-of-00014.safetensors",
|
| 420 |
+
"model.layers.42.self_attn.k_norm.weight": "model-00009-of-00014.safetensors",
|
| 421 |
+
"model.layers.42.self_attn.k_proj.weight": "model-00009-of-00014.safetensors",
|
| 422 |
+
"model.layers.42.self_attn.o_proj.weight": "model-00009-of-00014.safetensors",
|
| 423 |
+
"model.layers.42.self_attn.q_norm.weight": "model-00009-of-00014.safetensors",
|
| 424 |
+
"model.layers.42.self_attn.q_proj.weight": "model-00009-of-00014.safetensors",
|
| 425 |
+
"model.layers.42.self_attn.v_proj.weight": "model-00009-of-00014.safetensors",
|
| 426 |
+
"model.layers.43.input_layernorm.weight": "model-00010-of-00014.safetensors",
|
| 427 |
+
"model.layers.43.mlp.down_proj.weight": "model-00010-of-00014.safetensors",
|
| 428 |
+
"model.layers.43.mlp.gate_proj.weight": "model-00009-of-00014.safetensors",
|
| 429 |
+
"model.layers.43.mlp.up_proj.weight": "model-00010-of-00014.safetensors",
|
| 430 |
+
"model.layers.43.post_attention_layernorm.weight": "model-00010-of-00014.safetensors",
|
| 431 |
+
"model.layers.43.self_attn.k_norm.weight": "model-00009-of-00014.safetensors",
|
| 432 |
+
"model.layers.43.self_attn.k_proj.weight": "model-00009-of-00014.safetensors",
|
| 433 |
+
"model.layers.43.self_attn.o_proj.weight": "model-00009-of-00014.safetensors",
|
| 434 |
+
"model.layers.43.self_attn.q_norm.weight": "model-00009-of-00014.safetensors",
|
| 435 |
+
"model.layers.43.self_attn.q_proj.weight": "model-00009-of-00014.safetensors",
|
| 436 |
+
"model.layers.43.self_attn.v_proj.weight": "model-00009-of-00014.safetensors",
|
| 437 |
+
"model.layers.44.input_layernorm.weight": "model-00010-of-00014.safetensors",
|
| 438 |
+
"model.layers.44.mlp.down_proj.weight": "model-00010-of-00014.safetensors",
|
| 439 |
+
"model.layers.44.mlp.gate_proj.weight": "model-00010-of-00014.safetensors",
|
| 440 |
+
"model.layers.44.mlp.up_proj.weight": "model-00010-of-00014.safetensors",
|
| 441 |
+
"model.layers.44.post_attention_layernorm.weight": "model-00010-of-00014.safetensors",
|
| 442 |
+
"model.layers.44.self_attn.k_norm.weight": "model-00010-of-00014.safetensors",
|
| 443 |
+
"model.layers.44.self_attn.k_proj.weight": "model-00010-of-00014.safetensors",
|
| 444 |
+
"model.layers.44.self_attn.o_proj.weight": "model-00010-of-00014.safetensors",
|
| 445 |
+
"model.layers.44.self_attn.q_norm.weight": "model-00010-of-00014.safetensors",
|
| 446 |
+
"model.layers.44.self_attn.q_proj.weight": "model-00010-of-00014.safetensors",
|
| 447 |
+
"model.layers.44.self_attn.v_proj.weight": "model-00010-of-00014.safetensors",
|
| 448 |
+
"model.layers.45.input_layernorm.weight": "model-00010-of-00014.safetensors",
|
| 449 |
+
"model.layers.45.mlp.down_proj.weight": "model-00010-of-00014.safetensors",
|
| 450 |
+
"model.layers.45.mlp.gate_proj.weight": "model-00010-of-00014.safetensors",
|
| 451 |
+
"model.layers.45.mlp.up_proj.weight": "model-00010-of-00014.safetensors",
|
| 452 |
+
"model.layers.45.post_attention_layernorm.weight": "model-00010-of-00014.safetensors",
|
| 453 |
+
"model.layers.45.self_attn.k_norm.weight": "model-00010-of-00014.safetensors",
|
| 454 |
+
"model.layers.45.self_attn.k_proj.weight": "model-00010-of-00014.safetensors",
|
| 455 |
+
"model.layers.45.self_attn.o_proj.weight": "model-00010-of-00014.safetensors",
|
| 456 |
+
"model.layers.45.self_attn.q_norm.weight": "model-00010-of-00014.safetensors",
|
| 457 |
+
"model.layers.45.self_attn.q_proj.weight": "model-00010-of-00014.safetensors",
|
| 458 |
+
"model.layers.45.self_attn.v_proj.weight": "model-00010-of-00014.safetensors",
|
| 459 |
+
"model.layers.46.input_layernorm.weight": "model-00010-of-00014.safetensors",
|
| 460 |
+
"model.layers.46.mlp.down_proj.weight": "model-00010-of-00014.safetensors",
|
| 461 |
+
"model.layers.46.mlp.gate_proj.weight": "model-00010-of-00014.safetensors",
|
| 462 |
+
"model.layers.46.mlp.up_proj.weight": "model-00010-of-00014.safetensors",
|
| 463 |
+
"model.layers.46.post_attention_layernorm.weight": "model-00010-of-00014.safetensors",
|
| 464 |
+
"model.layers.46.self_attn.k_norm.weight": "model-00010-of-00014.safetensors",
|
| 465 |
+
"model.layers.46.self_attn.k_proj.weight": "model-00010-of-00014.safetensors",
|
| 466 |
+
"model.layers.46.self_attn.o_proj.weight": "model-00010-of-00014.safetensors",
|
| 467 |
+
"model.layers.46.self_attn.q_norm.weight": "model-00010-of-00014.safetensors",
|
| 468 |
+
"model.layers.46.self_attn.q_proj.weight": "model-00010-of-00014.safetensors",
|
| 469 |
+
"model.layers.46.self_attn.v_proj.weight": "model-00010-of-00014.safetensors",
|
| 470 |
+
"model.layers.47.input_layernorm.weight": "model-00010-of-00014.safetensors",
|
| 471 |
+
"model.layers.47.mlp.down_proj.weight": "model-00010-of-00014.safetensors",
|
| 472 |
+
"model.layers.47.mlp.gate_proj.weight": "model-00010-of-00014.safetensors",
|
| 473 |
+
"model.layers.47.mlp.up_proj.weight": "model-00010-of-00014.safetensors",
|
| 474 |
+
"model.layers.47.post_attention_layernorm.weight": "model-00010-of-00014.safetensors",
|
| 475 |
+
"model.layers.47.self_attn.k_norm.weight": "model-00010-of-00014.safetensors",
|
| 476 |
+
"model.layers.47.self_attn.k_proj.weight": "model-00010-of-00014.safetensors",
|
| 477 |
+
"model.layers.47.self_attn.o_proj.weight": "model-00010-of-00014.safetensors",
|
| 478 |
+
"model.layers.47.self_attn.q_norm.weight": "model-00010-of-00014.safetensors",
|
| 479 |
+
"model.layers.47.self_attn.q_proj.weight": "model-00010-of-00014.safetensors",
|
| 480 |
+
"model.layers.47.self_attn.v_proj.weight": "model-00010-of-00014.safetensors",
|
| 481 |
+
"model.layers.48.input_layernorm.weight": "model-00011-of-00014.safetensors",
|
| 482 |
+
"model.layers.48.mlp.down_proj.weight": "model-00011-of-00014.safetensors",
|
| 483 |
+
"model.layers.48.mlp.gate_proj.weight": "model-00010-of-00014.safetensors",
|
| 484 |
+
"model.layers.48.mlp.up_proj.weight": "model-00011-of-00014.safetensors",
|
| 485 |
+
"model.layers.48.post_attention_layernorm.weight": "model-00011-of-00014.safetensors",
|
| 486 |
+
"model.layers.48.self_attn.k_norm.weight": "model-00010-of-00014.safetensors",
|
| 487 |
+
"model.layers.48.self_attn.k_proj.weight": "model-00010-of-00014.safetensors",
|
| 488 |
+
"model.layers.48.self_attn.o_proj.weight": "model-00010-of-00014.safetensors",
|
| 489 |
+
"model.layers.48.self_attn.q_norm.weight": "model-00010-of-00014.safetensors",
|
| 490 |
+
"model.layers.48.self_attn.q_proj.weight": "model-00010-of-00014.safetensors",
|
| 491 |
+
"model.layers.48.self_attn.v_proj.weight": "model-00010-of-00014.safetensors",
|
| 492 |
+
"model.layers.49.input_layernorm.weight": "model-00011-of-00014.safetensors",
|
| 493 |
+
"model.layers.49.mlp.down_proj.weight": "model-00011-of-00014.safetensors",
|
| 494 |
+
"model.layers.49.mlp.gate_proj.weight": "model-00011-of-00014.safetensors",
|
| 495 |
+
"model.layers.49.mlp.up_proj.weight": "model-00011-of-00014.safetensors",
|
| 496 |
+
"model.layers.49.post_attention_layernorm.weight": "model-00011-of-00014.safetensors",
|
| 497 |
+
"model.layers.49.self_attn.k_norm.weight": "model-00011-of-00014.safetensors",
|
| 498 |
+
"model.layers.49.self_attn.k_proj.weight": "model-00011-of-00014.safetensors",
|
| 499 |
+
"model.layers.49.self_attn.o_proj.weight": "model-00011-of-00014.safetensors",
|
| 500 |
+
"model.layers.49.self_attn.q_norm.weight": "model-00011-of-00014.safetensors",
|
| 501 |
+
"model.layers.49.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
|
| 502 |
+
"model.layers.49.self_attn.v_proj.weight": "model-00011-of-00014.safetensors",
|
| 503 |
+
"model.layers.5.input_layernorm.weight": "model-00002-of-00014.safetensors",
|
| 504 |
+
"model.layers.5.mlp.down_proj.weight": "model-00002-of-00014.safetensors",
|
| 505 |
+
"model.layers.5.mlp.gate_proj.weight": "model-00002-of-00014.safetensors",
|
| 506 |
+
"model.layers.5.mlp.up_proj.weight": "model-00002-of-00014.safetensors",
|
| 507 |
+
"model.layers.5.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
|
| 508 |
+
"model.layers.5.self_attn.k_norm.weight": "model-00002-of-00014.safetensors",
|
| 509 |
+
"model.layers.5.self_attn.k_proj.weight": "model-00002-of-00014.safetensors",
|
| 510 |
+
"model.layers.5.self_attn.o_proj.weight": "model-00002-of-00014.safetensors",
|
| 511 |
+
"model.layers.5.self_attn.q_norm.weight": "model-00002-of-00014.safetensors",
|
| 512 |
+
"model.layers.5.self_attn.q_proj.weight": "model-00002-of-00014.safetensors",
|
| 513 |
+
"model.layers.5.self_attn.v_proj.weight": "model-00002-of-00014.safetensors",
|
| 514 |
+
"model.layers.50.input_layernorm.weight": "model-00011-of-00014.safetensors",
|
| 515 |
+
"model.layers.50.mlp.down_proj.weight": "model-00011-of-00014.safetensors",
|
| 516 |
+
"model.layers.50.mlp.gate_proj.weight": "model-00011-of-00014.safetensors",
|
| 517 |
+
"model.layers.50.mlp.up_proj.weight": "model-00011-of-00014.safetensors",
|
| 518 |
+
"model.layers.50.post_attention_layernorm.weight": "model-00011-of-00014.safetensors",
|
| 519 |
+
"model.layers.50.self_attn.k_norm.weight": "model-00011-of-00014.safetensors",
|
| 520 |
+
"model.layers.50.self_attn.k_proj.weight": "model-00011-of-00014.safetensors",
|
| 521 |
+
"model.layers.50.self_attn.o_proj.weight": "model-00011-of-00014.safetensors",
|
| 522 |
+
"model.layers.50.self_attn.q_norm.weight": "model-00011-of-00014.safetensors",
|
| 523 |
+
"model.layers.50.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
|
| 524 |
+
"model.layers.50.self_attn.v_proj.weight": "model-00011-of-00014.safetensors",
|
| 525 |
+
"model.layers.51.input_layernorm.weight": "model-00011-of-00014.safetensors",
|
| 526 |
+
"model.layers.51.mlp.down_proj.weight": "model-00011-of-00014.safetensors",
|
| 527 |
+
"model.layers.51.mlp.gate_proj.weight": "model-00011-of-00014.safetensors",
|
| 528 |
+
"model.layers.51.mlp.up_proj.weight": "model-00011-of-00014.safetensors",
|
| 529 |
+
"model.layers.51.post_attention_layernorm.weight": "model-00011-of-00014.safetensors",
|
| 530 |
+
"model.layers.51.self_attn.k_norm.weight": "model-00011-of-00014.safetensors",
|
| 531 |
+
"model.layers.51.self_attn.k_proj.weight": "model-00011-of-00014.safetensors",
|
| 532 |
+
"model.layers.51.self_attn.o_proj.weight": "model-00011-of-00014.safetensors",
|
| 533 |
+
"model.layers.51.self_attn.q_norm.weight": "model-00011-of-00014.safetensors",
|
| 534 |
+
"model.layers.51.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
|
| 535 |
+
"model.layers.51.self_attn.v_proj.weight": "model-00011-of-00014.safetensors",
|
| 536 |
+
"model.layers.52.input_layernorm.weight": "model-00011-of-00014.safetensors",
|
| 537 |
+
"model.layers.52.mlp.down_proj.weight": "model-00011-of-00014.safetensors",
|
| 538 |
+
"model.layers.52.mlp.gate_proj.weight": "model-00011-of-00014.safetensors",
|
| 539 |
+
"model.layers.52.mlp.up_proj.weight": "model-00011-of-00014.safetensors",
|
| 540 |
+
"model.layers.52.post_attention_layernorm.weight": "model-00011-of-00014.safetensors",
|
| 541 |
+
"model.layers.52.self_attn.k_norm.weight": "model-00011-of-00014.safetensors",
|
| 542 |
+
"model.layers.52.self_attn.k_proj.weight": "model-00011-of-00014.safetensors",
|
| 543 |
+
"model.layers.52.self_attn.o_proj.weight": "model-00011-of-00014.safetensors",
|
| 544 |
+
"model.layers.52.self_attn.q_norm.weight": "model-00011-of-00014.safetensors",
|
| 545 |
+
"model.layers.52.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
|
| 546 |
+
"model.layers.52.self_attn.v_proj.weight": "model-00011-of-00014.safetensors",
|
| 547 |
+
"model.layers.53.input_layernorm.weight": "model-00012-of-00014.safetensors",
|
| 548 |
+
"model.layers.53.mlp.down_proj.weight": "model-00012-of-00014.safetensors",
|
| 549 |
+
"model.layers.53.mlp.gate_proj.weight": "model-00011-of-00014.safetensors",
|
| 550 |
+
"model.layers.53.mlp.up_proj.weight": "model-00012-of-00014.safetensors",
|
| 551 |
+
"model.layers.53.post_attention_layernorm.weight": "model-00012-of-00014.safetensors",
|
| 552 |
+
"model.layers.53.self_attn.k_norm.weight": "model-00011-of-00014.safetensors",
|
| 553 |
+
"model.layers.53.self_attn.k_proj.weight": "model-00011-of-00014.safetensors",
|
| 554 |
+
"model.layers.53.self_attn.o_proj.weight": "model-00011-of-00014.safetensors",
|
| 555 |
+
"model.layers.53.self_attn.q_norm.weight": "model-00011-of-00014.safetensors",
|
| 556 |
+
"model.layers.53.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
|
| 557 |
+
"model.layers.53.self_attn.v_proj.weight": "model-00011-of-00014.safetensors",
|
| 558 |
+
"model.layers.54.input_layernorm.weight": "model-00012-of-00014.safetensors",
|
| 559 |
+
"model.layers.54.mlp.down_proj.weight": "model-00012-of-00014.safetensors",
|
| 560 |
+
"model.layers.54.mlp.gate_proj.weight": "model-00012-of-00014.safetensors",
|
| 561 |
+
"model.layers.54.mlp.up_proj.weight": "model-00012-of-00014.safetensors",
|
| 562 |
+
"model.layers.54.post_attention_layernorm.weight": "model-00012-of-00014.safetensors",
|
| 563 |
+
"model.layers.54.self_attn.k_norm.weight": "model-00012-of-00014.safetensors",
|
| 564 |
+
"model.layers.54.self_attn.k_proj.weight": "model-00012-of-00014.safetensors",
|
| 565 |
+
"model.layers.54.self_attn.o_proj.weight": "model-00012-of-00014.safetensors",
|
| 566 |
+
"model.layers.54.self_attn.q_norm.weight": "model-00012-of-00014.safetensors",
|
| 567 |
+
"model.layers.54.self_attn.q_proj.weight": "model-00012-of-00014.safetensors",
|
| 568 |
+
"model.layers.54.self_attn.v_proj.weight": "model-00012-of-00014.safetensors",
|
| 569 |
+
"model.layers.55.input_layernorm.weight": "model-00012-of-00014.safetensors",
|
| 570 |
+
"model.layers.55.mlp.down_proj.weight": "model-00012-of-00014.safetensors",
|
| 571 |
+
"model.layers.55.mlp.gate_proj.weight": "model-00012-of-00014.safetensors",
|
| 572 |
+
"model.layers.55.mlp.up_proj.weight": "model-00012-of-00014.safetensors",
|
| 573 |
+
"model.layers.55.post_attention_layernorm.weight": "model-00012-of-00014.safetensors",
|
| 574 |
+
"model.layers.55.self_attn.k_norm.weight": "model-00012-of-00014.safetensors",
|
| 575 |
+
"model.layers.55.self_attn.k_proj.weight": "model-00012-of-00014.safetensors",
|
| 576 |
+
"model.layers.55.self_attn.o_proj.weight": "model-00012-of-00014.safetensors",
|
| 577 |
+
"model.layers.55.self_attn.q_norm.weight": "model-00012-of-00014.safetensors",
|
| 578 |
+
"model.layers.55.self_attn.q_proj.weight": "model-00012-of-00014.safetensors",
|
| 579 |
+
"model.layers.55.self_attn.v_proj.weight": "model-00012-of-00014.safetensors",
|
| 580 |
+
"model.layers.56.input_layernorm.weight": "model-00012-of-00014.safetensors",
|
| 581 |
+
"model.layers.56.mlp.down_proj.weight": "model-00012-of-00014.safetensors",
|
| 582 |
+
"model.layers.56.mlp.gate_proj.weight": "model-00012-of-00014.safetensors",
|
| 583 |
+
"model.layers.56.mlp.up_proj.weight": "model-00012-of-00014.safetensors",
|
| 584 |
+
"model.layers.56.post_attention_layernorm.weight": "model-00012-of-00014.safetensors",
|
| 585 |
+
"model.layers.56.self_attn.k_norm.weight": "model-00012-of-00014.safetensors",
|
| 586 |
+
"model.layers.56.self_attn.k_proj.weight": "model-00012-of-00014.safetensors",
|
| 587 |
+
"model.layers.56.self_attn.o_proj.weight": "model-00012-of-00014.safetensors",
|
| 588 |
+
"model.layers.56.self_attn.q_norm.weight": "model-00012-of-00014.safetensors",
|
| 589 |
+
"model.layers.56.self_attn.q_proj.weight": "model-00012-of-00014.safetensors",
|
| 590 |
+
"model.layers.56.self_attn.v_proj.weight": "model-00012-of-00014.safetensors",
|
| 591 |
+
"model.layers.57.input_layernorm.weight": "model-00012-of-00014.safetensors",
|
| 592 |
+
"model.layers.57.mlp.down_proj.weight": "model-00012-of-00014.safetensors",
|
| 593 |
+
"model.layers.57.mlp.gate_proj.weight": "model-00012-of-00014.safetensors",
|
| 594 |
+
"model.layers.57.mlp.up_proj.weight": "model-00012-of-00014.safetensors",
|
| 595 |
+
"model.layers.57.post_attention_layernorm.weight": "model-00012-of-00014.safetensors",
|
| 596 |
+
"model.layers.57.self_attn.k_norm.weight": "model-00012-of-00014.safetensors",
|
| 597 |
+
"model.layers.57.self_attn.k_proj.weight": "model-00012-of-00014.safetensors",
|
| 598 |
+
"model.layers.57.self_attn.o_proj.weight": "model-00012-of-00014.safetensors",
|
| 599 |
+
"model.layers.57.self_attn.q_norm.weight": "model-00012-of-00014.safetensors",
|
| 600 |
+
"model.layers.57.self_attn.q_proj.weight": "model-00012-of-00014.safetensors",
|
| 601 |
+
"model.layers.57.self_attn.v_proj.weight": "model-00012-of-00014.safetensors",
|
| 602 |
+
"model.layers.58.input_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 603 |
+
"model.layers.58.mlp.down_proj.weight": "model-00013-of-00014.safetensors",
|
| 604 |
+
"model.layers.58.mlp.gate_proj.weight": "model-00012-of-00014.safetensors",
|
| 605 |
+
"model.layers.58.mlp.up_proj.weight": "model-00013-of-00014.safetensors",
|
| 606 |
+
"model.layers.58.post_attention_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 607 |
+
"model.layers.58.self_attn.k_norm.weight": "model-00012-of-00014.safetensors",
|
| 608 |
+
"model.layers.58.self_attn.k_proj.weight": "model-00012-of-00014.safetensors",
|
| 609 |
+
"model.layers.58.self_attn.o_proj.weight": "model-00012-of-00014.safetensors",
|
| 610 |
+
"model.layers.58.self_attn.q_norm.weight": "model-00012-of-00014.safetensors",
|
| 611 |
+
"model.layers.58.self_attn.q_proj.weight": "model-00012-of-00014.safetensors",
|
| 612 |
+
"model.layers.58.self_attn.v_proj.weight": "model-00012-of-00014.safetensors",
|
| 613 |
+
"model.layers.59.input_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 614 |
+
"model.layers.59.mlp.down_proj.weight": "model-00013-of-00014.safetensors",
|
| 615 |
+
"model.layers.59.mlp.gate_proj.weight": "model-00013-of-00014.safetensors",
|
| 616 |
+
"model.layers.59.mlp.up_proj.weight": "model-00013-of-00014.safetensors",
|
| 617 |
+
"model.layers.59.post_attention_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 618 |
+
"model.layers.59.self_attn.k_norm.weight": "model-00013-of-00014.safetensors",
|
| 619 |
+
"model.layers.59.self_attn.k_proj.weight": "model-00013-of-00014.safetensors",
|
| 620 |
+
"model.layers.59.self_attn.o_proj.weight": "model-00013-of-00014.safetensors",
|
| 621 |
+
"model.layers.59.self_attn.q_norm.weight": "model-00013-of-00014.safetensors",
|
| 622 |
+
"model.layers.59.self_attn.q_proj.weight": "model-00013-of-00014.safetensors",
|
| 623 |
+
"model.layers.59.self_attn.v_proj.weight": "model-00013-of-00014.safetensors",
|
| 624 |
+
"model.layers.6.input_layernorm.weight": "model-00002-of-00014.safetensors",
|
| 625 |
+
"model.layers.6.mlp.down_proj.weight": "model-00002-of-00014.safetensors",
|
| 626 |
+
"model.layers.6.mlp.gate_proj.weight": "model-00002-of-00014.safetensors",
|
| 627 |
+
"model.layers.6.mlp.up_proj.weight": "model-00002-of-00014.safetensors",
|
| 628 |
+
"model.layers.6.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
|
| 629 |
+
"model.layers.6.self_attn.k_norm.weight": "model-00002-of-00014.safetensors",
|
| 630 |
+
"model.layers.6.self_attn.k_proj.weight": "model-00002-of-00014.safetensors",
|
| 631 |
+
"model.layers.6.self_attn.o_proj.weight": "model-00002-of-00014.safetensors",
|
| 632 |
+
"model.layers.6.self_attn.q_norm.weight": "model-00002-of-00014.safetensors",
|
| 633 |
+
"model.layers.6.self_attn.q_proj.weight": "model-00002-of-00014.safetensors",
|
| 634 |
+
"model.layers.6.self_attn.v_proj.weight": "model-00002-of-00014.safetensors",
|
| 635 |
+
"model.layers.60.input_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 636 |
+
"model.layers.60.mlp.down_proj.weight": "model-00013-of-00014.safetensors",
|
| 637 |
+
"model.layers.60.mlp.gate_proj.weight": "model-00013-of-00014.safetensors",
|
| 638 |
+
"model.layers.60.mlp.up_proj.weight": "model-00013-of-00014.safetensors",
|
| 639 |
+
"model.layers.60.post_attention_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 640 |
+
"model.layers.60.self_attn.k_norm.weight": "model-00013-of-00014.safetensors",
|
| 641 |
+
"model.layers.60.self_attn.k_proj.weight": "model-00013-of-00014.safetensors",
|
| 642 |
+
"model.layers.60.self_attn.o_proj.weight": "model-00013-of-00014.safetensors",
|
| 643 |
+
"model.layers.60.self_attn.q_norm.weight": "model-00013-of-00014.safetensors",
|
| 644 |
+
"model.layers.60.self_attn.q_proj.weight": "model-00013-of-00014.safetensors",
|
| 645 |
+
"model.layers.60.self_attn.v_proj.weight": "model-00013-of-00014.safetensors",
|
| 646 |
+
"model.layers.61.input_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 647 |
+
"model.layers.61.mlp.down_proj.weight": "model-00013-of-00014.safetensors",
|
| 648 |
+
"model.layers.61.mlp.gate_proj.weight": "model-00013-of-00014.safetensors",
|
| 649 |
+
"model.layers.61.mlp.up_proj.weight": "model-00013-of-00014.safetensors",
|
| 650 |
+
"model.layers.61.post_attention_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 651 |
+
"model.layers.61.self_attn.k_norm.weight": "model-00013-of-00014.safetensors",
|
| 652 |
+
"model.layers.61.self_attn.k_proj.weight": "model-00013-of-00014.safetensors",
|
| 653 |
+
"model.layers.61.self_attn.o_proj.weight": "model-00013-of-00014.safetensors",
|
| 654 |
+
"model.layers.61.self_attn.q_norm.weight": "model-00013-of-00014.safetensors",
|
| 655 |
+
"model.layers.61.self_attn.q_proj.weight": "model-00013-of-00014.safetensors",
|
| 656 |
+
"model.layers.61.self_attn.v_proj.weight": "model-00013-of-00014.safetensors",
|
| 657 |
+
"model.layers.62.input_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 658 |
+
"model.layers.62.mlp.down_proj.weight": "model-00013-of-00014.safetensors",
|
| 659 |
+
"model.layers.62.mlp.gate_proj.weight": "model-00013-of-00014.safetensors",
|
| 660 |
+
"model.layers.62.mlp.up_proj.weight": "model-00013-of-00014.safetensors",
|
| 661 |
+
"model.layers.62.post_attention_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 662 |
+
"model.layers.62.self_attn.k_norm.weight": "model-00013-of-00014.safetensors",
|
| 663 |
+
"model.layers.62.self_attn.k_proj.weight": "model-00013-of-00014.safetensors",
|
| 664 |
+
"model.layers.62.self_attn.o_proj.weight": "model-00013-of-00014.safetensors",
|
| 665 |
+
"model.layers.62.self_attn.q_norm.weight": "model-00013-of-00014.safetensors",
|
| 666 |
+
"model.layers.62.self_attn.q_proj.weight": "model-00013-of-00014.safetensors",
|
| 667 |
+
"model.layers.62.self_attn.v_proj.weight": "model-00013-of-00014.safetensors",
|
| 668 |
+
"model.layers.63.input_layernorm.weight": "model-00014-of-00014.safetensors",
|
| 669 |
+
"model.layers.63.mlp.down_proj.weight": "model-00014-of-00014.safetensors",
|
| 670 |
+
"model.layers.63.mlp.gate_proj.weight": "model-00013-of-00014.safetensors",
|
| 671 |
+
"model.layers.63.mlp.up_proj.weight": "model-00014-of-00014.safetensors",
|
| 672 |
+
"model.layers.63.post_attention_layernorm.weight": "model-00014-of-00014.safetensors",
|
| 673 |
+
"model.layers.63.self_attn.k_norm.weight": "model-00013-of-00014.safetensors",
|
| 674 |
+
"model.layers.63.self_attn.k_proj.weight": "model-00013-of-00014.safetensors",
|
| 675 |
+
"model.layers.63.self_attn.o_proj.weight": "model-00013-of-00014.safetensors",
|
| 676 |
+
"model.layers.63.self_attn.q_norm.weight": "model-00013-of-00014.safetensors",
|
| 677 |
+
"model.layers.63.self_attn.q_proj.weight": "model-00013-of-00014.safetensors",
|
| 678 |
+
"model.layers.63.self_attn.v_proj.weight": "model-00013-of-00014.safetensors",
|
| 679 |
+
"model.layers.7.input_layernorm.weight": "model-00002-of-00014.safetensors",
|
| 680 |
+
"model.layers.7.mlp.down_proj.weight": "model-00002-of-00014.safetensors",
|
| 681 |
+
"model.layers.7.mlp.gate_proj.weight": "model-00002-of-00014.safetensors",
|
| 682 |
+
"model.layers.7.mlp.up_proj.weight": "model-00002-of-00014.safetensors",
|
| 683 |
+
"model.layers.7.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
|
| 684 |
+
"model.layers.7.self_attn.k_norm.weight": "model-00002-of-00014.safetensors",
|
| 685 |
+
"model.layers.7.self_attn.k_proj.weight": "model-00002-of-00014.safetensors",
|
| 686 |
+
"model.layers.7.self_attn.o_proj.weight": "model-00002-of-00014.safetensors",
|
| 687 |
+
"model.layers.7.self_attn.q_norm.weight": "model-00002-of-00014.safetensors",
|
| 688 |
+
"model.layers.7.self_attn.q_proj.weight": "model-00002-of-00014.safetensors",
|
| 689 |
+
"model.layers.7.self_attn.v_proj.weight": "model-00002-of-00014.safetensors",
|
| 690 |
+
"model.layers.8.input_layernorm.weight": "model-00003-of-00014.safetensors",
|
| 691 |
+
"model.layers.8.mlp.down_proj.weight": "model-00003-of-00014.safetensors",
|
| 692 |
+
"model.layers.8.mlp.gate_proj.weight": "model-00002-of-00014.safetensors",
|
| 693 |
+
"model.layers.8.mlp.up_proj.weight": "model-00003-of-00014.safetensors",
|
| 694 |
+
"model.layers.8.post_attention_layernorm.weight": "model-00003-of-00014.safetensors",
|
| 695 |
+
"model.layers.8.self_attn.k_norm.weight": "model-00002-of-00014.safetensors",
|
| 696 |
+
"model.layers.8.self_attn.k_proj.weight": "model-00002-of-00014.safetensors",
|
| 697 |
+
"model.layers.8.self_attn.o_proj.weight": "model-00002-of-00014.safetensors",
|
| 698 |
+
"model.layers.8.self_attn.q_norm.weight": "model-00002-of-00014.safetensors",
|
| 699 |
+
"model.layers.8.self_attn.q_proj.weight": "model-00002-of-00014.safetensors",
|
| 700 |
+
"model.layers.8.self_attn.v_proj.weight": "model-00002-of-00014.safetensors",
|
| 701 |
+
"model.layers.9.input_layernorm.weight": "model-00003-of-00014.safetensors",
|
| 702 |
+
"model.layers.9.mlp.down_proj.weight": "model-00003-of-00014.safetensors",
|
| 703 |
+
"model.layers.9.mlp.gate_proj.weight": "model-00003-of-00014.safetensors",
|
| 704 |
+
"model.layers.9.mlp.up_proj.weight": "model-00003-of-00014.safetensors",
|
| 705 |
+
"model.layers.9.post_attention_layernorm.weight": "model-00003-of-00014.safetensors",
|
| 706 |
+
"model.layers.9.self_attn.k_norm.weight": "model-00003-of-00014.safetensors",
|
| 707 |
+
"model.layers.9.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
|
| 708 |
+
"model.layers.9.self_attn.o_proj.weight": "model-00003-of-00014.safetensors",
|
| 709 |
+
"model.layers.9.self_attn.q_norm.weight": "model-00003-of-00014.safetensors",
|
| 710 |
+
"model.layers.9.self_attn.q_proj.weight": "model-00003-of-00014.safetensors",
|
| 711 |
+
"model.layers.9.self_attn.v_proj.weight": "model-00003-of-00014.safetensors",
|
| 712 |
+
"model.norm.weight": "model-00014-of-00014.safetensors"
|
| 713 |
+
}
|
| 714 |
+
}
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<|im_start|>",
|
| 4 |
+
"<|im_end|>",
|
| 5 |
+
"<|object_ref_start|>",
|
| 6 |
+
"<|object_ref_end|>",
|
| 7 |
+
"<|box_start|>",
|
| 8 |
+
"<|box_end|>",
|
| 9 |
+
"<|quad_start|>",
|
| 10 |
+
"<|quad_end|>",
|
| 11 |
+
"<|vision_start|>",
|
| 12 |
+
"<|vision_end|>",
|
| 13 |
+
"<|vision_pad|>",
|
| 14 |
+
"<|image_pad|>",
|
| 15 |
+
"<|video_pad|>"
|
| 16 |
+
],
|
| 17 |
+
"eos_token": {
|
| 18 |
+
"content": "<|im_end|>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
},
|
| 24 |
+
"pad_token": {
|
| 25 |
+
"content": "<|endoftext|>",
|
| 26 |
+
"lstrip": false,
|
| 27 |
+
"normalized": false,
|
| 28 |
+
"rstrip": false,
|
| 29 |
+
"single_word": false
|
| 30 |
+
}
|
| 31 |
+
}
|
tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
|
| 3 |
+
size 11422654
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": false,
|
| 3 |
+
"add_prefix_space": false,
|
| 4 |
+
"added_tokens_decoder": {
|
| 5 |
+
"151643": {
|
| 6 |
+
"content": "<|endoftext|>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false,
|
| 11 |
+
"special": true
|
| 12 |
+
},
|
| 13 |
+
"151644": {
|
| 14 |
+
"content": "<|im_start|>",
|
| 15 |
+
"lstrip": false,
|
| 16 |
+
"normalized": false,
|
| 17 |
+
"rstrip": false,
|
| 18 |
+
"single_word": false,
|
| 19 |
+
"special": true
|
| 20 |
+
},
|
| 21 |
+
"151645": {
|
| 22 |
+
"content": "<|im_end|>",
|
| 23 |
+
"lstrip": false,
|
| 24 |
+
"normalized": false,
|
| 25 |
+
"rstrip": false,
|
| 26 |
+
"single_word": false,
|
| 27 |
+
"special": true
|
| 28 |
+
},
|
| 29 |
+
"151646": {
|
| 30 |
+
"content": "<|object_ref_start|>",
|
| 31 |
+
"lstrip": false,
|
| 32 |
+
"normalized": false,
|
| 33 |
+
"rstrip": false,
|
| 34 |
+
"single_word": false,
|
| 35 |
+
"special": true
|
| 36 |
+
},
|
| 37 |
+
"151647": {
|
| 38 |
+
"content": "<|object_ref_end|>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": false,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false,
|
| 43 |
+
"special": true
|
| 44 |
+
},
|
| 45 |
+
"151648": {
|
| 46 |
+
"content": "<|box_start|>",
|
| 47 |
+
"lstrip": false,
|
| 48 |
+
"normalized": false,
|
| 49 |
+
"rstrip": false,
|
| 50 |
+
"single_word": false,
|
| 51 |
+
"special": true
|
| 52 |
+
},
|
| 53 |
+
"151649": {
|
| 54 |
+
"content": "<|box_end|>",
|
| 55 |
+
"lstrip": false,
|
| 56 |
+
"normalized": false,
|
| 57 |
+
"rstrip": false,
|
| 58 |
+
"single_word": false,
|
| 59 |
+
"special": true
|
| 60 |
+
},
|
| 61 |
+
"151650": {
|
| 62 |
+
"content": "<|quad_start|>",
|
| 63 |
+
"lstrip": false,
|
| 64 |
+
"normalized": false,
|
| 65 |
+
"rstrip": false,
|
| 66 |
+
"single_word": false,
|
| 67 |
+
"special": true
|
| 68 |
+
},
|
| 69 |
+
"151651": {
|
| 70 |
+
"content": "<|quad_end|>",
|
| 71 |
+
"lstrip": false,
|
| 72 |
+
"normalized": false,
|
| 73 |
+
"rstrip": false,
|
| 74 |
+
"single_word": false,
|
| 75 |
+
"special": true
|
| 76 |
+
},
|
| 77 |
+
"151652": {
|
| 78 |
+
"content": "<|vision_start|>",
|
| 79 |
+
"lstrip": false,
|
| 80 |
+
"normalized": false,
|
| 81 |
+
"rstrip": false,
|
| 82 |
+
"single_word": false,
|
| 83 |
+
"special": true
|
| 84 |
+
},
|
| 85 |
+
"151653": {
|
| 86 |
+
"content": "<|vision_end|>",
|
| 87 |
+
"lstrip": false,
|
| 88 |
+
"normalized": false,
|
| 89 |
+
"rstrip": false,
|
| 90 |
+
"single_word": false,
|
| 91 |
+
"special": true
|
| 92 |
+
},
|
| 93 |
+
"151654": {
|
| 94 |
+
"content": "<|vision_pad|>",
|
| 95 |
+
"lstrip": false,
|
| 96 |
+
"normalized": false,
|
| 97 |
+
"rstrip": false,
|
| 98 |
+
"single_word": false,
|
| 99 |
+
"special": true
|
| 100 |
+
},
|
| 101 |
+
"151655": {
|
| 102 |
+
"content": "<|image_pad|>",
|
| 103 |
+
"lstrip": false,
|
| 104 |
+
"normalized": false,
|
| 105 |
+
"rstrip": false,
|
| 106 |
+
"single_word": false,
|
| 107 |
+
"special": true
|
| 108 |
+
},
|
| 109 |
+
"151656": {
|
| 110 |
+
"content": "<|video_pad|>",
|
| 111 |
+
"lstrip": false,
|
| 112 |
+
"normalized": false,
|
| 113 |
+
"rstrip": false,
|
| 114 |
+
"single_word": false,
|
| 115 |
+
"special": true
|
| 116 |
+
},
|
| 117 |
+
"151657": {
|
| 118 |
+
"content": "<tool_call>",
|
| 119 |
+
"lstrip": false,
|
| 120 |
+
"normalized": false,
|
| 121 |
+
"rstrip": false,
|
| 122 |
+
"single_word": false,
|
| 123 |
+
"special": false
|
| 124 |
+
},
|
| 125 |
+
"151658": {
|
| 126 |
+
"content": "</tool_call>",
|
| 127 |
+
"lstrip": false,
|
| 128 |
+
"normalized": false,
|
| 129 |
+
"rstrip": false,
|
| 130 |
+
"single_word": false,
|
| 131 |
+
"special": false
|
| 132 |
+
},
|
| 133 |
+
"151659": {
|
| 134 |
+
"content": "<|fim_prefix|>",
|
| 135 |
+
"lstrip": false,
|
| 136 |
+
"normalized": false,
|
| 137 |
+
"rstrip": false,
|
| 138 |
+
"single_word": false,
|
| 139 |
+
"special": false
|
| 140 |
+
},
|
| 141 |
+
"151660": {
|
| 142 |
+
"content": "<|fim_middle|>",
|
| 143 |
+
"lstrip": false,
|
| 144 |
+
"normalized": false,
|
| 145 |
+
"rstrip": false,
|
| 146 |
+
"single_word": false,
|
| 147 |
+
"special": false
|
| 148 |
+
},
|
| 149 |
+
"151661": {
|
| 150 |
+
"content": "<|fim_suffix|>",
|
| 151 |
+
"lstrip": false,
|
| 152 |
+
"normalized": false,
|
| 153 |
+
"rstrip": false,
|
| 154 |
+
"single_word": false,
|
| 155 |
+
"special": false
|
| 156 |
+
},
|
| 157 |
+
"151662": {
|
| 158 |
+
"content": "<|fim_pad|>",
|
| 159 |
+
"lstrip": false,
|
| 160 |
+
"normalized": false,
|
| 161 |
+
"rstrip": false,
|
| 162 |
+
"single_word": false,
|
| 163 |
+
"special": false
|
| 164 |
+
},
|
| 165 |
+
"151663": {
|
| 166 |
+
"content": "<|repo_name|>",
|
| 167 |
+
"lstrip": false,
|
| 168 |
+
"normalized": false,
|
| 169 |
+
"rstrip": false,
|
| 170 |
+
"single_word": false,
|
| 171 |
+
"special": false
|
| 172 |
+
},
|
| 173 |
+
"151664": {
|
| 174 |
+
"content": "<|file_sep|>",
|
| 175 |
+
"lstrip": false,
|
| 176 |
+
"normalized": false,
|
| 177 |
+
"rstrip": false,
|
| 178 |
+
"single_word": false,
|
| 179 |
+
"special": false
|
| 180 |
+
},
|
| 181 |
+
"151665": {
|
| 182 |
+
"content": "<tool_response>",
|
| 183 |
+
"lstrip": false,
|
| 184 |
+
"normalized": false,
|
| 185 |
+
"rstrip": false,
|
| 186 |
+
"single_word": false,
|
| 187 |
+
"special": false
|
| 188 |
+
},
|
| 189 |
+
"151666": {
|
| 190 |
+
"content": "</tool_response>",
|
| 191 |
+
"lstrip": false,
|
| 192 |
+
"normalized": false,
|
| 193 |
+
"rstrip": false,
|
| 194 |
+
"single_word": false,
|
| 195 |
+
"special": false
|
| 196 |
+
},
|
| 197 |
+
"151667": {
|
| 198 |
+
"content": "<think>",
|
| 199 |
+
"lstrip": false,
|
| 200 |
+
"normalized": false,
|
| 201 |
+
"rstrip": false,
|
| 202 |
+
"single_word": false,
|
| 203 |
+
"special": false
|
| 204 |
+
},
|
| 205 |
+
"151668": {
|
| 206 |
+
"content": "</think>",
|
| 207 |
+
"lstrip": false,
|
| 208 |
+
"normalized": false,
|
| 209 |
+
"rstrip": false,
|
| 210 |
+
"single_word": false,
|
| 211 |
+
"special": false
|
| 212 |
+
}
|
| 213 |
+
},
|
| 214 |
+
"additional_special_tokens": [
|
| 215 |
+
"<|im_start|>",
|
| 216 |
+
"<|im_end|>",
|
| 217 |
+
"<|object_ref_start|>",
|
| 218 |
+
"<|object_ref_end|>",
|
| 219 |
+
"<|box_start|>",
|
| 220 |
+
"<|box_end|>",
|
| 221 |
+
"<|quad_start|>",
|
| 222 |
+
"<|quad_end|>",
|
| 223 |
+
"<|vision_start|>",
|
| 224 |
+
"<|vision_end|>",
|
| 225 |
+
"<|vision_pad|>",
|
| 226 |
+
"<|image_pad|>",
|
| 227 |
+
"<|video_pad|>"
|
| 228 |
+
],
|
| 229 |
+
"bos_token": null,
|
| 230 |
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {{- messages[0].content + '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set content = message.content %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is defined and message.reasoning_content is not none %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '</think>' in message.content %}\n {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index %}\n {%- if loop.last or (not loop.last and reasoning_content) %}\n {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n {%- if enable_thinking is defined and enable_thinking is false %}\n {{- '<think>\\n\\n</think>\\n\\n' }}\n {%- endif %}\n{%- endif %}",
|
| 231 |
+
"clean_up_tokenization_spaces": false,
|
| 232 |
+
"eos_token": "<|im_end|>",
|
| 233 |
+
"errors": "replace",
|
| 234 |
+
"extra_special_tokens": {},
|
| 235 |
+
"model_max_length": 131072,
|
| 236 |
+
"pad_token": "<|endoftext|>",
|
| 237 |
+
"split_special_tokens": false,
|
| 238 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
| 239 |
+
"unk_token": null
|
| 240 |
+
}
|
vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|