YummyYum commited on
Commit
76f2ff4
·
verified ·
1 Parent(s): 7fe4f17

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -8,6 +8,7 @@
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
10
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
 
11
  *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
  *.model filter=lfs diff=lfs merge=lfs -text
13
  *.msgpack filter=lfs diff=lfs merge=lfs -text
@@ -25,11 +26,34 @@
25
  *.safetensors filter=lfs diff=lfs merge=lfs -text
26
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
  *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
  *.tflite filter=lfs diff=lfs merge=lfs -text
30
  *.tgz filter=lfs diff=lfs merge=lfs -text
31
  *.wasm filter=lfs diff=lfs merge=lfs -text
32
  *.xz filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
 
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
10
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.lz4 filter=lfs diff=lfs merge=lfs -text
12
  *.mlmodel filter=lfs diff=lfs merge=lfs -text
13
  *.model filter=lfs diff=lfs merge=lfs -text
14
  *.msgpack filter=lfs diff=lfs merge=lfs -text
 
26
  *.safetensors filter=lfs diff=lfs merge=lfs -text
27
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
28
  *.tar.* filter=lfs diff=lfs merge=lfs -text
 
29
  *.tflite filter=lfs diff=lfs merge=lfs -text
30
  *.tgz filter=lfs diff=lfs merge=lfs -text
31
  *.wasm filter=lfs diff=lfs merge=lfs -text
32
  *.xz filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *.jsonl filter=lfs diff=lfs merge=lfs -text
36
  *tfevents* filter=lfs diff=lfs merge=lfs -text
37
+ # Audio files - uncompressed
38
+ *.pcm filter=lfs diff=lfs merge=lfs -text
39
+ *.sam filter=lfs diff=lfs merge=lfs -text
40
+ *.raw filter=lfs diff=lfs merge=lfs -text
41
+ # Audio files - compressed
42
+ *.aac filter=lfs diff=lfs merge=lfs -text
43
+ *.flac filter=lfs diff=lfs merge=lfs -text
44
+ *.mp3 filter=lfs diff=lfs merge=lfs -text
45
+ *.ogg filter=lfs diff=lfs merge=lfs -text
46
+ *.wav filter=lfs diff=lfs merge=lfs -text
47
+ # Image files - uncompressed
48
+ *.bmp filter=lfs diff=lfs merge=lfs -text
49
+ *.gif filter=lfs diff=lfs merge=lfs -text
50
+ *.png filter=lfs diff=lfs merge=lfs -text
51
+ *.tiff filter=lfs diff=lfs merge=lfs -text
52
+ # JSON files - uncompressed
53
+ *.json filter=lfs diff=lfs merge=lfs -text
54
+ *.log
55
+ # Image files - compressed
56
+ *.jpg filter=lfs diff=lfs merge=lfs -text
57
+ *.jpeg filter=lfs diff=lfs merge=lfs -text
58
+ *.webp filter=lfs diff=lfs merge=lfs -text
59
+ *.gguf filter=lfs diff=lfs merge=lfs -text
CODE_OF_CONDUCT.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # Microsoft Open Source Code of Conduct
2
+
3
+ This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
4
+
5
+ Resources:
6
+
7
+ - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
8
+ - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
9
+ - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
LICENSE ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Microsoft.
2
+ Copyright (c) Microsoft Corporation.
3
+
4
+ MIT License
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
README.md ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Introduction
2
+
3
+ **FlagOS** is a unified heterogeneous computing software stack for large models, co-developed with leading global chip manufacturers. With core technologies such as the **FlagScale** distributed training/inference framework, **FlagGems** universal operator library, **FlagCX** communication library, and **FlagTree** unified compiler, the **FlagRelease** platform leverages the FlagOS stack to automatically produce and release various combinations of <chip + open-source model>. This enables efficient and automated model migration across diverse chips, opening a new chapter for large model deployment and application.
4
+
5
+ Based on this, the **phi-4-hygon-FlagOS** model is adapted for the Nvidia chip using the FlagOS software stack, enabling:
6
+
7
+ ### Integrated Deployment
8
+
9
+ - Deep integration with the open-source [FlagScale framework](https://github.com/FlagOpen/FlagScale)
10
+ - Out-of-the-box inference scripts with pre-configured hardware and software parameters
11
+ - Released **FlagOS** container image supporting deployment within minutes
12
+
13
+ ### Consistency Validation
14
+
15
+ - Rigorously evaluated through benchmark testing: Performance and results from the FlagOS software stack are compared against native stacks on multiple public.
16
+
17
+ # Technical Overview
18
+
19
+ ## **FlagScale Distributed Training and Inference Framework**
20
+
21
+ FlagScale is an end-to-end framework for large models across heterogeneous computing resources, maximizing computational efficiency and ensuring model validity through core technologies. Its key advantages include:
22
+
23
+ - **Unified Deployment Interface:** Standardized command-line tools support one-click service deployment across multiple hardware platforms, significantly reducing adaptation costs in heterogeneous environments.
24
+ - **Intelligent Parallel Optimization:** Automatically generates optimal distributed parallel strategies based on chip computing characteristics, achieving dynamic load balancing of computation/communication resources.
25
+ - **Seamless Operator Switching:** Deep integration with the FlagGems operator library allows high-performance operators to be invoked via environment variables without modifying model code.
26
+
27
+ ## **FlagGems Universal Large-Model Operator Library**
28
+
29
+ FlagGems is a Triton-based, cross-architecture operator library collaboratively developed with industry partners. Its core strengths include:
30
+
31
+ - **Full-stack Coverage**: Over 100 operators, with a broader range of operator types than competing libraries.
32
+ - **Ecosystem Compatibility**: Supports 7 accelerator backends. Ongoing optimizations have significantly improved performance.
33
+ - **High Efficiency**: Employs unique code generation and runtime optimization techniques for faster secondary development and better runtime performance compared to alternatives.
34
+
35
+ ## **FlagEval Evaluation Framework**
36
+
37
+ FlagEval (Libra)** is a comprehensive evaluation system and open platform for large models launched in 2023. It aims to establish scientific, fair, and open benchmarks, methodologies, and tools to help researchers assess model and training algorithm performance. It features:
38
+ - **Multi-dimensional Evaluation**: Supports 800+ model evaluations across NLP, CV, Audio, and Multimodal fields, covering 20+ downstream tasks including language understanding and image-text generation.
39
+ - **Industry-Grade Use Cases**: Has completed horizontal evaluations of mainstream large models, providing authoritative benchmarks for chip-model performance validation.
40
+
41
+ # Evaluation Results
42
+
43
+ ## Benchmark Result
44
+
45
+ | Metrics | phi-4-H100-CUDA | phi-4-hygon-FlagOS |
46
+ | ------------------------- | --------------------- | ------------------ |
47
+ |AIME-0shot@avg1|0.200|0.200|
48
+ |GPQA-0shot@avg1|0.241|0.225|
49
+ |MMLU-5shots@avg1|0.713|0.714|
50
+ |MUSR-0shot@avg1|0.594|0.574|
51
+ |LiveBench-0shot@avg1|0.431|0.422|
52
+
53
+ # User Guide
54
+
55
+ **Environment Setup**
56
+
57
+ | Item | Version |
58
+ | ------------- | ------------------------------------------------------------ |
59
+ | Docker Version | Docker version 24.0.6, build ed223bc |
60
+ | Operating System | Ubuntu 22.04.4 LTS |
61
+ | FlagScale | Version: 0.8.0 |
62
+ | FlagGems | Version: 3.0 |
63
+
64
+ ## Operation Steps
65
+
66
+ ### Download Open-source Model Weights
67
+
68
+ ```bash
69
+ pip install modelscope
70
+ modelscope download --model LLM-Research/phi-4 --local_dir /share/phi-4
71
+
72
+ ```
73
+
74
+ ### Download FlagOS Image
75
+
76
+ BE AWARE!, Hygon's FLAGOS image have not decided public-accesible through internet or not. To obtain this image, you can contact us or hygon through issues.
77
+ ```bash
78
+ docker pull harbor.baai.ac.cn/flagrelease-inner/flagrelease_hygon_phi
79
+ ```
80
+
81
+ ### Start the inference service
82
+
83
+ ```bash
84
+ #Container Startup
85
+
86
+ docker run -it \
87
+ --name=flagos \
88
+ --network=host \
89
+ --privileged \
90
+ --ipc=host \
91
+ --shm-size=16G \
92
+ --memory="512g" \
93
+ --ulimit stack=-1:-1 \
94
+ --ulimit memlock=-1:-1 \
95
+ --cap-add=SYS_PTRACE \
96
+ --security-opt seccomp=unconfined \
97
+ --device=/dev/kfd \
98
+ --device=/dev/dri \
99
+ --group-add video \
100
+ -u root \
101
+ -v /opt/hyhal:/opt/hyhal \
102
+ -v /share:/share \
103
+ harbor.baai.ac.cn/flagrelease-inner/flagrelease_hygon_phi \
104
+ /bin/bash
105
+ ```
106
+
107
+ ### Serve
108
+
109
+ ```bash
110
+ flagscale serve phi_4
111
+
112
+ ```
113
+
114
+
115
+ ## Service Invocation
116
+
117
+ ### API-based Invocation Script
118
+
119
+ ```bash
120
+ import openai
121
+ openai.api_key = "EMPTY"
122
+ openai.base_url = "http://<server_ip>:9010/v1/"
123
+ model = "phi-4-hygon-flagos"
124
+ messages = [
125
+ {"role": "system", "content": "You are a helpful assistant."},
126
+ {"role": "user", "content": "What's the weather like today?"}
127
+ ]
128
+ response = openai.chat.completions.create(
129
+ model=model,
130
+ messages=messages,
131
+ stream=False,
132
+ )
133
+ for item in response:
134
+ print(item)
135
+
136
+ ```
137
+
138
+ ### AnythingLLM Integration Guide
139
+
140
+ #### 1. Download & Install
141
+
142
+ - Visit the official site: https://anythingllm.com/
143
+ - Choose the appropriate version for your OS (Windows/macOS/Linux)
144
+ - Follow the installation wizard to complete the setup
145
+
146
+ #### 2. Configuration
147
+
148
+ - Launch AnythingLLM
149
+ - Open settings (bottom left, fourth tab)
150
+ - Configure core LLM parameters
151
+ - Click "Save Settings" to apply changes
152
+
153
+ #### 3. Model Interaction
154
+
155
+ - After model loading is complete:
156
+ - Click **"New Conversation"**
157
+ - Enter your question (e.g., “Explain the basics of quantum computing”)
158
+ - Click the send button to get a response
159
+
160
+ # Contributing
161
+
162
+ We warmly welcome global developers to join us:
163
+
164
+ 1. Submit Issues to report problems
165
+ 2. Create Pull Requests to contribute code
166
+ 3. Improve technical documentation
167
+ 4. Expand hardware adaptation support
168
+
169
+
170
+ # License
171
+
172
+ 本模型的权重来源于LLM-Research/phi-4,以apache2.0协议https://www.apache.org/licenses/LICENSE-2.0.txt开源。
173
+
174
+
SECURITY.md ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- BEGIN MICROSOFT SECURITY.MD V0.0.9 BLOCK -->
2
+
3
+ ## Security
4
+
5
+ Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet) and [Xamarin](https://github.com/xamarin).
6
+
7
+ If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/security.md/definition), please report it to us as described below.
8
+
9
+ ## Reporting Security Issues
10
+
11
+ **Please do not report security vulnerabilities through public GitHub issues.**
12
+
13
+ Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/security.md/msrc/create-report).
14
+
15
+ If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/security.md/msrc/pgp).
16
+
17
+ You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc).
18
+
19
+ Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20
+
21
+ * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22
+ * Full paths of source file(s) related to the manifestation of the issue
23
+ * The location of the affected source code (tag/branch/commit or direct URL)
24
+ * Any special configuration required to reproduce the issue
25
+ * Step-by-step instructions to reproduce the issue
26
+ * Proof-of-concept or exploit code (if possible)
27
+ * Impact of the issue, including how an attacker might exploit the issue
28
+
29
+ This information will help us triage your report more quickly.
30
+
31
+ If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/security.md/msrc/bounty) page for more details about our active programs.
32
+
33
+ ## Preferred Languages
34
+
35
+ We prefer all communications to be in English.
36
+
37
+ ## Policy
38
+
39
+ Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/security.md/cvd).
40
+
41
+ <!-- END MICROSOFT SECURITY.MD BLOCK -->
added_tokens.json ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<|dummy_0|>": 100256,
3
+ "<|endoftext|>": 100257,
4
+ "<|fim_prefix|>": 100258,
5
+ "<|fim_middle|>": 100259,
6
+ "<|fim_suffix|>": 100260,
7
+ "<|dummy_1|>": 100261,
8
+ "<|dummy_2|>": 100262,
9
+ "<|dummy_3|>": 100263,
10
+ "<|im_start|>": 100264,
11
+ "<|im_end|>": 100265,
12
+ "<|im_sep|>": 100266,
13
+ "<|dummy_4|>": 100267,
14
+ "<|dummy_5|>": 100268,
15
+ "<|dummy_6|>": 100269,
16
+ "<|dummy_7|>": 100270,
17
+ "<|dummy_8|>": 100271,
18
+ "<|dummy_9|>": 100272,
19
+ "<|dummy_10|>": 100273,
20
+ "<|dummy_11|>": 100274,
21
+ "<|dummy_12|>": 100275,
22
+ "<|endofprompt|>": 100276,
23
+ "<|dummy_13|>": 100277,
24
+ "<|dummy_14|>": 100278,
25
+ "<|dummy_15|>": 100279,
26
+ "<|dummy_16|>": 100280,
27
+ "<|dummy_17|>": 100281,
28
+ "<|dummy_18|>": 100282,
29
+ "<|dummy_19|>": 100283,
30
+ "<|dummy_20|>": 100284,
31
+ "<|dummy_21|>": 100285,
32
+ "<|dummy_22|>": 100286,
33
+ "<|dummy_23|>": 100287,
34
+ "<|dummy_24|>": 100288,
35
+ "<|dummy_25|>": 100289,
36
+ "<|dummy_26|>": 100290,
37
+ "<|dummy_27|>": 100291,
38
+ "<|dummy_28|>": 100292,
39
+ "<|dummy_29|>": 100293,
40
+ "<|dummy_30|>": 100294,
41
+ "<|dummy_31|>": 100295,
42
+ "<|dummy_32|>": 100296,
43
+ "<|dummy_33|>": 100297,
44
+ "<|dummy_34|>": 100298,
45
+ "<|dummy_35|>": 100299,
46
+ "<|dummy_36|>": 100300,
47
+ "<|dummy_37|>": 100301,
48
+ "<|dummy_38|>": 100302,
49
+ "<|dummy_39|>": 100303,
50
+ "<|dummy_40|>": 100304,
51
+ "<|dummy_41|>": 100305,
52
+ "<|dummy_42|>": 100306,
53
+ "<|dummy_43|>": 100307,
54
+ "<|dummy_44|>": 100308,
55
+ "<|dummy_45|>": 100309,
56
+ "<|dummy_46|>": 100310,
57
+ "<|dummy_47|>": 100311,
58
+ "<|dummy_48|>": 100312,
59
+ "<|dummy_49|>": 100313,
60
+ "<|dummy_50|>": 100314,
61
+ "<|dummy_51|>": 100315,
62
+ "<|dummy_52|>": 100316,
63
+ "<|dummy_53|>": 100317,
64
+ "<|dummy_54|>": 100318,
65
+ "<|dummy_55|>": 100319,
66
+ "<|dummy_56|>": 100320,
67
+ "<|dummy_57|>": 100321,
68
+ "<|dummy_58|>": 100322,
69
+ "<|dummy_59|>": 100323,
70
+ "<|dummy_60|>": 100324,
71
+ "<|dummy_61|>": 100325,
72
+ "<|dummy_62|>": 100326,
73
+ "<|dummy_63|>": 100327,
74
+ "<|dummy_64|>": 100328,
75
+ "<|dummy_65|>": 100329,
76
+ "<|dummy_66|>": 100330,
77
+ "<|dummy_67|>": 100331,
78
+ "<|dummy_68|>": 100332,
79
+ "<|dummy_69|>": 100333,
80
+ "<|dummy_70|>": 100334,
81
+ "<|dummy_71|>": 100335,
82
+ "<|dummy_72|>": 100336,
83
+ "<|dummy_73|>": 100337,
84
+ "<|dummy_74|>": 100338,
85
+ "<|dummy_75|>": 100339,
86
+ "<|dummy_76|>": 100340,
87
+ "<|dummy_77|>": 100341,
88
+ "<|dummy_78|>": 100342,
89
+ "<|dummy_79|>": 100343,
90
+ "<|dummy_80|>": 100344,
91
+ "<|dummy_81|>": 100345,
92
+ "<|dummy_82|>": 100346,
93
+ "<|dummy_83|>": 100347,
94
+ "<|dummy_84|>": 100348,
95
+ "<|dummy_85|>": 100349,
96
+ "<|dummy_86|>": 100350,
97
+ "<|dummy_87|>": 100351
98
+ }
config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/phi-4",
3
+ "architectures": [
4
+ "Phi3ForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 100257,
9
+ "embd_pdrop": 0.0,
10
+ "eos_token_id": 100265,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 5120,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 17920,
15
+ "max_position_embeddings": 16384,
16
+ "model_type": "phi3",
17
+ "num_attention_heads": 40,
18
+ "num_hidden_layers": 40,
19
+ "num_key_value_heads": 10,
20
+ "original_max_position_embeddings": 16384,
21
+ "pad_token_id": 100349,
22
+ "resid_pdrop": 0.0,
23
+ "rms_norm_eps": 1e-05,
24
+ "rope_scaling": null,
25
+ "rope_theta": 250000,
26
+ "sliding_window": null,
27
+ "tie_word_embeddings": false,
28
+ "torch_dtype": "bfloat16",
29
+ "transformers_version": "4.47.0",
30
+ "use_cache": true,
31
+ "vocab_size": 100352
32
+ }
configuration.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"framework": "pytorch", "task": "text-generation", "allow_remote": true}
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 100257,
4
+ "eos_token_id": 100265,
5
+ "pad_token_id": 100349,
6
+ "transformers_version": "4.47.0"
7
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cafbe72210c39b19270cbfce27996afc4383c8a487087bcb91a6290d5ceb8f18
3
+ size 4933656472
model-00002-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f87a5897162ed988cfb0fac13879634b4bab3da5a756b4f9dd2437ef8145e20
3
+ size 4954690712
model-00003-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19537df78a263aa0c0b2dc12dbce5908021e30a25e528699b08642c3999eeb05
3
+ size 4902241352
model-00004-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4f17100a39c19d352f2e8108eef6fb975afb86808e50d78a075e162f1edfc93
3
+ size 4771169120
model-00005-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a72666b6001d76e7a9dc3ebe0d05a2c3316f546a8c6779c87343f296bdf82e50
3
+ size 4771169120
model-00006-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9bfeeea354e31503d09a864e6cd5bd0f1230664065b6a2988b05e22b3228d77
3
+ size 4986116216
model.safetensors.index.json ADDED
@@ -0,0 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 29319014400
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "model-00006-of-00006.safetensors",
7
+ "model.embed_tokens.weight": "model-00001-of-00006.safetensors",
8
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00006.safetensors",
9
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
10
+ "model.layers.0.mlp.gate_up_proj.weight": "model-00001-of-00006.safetensors",
11
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
12
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
13
+ "model.layers.0.self_attn.qkv_proj.weight": "model-00001-of-00006.safetensors",
14
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00006.safetensors",
15
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
16
+ "model.layers.1.mlp.gate_up_proj.weight": "model-00001-of-00006.safetensors",
17
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
18
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
19
+ "model.layers.1.self_attn.qkv_proj.weight": "model-00001-of-00006.safetensors",
20
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00006.safetensors",
21
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
22
+ "model.layers.10.mlp.gate_up_proj.weight": "model-00002-of-00006.safetensors",
23
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
24
+ "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
25
+ "model.layers.10.self_attn.qkv_proj.weight": "model-00002-of-00006.safetensors",
26
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00006.safetensors",
27
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
28
+ "model.layers.11.mlp.gate_up_proj.weight": "model-00002-of-00006.safetensors",
29
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
30
+ "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
31
+ "model.layers.11.self_attn.qkv_proj.weight": "model-00002-of-00006.safetensors",
32
+ "model.layers.12.input_layernorm.weight": "model-00002-of-00006.safetensors",
33
+ "model.layers.12.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
34
+ "model.layers.12.mlp.gate_up_proj.weight": "model-00002-of-00006.safetensors",
35
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
36
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
37
+ "model.layers.12.self_attn.qkv_proj.weight": "model-00002-of-00006.safetensors",
38
+ "model.layers.13.input_layernorm.weight": "model-00003-of-00006.safetensors",
39
+ "model.layers.13.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
40
+ "model.layers.13.mlp.gate_up_proj.weight": "model-00003-of-00006.safetensors",
41
+ "model.layers.13.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
42
+ "model.layers.13.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
43
+ "model.layers.13.self_attn.qkv_proj.weight": "model-00003-of-00006.safetensors",
44
+ "model.layers.14.input_layernorm.weight": "model-00003-of-00006.safetensors",
45
+ "model.layers.14.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
46
+ "model.layers.14.mlp.gate_up_proj.weight": "model-00003-of-00006.safetensors",
47
+ "model.layers.14.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
48
+ "model.layers.14.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
49
+ "model.layers.14.self_attn.qkv_proj.weight": "model-00003-of-00006.safetensors",
50
+ "model.layers.15.input_layernorm.weight": "model-00003-of-00006.safetensors",
51
+ "model.layers.15.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
52
+ "model.layers.15.mlp.gate_up_proj.weight": "model-00003-of-00006.safetensors",
53
+ "model.layers.15.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
54
+ "model.layers.15.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
55
+ "model.layers.15.self_attn.qkv_proj.weight": "model-00003-of-00006.safetensors",
56
+ "model.layers.16.input_layernorm.weight": "model-00003-of-00006.safetensors",
57
+ "model.layers.16.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
58
+ "model.layers.16.mlp.gate_up_proj.weight": "model-00003-of-00006.safetensors",
59
+ "model.layers.16.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
60
+ "model.layers.16.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
61
+ "model.layers.16.self_attn.qkv_proj.weight": "model-00003-of-00006.safetensors",
62
+ "model.layers.17.input_layernorm.weight": "model-00003-of-00006.safetensors",
63
+ "model.layers.17.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
64
+ "model.layers.17.mlp.gate_up_proj.weight": "model-00003-of-00006.safetensors",
65
+ "model.layers.17.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
66
+ "model.layers.17.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
67
+ "model.layers.17.self_attn.qkv_proj.weight": "model-00003-of-00006.safetensors",
68
+ "model.layers.18.input_layernorm.weight": "model-00003-of-00006.safetensors",
69
+ "model.layers.18.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
70
+ "model.layers.18.mlp.gate_up_proj.weight": "model-00003-of-00006.safetensors",
71
+ "model.layers.18.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
72
+ "model.layers.18.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
73
+ "model.layers.18.self_attn.qkv_proj.weight": "model-00003-of-00006.safetensors",
74
+ "model.layers.19.input_layernorm.weight": "model-00003-of-00006.safetensors",
75
+ "model.layers.19.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
76
+ "model.layers.19.mlp.gate_up_proj.weight": "model-00003-of-00006.safetensors",
77
+ "model.layers.19.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
78
+ "model.layers.19.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
79
+ "model.layers.19.self_attn.qkv_proj.weight": "model-00003-of-00006.safetensors",
80
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00006.safetensors",
81
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
82
+ "model.layers.2.mlp.gate_up_proj.weight": "model-00001-of-00006.safetensors",
83
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
84
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
85
+ "model.layers.2.self_attn.qkv_proj.weight": "model-00001-of-00006.safetensors",
86
+ "model.layers.20.input_layernorm.weight": "model-00004-of-00006.safetensors",
87
+ "model.layers.20.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
88
+ "model.layers.20.mlp.gate_up_proj.weight": "model-00004-of-00006.safetensors",
89
+ "model.layers.20.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
90
+ "model.layers.20.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
91
+ "model.layers.20.self_attn.qkv_proj.weight": "model-00003-of-00006.safetensors",
92
+ "model.layers.21.input_layernorm.weight": "model-00004-of-00006.safetensors",
93
+ "model.layers.21.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
94
+ "model.layers.21.mlp.gate_up_proj.weight": "model-00004-of-00006.safetensors",
95
+ "model.layers.21.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
96
+ "model.layers.21.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
97
+ "model.layers.21.self_attn.qkv_proj.weight": "model-00004-of-00006.safetensors",
98
+ "model.layers.22.input_layernorm.weight": "model-00004-of-00006.safetensors",
99
+ "model.layers.22.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
100
+ "model.layers.22.mlp.gate_up_proj.weight": "model-00004-of-00006.safetensors",
101
+ "model.layers.22.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
102
+ "model.layers.22.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
103
+ "model.layers.22.self_attn.qkv_proj.weight": "model-00004-of-00006.safetensors",
104
+ "model.layers.23.input_layernorm.weight": "model-00004-of-00006.safetensors",
105
+ "model.layers.23.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
106
+ "model.layers.23.mlp.gate_up_proj.weight": "model-00004-of-00006.safetensors",
107
+ "model.layers.23.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
108
+ "model.layers.23.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
109
+ "model.layers.23.self_attn.qkv_proj.weight": "model-00004-of-00006.safetensors",
110
+ "model.layers.24.input_layernorm.weight": "model-00004-of-00006.safetensors",
111
+ "model.layers.24.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
112
+ "model.layers.24.mlp.gate_up_proj.weight": "model-00004-of-00006.safetensors",
113
+ "model.layers.24.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
114
+ "model.layers.24.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
115
+ "model.layers.24.self_attn.qkv_proj.weight": "model-00004-of-00006.safetensors",
116
+ "model.layers.25.input_layernorm.weight": "model-00004-of-00006.safetensors",
117
+ "model.layers.25.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
118
+ "model.layers.25.mlp.gate_up_proj.weight": "model-00004-of-00006.safetensors",
119
+ "model.layers.25.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
120
+ "model.layers.25.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
121
+ "model.layers.25.self_attn.qkv_proj.weight": "model-00004-of-00006.safetensors",
122
+ "model.layers.26.input_layernorm.weight": "model-00004-of-00006.safetensors",
123
+ "model.layers.26.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
124
+ "model.layers.26.mlp.gate_up_proj.weight": "model-00004-of-00006.safetensors",
125
+ "model.layers.26.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
126
+ "model.layers.26.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
127
+ "model.layers.26.self_attn.qkv_proj.weight": "model-00004-of-00006.safetensors",
128
+ "model.layers.27.input_layernorm.weight": "model-00005-of-00006.safetensors",
129
+ "model.layers.27.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
130
+ "model.layers.27.mlp.gate_up_proj.weight": "model-00005-of-00006.safetensors",
131
+ "model.layers.27.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
132
+ "model.layers.27.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
133
+ "model.layers.27.self_attn.qkv_proj.weight": "model-00004-of-00006.safetensors",
134
+ "model.layers.28.input_layernorm.weight": "model-00005-of-00006.safetensors",
135
+ "model.layers.28.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
136
+ "model.layers.28.mlp.gate_up_proj.weight": "model-00005-of-00006.safetensors",
137
+ "model.layers.28.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
138
+ "model.layers.28.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
139
+ "model.layers.28.self_attn.qkv_proj.weight": "model-00005-of-00006.safetensors",
140
+ "model.layers.29.input_layernorm.weight": "model-00005-of-00006.safetensors",
141
+ "model.layers.29.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
142
+ "model.layers.29.mlp.gate_up_proj.weight": "model-00005-of-00006.safetensors",
143
+ "model.layers.29.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
144
+ "model.layers.29.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
145
+ "model.layers.29.self_attn.qkv_proj.weight": "model-00005-of-00006.safetensors",
146
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00006.safetensors",
147
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
148
+ "model.layers.3.mlp.gate_up_proj.weight": "model-00001-of-00006.safetensors",
149
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
150
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
151
+ "model.layers.3.self_attn.qkv_proj.weight": "model-00001-of-00006.safetensors",
152
+ "model.layers.30.input_layernorm.weight": "model-00005-of-00006.safetensors",
153
+ "model.layers.30.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
154
+ "model.layers.30.mlp.gate_up_proj.weight": "model-00005-of-00006.safetensors",
155
+ "model.layers.30.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
156
+ "model.layers.30.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
157
+ "model.layers.30.self_attn.qkv_proj.weight": "model-00005-of-00006.safetensors",
158
+ "model.layers.31.input_layernorm.weight": "model-00005-of-00006.safetensors",
159
+ "model.layers.31.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
160
+ "model.layers.31.mlp.gate_up_proj.weight": "model-00005-of-00006.safetensors",
161
+ "model.layers.31.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
162
+ "model.layers.31.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
163
+ "model.layers.31.self_attn.qkv_proj.weight": "model-00005-of-00006.safetensors",
164
+ "model.layers.32.input_layernorm.weight": "model-00005-of-00006.safetensors",
165
+ "model.layers.32.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
166
+ "model.layers.32.mlp.gate_up_proj.weight": "model-00005-of-00006.safetensors",
167
+ "model.layers.32.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
168
+ "model.layers.32.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
169
+ "model.layers.32.self_attn.qkv_proj.weight": "model-00005-of-00006.safetensors",
170
+ "model.layers.33.input_layernorm.weight": "model-00005-of-00006.safetensors",
171
+ "model.layers.33.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
172
+ "model.layers.33.mlp.gate_up_proj.weight": "model-00005-of-00006.safetensors",
173
+ "model.layers.33.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
174
+ "model.layers.33.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
175
+ "model.layers.33.self_attn.qkv_proj.weight": "model-00005-of-00006.safetensors",
176
+ "model.layers.34.input_layernorm.weight": "model-00006-of-00006.safetensors",
177
+ "model.layers.34.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
178
+ "model.layers.34.mlp.gate_up_proj.weight": "model-00006-of-00006.safetensors",
179
+ "model.layers.34.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
180
+ "model.layers.34.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
181
+ "model.layers.34.self_attn.qkv_proj.weight": "model-00005-of-00006.safetensors",
182
+ "model.layers.35.input_layernorm.weight": "model-00006-of-00006.safetensors",
183
+ "model.layers.35.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
184
+ "model.layers.35.mlp.gate_up_proj.weight": "model-00006-of-00006.safetensors",
185
+ "model.layers.35.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
186
+ "model.layers.35.self_attn.o_proj.weight": "model-00006-of-00006.safetensors",
187
+ "model.layers.35.self_attn.qkv_proj.weight": "model-00006-of-00006.safetensors",
188
+ "model.layers.36.input_layernorm.weight": "model-00006-of-00006.safetensors",
189
+ "model.layers.36.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
190
+ "model.layers.36.mlp.gate_up_proj.weight": "model-00006-of-00006.safetensors",
191
+ "model.layers.36.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
192
+ "model.layers.36.self_attn.o_proj.weight": "model-00006-of-00006.safetensors",
193
+ "model.layers.36.self_attn.qkv_proj.weight": "model-00006-of-00006.safetensors",
194
+ "model.layers.37.input_layernorm.weight": "model-00006-of-00006.safetensors",
195
+ "model.layers.37.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
196
+ "model.layers.37.mlp.gate_up_proj.weight": "model-00006-of-00006.safetensors",
197
+ "model.layers.37.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
198
+ "model.layers.37.self_attn.o_proj.weight": "model-00006-of-00006.safetensors",
199
+ "model.layers.37.self_attn.qkv_proj.weight": "model-00006-of-00006.safetensors",
200
+ "model.layers.38.input_layernorm.weight": "model-00006-of-00006.safetensors",
201
+ "model.layers.38.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
202
+ "model.layers.38.mlp.gate_up_proj.weight": "model-00006-of-00006.safetensors",
203
+ "model.layers.38.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
204
+ "model.layers.38.self_attn.o_proj.weight": "model-00006-of-00006.safetensors",
205
+ "model.layers.38.self_attn.qkv_proj.weight": "model-00006-of-00006.safetensors",
206
+ "model.layers.39.input_layernorm.weight": "model-00006-of-00006.safetensors",
207
+ "model.layers.39.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
208
+ "model.layers.39.mlp.gate_up_proj.weight": "model-00006-of-00006.safetensors",
209
+ "model.layers.39.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
210
+ "model.layers.39.self_attn.o_proj.weight": "model-00006-of-00006.safetensors",
211
+ "model.layers.39.self_attn.qkv_proj.weight": "model-00006-of-00006.safetensors",
212
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00006.safetensors",
213
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
214
+ "model.layers.4.mlp.gate_up_proj.weight": "model-00001-of-00006.safetensors",
215
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
216
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
217
+ "model.layers.4.self_attn.qkv_proj.weight": "model-00001-of-00006.safetensors",
218
+ "model.layers.5.input_layernorm.weight": "model-00002-of-00006.safetensors",
219
+ "model.layers.5.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
220
+ "model.layers.5.mlp.gate_up_proj.weight": "model-00001-of-00006.safetensors",
221
+ "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
222
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
223
+ "model.layers.5.self_attn.qkv_proj.weight": "model-00001-of-00006.safetensors",
224
+ "model.layers.6.input_layernorm.weight": "model-00002-of-00006.safetensors",
225
+ "model.layers.6.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
226
+ "model.layers.6.mlp.gate_up_proj.weight": "model-00002-of-00006.safetensors",
227
+ "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
228
+ "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
229
+ "model.layers.6.self_attn.qkv_proj.weight": "model-00002-of-00006.safetensors",
230
+ "model.layers.7.input_layernorm.weight": "model-00002-of-00006.safetensors",
231
+ "model.layers.7.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
232
+ "model.layers.7.mlp.gate_up_proj.weight": "model-00002-of-00006.safetensors",
233
+ "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
234
+ "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
235
+ "model.layers.7.self_attn.qkv_proj.weight": "model-00002-of-00006.safetensors",
236
+ "model.layers.8.input_layernorm.weight": "model-00002-of-00006.safetensors",
237
+ "model.layers.8.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
238
+ "model.layers.8.mlp.gate_up_proj.weight": "model-00002-of-00006.safetensors",
239
+ "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
240
+ "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
241
+ "model.layers.8.self_attn.qkv_proj.weight": "model-00002-of-00006.safetensors",
242
+ "model.layers.9.input_layernorm.weight": "model-00002-of-00006.safetensors",
243
+ "model.layers.9.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
244
+ "model.layers.9.mlp.gate_up_proj.weight": "model-00002-of-00006.safetensors",
245
+ "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
246
+ "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
247
+ "model.layers.9.self_attn.qkv_proj.weight": "model-00002-of-00006.safetensors",
248
+ "model.norm.weight": "model-00006-of-00006.safetensors"
249
+ }
250
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|im_end|>",
4
+ "pad_token": "<|dummy_85|>"
5
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,780 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "100256": {
5
+ "content": "<|dummy_0|>",
6
+ "lstrip": true,
7
+ "normalized": false,
8
+ "rstrip": true,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "100257": {
13
+ "content": "<|endoftext|>",
14
+ "lstrip": true,
15
+ "normalized": false,
16
+ "rstrip": true,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "100258": {
21
+ "content": "<|fim_prefix|>",
22
+ "lstrip": true,
23
+ "normalized": false,
24
+ "rstrip": true,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "100259": {
29
+ "content": "<|fim_middle|>",
30
+ "lstrip": true,
31
+ "normalized": false,
32
+ "rstrip": true,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "100260": {
37
+ "content": "<|fim_suffix|>",
38
+ "lstrip": true,
39
+ "normalized": false,
40
+ "rstrip": true,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "100261": {
45
+ "content": "<|dummy_1|>",
46
+ "lstrip": true,
47
+ "normalized": false,
48
+ "rstrip": true,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "100262": {
53
+ "content": "<|dummy_2|>",
54
+ "lstrip": true,
55
+ "normalized": false,
56
+ "rstrip": true,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "100263": {
61
+ "content": "<|dummy_3|>",
62
+ "lstrip": true,
63
+ "normalized": false,
64
+ "rstrip": true,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "100264": {
69
+ "content": "<|im_start|>",
70
+ "lstrip": true,
71
+ "normalized": false,
72
+ "rstrip": true,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "100265": {
77
+ "content": "<|im_end|>",
78
+ "lstrip": true,
79
+ "normalized": false,
80
+ "rstrip": true,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "100266": {
85
+ "content": "<|im_sep|>",
86
+ "lstrip": true,
87
+ "normalized": false,
88
+ "rstrip": true,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "100267": {
93
+ "content": "<|dummy_4|>",
94
+ "lstrip": true,
95
+ "normalized": false,
96
+ "rstrip": true,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "100268": {
101
+ "content": "<|dummy_5|>",
102
+ "lstrip": true,
103
+ "normalized": false,
104
+ "rstrip": true,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "100269": {
109
+ "content": "<|dummy_6|>",
110
+ "lstrip": true,
111
+ "normalized": false,
112
+ "rstrip": true,
113
+ "single_word": false,
114
+ "special": true
115
+ },
116
+ "100270": {
117
+ "content": "<|dummy_7|>",
118
+ "lstrip": true,
119
+ "normalized": false,
120
+ "rstrip": true,
121
+ "single_word": false,
122
+ "special": true
123
+ },
124
+ "100271": {
125
+ "content": "<|dummy_8|>",
126
+ "lstrip": true,
127
+ "normalized": false,
128
+ "rstrip": true,
129
+ "single_word": false,
130
+ "special": true
131
+ },
132
+ "100272": {
133
+ "content": "<|dummy_9|>",
134
+ "lstrip": true,
135
+ "normalized": false,
136
+ "rstrip": true,
137
+ "single_word": false,
138
+ "special": true
139
+ },
140
+ "100273": {
141
+ "content": "<|dummy_10|>",
142
+ "lstrip": true,
143
+ "normalized": false,
144
+ "rstrip": true,
145
+ "single_word": false,
146
+ "special": true
147
+ },
148
+ "100274": {
149
+ "content": "<|dummy_11|>",
150
+ "lstrip": true,
151
+ "normalized": false,
152
+ "rstrip": true,
153
+ "single_word": false,
154
+ "special": true
155
+ },
156
+ "100275": {
157
+ "content": "<|dummy_12|>",
158
+ "lstrip": true,
159
+ "normalized": false,
160
+ "rstrip": true,
161
+ "single_word": false,
162
+ "special": true
163
+ },
164
+ "100276": {
165
+ "content": "<|endofprompt|>",
166
+ "lstrip": true,
167
+ "normalized": false,
168
+ "rstrip": true,
169
+ "single_word": false,
170
+ "special": true
171
+ },
172
+ "100277": {
173
+ "content": "<|dummy_13|>",
174
+ "lstrip": true,
175
+ "normalized": false,
176
+ "rstrip": true,
177
+ "single_word": false,
178
+ "special": true
179
+ },
180
+ "100278": {
181
+ "content": "<|dummy_14|>",
182
+ "lstrip": true,
183
+ "normalized": false,
184
+ "rstrip": true,
185
+ "single_word": false,
186
+ "special": true
187
+ },
188
+ "100279": {
189
+ "content": "<|dummy_15|>",
190
+ "lstrip": true,
191
+ "normalized": false,
192
+ "rstrip": true,
193
+ "single_word": false,
194
+ "special": true
195
+ },
196
+ "100280": {
197
+ "content": "<|dummy_16|>",
198
+ "lstrip": true,
199
+ "normalized": false,
200
+ "rstrip": true,
201
+ "single_word": false,
202
+ "special": true
203
+ },
204
+ "100281": {
205
+ "content": "<|dummy_17|>",
206
+ "lstrip": true,
207
+ "normalized": false,
208
+ "rstrip": true,
209
+ "single_word": false,
210
+ "special": true
211
+ },
212
+ "100282": {
213
+ "content": "<|dummy_18|>",
214
+ "lstrip": true,
215
+ "normalized": false,
216
+ "rstrip": true,
217
+ "single_word": false,
218
+ "special": true
219
+ },
220
+ "100283": {
221
+ "content": "<|dummy_19|>",
222
+ "lstrip": true,
223
+ "normalized": false,
224
+ "rstrip": true,
225
+ "single_word": false,
226
+ "special": true
227
+ },
228
+ "100284": {
229
+ "content": "<|dummy_20|>",
230
+ "lstrip": true,
231
+ "normalized": false,
232
+ "rstrip": true,
233
+ "single_word": false,
234
+ "special": true
235
+ },
236
+ "100285": {
237
+ "content": "<|dummy_21|>",
238
+ "lstrip": true,
239
+ "normalized": false,
240
+ "rstrip": true,
241
+ "single_word": false,
242
+ "special": true
243
+ },
244
+ "100286": {
245
+ "content": "<|dummy_22|>",
246
+ "lstrip": true,
247
+ "normalized": false,
248
+ "rstrip": true,
249
+ "single_word": false,
250
+ "special": true
251
+ },
252
+ "100287": {
253
+ "content": "<|dummy_23|>",
254
+ "lstrip": true,
255
+ "normalized": false,
256
+ "rstrip": true,
257
+ "single_word": false,
258
+ "special": true
259
+ },
260
+ "100288": {
261
+ "content": "<|dummy_24|>",
262
+ "lstrip": true,
263
+ "normalized": false,
264
+ "rstrip": true,
265
+ "single_word": false,
266
+ "special": true
267
+ },
268
+ "100289": {
269
+ "content": "<|dummy_25|>",
270
+ "lstrip": true,
271
+ "normalized": false,
272
+ "rstrip": true,
273
+ "single_word": false,
274
+ "special": true
275
+ },
276
+ "100290": {
277
+ "content": "<|dummy_26|>",
278
+ "lstrip": true,
279
+ "normalized": false,
280
+ "rstrip": true,
281
+ "single_word": false,
282
+ "special": true
283
+ },
284
+ "100291": {
285
+ "content": "<|dummy_27|>",
286
+ "lstrip": true,
287
+ "normalized": false,
288
+ "rstrip": true,
289
+ "single_word": false,
290
+ "special": true
291
+ },
292
+ "100292": {
293
+ "content": "<|dummy_28|>",
294
+ "lstrip": true,
295
+ "normalized": false,
296
+ "rstrip": true,
297
+ "single_word": false,
298
+ "special": true
299
+ },
300
+ "100293": {
301
+ "content": "<|dummy_29|>",
302
+ "lstrip": true,
303
+ "normalized": false,
304
+ "rstrip": true,
305
+ "single_word": false,
306
+ "special": true
307
+ },
308
+ "100294": {
309
+ "content": "<|dummy_30|>",
310
+ "lstrip": true,
311
+ "normalized": false,
312
+ "rstrip": true,
313
+ "single_word": false,
314
+ "special": true
315
+ },
316
+ "100295": {
317
+ "content": "<|dummy_31|>",
318
+ "lstrip": true,
319
+ "normalized": false,
320
+ "rstrip": true,
321
+ "single_word": false,
322
+ "special": true
323
+ },
324
+ "100296": {
325
+ "content": "<|dummy_32|>",
326
+ "lstrip": true,
327
+ "normalized": false,
328
+ "rstrip": true,
329
+ "single_word": false,
330
+ "special": true
331
+ },
332
+ "100297": {
333
+ "content": "<|dummy_33|>",
334
+ "lstrip": true,
335
+ "normalized": false,
336
+ "rstrip": true,
337
+ "single_word": false,
338
+ "special": true
339
+ },
340
+ "100298": {
341
+ "content": "<|dummy_34|>",
342
+ "lstrip": true,
343
+ "normalized": false,
344
+ "rstrip": true,
345
+ "single_word": false,
346
+ "special": true
347
+ },
348
+ "100299": {
349
+ "content": "<|dummy_35|>",
350
+ "lstrip": true,
351
+ "normalized": false,
352
+ "rstrip": true,
353
+ "single_word": false,
354
+ "special": true
355
+ },
356
+ "100300": {
357
+ "content": "<|dummy_36|>",
358
+ "lstrip": true,
359
+ "normalized": false,
360
+ "rstrip": true,
361
+ "single_word": false,
362
+ "special": true
363
+ },
364
+ "100301": {
365
+ "content": "<|dummy_37|>",
366
+ "lstrip": true,
367
+ "normalized": false,
368
+ "rstrip": true,
369
+ "single_word": false,
370
+ "special": true
371
+ },
372
+ "100302": {
373
+ "content": "<|dummy_38|>",
374
+ "lstrip": true,
375
+ "normalized": false,
376
+ "rstrip": true,
377
+ "single_word": false,
378
+ "special": true
379
+ },
380
+ "100303": {
381
+ "content": "<|dummy_39|>",
382
+ "lstrip": true,
383
+ "normalized": false,
384
+ "rstrip": true,
385
+ "single_word": false,
386
+ "special": true
387
+ },
388
+ "100304": {
389
+ "content": "<|dummy_40|>",
390
+ "lstrip": true,
391
+ "normalized": false,
392
+ "rstrip": true,
393
+ "single_word": false,
394
+ "special": true
395
+ },
396
+ "100305": {
397
+ "content": "<|dummy_41|>",
398
+ "lstrip": true,
399
+ "normalized": false,
400
+ "rstrip": true,
401
+ "single_word": false,
402
+ "special": true
403
+ },
404
+ "100306": {
405
+ "content": "<|dummy_42|>",
406
+ "lstrip": true,
407
+ "normalized": false,
408
+ "rstrip": true,
409
+ "single_word": false,
410
+ "special": true
411
+ },
412
+ "100307": {
413
+ "content": "<|dummy_43|>",
414
+ "lstrip": true,
415
+ "normalized": false,
416
+ "rstrip": true,
417
+ "single_word": false,
418
+ "special": true
419
+ },
420
+ "100308": {
421
+ "content": "<|dummy_44|>",
422
+ "lstrip": true,
423
+ "normalized": false,
424
+ "rstrip": true,
425
+ "single_word": false,
426
+ "special": true
427
+ },
428
+ "100309": {
429
+ "content": "<|dummy_45|>",
430
+ "lstrip": true,
431
+ "normalized": false,
432
+ "rstrip": true,
433
+ "single_word": false,
434
+ "special": true
435
+ },
436
+ "100310": {
437
+ "content": "<|dummy_46|>",
438
+ "lstrip": true,
439
+ "normalized": false,
440
+ "rstrip": true,
441
+ "single_word": false,
442
+ "special": true
443
+ },
444
+ "100311": {
445
+ "content": "<|dummy_47|>",
446
+ "lstrip": true,
447
+ "normalized": false,
448
+ "rstrip": true,
449
+ "single_word": false,
450
+ "special": true
451
+ },
452
+ "100312": {
453
+ "content": "<|dummy_48|>",
454
+ "lstrip": true,
455
+ "normalized": false,
456
+ "rstrip": true,
457
+ "single_word": false,
458
+ "special": true
459
+ },
460
+ "100313": {
461
+ "content": "<|dummy_49|>",
462
+ "lstrip": true,
463
+ "normalized": false,
464
+ "rstrip": true,
465
+ "single_word": false,
466
+ "special": true
467
+ },
468
+ "100314": {
469
+ "content": "<|dummy_50|>",
470
+ "lstrip": true,
471
+ "normalized": false,
472
+ "rstrip": true,
473
+ "single_word": false,
474
+ "special": true
475
+ },
476
+ "100315": {
477
+ "content": "<|dummy_51|>",
478
+ "lstrip": true,
479
+ "normalized": false,
480
+ "rstrip": true,
481
+ "single_word": false,
482
+ "special": true
483
+ },
484
+ "100316": {
485
+ "content": "<|dummy_52|>",
486
+ "lstrip": true,
487
+ "normalized": false,
488
+ "rstrip": true,
489
+ "single_word": false,
490
+ "special": true
491
+ },
492
+ "100317": {
493
+ "content": "<|dummy_53|>",
494
+ "lstrip": true,
495
+ "normalized": false,
496
+ "rstrip": true,
497
+ "single_word": false,
498
+ "special": true
499
+ },
500
+ "100318": {
501
+ "content": "<|dummy_54|>",
502
+ "lstrip": true,
503
+ "normalized": false,
504
+ "rstrip": true,
505
+ "single_word": false,
506
+ "special": true
507
+ },
508
+ "100319": {
509
+ "content": "<|dummy_55|>",
510
+ "lstrip": true,
511
+ "normalized": false,
512
+ "rstrip": true,
513
+ "single_word": false,
514
+ "special": true
515
+ },
516
+ "100320": {
517
+ "content": "<|dummy_56|>",
518
+ "lstrip": true,
519
+ "normalized": false,
520
+ "rstrip": true,
521
+ "single_word": false,
522
+ "special": true
523
+ },
524
+ "100321": {
525
+ "content": "<|dummy_57|>",
526
+ "lstrip": true,
527
+ "normalized": false,
528
+ "rstrip": true,
529
+ "single_word": false,
530
+ "special": true
531
+ },
532
+ "100322": {
533
+ "content": "<|dummy_58|>",
534
+ "lstrip": true,
535
+ "normalized": false,
536
+ "rstrip": true,
537
+ "single_word": false,
538
+ "special": true
539
+ },
540
+ "100323": {
541
+ "content": "<|dummy_59|>",
542
+ "lstrip": true,
543
+ "normalized": false,
544
+ "rstrip": true,
545
+ "single_word": false,
546
+ "special": true
547
+ },
548
+ "100324": {
549
+ "content": "<|dummy_60|>",
550
+ "lstrip": true,
551
+ "normalized": false,
552
+ "rstrip": true,
553
+ "single_word": false,
554
+ "special": true
555
+ },
556
+ "100325": {
557
+ "content": "<|dummy_61|>",
558
+ "lstrip": true,
559
+ "normalized": false,
560
+ "rstrip": true,
561
+ "single_word": false,
562
+ "special": true
563
+ },
564
+ "100326": {
565
+ "content": "<|dummy_62|>",
566
+ "lstrip": true,
567
+ "normalized": false,
568
+ "rstrip": true,
569
+ "single_word": false,
570
+ "special": true
571
+ },
572
+ "100327": {
573
+ "content": "<|dummy_63|>",
574
+ "lstrip": true,
575
+ "normalized": false,
576
+ "rstrip": true,
577
+ "single_word": false,
578
+ "special": true
579
+ },
580
+ "100328": {
581
+ "content": "<|dummy_64|>",
582
+ "lstrip": true,
583
+ "normalized": false,
584
+ "rstrip": true,
585
+ "single_word": false,
586
+ "special": true
587
+ },
588
+ "100329": {
589
+ "content": "<|dummy_65|>",
590
+ "lstrip": true,
591
+ "normalized": false,
592
+ "rstrip": true,
593
+ "single_word": false,
594
+ "special": true
595
+ },
596
+ "100330": {
597
+ "content": "<|dummy_66|>",
598
+ "lstrip": true,
599
+ "normalized": false,
600
+ "rstrip": true,
601
+ "single_word": false,
602
+ "special": true
603
+ },
604
+ "100331": {
605
+ "content": "<|dummy_67|>",
606
+ "lstrip": true,
607
+ "normalized": false,
608
+ "rstrip": true,
609
+ "single_word": false,
610
+ "special": true
611
+ },
612
+ "100332": {
613
+ "content": "<|dummy_68|>",
614
+ "lstrip": true,
615
+ "normalized": false,
616
+ "rstrip": true,
617
+ "single_word": false,
618
+ "special": true
619
+ },
620
+ "100333": {
621
+ "content": "<|dummy_69|>",
622
+ "lstrip": true,
623
+ "normalized": false,
624
+ "rstrip": true,
625
+ "single_word": false,
626
+ "special": true
627
+ },
628
+ "100334": {
629
+ "content": "<|dummy_70|>",
630
+ "lstrip": true,
631
+ "normalized": false,
632
+ "rstrip": true,
633
+ "single_word": false,
634
+ "special": true
635
+ },
636
+ "100335": {
637
+ "content": "<|dummy_71|>",
638
+ "lstrip": true,
639
+ "normalized": false,
640
+ "rstrip": true,
641
+ "single_word": false,
642
+ "special": true
643
+ },
644
+ "100336": {
645
+ "content": "<|dummy_72|>",
646
+ "lstrip": true,
647
+ "normalized": false,
648
+ "rstrip": true,
649
+ "single_word": false,
650
+ "special": true
651
+ },
652
+ "100337": {
653
+ "content": "<|dummy_73|>",
654
+ "lstrip": true,
655
+ "normalized": false,
656
+ "rstrip": true,
657
+ "single_word": false,
658
+ "special": true
659
+ },
660
+ "100338": {
661
+ "content": "<|dummy_74|>",
662
+ "lstrip": true,
663
+ "normalized": false,
664
+ "rstrip": true,
665
+ "single_word": false,
666
+ "special": true
667
+ },
668
+ "100339": {
669
+ "content": "<|dummy_75|>",
670
+ "lstrip": true,
671
+ "normalized": false,
672
+ "rstrip": true,
673
+ "single_word": false,
674
+ "special": true
675
+ },
676
+ "100340": {
677
+ "content": "<|dummy_76|>",
678
+ "lstrip": true,
679
+ "normalized": false,
680
+ "rstrip": true,
681
+ "single_word": false,
682
+ "special": true
683
+ },
684
+ "100341": {
685
+ "content": "<|dummy_77|>",
686
+ "lstrip": true,
687
+ "normalized": false,
688
+ "rstrip": true,
689
+ "single_word": false,
690
+ "special": true
691
+ },
692
+ "100342": {
693
+ "content": "<|dummy_78|>",
694
+ "lstrip": true,
695
+ "normalized": false,
696
+ "rstrip": true,
697
+ "single_word": false,
698
+ "special": true
699
+ },
700
+ "100343": {
701
+ "content": "<|dummy_79|>",
702
+ "lstrip": true,
703
+ "normalized": false,
704
+ "rstrip": true,
705
+ "single_word": false,
706
+ "special": true
707
+ },
708
+ "100344": {
709
+ "content": "<|dummy_80|>",
710
+ "lstrip": true,
711
+ "normalized": false,
712
+ "rstrip": true,
713
+ "single_word": false,
714
+ "special": true
715
+ },
716
+ "100345": {
717
+ "content": "<|dummy_81|>",
718
+ "lstrip": true,
719
+ "normalized": false,
720
+ "rstrip": true,
721
+ "single_word": false,
722
+ "special": true
723
+ },
724
+ "100346": {
725
+ "content": "<|dummy_82|>",
726
+ "lstrip": true,
727
+ "normalized": false,
728
+ "rstrip": true,
729
+ "single_word": false,
730
+ "special": true
731
+ },
732
+ "100347": {
733
+ "content": "<|dummy_83|>",
734
+ "lstrip": true,
735
+ "normalized": false,
736
+ "rstrip": true,
737
+ "single_word": false,
738
+ "special": true
739
+ },
740
+ "100348": {
741
+ "content": "<|dummy_84|>",
742
+ "lstrip": true,
743
+ "normalized": false,
744
+ "rstrip": true,
745
+ "single_word": false,
746
+ "special": true
747
+ },
748
+ "100349": {
749
+ "content": "<|dummy_85|>",
750
+ "lstrip": true,
751
+ "normalized": false,
752
+ "rstrip": true,
753
+ "single_word": false,
754
+ "special": true
755
+ },
756
+ "100350": {
757
+ "content": "<|dummy_86|>",
758
+ "lstrip": true,
759
+ "normalized": false,
760
+ "rstrip": true,
761
+ "single_word": false,
762
+ "special": true
763
+ },
764
+ "100351": {
765
+ "content": "<|dummy_87|>",
766
+ "lstrip": true,
767
+ "normalized": false,
768
+ "rstrip": true,
769
+ "single_word": false,
770
+ "special": true
771
+ }
772
+ },
773
+ "bos_token": "<|endoftext|>",
774
+ "chat_template": "{% for message in messages %}{% if (message['role'] == 'system') %}{{'<|im_start|>system<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'user') %}{{'<|im_start|>user<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'assistant') %}{{'<|im_start|>assistant<|im_sep|>' + message['content'] + '<|im_end|>'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant<|im_sep|>' }}{% endif %}",
775
+ "clean_up_tokenization_spaces": false,
776
+ "eos_token": "<|im_end|>",
777
+ "model_max_length": 16384,
778
+ "pad_token": "<|dummy_85|>",
779
+ "tokenizer_class": "GPT2Tokenizer"
780
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff