Upload folder using huggingface_hub
Browse files- .gitattributes +43 -9
- README.md +54 -3
- README_EN.md +59 -0
- added_tokens.json +28 -0
- args.json +364 -0
- config.json +31 -0
- configuration.json +1 -0
- generation_config.json +13 -0
- images/.gitkeep +0 -0
- images/RAG-workflow.png +3 -0
- images/logo.png +3 -0
- merges.txt +0 -0
- model-00001-of-00014.safetensors +3 -0
- model-00002-of-00014.safetensors +3 -0
- model-00003-of-00014.safetensors +3 -0
- model-00004-of-00014.safetensors +3 -0
- model-00005-of-00014.safetensors +3 -0
- model-00006-of-00014.safetensors +3 -0
- model-00007-of-00014.safetensors +3 -0
- model-00008-of-00014.safetensors +3 -0
- model-00009-of-00014.safetensors +3 -0
- model-00010-of-00014.safetensors +3 -0
- model-00011-of-00014.safetensors +3 -0
- model-00012-of-00014.safetensors +3 -0
- model-00013-of-00014.safetensors +3 -0
- model-00014-of-00014.safetensors +3 -0
- model.safetensors.index.json +714 -0
- special_tokens_map.json +31 -0
- tokenizer.json +3 -0
- tokenizer_config.json +240 -0
- vocab.json +0 -0
.gitattributes
CHANGED
|
@@ -1,35 +1,69 @@
|
|
| 1 |
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
*.bin filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 4 |
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bin.* filter=lfs diff=lfs merge=lfs -text
|
| 5 |
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 6 |
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 11 |
*.model filter=lfs diff=lfs merge=lfs -text
|
| 12 |
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
| 13 |
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 14 |
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 15 |
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 16 |
*.pb filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
| 17 |
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 18 |
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 19 |
*.rar filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 20 |
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 21 |
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 22 |
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 23 |
*.tgz filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 24 |
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 25 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.db* filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.ark* filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
|
| 34 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*.gguf* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*.ggml filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
*.llamafile* filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
*.pt2 filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 46 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 47 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
|
| 49 |
+
logo.png filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
RAG-workflow.png filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
|
| 52 |
+
images/RAG-workflow.png filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
images/logo.png filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
|
| 55 |
+
model-00012-of-00014.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
model-00001-of-00014.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
model-00007-of-00014.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
model-00014-of-00014.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
model-00006-of-00014.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
model-00004-of-00014.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
model-00009-of-00014.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
model-00010-of-00014.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
model-00005-of-00014.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
model-00002-of-00014.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
model-00013-of-00014.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
model-00008-of-00014.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
model-00011-of-00014.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 68 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 69 |
+
model-00003-of-00014.safetensors filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
|
@@ -1,3 +1,54 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 司农大语言模型
|
| 2 |
+
中文 | <a href="README_EN.md">English</a>
|
| 3 |
+
|
| 4 |
+
**面向通用农业领域的垂直大模型**
|
| 5 |
+
|
| 6 |
+

|
| 7 |
+
## 数据收集与模型训练
|
| 8 |
+
依托南京农业大学学科特色,面向**动物科学**、**动物医学**、**农业经济管理**、**农业资源与环境**、**园艺学**、**智慧农业**、**植物保护**、**作物育种**等细分学科收集了大规模领域数据,总量超40亿token。
|
| 9 |
+
|数据类型|数据量|
|
| 10 |
+
|:---:|:---:|
|
| 11 |
+
|书籍|8,863册|
|
| 12 |
+
|论文|243,897篇|
|
| 13 |
+
|政策、标准、专利等|196,748份|
|
| 14 |
+
|||
|
| 15 |
+
|
| 16 |
+
结合视觉模型与大语言模型,这些多源异构的数据被整理、筛选、整合,形成了**大规模**、**高质量**的**农业基础数据集**。
|
| 17 |
+
|
| 18 |
+
为提升模型的农业语言能力与领域知识,我们在基础数据集上开展了合成数据构建、指令微调与强化学习,除传统指令数据外,思维链、上下文参考形式的指令数据也被应用于模型训练中,旨在提升模型对农业领域知识的理解、学习和使用能力。
|
| 19 |
+
|
| 20 |
+
|数据类型|数据样例|
|
| 21 |
+
|:---:|:---:|
|
| 22 |
+
|QA|"instruction": "在人多地少、耕地后备资源不足的背景下,如何借鉴中国古代‘精耕细作’传统提升单位面积粮食产量,且避免过度投入导致的生态退化?","output": "应采取‘三才’统筹的精耕细作模式:一是因地制宜优化耕作制度,..."|
|
| 23 |
+
|COT-QA|"instruction": "宋代南方水田农业中出现了哪些新型农具?这些农具如何促进了水稻种植效率的提升?",“output”: <think>\n好的,我现在需要回答用户的问题:“宋代南方水田农业中出现了哪些新型农具?这些农具如何促进了水稻种植效率的提升?”……</think>\n宋代南方水田农业中出现的新型农具包括:……"|
|
| 24 |
+
|Incontext-QA|"instruction": ""小麦纹枯病的发生与温湿度、栽培措施及品种抗性如何交互影响,导致其在不同生育阶段呈现动态发展规律?\n\n小麦和大麦种子萌发后……",“output”:小麦纹枯病的发生与温湿度、栽培措施及品种抗性三者交互影响,共同决定其在不同生育阶段的动态发展规律,具体表现为:\n\n1. 温湿度主导病害发展节律: \n 病菌侵染和扩展受温度显著调控。冬前(10℃以下)病害发展缓慢,仅零星发生;翌春气温回升至15℃以上时,病害加速扩展,20~25℃为最适发展温度,此时病株率与严重度急剧上升,尤其在拔节至孕穗期达高峰;30℃以上病害基本停滞。同时,春季低温多雨……|
|
| 25 |
+
|Incontext-COT-QA|“instruction”: “中国百强农业对外投资企业在投资规模、区域分布与行业结构上呈现出哪些显著特征,反映了我国农业“走出去”战略的何种优先导向?\n\n2015年,中国对外农业投资100强企业(以下简称百强企业”)对外投资30.61亿美元,占投资流量总额的84% ;截至2015年底,累计投资达111.96亿美元,占比86.6% ;在58个国家和地区共设立农业企业194家,占境外农业企业总数的25.4% ;营业收入达418.38亿美元,占所有企业营业收入的 94.6%。可见,尽管百强企业数量占比不高,但在投资规模、经营效益等方面……",“output”:"<think>\n好的,我现在需要回答用户的问题,关于中国百强农业对外投资企业在投资规模、区域分布与行业结构上的显著特征,以及这些特征反映的农业“走出去”战略的优先导向。首先,我需要仔细阅读用户提供的参考材料,从中提取关键数据,并进行归纳总结。\n\n首先看投资规模部分。2015年百强企业对外……</think>\n中国百强农业对外投资企业在投资规模、区域分布与行业结构上的显著特征及其反映的农业“走出去”战略优先导向如下:\n\n---\n\n### 一、显著特征\n1. **投资规模集中化……"|
|
| 26 |
+
|
| 27 |
+
### 多智能体检索增强
|
| 28 |
+
当前,检索增强(RAG)已经成为大语言模型的领域应用不可或缺的一环,然而,当前针对领域专业文献(数据论文)的检索增强仍存在局限性。我们推出了一套多智能体检索增强框架,针对知识库构建、数据来源、检索效率等方面进行了优化。
|
| 29 |
+
|
| 30 |
+

|
| 31 |
+
|
| 32 |
+
## 模型及数据使用
|
| 33 |
+
### 模型获取和使用
|
| 34 |
+
司农大语言模型1.0版本已经在魔搭社区开源,目前我们开源了8B和32B两种参数规模的模型,任何人均可直接下载和部署该模型。
|
| 35 |
+
|
| 36 |
+
|模型名称|开源地址|
|
| 37 |
+
|:---:|:---:|
|
| 38 |
+
|Sinong1.0-8B|[ModelScope](https://modelscope.cn/models/NAULLM/Sinong1.0-8B)|
|
| 39 |
+
|Sinong1.0-32B|[ModelScope](https://modelscope.cn/models/NAULLM/Sinong1.0-32B)|
|
| 40 |
+
|||
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
### 数据获取和使用
|
| 44 |
+
如果您需要使用本研究的基础或过程数据,可以通过邮箱(llm4cca@njau.edu.cn)与我们取得联系,我们期待与您一起探索大语言模型下的智慧农业��径。
|
| 45 |
+
|
| 46 |
+
## 研发团队
|
| 47 |
+
司农大模型由南京农业大学、南京理工大学两所高校的跨学校团队共同研发
|
| 48 |
+
* 南京农业大学团队:**王东波**、赵志枭、刘瑞麟、杨帆、齐月、庞蔚淇、杨俊羿、李萱、周倩、魏齐治、武瑞峰、林森、张宏敏、潘梦菲、陆琪
|
| 49 |
+
* 南京理工大学团队:**沈思**、**朱丹浩**
|
| 50 |
+
|
| 51 |
+
## 特别声明
|
| 52 |
+
司农大语言模型基于大规模农业领域数据进行训练,我们已通过多种技术手段对数据进行了清洗与筛选。然而,受大语言模型自身架构特点所限,生成内容中仍可能出现不准确或失实的“幻觉”现象。因此,在使用本模型过程中,请您务必对生成内容进行审慎甄别,避免因盲目采纳AI生成信息而导致任何形式的损失。
|
| 53 |
+
|
| 54 |
+
使用司农大语言模型时,您应严格遵守中华人民共和国相关法律法规。在任何情况下,我们均不对因使用本模型或其输出内容所引发的任何损害(无论其成因或形式)承担责任,包括但不限于直接、间接、特殊或附带损害。
|
README_EN.md
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Sinong Large Language Model
|
| 2 |
+
<a href="readme.md">中文</a> | English
|
| 3 |
+
|
| 4 |
+
**A Vertical Large Model for the General Agricultural Domain**
|
| 5 |
+
|
| 6 |
+

|
| 7 |
+
|
| 8 |
+
## Data Collection and Model Training
|
| 9 |
+
|
| 10 |
+
Relying on the disciplinary features of Nanjing Agricultural University, we have collected large-scale domain data oriented towards specific sub-disciplines such as **Animal Science**, **Animal Medicine** **Agricultural Economic Management**, **Agricultural Resources and Environment**, **Horticulture**, **Smart Agriculture**, **Plant Protection**, and **Crop Breeding**, with a total volume exceeding 4 billion tokens.
|
| 11 |
+
|
| 12 |
+
| Data Type | Volume |
|
| 13 |
+
| :---- | :---- |
|
| 14 |
+
| Books | 8,863 Volumes |
|
| 15 |
+
| Papers | 243,897 Articles |
|
| 16 |
+
| Policies, Standards, Patents, etc. | 196,748 Documents |
|
| 17 |
+
| | |
|
| 18 |
+
|
| 19 |
+
Combining visual models and Large Language Models (LLMs), this multi-source heterogeneous data was organized, screened, and integrated to form a **large-scale**, **high-quality** **agricultural foundation dataset**.
|
| 20 |
+
|
| 21 |
+
To enhance the model's agricultural language capabilities and domain knowledge, we conducted synthetic data construction, instruction fine-tuning, and reinforcement learning on the foundation dataset. In addition to traditional instruction data, Chain-of-Thought (CoT) and context-reference forms of instruction data were also applied in model training, aiming to improve the model's ability to understand, learn, and use agricultural domain knowledge.
|
| 22 |
+
|
| 23 |
+
| Data Type | Data Example |
|
| 24 |
+
| :---- | :---- |
|
| 25 |
+
| QA | "instruction": "In the context of a large population with little land and insufficient reserve arable land resources, how can we draw on the traditional Chinese practice of 'intensive farming' to increase grain yield per unit area while avoiding ecological degradation caused by excessive inputs?","output": "A 'San Cai' (Three Factors) coordinated intensive farming mode should be adopted: First, optimize the cropping system according to local conditions..." |
|
| 26 |
+
| COT-QA | "instruction": "What new types of farm tools appeared in Southern paddy field agriculture during the Song Dynasty? How did these tools promote the improvement of rice planting efficiency?",“output”: \<think\>\\nOkay, I need to answer the user's question: 'What new types of farm tools appeared in Southern paddy field agriculture during the Song Dynasty? How did these tools promote the improvement of rice planting efficiency?'...\\n\</think\>\\nThe new types of farm tools that appeared in Southern paddy field agriculture during the Song Dynasty include:..." |
|
| 27 |
+
| Incontext-QA | "instruction": ""How do temperature/humidity, cultivation measures, and varietal resistance interact to influence the occurrence of wheat sheath blight, leading to its dynamic development patterns at different growth stages?\\n\\nAfter wheat and barley seeds germinate...",“output”:The occurrence of wheat sheath blight is interactively influenced by temperature/humidity, cultivation measures, and varietal resistance, which jointly determine its dynamic development patterns at different growth stages. Specifically:\\n\\n1. Temperature and humidity dominate disease development rhythm: \\n Pathogen infection and expansion are significantly regulated by temperature. Before winter (below 10°C), disease development is slow, with only sporadic occurrences; when the temperature rises above 15°C the following spring, the disease expands continuously. 20–25°C is the optimal temperature for development, during which the rate of diseased plants and severity rise sharply, peaking especially from the jointing to the booting stage; above 30°C, the disease basically stagnates. Meanwhile, low temperature and rainy weather in spring... |
|
| 28 |
+
| Incontext-COT-QA | “instruction”: “What significant characteristics do China's top 100 agricultural foreign investment enterprises exhibit in terms of investment scale, regional distribution, and industrial structure? What priority orientation of China's agricultural 'Going Global' strategy does this reflect?\\n\\nIn 2015, China's top 100 foreign agricultural investment enterprises (hereinafter referred to as 'Top 100 Enterprises') invested 3.061 billion USD abroad, accounting for 84% of the total investment flow; as of the end of 2015, the cumulative investment reached 11.196 billion USD, accounting for 86.6%; 194 agricultural enterprises were established in 58 countries and regions, accounting for 25.4% of the total number of overseas agricultural enterprises; operating income reached 41.838 billion USD, accounting for 94.6% of the operating income of all enterprises. It can be seen that although the proportion of the number of Top 100 Enterprises is not high, in terms of investment scale, operating efficiency, etc....",“output”:"\<think\>\\nOkay, I need to answer the user's question regarding the significant characteristics of China's top 100 agricultural foreign investment enterprises in terms of investment scale, regional distribution, and industrial structure, as well as the priority orientation of the agricultural 'Going Global' strategy reflected by these characteristics. First, I need to carefully read the reference material provided by the user, extract key data from it, and summarize.\\n\\nFirst, looking at the investment scale section. In 2015, the Top 100 Enterprises' foreign...\\n\</think\>\\nThe significant characteristics of China's top 100 agricultural foreign investment enterprises in terms of investment scale, regional distribution, and industrial structure, and the priority orientation of the agricultural 'Going Global' strategy they reflect are as follows:\\n\\n---\\n\\n\#\#\# I. Significant Characteristics\\n1. Concentration of Investment Scale..." |
|
| 29 |
+
|
| 30 |
+
### Multi-Agent Retrieval-Augmented Generation (RAG)
|
| 31 |
+
|
| 32 |
+
Currently, Retrieval-Augmented Generation (RAG) has become an indispensable part of domain applications for Large Language Models. However, current RAG for domain-specific professional literature (data papers) still has limitations. We have launched a multi-agent retrieval-augmented framework optimized for knowledge base construction, data sources, and retrieval efficiency.
|
| 33 |
+
|
| 34 |
+

|
| 35 |
+
|
| 36 |
+
## Model and Data Usage
|
| 37 |
+
|
| 38 |
+
### Model Acquisition and Usage
|
| 39 |
+
|
| 40 |
+
Sinong-1.0 has been open-sourced on the ModelScope community. Currently, we have open-sourced models with two parameter scales: 8B and 32B. Anyone can directly download and deploy these models.
|
| 41 |
+
|Model Name|Open Source Address|
|
| 42 |
+
|:---:|:---:|
|
| 43 |
+
|Sinong1.0-8B|[ModelScope](https://modelscope.cn/models/NAULLM/Sinong1.0-8B)|
|
| 44 |
+
|Sinong1.0-32B|[ModelScope](https://modelscope.cn/models/NAULLM/Sinong1.0-32B)|
|
| 45 |
+
|||
|
| 46 |
+
|
| 47 |
+
### Data Acquisition and Usage
|
| 48 |
+
|
| 49 |
+
If you need to use the foundational or process data of this study, please contact us via email (llm4cca@njau.edu.cn). We look forward to exploring the path of smart agriculture under large language models with you.
|
| 50 |
+
|
| 51 |
+
## Development Team
|
| 52 |
+
Sinongwas jointly developed by a cross-university team from Nanjing Agricultural University and Nanjing University of Science and Technology.
|
| 53 |
+
* Nanjing Agricultural University: **Wang Dongbo**, Zhao Zhixiao, Liu Ruilin, Yang Fan, Qi Yue, Pang Weiqi, Yang Junyi, Li Xuan, Zhou Qian, Wei Qizhi, Wu Ruifeng, Lin Sen, Zhang Hongmin, Pan Mengfei, Lu Qi
|
| 54 |
+
* Nanjing University of Science and Technology Team: **Shen Si**, **Zhu Danhao**
|
| 55 |
+
|
| 56 |
+
## Special Disclaimer
|
| 57 |
+
SiNong has been trained on extensive datasets from the agricultural domain. While we have employed various methods to clean and filter the training data, the inherent architecture of large language models makes it difficult to completely eliminate inaccuracies or "hallucinations" in generated content. Therefore, when using the SiNong model, please exercise careful judgment regarding its outputs, and avoid blind trust in AI-generated information to prevent any potential losses.
|
| 58 |
+
|
| 59 |
+
When using the SiNong Large Language Model, you must strictly comply with all applicable laws and regulations of the People's Republic of China. Under no circumstances shall we be liable for any damages (regardless of cause or form) arising from your use of the model or its outputs, including but not limited to direct, indirect, special, or incidental damages.
|
added_tokens.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"</think>": 151668,
|
| 3 |
+
"</tool_call>": 151658,
|
| 4 |
+
"</tool_response>": 151666,
|
| 5 |
+
"<think>": 151667,
|
| 6 |
+
"<tool_call>": 151657,
|
| 7 |
+
"<tool_response>": 151665,
|
| 8 |
+
"<|box_end|>": 151649,
|
| 9 |
+
"<|box_start|>": 151648,
|
| 10 |
+
"<|endoftext|>": 151643,
|
| 11 |
+
"<|file_sep|>": 151664,
|
| 12 |
+
"<|fim_middle|>": 151660,
|
| 13 |
+
"<|fim_pad|>": 151662,
|
| 14 |
+
"<|fim_prefix|>": 151659,
|
| 15 |
+
"<|fim_suffix|>": 151661,
|
| 16 |
+
"<|im_end|>": 151645,
|
| 17 |
+
"<|im_start|>": 151644,
|
| 18 |
+
"<|image_pad|>": 151655,
|
| 19 |
+
"<|object_ref_end|>": 151647,
|
| 20 |
+
"<|object_ref_start|>": 151646,
|
| 21 |
+
"<|quad_end|>": 151651,
|
| 22 |
+
"<|quad_start|>": 151650,
|
| 23 |
+
"<|repo_name|>": 151663,
|
| 24 |
+
"<|video_pad|>": 151656,
|
| 25 |
+
"<|vision_end|>": 151653,
|
| 26 |
+
"<|vision_pad|>": 151654,
|
| 27 |
+
"<|vision_start|>": 151652
|
| 28 |
+
}
|
args.json
ADDED
|
@@ -0,0 +1,364 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "/gemini/data-1/Qwen3-32B",
|
| 3 |
+
"model_type": "qwen3",
|
| 4 |
+
"model_revision": null,
|
| 5 |
+
"task_type": "causal_lm",
|
| 6 |
+
"torch_dtype": "bfloat16",
|
| 7 |
+
"attn_impl": null,
|
| 8 |
+
"num_labels": null,
|
| 9 |
+
"problem_type": null,
|
| 10 |
+
"rope_scaling": null,
|
| 11 |
+
"device_map": null,
|
| 12 |
+
"max_memory": {},
|
| 13 |
+
"local_repo_path": null,
|
| 14 |
+
"template": "qwen3",
|
| 15 |
+
"system": "You are a helpful assistant.",
|
| 16 |
+
"max_length": 8192,
|
| 17 |
+
"truncation_strategy": "delete",
|
| 18 |
+
"max_pixels": null,
|
| 19 |
+
"agent_template": null,
|
| 20 |
+
"norm_bbox": null,
|
| 21 |
+
"response_prefix": null,
|
| 22 |
+
"padding_side": "right",
|
| 23 |
+
"loss_scale": "default",
|
| 24 |
+
"sequence_parallel_size": 4,
|
| 25 |
+
"use_chat_template": true,
|
| 26 |
+
"template_backend": "swift",
|
| 27 |
+
"dataset": [
|
| 28 |
+
"/gemini/data-3/lrl/vlm/ms-swift-main/agr_FT.json"
|
| 29 |
+
],
|
| 30 |
+
"val_dataset": [],
|
| 31 |
+
"split_dataset_ratio": 0.01,
|
| 32 |
+
"data_seed": 42,
|
| 33 |
+
"dataset_num_proc": 1,
|
| 34 |
+
"dataset_shuffle": true,
|
| 35 |
+
"val_dataset_shuffle": false,
|
| 36 |
+
"streaming": false,
|
| 37 |
+
"interleave_prob": null,
|
| 38 |
+
"stopping_strategy": "first_exhausted",
|
| 39 |
+
"shuffle_buffer_size": 1000,
|
| 40 |
+
"enable_cache": false,
|
| 41 |
+
"download_mode": "reuse_dataset_if_exists",
|
| 42 |
+
"columns": {},
|
| 43 |
+
"strict": false,
|
| 44 |
+
"remove_unused_columns": true,
|
| 45 |
+
"model_name": [
|
| 46 |
+
"agrR-robot"
|
| 47 |
+
],
|
| 48 |
+
"model_author": [
|
| 49 |
+
"swift"
|
| 50 |
+
],
|
| 51 |
+
"custom_dataset_info": [],
|
| 52 |
+
"quant_method": null,
|
| 53 |
+
"quant_bits": null,
|
| 54 |
+
"hqq_axis": null,
|
| 55 |
+
"bnb_4bit_compute_dtype": "bfloat16",
|
| 56 |
+
"bnb_4bit_quant_type": "nf4",
|
| 57 |
+
"bnb_4bit_use_double_quant": true,
|
| 58 |
+
"bnb_4bit_quant_storage": null,
|
| 59 |
+
"max_new_tokens": 64,
|
| 60 |
+
"temperature": 0.0,
|
| 61 |
+
"top_k": null,
|
| 62 |
+
"top_p": null,
|
| 63 |
+
"repetition_penalty": null,
|
| 64 |
+
"num_beams": 1,
|
| 65 |
+
"stream": false,
|
| 66 |
+
"stop_words": [],
|
| 67 |
+
"logprobs": false,
|
| 68 |
+
"top_logprobs": null,
|
| 69 |
+
"ckpt_dir": null,
|
| 70 |
+
"load_dataset_config": null,
|
| 71 |
+
"lora_modules": [],
|
| 72 |
+
"tuner_backend": "peft",
|
| 73 |
+
"train_type": "lora",
|
| 74 |
+
"adapters": [],
|
| 75 |
+
"external_plugins": [],
|
| 76 |
+
"seed": 42,
|
| 77 |
+
"model_kwargs": {},
|
| 78 |
+
"load_args": false,
|
| 79 |
+
"load_data_args": false,
|
| 80 |
+
"use_hf": false,
|
| 81 |
+
"hub_token": null,
|
| 82 |
+
"custom_register_path": [],
|
| 83 |
+
"ignore_args_error": false,
|
| 84 |
+
"use_swift_lora": false,
|
| 85 |
+
"output_dir": "/gemini/data-3/lrl/vlm/ms-swift-main/AGR-32B/v2-20260106-171250",
|
| 86 |
+
"overwrite_output_dir": false,
|
| 87 |
+
"do_train": false,
|
| 88 |
+
"do_eval": false,
|
| 89 |
+
"do_predict": false,
|
| 90 |
+
"eval_strategy": "steps",
|
| 91 |
+
"prediction_loss_only": false,
|
| 92 |
+
"per_device_train_batch_size": 4,
|
| 93 |
+
"per_device_eval_batch_size": 2,
|
| 94 |
+
"per_gpu_train_batch_size": null,
|
| 95 |
+
"per_gpu_eval_batch_size": null,
|
| 96 |
+
"gradient_accumulation_steps": 2,
|
| 97 |
+
"eval_accumulation_steps": null,
|
| 98 |
+
"eval_delay": 0,
|
| 99 |
+
"torch_empty_cache_steps": null,
|
| 100 |
+
"learning_rate": 5e-05,
|
| 101 |
+
"weight_decay": 0.1,
|
| 102 |
+
"adam_beta1": 0.9,
|
| 103 |
+
"adam_beta2": 0.95,
|
| 104 |
+
"adam_epsilon": 1e-08,
|
| 105 |
+
"max_grad_norm": 1.0,
|
| 106 |
+
"num_train_epochs": 3.0,
|
| 107 |
+
"max_steps": -1,
|
| 108 |
+
"lr_scheduler_type": "cosine",
|
| 109 |
+
"lr_scheduler_kwargs": null,
|
| 110 |
+
"warmup_ratio": 0.05,
|
| 111 |
+
"warmup_steps": 0,
|
| 112 |
+
"log_level": "passive",
|
| 113 |
+
"log_level_replica": "warning",
|
| 114 |
+
"log_on_each_node": true,
|
| 115 |
+
"logging_dir": "/gemini/data-3/lrl/vlm/ms-swift-main/AGR-32B/v2-20260106-171250/runs",
|
| 116 |
+
"logging_strategy": "steps",
|
| 117 |
+
"logging_first_step": true,
|
| 118 |
+
"logging_steps": 5,
|
| 119 |
+
"logging_nan_inf_filter": true,
|
| 120 |
+
"save_strategy": "steps",
|
| 121 |
+
"save_steps": 2000.0,
|
| 122 |
+
"save_total_limit": 4,
|
| 123 |
+
"save_safetensors": true,
|
| 124 |
+
"save_on_each_node": false,
|
| 125 |
+
"save_only_model": false,
|
| 126 |
+
"restore_callback_states_from_checkpoint": false,
|
| 127 |
+
"no_cuda": false,
|
| 128 |
+
"use_cpu": false,
|
| 129 |
+
"use_mps_device": false,
|
| 130 |
+
"jit_mode_eval": false,
|
| 131 |
+
"use_ipex": false,
|
| 132 |
+
"bf16": true,
|
| 133 |
+
"fp16": false,
|
| 134 |
+
"fp16_opt_level": "O1",
|
| 135 |
+
"half_precision_backend": "auto",
|
| 136 |
+
"bf16_full_eval": false,
|
| 137 |
+
"fp16_full_eval": false,
|
| 138 |
+
"tf32": null,
|
| 139 |
+
"local_rank": 0,
|
| 140 |
+
"ddp_backend": null,
|
| 141 |
+
"tpu_num_cores": null,
|
| 142 |
+
"tpu_metrics_debug": false,
|
| 143 |
+
"debug": null,
|
| 144 |
+
"dataloader_drop_last": false,
|
| 145 |
+
"eval_steps": 10000000.0,
|
| 146 |
+
"dataloader_num_workers": 4,
|
| 147 |
+
"dataloader_prefetch_factor": null,
|
| 148 |
+
"past_index": -1,
|
| 149 |
+
"run_name": null,
|
| 150 |
+
"disable_tqdm": null,
|
| 151 |
+
"label_names": null,
|
| 152 |
+
"load_best_model_at_end": false,
|
| 153 |
+
"metric_for_best_model": "loss",
|
| 154 |
+
"greater_is_better": false,
|
| 155 |
+
"ignore_data_skip": true,
|
| 156 |
+
"fsdp": "",
|
| 157 |
+
"fsdp_min_num_params": 0,
|
| 158 |
+
"fsdp_config": null,
|
| 159 |
+
"tp_size": 0,
|
| 160 |
+
"fsdp_transformer_layer_cls_to_wrap": null,
|
| 161 |
+
"accelerator_config": {
|
| 162 |
+
"dispatch_batches": false
|
| 163 |
+
},
|
| 164 |
+
"deepspeed": {
|
| 165 |
+
"fp16": {
|
| 166 |
+
"enabled": "auto",
|
| 167 |
+
"loss_scale": 0,
|
| 168 |
+
"loss_scale_window": 1000,
|
| 169 |
+
"initial_scale_power": 16,
|
| 170 |
+
"hysteresis": 2,
|
| 171 |
+
"min_loss_scale": 1
|
| 172 |
+
},
|
| 173 |
+
"bf16": {
|
| 174 |
+
"enabled": "auto"
|
| 175 |
+
},
|
| 176 |
+
"zero_optimization": {
|
| 177 |
+
"stage": 3,
|
| 178 |
+
"offload_optimizer": {
|
| 179 |
+
"device": "none",
|
| 180 |
+
"pin_memory": true
|
| 181 |
+
},
|
| 182 |
+
"offload_param": {
|
| 183 |
+
"device": "none",
|
| 184 |
+
"pin_memory": true
|
| 185 |
+
},
|
| 186 |
+
"overlap_comm": false,
|
| 187 |
+
"contiguous_gradients": true,
|
| 188 |
+
"sub_group_size": 1000000000.0,
|
| 189 |
+
"reduce_bucket_size": "auto",
|
| 190 |
+
"zero_quantized_weights": false,
|
| 191 |
+
"zero_quantized_gradients": false,
|
| 192 |
+
"stage3_prefetch_bucket_size": "auto",
|
| 193 |
+
"stage3_param_persistence_threshold": "auto",
|
| 194 |
+
"stage3_max_live_parameters": 1000000000.0,
|
| 195 |
+
"stage3_max_reuse_distance": 1000000000.0,
|
| 196 |
+
"stage3_gather_16bit_weights_on_model_save": true
|
| 197 |
+
},
|
| 198 |
+
"gradient_accumulation_steps": "auto",
|
| 199 |
+
"gradient_clipping": "auto",
|
| 200 |
+
"steps_per_print": 2000,
|
| 201 |
+
"train_batch_size": "auto",
|
| 202 |
+
"train_micro_batch_size_per_gpu": "auto",
|
| 203 |
+
"wall_clock_breakdown": false
|
| 204 |
+
},
|
| 205 |
+
"label_smoothing_factor": 0.0,
|
| 206 |
+
"optim": "adamw_torch",
|
| 207 |
+
"optim_args": null,
|
| 208 |
+
"adafactor": false,
|
| 209 |
+
"group_by_length": false,
|
| 210 |
+
"length_column_name": "length",
|
| 211 |
+
"report_to": [
|
| 212 |
+
"tensorboard"
|
| 213 |
+
],
|
| 214 |
+
"ddp_find_unused_parameters": null,
|
| 215 |
+
"ddp_bucket_cap_mb": null,
|
| 216 |
+
"ddp_broadcast_buffers": null,
|
| 217 |
+
"dataloader_pin_memory": true,
|
| 218 |
+
"dataloader_persistent_workers": false,
|
| 219 |
+
"skip_memory_metrics": true,
|
| 220 |
+
"use_legacy_prediction_loop": false,
|
| 221 |
+
"push_to_hub": false,
|
| 222 |
+
"resume_from_checkpoint": null,
|
| 223 |
+
"hub_model_id": null,
|
| 224 |
+
"hub_strategy": "every_save",
|
| 225 |
+
"hub_private_repo": null,
|
| 226 |
+
"hub_always_push": false,
|
| 227 |
+
"gradient_checkpointing": true,
|
| 228 |
+
"gradient_checkpointing_kwargs": null,
|
| 229 |
+
"include_inputs_for_metrics": false,
|
| 230 |
+
"include_for_metrics": [],
|
| 231 |
+
"eval_do_concat_batches": true,
|
| 232 |
+
"fp16_backend": "auto",
|
| 233 |
+
"push_to_hub_model_id": null,
|
| 234 |
+
"push_to_hub_organization": null,
|
| 235 |
+
"push_to_hub_token": null,
|
| 236 |
+
"mp_parameters": "",
|
| 237 |
+
"auto_find_batch_size": false,
|
| 238 |
+
"full_determinism": false,
|
| 239 |
+
"torchdynamo": null,
|
| 240 |
+
"ray_scope": "last",
|
| 241 |
+
"ddp_timeout": 1800,
|
| 242 |
+
"torch_compile": false,
|
| 243 |
+
"torch_compile_backend": null,
|
| 244 |
+
"torch_compile_mode": null,
|
| 245 |
+
"include_tokens_per_second": false,
|
| 246 |
+
"include_num_input_tokens_seen": false,
|
| 247 |
+
"neftune_noise_alpha": null,
|
| 248 |
+
"optim_target_modules": null,
|
| 249 |
+
"batch_eval_metrics": false,
|
| 250 |
+
"eval_on_start": false,
|
| 251 |
+
"use_liger_kernel": false,
|
| 252 |
+
"eval_use_gather_object": false,
|
| 253 |
+
"average_tokens_across_devices": false,
|
| 254 |
+
"sortish_sampler": false,
|
| 255 |
+
"predict_with_generate": false,
|
| 256 |
+
"generation_max_length": null,
|
| 257 |
+
"generation_num_beams": null,
|
| 258 |
+
"generation_config": null,
|
| 259 |
+
"check_model": true,
|
| 260 |
+
"acc_strategy": "token",
|
| 261 |
+
"train_dataloader_shuffle": true,
|
| 262 |
+
"metric_warmup_step": 0,
|
| 263 |
+
"fsdp_num": 1,
|
| 264 |
+
"acc_steps": 1,
|
| 265 |
+
"eval_use_evalscope": false,
|
| 266 |
+
"eval_datasets": [],
|
| 267 |
+
"eval_limit": null,
|
| 268 |
+
"eval_datasets_args": null,
|
| 269 |
+
"eval_generation_config": null,
|
| 270 |
+
"freeze_parameters": [],
|
| 271 |
+
"freeze_parameters_ratio": 0.0,
|
| 272 |
+
"trainable_parameters": [],
|
| 273 |
+
"freeze_llm": false,
|
| 274 |
+
"freeze_vit": true,
|
| 275 |
+
"freeze_aligner": true,
|
| 276 |
+
"target_modules": [
|
| 277 |
+
"all-linear"
|
| 278 |
+
],
|
| 279 |
+
"target_regex": null,
|
| 280 |
+
"modules_to_save": [],
|
| 281 |
+
"lora_rank": 8,
|
| 282 |
+
"lora_alpha": 32,
|
| 283 |
+
"lora_dropout": 0.05,
|
| 284 |
+
"lora_bias": "none",
|
| 285 |
+
"lora_dtype": null,
|
| 286 |
+
"lorap_lr_ratio": null,
|
| 287 |
+
"use_rslora": false,
|
| 288 |
+
"use_dora": false,
|
| 289 |
+
"lora_ga_batch_size": 2,
|
| 290 |
+
"lora_ga_iters": 2,
|
| 291 |
+
"lora_ga_max_length": 1024,
|
| 292 |
+
"lora_ga_direction": "ArB2r",
|
| 293 |
+
"lora_ga_scale": "stable",
|
| 294 |
+
"lora_ga_stable_gamma": 16,
|
| 295 |
+
"init_weights": true,
|
| 296 |
+
"fourier_n_frequency": 2000,
|
| 297 |
+
"fourier_scaling": 300.0,
|
| 298 |
+
"boft_block_size": 4,
|
| 299 |
+
"boft_block_num": 0,
|
| 300 |
+
"boft_n_butterfly_factor": 1,
|
| 301 |
+
"boft_dropout": 0.0,
|
| 302 |
+
"vera_rank": 256,
|
| 303 |
+
"vera_projection_prng_key": 0,
|
| 304 |
+
"vera_dropout": 0.0,
|
| 305 |
+
"vera_d_initial": 0.1,
|
| 306 |
+
"adapter_act": "gelu",
|
| 307 |
+
"adapter_length": 128,
|
| 308 |
+
"use_galore": false,
|
| 309 |
+
"galore_target_modules": null,
|
| 310 |
+
"galore_rank": 128,
|
| 311 |
+
"galore_update_proj_gap": 50,
|
| 312 |
+
"galore_scale": 1.0,
|
| 313 |
+
"galore_proj_type": "std",
|
| 314 |
+
"galore_optim_per_parameter": false,
|
| 315 |
+
"galore_with_embedding": false,
|
| 316 |
+
"galore_quantization": false,
|
| 317 |
+
"galore_proj_quant": false,
|
| 318 |
+
"galore_proj_bits": 4,
|
| 319 |
+
"galore_proj_group_size": 256,
|
| 320 |
+
"galore_cos_threshold": 0.4,
|
| 321 |
+
"galore_gamma_proj": 2,
|
| 322 |
+
"galore_queue_size": 5,
|
| 323 |
+
"adalora_target_r": 8,
|
| 324 |
+
"adalora_init_r": 12,
|
| 325 |
+
"adalora_tinit": 0,
|
| 326 |
+
"adalora_tfinal": 0,
|
| 327 |
+
"adalora_deltaT": 1,
|
| 328 |
+
"adalora_beta1": 0.85,
|
| 329 |
+
"adalora_beta2": 0.85,
|
| 330 |
+
"adalora_orth_reg_weight": 0.5,
|
| 331 |
+
"llamapro_num_new_blocks": 4,
|
| 332 |
+
"llamapro_num_groups": null,
|
| 333 |
+
"lisa_activated_layers": 0,
|
| 334 |
+
"lisa_step_interval": 20,
|
| 335 |
+
"reft_layer_key": null,
|
| 336 |
+
"reft_layers": null,
|
| 337 |
+
"reft_rank": 4,
|
| 338 |
+
"reft_intervention_type": "LoreftIntervention",
|
| 339 |
+
"reft_args": null,
|
| 340 |
+
"swanlab_token": null,
|
| 341 |
+
"swanlab_project": null,
|
| 342 |
+
"swanlab_workspace": null,
|
| 343 |
+
"swanlab_exp_name": null,
|
| 344 |
+
"swanlab_mode": "cloud",
|
| 345 |
+
"add_version": true,
|
| 346 |
+
"resume_only_model": false,
|
| 347 |
+
"create_checkpoint_symlink": false,
|
| 348 |
+
"packing": false,
|
| 349 |
+
"lazy_tokenize": false,
|
| 350 |
+
"loss_type": null,
|
| 351 |
+
"optimizer": null,
|
| 352 |
+
"metric": null,
|
| 353 |
+
"zero_hpz_partition_size": null,
|
| 354 |
+
"rank": 0,
|
| 355 |
+
"global_world_size": 4,
|
| 356 |
+
"local_world_size": 4,
|
| 357 |
+
"model_suffix": "Qwen3-32B",
|
| 358 |
+
"model_info": "ModelInfo(model_type='qwen3', model_dir='/gemini/data-1/Qwen3-32B', torch_dtype=torch.bfloat16, max_model_len=40960, quant_method=None, quant_bits=None, rope_scaling=None, config=None, task_type='causal_lm', num_labels=None)",
|
| 359 |
+
"model_meta": "ModelMeta(model_type='qwen3', model_groups=[ModelGroup(models=[Model(ms_model_id='Qwen/Qwen3-0.6B-Base', hf_model_id='Qwen/Qwen3-0.6B-Base', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-1.7B-Base', hf_model_id='Qwen/Qwen3-1.7B-Base', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-4B-Base', hf_model_id='Qwen/Qwen3-4B-Base', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-8B-Base', hf_model_id='Qwen/Qwen3-8B-Base', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-14B-Base', hf_model_id='Qwen/Qwen3-14B-Base', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-32B-Base', hf_model_id='Qwen/Qwen3-32B-Base', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-0.6B', hf_model_id='Qwen/Qwen3-0.6B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-1.7B', hf_model_id='Qwen/Qwen3-1.7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-4B', hf_model_id='Qwen/Qwen3-4B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-8B', hf_model_id='Qwen/Qwen3-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-14B', hf_model_id='Qwen/Qwen3-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-32B', hf_model_id='Qwen/Qwen3-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='qwen3', get_function=<function get_model_tokenizer_with_flash_attn at 0x7f2a44f4a4d0>, model_arch='llama', architectures=['Qwen3ForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=None, requires=['transformers>=4.51'], tags=[])",
|
| 360 |
+
"model_dir": "/gemini/data-1/Qwen3-32B",
|
| 361 |
+
"hub": "<class 'swift.hub.hub.MSHub'>",
|
| 362 |
+
"evaluation_strategy": "steps",
|
| 363 |
+
"training_args": "Seq2SeqTrainingArguments(output_dir='/gemini/data-3/lrl/vlm/ms-swift-main/AGR-32B/v2-20260106-171250', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=<IntervalStrategy.STEPS: 'steps'>, prediction_loss_only=False, per_device_train_batch_size=4, per_device_eval_batch_size=2, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=2, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=5e-05, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.95, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=<SchedulerType.COSINE: 'cosine'>, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/gemini/data-3/lrl/vlm/ms-swift-main/AGR-32B/v2-20260106-171250/runs', logging_strategy=<IntervalStrategy.STEPS: 'steps'>, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=<SaveStrategy.STEPS: 'steps'>, save_steps=2000, save_total_limit=4, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10000000, dataloader_num_workers=4, dataloader_prefetch_factor=10, past_index=-1, run_name='/gemini/data-3/lrl/vlm/ms-swift-main/AGR-32B/v2-20260106-171250', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=True, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, tp_size=0, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': False, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'zero_quantized_weights': False, 'zero_quantized_gradients': False, 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=<OptimizerNames.ADAMW_TORCH: 'adamw_torch'>, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=<HubStrategy.EVERY_SAVE: 'every_save'>, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, sortish_sampler=False, predict_with_generate=False, generation_max_length=None, generation_num_beams=None, generation_config=None, check_model=True, acc_strategy='token', train_dataloader_shuffle=True, metric_warmup_step=0, fsdp_num=1, acc_steps=1, eval_use_evalscope=False, eval_datasets=[], eval_limit=None, eval_datasets_args=None, eval_generation_config=None, train_type='lora', optimizer=None, local_repo_path=None, galore_config=None)"
|
| 364 |
+
}
|
config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"Qwen3ForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 151643,
|
| 8 |
+
"eos_token_id": 151645,
|
| 9 |
+
"head_dim": 128,
|
| 10 |
+
"hidden_act": "silu",
|
| 11 |
+
"hidden_size": 5120,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 25600,
|
| 14 |
+
"max_position_embeddings": 40960,
|
| 15 |
+
"max_window_layers": 64,
|
| 16 |
+
"model_type": "qwen3",
|
| 17 |
+
"num_attention_heads": 64,
|
| 18 |
+
"num_hidden_layers": 64,
|
| 19 |
+
"num_key_value_heads": 8,
|
| 20 |
+
"pad_token_id": 151643,
|
| 21 |
+
"rms_norm_eps": 1e-06,
|
| 22 |
+
"rope_scaling": null,
|
| 23 |
+
"rope_theta": 1000000,
|
| 24 |
+
"sliding_window": null,
|
| 25 |
+
"tie_word_embeddings": false,
|
| 26 |
+
"torch_dtype": "bfloat16",
|
| 27 |
+
"transformers_version": "4.51.3",
|
| 28 |
+
"use_cache": true,
|
| 29 |
+
"use_sliding_window": false,
|
| 30 |
+
"vocab_size": 151936
|
| 31 |
+
}
|
configuration.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"framework":"Pytorch","task":"text-generation"}
|
generation_config.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token_id": 151643,
|
| 3 |
+
"do_sample": true,
|
| 4 |
+
"eos_token_id": [
|
| 5 |
+
151645,
|
| 6 |
+
151643
|
| 7 |
+
],
|
| 8 |
+
"pad_token_id": 151643,
|
| 9 |
+
"temperature": 0.6,
|
| 10 |
+
"top_k": 20,
|
| 11 |
+
"top_p": 0.95,
|
| 12 |
+
"transformers_version": "4.51.3"
|
| 13 |
+
}
|
images/.gitkeep
ADDED
|
File without changes
|
images/RAG-workflow.png
ADDED
|
Git LFS Details
|
images/logo.png
ADDED
|
Git LFS Details
|
merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
model-00001-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ddabeb534857512ccbddf9a2d3474be9f72613b0e65f3b6e6710e41a0ce2af85
|
| 3 |
+
size 4932307584
|
model-00002-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:576a1e8b067da2e628ea91cf3c3b21a5c63979c23d6f0311279aeab78af698ac
|
| 3 |
+
size 4875989696
|
model-00003-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:96d2aa2e3fc68375bb66f3f36dcb49617a92cc8657bb5c82bf6a35c10b932b5a
|
| 3 |
+
size 4875989720
|
model-00004-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5ea682e9a14fd187427da17df6bc97548071f50c0fb63d813e29a2f2c8888076
|
| 3 |
+
size 4875989752
|
model-00005-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a0d8e2bac3b4e3ffd7b33955a68cebf7b053ec8eb88fe6a2a446d043ae7434e5
|
| 3 |
+
size 4875989752
|
model-00006-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b3479141893dffca3957cf2dfb686b70e7b01d2b80a78c935763b3eaab6095a9
|
| 3 |
+
size 4875989752
|
model-00007-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9d586f56b489191fae96dfdeaf811f8e6775a2fb8cba151b65a361cabd26e99a
|
| 3 |
+
size 4875989752
|
model-00008-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5961e12be2a0308e24663a278cbbedaec68c29ca0168ed00a89069a5daac29e0
|
| 3 |
+
size 4875989752
|
model-00009-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:29692e9a8509f890f810f29b33d520d584c84c1175b933d2bae80e2b3c29a419
|
| 3 |
+
size 4875989752
|
model-00010-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03af5641577747ebf9e0bc7eb3a605d21e63fe577f3bc4d14040ce4c5f1acc15
|
| 3 |
+
size 4875989752
|
model-00011-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3de1684b396e8ac911ed447477bf07ec0e8394637109ac8322f9541ed7996984
|
| 3 |
+
size 4875989752
|
model-00012-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c143210d9c9ad92212b530072e9a8ec12f4098f5fe894f1441aa0b4103a3a8f7
|
| 3 |
+
size 4875989752
|
model-00013-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8dcb456e02976a9a6f2d376a248f7caeb67931eec42f8c37e9689cca9736e271
|
| 3 |
+
size 4875989752
|
model-00014-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:119c5a75465a7e034b93d4f756c703e064a3b60b176966d39f01951a61efcbcb
|
| 3 |
+
size 2080144040
|
model.safetensors.index.json
ADDED
|
@@ -0,0 +1,714 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"metadata": {
|
| 3 |
+
"total_size": 65524246528
|
| 4 |
+
},
|
| 5 |
+
"weight_map": {
|
| 6 |
+
"lm_head.weight": "model-00014-of-00014.safetensors",
|
| 7 |
+
"model.embed_tokens.weight": "model-00001-of-00014.safetensors",
|
| 8 |
+
"model.layers.0.input_layernorm.weight": "model-00001-of-00014.safetensors",
|
| 9 |
+
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00014.safetensors",
|
| 10 |
+
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00014.safetensors",
|
| 11 |
+
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00014.safetensors",
|
| 12 |
+
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00014.safetensors",
|
| 13 |
+
"model.layers.0.self_attn.k_norm.weight": "model-00001-of-00014.safetensors",
|
| 14 |
+
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00014.safetensors",
|
| 15 |
+
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00014.safetensors",
|
| 16 |
+
"model.layers.0.self_attn.q_norm.weight": "model-00001-of-00014.safetensors",
|
| 17 |
+
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00014.safetensors",
|
| 18 |
+
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00014.safetensors",
|
| 19 |
+
"model.layers.1.input_layernorm.weight": "model-00001-of-00014.safetensors",
|
| 20 |
+
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00014.safetensors",
|
| 21 |
+
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00014.safetensors",
|
| 22 |
+
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00014.safetensors",
|
| 23 |
+
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00014.safetensors",
|
| 24 |
+
"model.layers.1.self_attn.k_norm.weight": "model-00001-of-00014.safetensors",
|
| 25 |
+
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00014.safetensors",
|
| 26 |
+
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00014.safetensors",
|
| 27 |
+
"model.layers.1.self_attn.q_norm.weight": "model-00001-of-00014.safetensors",
|
| 28 |
+
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00014.safetensors",
|
| 29 |
+
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00014.safetensors",
|
| 30 |
+
"model.layers.10.input_layernorm.weight": "model-00003-of-00014.safetensors",
|
| 31 |
+
"model.layers.10.mlp.down_proj.weight": "model-00003-of-00014.safetensors",
|
| 32 |
+
"model.layers.10.mlp.gate_proj.weight": "model-00003-of-00014.safetensors",
|
| 33 |
+
"model.layers.10.mlp.up_proj.weight": "model-00003-of-00014.safetensors",
|
| 34 |
+
"model.layers.10.post_attention_layernorm.weight": "model-00003-of-00014.safetensors",
|
| 35 |
+
"model.layers.10.self_attn.k_norm.weight": "model-00003-of-00014.safetensors",
|
| 36 |
+
"model.layers.10.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
|
| 37 |
+
"model.layers.10.self_attn.o_proj.weight": "model-00003-of-00014.safetensors",
|
| 38 |
+
"model.layers.10.self_attn.q_norm.weight": "model-00003-of-00014.safetensors",
|
| 39 |
+
"model.layers.10.self_attn.q_proj.weight": "model-00003-of-00014.safetensors",
|
| 40 |
+
"model.layers.10.self_attn.v_proj.weight": "model-00003-of-00014.safetensors",
|
| 41 |
+
"model.layers.11.input_layernorm.weight": "model-00003-of-00014.safetensors",
|
| 42 |
+
"model.layers.11.mlp.down_proj.weight": "model-00003-of-00014.safetensors",
|
| 43 |
+
"model.layers.11.mlp.gate_proj.weight": "model-00003-of-00014.safetensors",
|
| 44 |
+
"model.layers.11.mlp.up_proj.weight": "model-00003-of-00014.safetensors",
|
| 45 |
+
"model.layers.11.post_attention_layernorm.weight": "model-00003-of-00014.safetensors",
|
| 46 |
+
"model.layers.11.self_attn.k_norm.weight": "model-00003-of-00014.safetensors",
|
| 47 |
+
"model.layers.11.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
|
| 48 |
+
"model.layers.11.self_attn.o_proj.weight": "model-00003-of-00014.safetensors",
|
| 49 |
+
"model.layers.11.self_attn.q_norm.weight": "model-00003-of-00014.safetensors",
|
| 50 |
+
"model.layers.11.self_attn.q_proj.weight": "model-00003-of-00014.safetensors",
|
| 51 |
+
"model.layers.11.self_attn.v_proj.weight": "model-00003-of-00014.safetensors",
|
| 52 |
+
"model.layers.12.input_layernorm.weight": "model-00003-of-00014.safetensors",
|
| 53 |
+
"model.layers.12.mlp.down_proj.weight": "model-00003-of-00014.safetensors",
|
| 54 |
+
"model.layers.12.mlp.gate_proj.weight": "model-00003-of-00014.safetensors",
|
| 55 |
+
"model.layers.12.mlp.up_proj.weight": "model-00003-of-00014.safetensors",
|
| 56 |
+
"model.layers.12.post_attention_layernorm.weight": "model-00003-of-00014.safetensors",
|
| 57 |
+
"model.layers.12.self_attn.k_norm.weight": "model-00003-of-00014.safetensors",
|
| 58 |
+
"model.layers.12.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
|
| 59 |
+
"model.layers.12.self_attn.o_proj.weight": "model-00003-of-00014.safetensors",
|
| 60 |
+
"model.layers.12.self_attn.q_norm.weight": "model-00003-of-00014.safetensors",
|
| 61 |
+
"model.layers.12.self_attn.q_proj.weight": "model-00003-of-00014.safetensors",
|
| 62 |
+
"model.layers.12.self_attn.v_proj.weight": "model-00003-of-00014.safetensors",
|
| 63 |
+
"model.layers.13.input_layernorm.weight": "model-00004-of-00014.safetensors",
|
| 64 |
+
"model.layers.13.mlp.down_proj.weight": "model-00004-of-00014.safetensors",
|
| 65 |
+
"model.layers.13.mlp.gate_proj.weight": "model-00003-of-00014.safetensors",
|
| 66 |
+
"model.layers.13.mlp.up_proj.weight": "model-00004-of-00014.safetensors",
|
| 67 |
+
"model.layers.13.post_attention_layernorm.weight": "model-00004-of-00014.safetensors",
|
| 68 |
+
"model.layers.13.self_attn.k_norm.weight": "model-00003-of-00014.safetensors",
|
| 69 |
+
"model.layers.13.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
|
| 70 |
+
"model.layers.13.self_attn.o_proj.weight": "model-00003-of-00014.safetensors",
|
| 71 |
+
"model.layers.13.self_attn.q_norm.weight": "model-00003-of-00014.safetensors",
|
| 72 |
+
"model.layers.13.self_attn.q_proj.weight": "model-00003-of-00014.safetensors",
|
| 73 |
+
"model.layers.13.self_attn.v_proj.weight": "model-00003-of-00014.safetensors",
|
| 74 |
+
"model.layers.14.input_layernorm.weight": "model-00004-of-00014.safetensors",
|
| 75 |
+
"model.layers.14.mlp.down_proj.weight": "model-00004-of-00014.safetensors",
|
| 76 |
+
"model.layers.14.mlp.gate_proj.weight": "model-00004-of-00014.safetensors",
|
| 77 |
+
"model.layers.14.mlp.up_proj.weight": "model-00004-of-00014.safetensors",
|
| 78 |
+
"model.layers.14.post_attention_layernorm.weight": "model-00004-of-00014.safetensors",
|
| 79 |
+
"model.layers.14.self_attn.k_norm.weight": "model-00004-of-00014.safetensors",
|
| 80 |
+
"model.layers.14.self_attn.k_proj.weight": "model-00004-of-00014.safetensors",
|
| 81 |
+
"model.layers.14.self_attn.o_proj.weight": "model-00004-of-00014.safetensors",
|
| 82 |
+
"model.layers.14.self_attn.q_norm.weight": "model-00004-of-00014.safetensors",
|
| 83 |
+
"model.layers.14.self_attn.q_proj.weight": "model-00004-of-00014.safetensors",
|
| 84 |
+
"model.layers.14.self_attn.v_proj.weight": "model-00004-of-00014.safetensors",
|
| 85 |
+
"model.layers.15.input_layernorm.weight": "model-00004-of-00014.safetensors",
|
| 86 |
+
"model.layers.15.mlp.down_proj.weight": "model-00004-of-00014.safetensors",
|
| 87 |
+
"model.layers.15.mlp.gate_proj.weight": "model-00004-of-00014.safetensors",
|
| 88 |
+
"model.layers.15.mlp.up_proj.weight": "model-00004-of-00014.safetensors",
|
| 89 |
+
"model.layers.15.post_attention_layernorm.weight": "model-00004-of-00014.safetensors",
|
| 90 |
+
"model.layers.15.self_attn.k_norm.weight": "model-00004-of-00014.safetensors",
|
| 91 |
+
"model.layers.15.self_attn.k_proj.weight": "model-00004-of-00014.safetensors",
|
| 92 |
+
"model.layers.15.self_attn.o_proj.weight": "model-00004-of-00014.safetensors",
|
| 93 |
+
"model.layers.15.self_attn.q_norm.weight": "model-00004-of-00014.safetensors",
|
| 94 |
+
"model.layers.15.self_attn.q_proj.weight": "model-00004-of-00014.safetensors",
|
| 95 |
+
"model.layers.15.self_attn.v_proj.weight": "model-00004-of-00014.safetensors",
|
| 96 |
+
"model.layers.16.input_layernorm.weight": "model-00004-of-00014.safetensors",
|
| 97 |
+
"model.layers.16.mlp.down_proj.weight": "model-00004-of-00014.safetensors",
|
| 98 |
+
"model.layers.16.mlp.gate_proj.weight": "model-00004-of-00014.safetensors",
|
| 99 |
+
"model.layers.16.mlp.up_proj.weight": "model-00004-of-00014.safetensors",
|
| 100 |
+
"model.layers.16.post_attention_layernorm.weight": "model-00004-of-00014.safetensors",
|
| 101 |
+
"model.layers.16.self_attn.k_norm.weight": "model-00004-of-00014.safetensors",
|
| 102 |
+
"model.layers.16.self_attn.k_proj.weight": "model-00004-of-00014.safetensors",
|
| 103 |
+
"model.layers.16.self_attn.o_proj.weight": "model-00004-of-00014.safetensors",
|
| 104 |
+
"model.layers.16.self_attn.q_norm.weight": "model-00004-of-00014.safetensors",
|
| 105 |
+
"model.layers.16.self_attn.q_proj.weight": "model-00004-of-00014.safetensors",
|
| 106 |
+
"model.layers.16.self_attn.v_proj.weight": "model-00004-of-00014.safetensors",
|
| 107 |
+
"model.layers.17.input_layernorm.weight": "model-00004-of-00014.safetensors",
|
| 108 |
+
"model.layers.17.mlp.down_proj.weight": "model-00004-of-00014.safetensors",
|
| 109 |
+
"model.layers.17.mlp.gate_proj.weight": "model-00004-of-00014.safetensors",
|
| 110 |
+
"model.layers.17.mlp.up_proj.weight": "model-00004-of-00014.safetensors",
|
| 111 |
+
"model.layers.17.post_attention_layernorm.weight": "model-00004-of-00014.safetensors",
|
| 112 |
+
"model.layers.17.self_attn.k_norm.weight": "model-00004-of-00014.safetensors",
|
| 113 |
+
"model.layers.17.self_attn.k_proj.weight": "model-00004-of-00014.safetensors",
|
| 114 |
+
"model.layers.17.self_attn.o_proj.weight": "model-00004-of-00014.safetensors",
|
| 115 |
+
"model.layers.17.self_attn.q_norm.weight": "model-00004-of-00014.safetensors",
|
| 116 |
+
"model.layers.17.self_attn.q_proj.weight": "model-00004-of-00014.safetensors",
|
| 117 |
+
"model.layers.17.self_attn.v_proj.weight": "model-00004-of-00014.safetensors",
|
| 118 |
+
"model.layers.18.input_layernorm.weight": "model-00005-of-00014.safetensors",
|
| 119 |
+
"model.layers.18.mlp.down_proj.weight": "model-00005-of-00014.safetensors",
|
| 120 |
+
"model.layers.18.mlp.gate_proj.weight": "model-00004-of-00014.safetensors",
|
| 121 |
+
"model.layers.18.mlp.up_proj.weight": "model-00005-of-00014.safetensors",
|
| 122 |
+
"model.layers.18.post_attention_layernorm.weight": "model-00005-of-00014.safetensors",
|
| 123 |
+
"model.layers.18.self_attn.k_norm.weight": "model-00004-of-00014.safetensors",
|
| 124 |
+
"model.layers.18.self_attn.k_proj.weight": "model-00004-of-00014.safetensors",
|
| 125 |
+
"model.layers.18.self_attn.o_proj.weight": "model-00004-of-00014.safetensors",
|
| 126 |
+
"model.layers.18.self_attn.q_norm.weight": "model-00004-of-00014.safetensors",
|
| 127 |
+
"model.layers.18.self_attn.q_proj.weight": "model-00004-of-00014.safetensors",
|
| 128 |
+
"model.layers.18.self_attn.v_proj.weight": "model-00004-of-00014.safetensors",
|
| 129 |
+
"model.layers.19.input_layernorm.weight": "model-00005-of-00014.safetensors",
|
| 130 |
+
"model.layers.19.mlp.down_proj.weight": "model-00005-of-00014.safetensors",
|
| 131 |
+
"model.layers.19.mlp.gate_proj.weight": "model-00005-of-00014.safetensors",
|
| 132 |
+
"model.layers.19.mlp.up_proj.weight": "model-00005-of-00014.safetensors",
|
| 133 |
+
"model.layers.19.post_attention_layernorm.weight": "model-00005-of-00014.safetensors",
|
| 134 |
+
"model.layers.19.self_attn.k_norm.weight": "model-00005-of-00014.safetensors",
|
| 135 |
+
"model.layers.19.self_attn.k_proj.weight": "model-00005-of-00014.safetensors",
|
| 136 |
+
"model.layers.19.self_attn.o_proj.weight": "model-00005-of-00014.safetensors",
|
| 137 |
+
"model.layers.19.self_attn.q_norm.weight": "model-00005-of-00014.safetensors",
|
| 138 |
+
"model.layers.19.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
|
| 139 |
+
"model.layers.19.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
|
| 140 |
+
"model.layers.2.input_layernorm.weight": "model-00001-of-00014.safetensors",
|
| 141 |
+
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00014.safetensors",
|
| 142 |
+
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00014.safetensors",
|
| 143 |
+
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00014.safetensors",
|
| 144 |
+
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00014.safetensors",
|
| 145 |
+
"model.layers.2.self_attn.k_norm.weight": "model-00001-of-00014.safetensors",
|
| 146 |
+
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00014.safetensors",
|
| 147 |
+
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00014.safetensors",
|
| 148 |
+
"model.layers.2.self_attn.q_norm.weight": "model-00001-of-00014.safetensors",
|
| 149 |
+
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00014.safetensors",
|
| 150 |
+
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00014.safetensors",
|
| 151 |
+
"model.layers.20.input_layernorm.weight": "model-00005-of-00014.safetensors",
|
| 152 |
+
"model.layers.20.mlp.down_proj.weight": "model-00005-of-00014.safetensors",
|
| 153 |
+
"model.layers.20.mlp.gate_proj.weight": "model-00005-of-00014.safetensors",
|
| 154 |
+
"model.layers.20.mlp.up_proj.weight": "model-00005-of-00014.safetensors",
|
| 155 |
+
"model.layers.20.post_attention_layernorm.weight": "model-00005-of-00014.safetensors",
|
| 156 |
+
"model.layers.20.self_attn.k_norm.weight": "model-00005-of-00014.safetensors",
|
| 157 |
+
"model.layers.20.self_attn.k_proj.weight": "model-00005-of-00014.safetensors",
|
| 158 |
+
"model.layers.20.self_attn.o_proj.weight": "model-00005-of-00014.safetensors",
|
| 159 |
+
"model.layers.20.self_attn.q_norm.weight": "model-00005-of-00014.safetensors",
|
| 160 |
+
"model.layers.20.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
|
| 161 |
+
"model.layers.20.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
|
| 162 |
+
"model.layers.21.input_layernorm.weight": "model-00005-of-00014.safetensors",
|
| 163 |
+
"model.layers.21.mlp.down_proj.weight": "model-00005-of-00014.safetensors",
|
| 164 |
+
"model.layers.21.mlp.gate_proj.weight": "model-00005-of-00014.safetensors",
|
| 165 |
+
"model.layers.21.mlp.up_proj.weight": "model-00005-of-00014.safetensors",
|
| 166 |
+
"model.layers.21.post_attention_layernorm.weight": "model-00005-of-00014.safetensors",
|
| 167 |
+
"model.layers.21.self_attn.k_norm.weight": "model-00005-of-00014.safetensors",
|
| 168 |
+
"model.layers.21.self_attn.k_proj.weight": "model-00005-of-00014.safetensors",
|
| 169 |
+
"model.layers.21.self_attn.o_proj.weight": "model-00005-of-00014.safetensors",
|
| 170 |
+
"model.layers.21.self_attn.q_norm.weight": "model-00005-of-00014.safetensors",
|
| 171 |
+
"model.layers.21.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
|
| 172 |
+
"model.layers.21.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
|
| 173 |
+
"model.layers.22.input_layernorm.weight": "model-00005-of-00014.safetensors",
|
| 174 |
+
"model.layers.22.mlp.down_proj.weight": "model-00005-of-00014.safetensors",
|
| 175 |
+
"model.layers.22.mlp.gate_proj.weight": "model-00005-of-00014.safetensors",
|
| 176 |
+
"model.layers.22.mlp.up_proj.weight": "model-00005-of-00014.safetensors",
|
| 177 |
+
"model.layers.22.post_attention_layernorm.weight": "model-00005-of-00014.safetensors",
|
| 178 |
+
"model.layers.22.self_attn.k_norm.weight": "model-00005-of-00014.safetensors",
|
| 179 |
+
"model.layers.22.self_attn.k_proj.weight": "model-00005-of-00014.safetensors",
|
| 180 |
+
"model.layers.22.self_attn.o_proj.weight": "model-00005-of-00014.safetensors",
|
| 181 |
+
"model.layers.22.self_attn.q_norm.weight": "model-00005-of-00014.safetensors",
|
| 182 |
+
"model.layers.22.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
|
| 183 |
+
"model.layers.22.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
|
| 184 |
+
"model.layers.23.input_layernorm.weight": "model-00006-of-00014.safetensors",
|
| 185 |
+
"model.layers.23.mlp.down_proj.weight": "model-00006-of-00014.safetensors",
|
| 186 |
+
"model.layers.23.mlp.gate_proj.weight": "model-00005-of-00014.safetensors",
|
| 187 |
+
"model.layers.23.mlp.up_proj.weight": "model-00006-of-00014.safetensors",
|
| 188 |
+
"model.layers.23.post_attention_layernorm.weight": "model-00006-of-00014.safetensors",
|
| 189 |
+
"model.layers.23.self_attn.k_norm.weight": "model-00005-of-00014.safetensors",
|
| 190 |
+
"model.layers.23.self_attn.k_proj.weight": "model-00005-of-00014.safetensors",
|
| 191 |
+
"model.layers.23.self_attn.o_proj.weight": "model-00005-of-00014.safetensors",
|
| 192 |
+
"model.layers.23.self_attn.q_norm.weight": "model-00005-of-00014.safetensors",
|
| 193 |
+
"model.layers.23.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
|
| 194 |
+
"model.layers.23.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
|
| 195 |
+
"model.layers.24.input_layernorm.weight": "model-00006-of-00014.safetensors",
|
| 196 |
+
"model.layers.24.mlp.down_proj.weight": "model-00006-of-00014.safetensors",
|
| 197 |
+
"model.layers.24.mlp.gate_proj.weight": "model-00006-of-00014.safetensors",
|
| 198 |
+
"model.layers.24.mlp.up_proj.weight": "model-00006-of-00014.safetensors",
|
| 199 |
+
"model.layers.24.post_attention_layernorm.weight": "model-00006-of-00014.safetensors",
|
| 200 |
+
"model.layers.24.self_attn.k_norm.weight": "model-00006-of-00014.safetensors",
|
| 201 |
+
"model.layers.24.self_attn.k_proj.weight": "model-00006-of-00014.safetensors",
|
| 202 |
+
"model.layers.24.self_attn.o_proj.weight": "model-00006-of-00014.safetensors",
|
| 203 |
+
"model.layers.24.self_attn.q_norm.weight": "model-00006-of-00014.safetensors",
|
| 204 |
+
"model.layers.24.self_attn.q_proj.weight": "model-00006-of-00014.safetensors",
|
| 205 |
+
"model.layers.24.self_attn.v_proj.weight": "model-00006-of-00014.safetensors",
|
| 206 |
+
"model.layers.25.input_layernorm.weight": "model-00006-of-00014.safetensors",
|
| 207 |
+
"model.layers.25.mlp.down_proj.weight": "model-00006-of-00014.safetensors",
|
| 208 |
+
"model.layers.25.mlp.gate_proj.weight": "model-00006-of-00014.safetensors",
|
| 209 |
+
"model.layers.25.mlp.up_proj.weight": "model-00006-of-00014.safetensors",
|
| 210 |
+
"model.layers.25.post_attention_layernorm.weight": "model-00006-of-00014.safetensors",
|
| 211 |
+
"model.layers.25.self_attn.k_norm.weight": "model-00006-of-00014.safetensors",
|
| 212 |
+
"model.layers.25.self_attn.k_proj.weight": "model-00006-of-00014.safetensors",
|
| 213 |
+
"model.layers.25.self_attn.o_proj.weight": "model-00006-of-00014.safetensors",
|
| 214 |
+
"model.layers.25.self_attn.q_norm.weight": "model-00006-of-00014.safetensors",
|
| 215 |
+
"model.layers.25.self_attn.q_proj.weight": "model-00006-of-00014.safetensors",
|
| 216 |
+
"model.layers.25.self_attn.v_proj.weight": "model-00006-of-00014.safetensors",
|
| 217 |
+
"model.layers.26.input_layernorm.weight": "model-00006-of-00014.safetensors",
|
| 218 |
+
"model.layers.26.mlp.down_proj.weight": "model-00006-of-00014.safetensors",
|
| 219 |
+
"model.layers.26.mlp.gate_proj.weight": "model-00006-of-00014.safetensors",
|
| 220 |
+
"model.layers.26.mlp.up_proj.weight": "model-00006-of-00014.safetensors",
|
| 221 |
+
"model.layers.26.post_attention_layernorm.weight": "model-00006-of-00014.safetensors",
|
| 222 |
+
"model.layers.26.self_attn.k_norm.weight": "model-00006-of-00014.safetensors",
|
| 223 |
+
"model.layers.26.self_attn.k_proj.weight": "model-00006-of-00014.safetensors",
|
| 224 |
+
"model.layers.26.self_attn.o_proj.weight": "model-00006-of-00014.safetensors",
|
| 225 |
+
"model.layers.26.self_attn.q_norm.weight": "model-00006-of-00014.safetensors",
|
| 226 |
+
"model.layers.26.self_attn.q_proj.weight": "model-00006-of-00014.safetensors",
|
| 227 |
+
"model.layers.26.self_attn.v_proj.weight": "model-00006-of-00014.safetensors",
|
| 228 |
+
"model.layers.27.input_layernorm.weight": "model-00006-of-00014.safetensors",
|
| 229 |
+
"model.layers.27.mlp.down_proj.weight": "model-00006-of-00014.safetensors",
|
| 230 |
+
"model.layers.27.mlp.gate_proj.weight": "model-00006-of-00014.safetensors",
|
| 231 |
+
"model.layers.27.mlp.up_proj.weight": "model-00006-of-00014.safetensors",
|
| 232 |
+
"model.layers.27.post_attention_layernorm.weight": "model-00006-of-00014.safetensors",
|
| 233 |
+
"model.layers.27.self_attn.k_norm.weight": "model-00006-of-00014.safetensors",
|
| 234 |
+
"model.layers.27.self_attn.k_proj.weight": "model-00006-of-00014.safetensors",
|
| 235 |
+
"model.layers.27.self_attn.o_proj.weight": "model-00006-of-00014.safetensors",
|
| 236 |
+
"model.layers.27.self_attn.q_norm.weight": "model-00006-of-00014.safetensors",
|
| 237 |
+
"model.layers.27.self_attn.q_proj.weight": "model-00006-of-00014.safetensors",
|
| 238 |
+
"model.layers.27.self_attn.v_proj.weight": "model-00006-of-00014.safetensors",
|
| 239 |
+
"model.layers.28.input_layernorm.weight": "model-00007-of-00014.safetensors",
|
| 240 |
+
"model.layers.28.mlp.down_proj.weight": "model-00007-of-00014.safetensors",
|
| 241 |
+
"model.layers.28.mlp.gate_proj.weight": "model-00006-of-00014.safetensors",
|
| 242 |
+
"model.layers.28.mlp.up_proj.weight": "model-00007-of-00014.safetensors",
|
| 243 |
+
"model.layers.28.post_attention_layernorm.weight": "model-00007-of-00014.safetensors",
|
| 244 |
+
"model.layers.28.self_attn.k_norm.weight": "model-00006-of-00014.safetensors",
|
| 245 |
+
"model.layers.28.self_attn.k_proj.weight": "model-00006-of-00014.safetensors",
|
| 246 |
+
"model.layers.28.self_attn.o_proj.weight": "model-00006-of-00014.safetensors",
|
| 247 |
+
"model.layers.28.self_attn.q_norm.weight": "model-00006-of-00014.safetensors",
|
| 248 |
+
"model.layers.28.self_attn.q_proj.weight": "model-00006-of-00014.safetensors",
|
| 249 |
+
"model.layers.28.self_attn.v_proj.weight": "model-00006-of-00014.safetensors",
|
| 250 |
+
"model.layers.29.input_layernorm.weight": "model-00007-of-00014.safetensors",
|
| 251 |
+
"model.layers.29.mlp.down_proj.weight": "model-00007-of-00014.safetensors",
|
| 252 |
+
"model.layers.29.mlp.gate_proj.weight": "model-00007-of-00014.safetensors",
|
| 253 |
+
"model.layers.29.mlp.up_proj.weight": "model-00007-of-00014.safetensors",
|
| 254 |
+
"model.layers.29.post_attention_layernorm.weight": "model-00007-of-00014.safetensors",
|
| 255 |
+
"model.layers.29.self_attn.k_norm.weight": "model-00007-of-00014.safetensors",
|
| 256 |
+
"model.layers.29.self_attn.k_proj.weight": "model-00007-of-00014.safetensors",
|
| 257 |
+
"model.layers.29.self_attn.o_proj.weight": "model-00007-of-00014.safetensors",
|
| 258 |
+
"model.layers.29.self_attn.q_norm.weight": "model-00007-of-00014.safetensors",
|
| 259 |
+
"model.layers.29.self_attn.q_proj.weight": "model-00007-of-00014.safetensors",
|
| 260 |
+
"model.layers.29.self_attn.v_proj.weight": "model-00007-of-00014.safetensors",
|
| 261 |
+
"model.layers.3.input_layernorm.weight": "model-00002-of-00014.safetensors",
|
| 262 |
+
"model.layers.3.mlp.down_proj.weight": "model-00002-of-00014.safetensors",
|
| 263 |
+
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00014.safetensors",
|
| 264 |
+
"model.layers.3.mlp.up_proj.weight": "model-00002-of-00014.safetensors",
|
| 265 |
+
"model.layers.3.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
|
| 266 |
+
"model.layers.3.self_attn.k_norm.weight": "model-00001-of-00014.safetensors",
|
| 267 |
+
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00014.safetensors",
|
| 268 |
+
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00014.safetensors",
|
| 269 |
+
"model.layers.3.self_attn.q_norm.weight": "model-00001-of-00014.safetensors",
|
| 270 |
+
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00014.safetensors",
|
| 271 |
+
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00014.safetensors",
|
| 272 |
+
"model.layers.30.input_layernorm.weight": "model-00007-of-00014.safetensors",
|
| 273 |
+
"model.layers.30.mlp.down_proj.weight": "model-00007-of-00014.safetensors",
|
| 274 |
+
"model.layers.30.mlp.gate_proj.weight": "model-00007-of-00014.safetensors",
|
| 275 |
+
"model.layers.30.mlp.up_proj.weight": "model-00007-of-00014.safetensors",
|
| 276 |
+
"model.layers.30.post_attention_layernorm.weight": "model-00007-of-00014.safetensors",
|
| 277 |
+
"model.layers.30.self_attn.k_norm.weight": "model-00007-of-00014.safetensors",
|
| 278 |
+
"model.layers.30.self_attn.k_proj.weight": "model-00007-of-00014.safetensors",
|
| 279 |
+
"model.layers.30.self_attn.o_proj.weight": "model-00007-of-00014.safetensors",
|
| 280 |
+
"model.layers.30.self_attn.q_norm.weight": "model-00007-of-00014.safetensors",
|
| 281 |
+
"model.layers.30.self_attn.q_proj.weight": "model-00007-of-00014.safetensors",
|
| 282 |
+
"model.layers.30.self_attn.v_proj.weight": "model-00007-of-00014.safetensors",
|
| 283 |
+
"model.layers.31.input_layernorm.weight": "model-00007-of-00014.safetensors",
|
| 284 |
+
"model.layers.31.mlp.down_proj.weight": "model-00007-of-00014.safetensors",
|
| 285 |
+
"model.layers.31.mlp.gate_proj.weight": "model-00007-of-00014.safetensors",
|
| 286 |
+
"model.layers.31.mlp.up_proj.weight": "model-00007-of-00014.safetensors",
|
| 287 |
+
"model.layers.31.post_attention_layernorm.weight": "model-00007-of-00014.safetensors",
|
| 288 |
+
"model.layers.31.self_attn.k_norm.weight": "model-00007-of-00014.safetensors",
|
| 289 |
+
"model.layers.31.self_attn.k_proj.weight": "model-00007-of-00014.safetensors",
|
| 290 |
+
"model.layers.31.self_attn.o_proj.weight": "model-00007-of-00014.safetensors",
|
| 291 |
+
"model.layers.31.self_attn.q_norm.weight": "model-00007-of-00014.safetensors",
|
| 292 |
+
"model.layers.31.self_attn.q_proj.weight": "model-00007-of-00014.safetensors",
|
| 293 |
+
"model.layers.31.self_attn.v_proj.weight": "model-00007-of-00014.safetensors",
|
| 294 |
+
"model.layers.32.input_layernorm.weight": "model-00007-of-00014.safetensors",
|
| 295 |
+
"model.layers.32.mlp.down_proj.weight": "model-00007-of-00014.safetensors",
|
| 296 |
+
"model.layers.32.mlp.gate_proj.weight": "model-00007-of-00014.safetensors",
|
| 297 |
+
"model.layers.32.mlp.up_proj.weight": "model-00007-of-00014.safetensors",
|
| 298 |
+
"model.layers.32.post_attention_layernorm.weight": "model-00007-of-00014.safetensors",
|
| 299 |
+
"model.layers.32.self_attn.k_norm.weight": "model-00007-of-00014.safetensors",
|
| 300 |
+
"model.layers.32.self_attn.k_proj.weight": "model-00007-of-00014.safetensors",
|
| 301 |
+
"model.layers.32.self_attn.o_proj.weight": "model-00007-of-00014.safetensors",
|
| 302 |
+
"model.layers.32.self_attn.q_norm.weight": "model-00007-of-00014.safetensors",
|
| 303 |
+
"model.layers.32.self_attn.q_proj.weight": "model-00007-of-00014.safetensors",
|
| 304 |
+
"model.layers.32.self_attn.v_proj.weight": "model-00007-of-00014.safetensors",
|
| 305 |
+
"model.layers.33.input_layernorm.weight": "model-00008-of-00014.safetensors",
|
| 306 |
+
"model.layers.33.mlp.down_proj.weight": "model-00008-of-00014.safetensors",
|
| 307 |
+
"model.layers.33.mlp.gate_proj.weight": "model-00007-of-00014.safetensors",
|
| 308 |
+
"model.layers.33.mlp.up_proj.weight": "model-00008-of-00014.safetensors",
|
| 309 |
+
"model.layers.33.post_attention_layernorm.weight": "model-00008-of-00014.safetensors",
|
| 310 |
+
"model.layers.33.self_attn.k_norm.weight": "model-00007-of-00014.safetensors",
|
| 311 |
+
"model.layers.33.self_attn.k_proj.weight": "model-00007-of-00014.safetensors",
|
| 312 |
+
"model.layers.33.self_attn.o_proj.weight": "model-00007-of-00014.safetensors",
|
| 313 |
+
"model.layers.33.self_attn.q_norm.weight": "model-00007-of-00014.safetensors",
|
| 314 |
+
"model.layers.33.self_attn.q_proj.weight": "model-00007-of-00014.safetensors",
|
| 315 |
+
"model.layers.33.self_attn.v_proj.weight": "model-00007-of-00014.safetensors",
|
| 316 |
+
"model.layers.34.input_layernorm.weight": "model-00008-of-00014.safetensors",
|
| 317 |
+
"model.layers.34.mlp.down_proj.weight": "model-00008-of-00014.safetensors",
|
| 318 |
+
"model.layers.34.mlp.gate_proj.weight": "model-00008-of-00014.safetensors",
|
| 319 |
+
"model.layers.34.mlp.up_proj.weight": "model-00008-of-00014.safetensors",
|
| 320 |
+
"model.layers.34.post_attention_layernorm.weight": "model-00008-of-00014.safetensors",
|
| 321 |
+
"model.layers.34.self_attn.k_norm.weight": "model-00008-of-00014.safetensors",
|
| 322 |
+
"model.layers.34.self_attn.k_proj.weight": "model-00008-of-00014.safetensors",
|
| 323 |
+
"model.layers.34.self_attn.o_proj.weight": "model-00008-of-00014.safetensors",
|
| 324 |
+
"model.layers.34.self_attn.q_norm.weight": "model-00008-of-00014.safetensors",
|
| 325 |
+
"model.layers.34.self_attn.q_proj.weight": "model-00008-of-00014.safetensors",
|
| 326 |
+
"model.layers.34.self_attn.v_proj.weight": "model-00008-of-00014.safetensors",
|
| 327 |
+
"model.layers.35.input_layernorm.weight": "model-00008-of-00014.safetensors",
|
| 328 |
+
"model.layers.35.mlp.down_proj.weight": "model-00008-of-00014.safetensors",
|
| 329 |
+
"model.layers.35.mlp.gate_proj.weight": "model-00008-of-00014.safetensors",
|
| 330 |
+
"model.layers.35.mlp.up_proj.weight": "model-00008-of-00014.safetensors",
|
| 331 |
+
"model.layers.35.post_attention_layernorm.weight": "model-00008-of-00014.safetensors",
|
| 332 |
+
"model.layers.35.self_attn.k_norm.weight": "model-00008-of-00014.safetensors",
|
| 333 |
+
"model.layers.35.self_attn.k_proj.weight": "model-00008-of-00014.safetensors",
|
| 334 |
+
"model.layers.35.self_attn.o_proj.weight": "model-00008-of-00014.safetensors",
|
| 335 |
+
"model.layers.35.self_attn.q_norm.weight": "model-00008-of-00014.safetensors",
|
| 336 |
+
"model.layers.35.self_attn.q_proj.weight": "model-00008-of-00014.safetensors",
|
| 337 |
+
"model.layers.35.self_attn.v_proj.weight": "model-00008-of-00014.safetensors",
|
| 338 |
+
"model.layers.36.input_layernorm.weight": "model-00008-of-00014.safetensors",
|
| 339 |
+
"model.layers.36.mlp.down_proj.weight": "model-00008-of-00014.safetensors",
|
| 340 |
+
"model.layers.36.mlp.gate_proj.weight": "model-00008-of-00014.safetensors",
|
| 341 |
+
"model.layers.36.mlp.up_proj.weight": "model-00008-of-00014.safetensors",
|
| 342 |
+
"model.layers.36.post_attention_layernorm.weight": "model-00008-of-00014.safetensors",
|
| 343 |
+
"model.layers.36.self_attn.k_norm.weight": "model-00008-of-00014.safetensors",
|
| 344 |
+
"model.layers.36.self_attn.k_proj.weight": "model-00008-of-00014.safetensors",
|
| 345 |
+
"model.layers.36.self_attn.o_proj.weight": "model-00008-of-00014.safetensors",
|
| 346 |
+
"model.layers.36.self_attn.q_norm.weight": "model-00008-of-00014.safetensors",
|
| 347 |
+
"model.layers.36.self_attn.q_proj.weight": "model-00008-of-00014.safetensors",
|
| 348 |
+
"model.layers.36.self_attn.v_proj.weight": "model-00008-of-00014.safetensors",
|
| 349 |
+
"model.layers.37.input_layernorm.weight": "model-00008-of-00014.safetensors",
|
| 350 |
+
"model.layers.37.mlp.down_proj.weight": "model-00008-of-00014.safetensors",
|
| 351 |
+
"model.layers.37.mlp.gate_proj.weight": "model-00008-of-00014.safetensors",
|
| 352 |
+
"model.layers.37.mlp.up_proj.weight": "model-00008-of-00014.safetensors",
|
| 353 |
+
"model.layers.37.post_attention_layernorm.weight": "model-00008-of-00014.safetensors",
|
| 354 |
+
"model.layers.37.self_attn.k_norm.weight": "model-00008-of-00014.safetensors",
|
| 355 |
+
"model.layers.37.self_attn.k_proj.weight": "model-00008-of-00014.safetensors",
|
| 356 |
+
"model.layers.37.self_attn.o_proj.weight": "model-00008-of-00014.safetensors",
|
| 357 |
+
"model.layers.37.self_attn.q_norm.weight": "model-00008-of-00014.safetensors",
|
| 358 |
+
"model.layers.37.self_attn.q_proj.weight": "model-00008-of-00014.safetensors",
|
| 359 |
+
"model.layers.37.self_attn.v_proj.weight": "model-00008-of-00014.safetensors",
|
| 360 |
+
"model.layers.38.input_layernorm.weight": "model-00009-of-00014.safetensors",
|
| 361 |
+
"model.layers.38.mlp.down_proj.weight": "model-00009-of-00014.safetensors",
|
| 362 |
+
"model.layers.38.mlp.gate_proj.weight": "model-00008-of-00014.safetensors",
|
| 363 |
+
"model.layers.38.mlp.up_proj.weight": "model-00009-of-00014.safetensors",
|
| 364 |
+
"model.layers.38.post_attention_layernorm.weight": "model-00009-of-00014.safetensors",
|
| 365 |
+
"model.layers.38.self_attn.k_norm.weight": "model-00008-of-00014.safetensors",
|
| 366 |
+
"model.layers.38.self_attn.k_proj.weight": "model-00008-of-00014.safetensors",
|
| 367 |
+
"model.layers.38.self_attn.o_proj.weight": "model-00008-of-00014.safetensors",
|
| 368 |
+
"model.layers.38.self_attn.q_norm.weight": "model-00008-of-00014.safetensors",
|
| 369 |
+
"model.layers.38.self_attn.q_proj.weight": "model-00008-of-00014.safetensors",
|
| 370 |
+
"model.layers.38.self_attn.v_proj.weight": "model-00008-of-00014.safetensors",
|
| 371 |
+
"model.layers.39.input_layernorm.weight": "model-00009-of-00014.safetensors",
|
| 372 |
+
"model.layers.39.mlp.down_proj.weight": "model-00009-of-00014.safetensors",
|
| 373 |
+
"model.layers.39.mlp.gate_proj.weight": "model-00009-of-00014.safetensors",
|
| 374 |
+
"model.layers.39.mlp.up_proj.weight": "model-00009-of-00014.safetensors",
|
| 375 |
+
"model.layers.39.post_attention_layernorm.weight": "model-00009-of-00014.safetensors",
|
| 376 |
+
"model.layers.39.self_attn.k_norm.weight": "model-00009-of-00014.safetensors",
|
| 377 |
+
"model.layers.39.self_attn.k_proj.weight": "model-00009-of-00014.safetensors",
|
| 378 |
+
"model.layers.39.self_attn.o_proj.weight": "model-00009-of-00014.safetensors",
|
| 379 |
+
"model.layers.39.self_attn.q_norm.weight": "model-00009-of-00014.safetensors",
|
| 380 |
+
"model.layers.39.self_attn.q_proj.weight": "model-00009-of-00014.safetensors",
|
| 381 |
+
"model.layers.39.self_attn.v_proj.weight": "model-00009-of-00014.safetensors",
|
| 382 |
+
"model.layers.4.input_layernorm.weight": "model-00002-of-00014.safetensors",
|
| 383 |
+
"model.layers.4.mlp.down_proj.weight": "model-00002-of-00014.safetensors",
|
| 384 |
+
"model.layers.4.mlp.gate_proj.weight": "model-00002-of-00014.safetensors",
|
| 385 |
+
"model.layers.4.mlp.up_proj.weight": "model-00002-of-00014.safetensors",
|
| 386 |
+
"model.layers.4.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
|
| 387 |
+
"model.layers.4.self_attn.k_norm.weight": "model-00002-of-00014.safetensors",
|
| 388 |
+
"model.layers.4.self_attn.k_proj.weight": "model-00002-of-00014.safetensors",
|
| 389 |
+
"model.layers.4.self_attn.o_proj.weight": "model-00002-of-00014.safetensors",
|
| 390 |
+
"model.layers.4.self_attn.q_norm.weight": "model-00002-of-00014.safetensors",
|
| 391 |
+
"model.layers.4.self_attn.q_proj.weight": "model-00002-of-00014.safetensors",
|
| 392 |
+
"model.layers.4.self_attn.v_proj.weight": "model-00002-of-00014.safetensors",
|
| 393 |
+
"model.layers.40.input_layernorm.weight": "model-00009-of-00014.safetensors",
|
| 394 |
+
"model.layers.40.mlp.down_proj.weight": "model-00009-of-00014.safetensors",
|
| 395 |
+
"model.layers.40.mlp.gate_proj.weight": "model-00009-of-00014.safetensors",
|
| 396 |
+
"model.layers.40.mlp.up_proj.weight": "model-00009-of-00014.safetensors",
|
| 397 |
+
"model.layers.40.post_attention_layernorm.weight": "model-00009-of-00014.safetensors",
|
| 398 |
+
"model.layers.40.self_attn.k_norm.weight": "model-00009-of-00014.safetensors",
|
| 399 |
+
"model.layers.40.self_attn.k_proj.weight": "model-00009-of-00014.safetensors",
|
| 400 |
+
"model.layers.40.self_attn.o_proj.weight": "model-00009-of-00014.safetensors",
|
| 401 |
+
"model.layers.40.self_attn.q_norm.weight": "model-00009-of-00014.safetensors",
|
| 402 |
+
"model.layers.40.self_attn.q_proj.weight": "model-00009-of-00014.safetensors",
|
| 403 |
+
"model.layers.40.self_attn.v_proj.weight": "model-00009-of-00014.safetensors",
|
| 404 |
+
"model.layers.41.input_layernorm.weight": "model-00009-of-00014.safetensors",
|
| 405 |
+
"model.layers.41.mlp.down_proj.weight": "model-00009-of-00014.safetensors",
|
| 406 |
+
"model.layers.41.mlp.gate_proj.weight": "model-00009-of-00014.safetensors",
|
| 407 |
+
"model.layers.41.mlp.up_proj.weight": "model-00009-of-00014.safetensors",
|
| 408 |
+
"model.layers.41.post_attention_layernorm.weight": "model-00009-of-00014.safetensors",
|
| 409 |
+
"model.layers.41.self_attn.k_norm.weight": "model-00009-of-00014.safetensors",
|
| 410 |
+
"model.layers.41.self_attn.k_proj.weight": "model-00009-of-00014.safetensors",
|
| 411 |
+
"model.layers.41.self_attn.o_proj.weight": "model-00009-of-00014.safetensors",
|
| 412 |
+
"model.layers.41.self_attn.q_norm.weight": "model-00009-of-00014.safetensors",
|
| 413 |
+
"model.layers.41.self_attn.q_proj.weight": "model-00009-of-00014.safetensors",
|
| 414 |
+
"model.layers.41.self_attn.v_proj.weight": "model-00009-of-00014.safetensors",
|
| 415 |
+
"model.layers.42.input_layernorm.weight": "model-00009-of-00014.safetensors",
|
| 416 |
+
"model.layers.42.mlp.down_proj.weight": "model-00009-of-00014.safetensors",
|
| 417 |
+
"model.layers.42.mlp.gate_proj.weight": "model-00009-of-00014.safetensors",
|
| 418 |
+
"model.layers.42.mlp.up_proj.weight": "model-00009-of-00014.safetensors",
|
| 419 |
+
"model.layers.42.post_attention_layernorm.weight": "model-00009-of-00014.safetensors",
|
| 420 |
+
"model.layers.42.self_attn.k_norm.weight": "model-00009-of-00014.safetensors",
|
| 421 |
+
"model.layers.42.self_attn.k_proj.weight": "model-00009-of-00014.safetensors",
|
| 422 |
+
"model.layers.42.self_attn.o_proj.weight": "model-00009-of-00014.safetensors",
|
| 423 |
+
"model.layers.42.self_attn.q_norm.weight": "model-00009-of-00014.safetensors",
|
| 424 |
+
"model.layers.42.self_attn.q_proj.weight": "model-00009-of-00014.safetensors",
|
| 425 |
+
"model.layers.42.self_attn.v_proj.weight": "model-00009-of-00014.safetensors",
|
| 426 |
+
"model.layers.43.input_layernorm.weight": "model-00010-of-00014.safetensors",
|
| 427 |
+
"model.layers.43.mlp.down_proj.weight": "model-00010-of-00014.safetensors",
|
| 428 |
+
"model.layers.43.mlp.gate_proj.weight": "model-00009-of-00014.safetensors",
|
| 429 |
+
"model.layers.43.mlp.up_proj.weight": "model-00010-of-00014.safetensors",
|
| 430 |
+
"model.layers.43.post_attention_layernorm.weight": "model-00010-of-00014.safetensors",
|
| 431 |
+
"model.layers.43.self_attn.k_norm.weight": "model-00009-of-00014.safetensors",
|
| 432 |
+
"model.layers.43.self_attn.k_proj.weight": "model-00009-of-00014.safetensors",
|
| 433 |
+
"model.layers.43.self_attn.o_proj.weight": "model-00009-of-00014.safetensors",
|
| 434 |
+
"model.layers.43.self_attn.q_norm.weight": "model-00009-of-00014.safetensors",
|
| 435 |
+
"model.layers.43.self_attn.q_proj.weight": "model-00009-of-00014.safetensors",
|
| 436 |
+
"model.layers.43.self_attn.v_proj.weight": "model-00009-of-00014.safetensors",
|
| 437 |
+
"model.layers.44.input_layernorm.weight": "model-00010-of-00014.safetensors",
|
| 438 |
+
"model.layers.44.mlp.down_proj.weight": "model-00010-of-00014.safetensors",
|
| 439 |
+
"model.layers.44.mlp.gate_proj.weight": "model-00010-of-00014.safetensors",
|
| 440 |
+
"model.layers.44.mlp.up_proj.weight": "model-00010-of-00014.safetensors",
|
| 441 |
+
"model.layers.44.post_attention_layernorm.weight": "model-00010-of-00014.safetensors",
|
| 442 |
+
"model.layers.44.self_attn.k_norm.weight": "model-00010-of-00014.safetensors",
|
| 443 |
+
"model.layers.44.self_attn.k_proj.weight": "model-00010-of-00014.safetensors",
|
| 444 |
+
"model.layers.44.self_attn.o_proj.weight": "model-00010-of-00014.safetensors",
|
| 445 |
+
"model.layers.44.self_attn.q_norm.weight": "model-00010-of-00014.safetensors",
|
| 446 |
+
"model.layers.44.self_attn.q_proj.weight": "model-00010-of-00014.safetensors",
|
| 447 |
+
"model.layers.44.self_attn.v_proj.weight": "model-00010-of-00014.safetensors",
|
| 448 |
+
"model.layers.45.input_layernorm.weight": "model-00010-of-00014.safetensors",
|
| 449 |
+
"model.layers.45.mlp.down_proj.weight": "model-00010-of-00014.safetensors",
|
| 450 |
+
"model.layers.45.mlp.gate_proj.weight": "model-00010-of-00014.safetensors",
|
| 451 |
+
"model.layers.45.mlp.up_proj.weight": "model-00010-of-00014.safetensors",
|
| 452 |
+
"model.layers.45.post_attention_layernorm.weight": "model-00010-of-00014.safetensors",
|
| 453 |
+
"model.layers.45.self_attn.k_norm.weight": "model-00010-of-00014.safetensors",
|
| 454 |
+
"model.layers.45.self_attn.k_proj.weight": "model-00010-of-00014.safetensors",
|
| 455 |
+
"model.layers.45.self_attn.o_proj.weight": "model-00010-of-00014.safetensors",
|
| 456 |
+
"model.layers.45.self_attn.q_norm.weight": "model-00010-of-00014.safetensors",
|
| 457 |
+
"model.layers.45.self_attn.q_proj.weight": "model-00010-of-00014.safetensors",
|
| 458 |
+
"model.layers.45.self_attn.v_proj.weight": "model-00010-of-00014.safetensors",
|
| 459 |
+
"model.layers.46.input_layernorm.weight": "model-00010-of-00014.safetensors",
|
| 460 |
+
"model.layers.46.mlp.down_proj.weight": "model-00010-of-00014.safetensors",
|
| 461 |
+
"model.layers.46.mlp.gate_proj.weight": "model-00010-of-00014.safetensors",
|
| 462 |
+
"model.layers.46.mlp.up_proj.weight": "model-00010-of-00014.safetensors",
|
| 463 |
+
"model.layers.46.post_attention_layernorm.weight": "model-00010-of-00014.safetensors",
|
| 464 |
+
"model.layers.46.self_attn.k_norm.weight": "model-00010-of-00014.safetensors",
|
| 465 |
+
"model.layers.46.self_attn.k_proj.weight": "model-00010-of-00014.safetensors",
|
| 466 |
+
"model.layers.46.self_attn.o_proj.weight": "model-00010-of-00014.safetensors",
|
| 467 |
+
"model.layers.46.self_attn.q_norm.weight": "model-00010-of-00014.safetensors",
|
| 468 |
+
"model.layers.46.self_attn.q_proj.weight": "model-00010-of-00014.safetensors",
|
| 469 |
+
"model.layers.46.self_attn.v_proj.weight": "model-00010-of-00014.safetensors",
|
| 470 |
+
"model.layers.47.input_layernorm.weight": "model-00010-of-00014.safetensors",
|
| 471 |
+
"model.layers.47.mlp.down_proj.weight": "model-00010-of-00014.safetensors",
|
| 472 |
+
"model.layers.47.mlp.gate_proj.weight": "model-00010-of-00014.safetensors",
|
| 473 |
+
"model.layers.47.mlp.up_proj.weight": "model-00010-of-00014.safetensors",
|
| 474 |
+
"model.layers.47.post_attention_layernorm.weight": "model-00010-of-00014.safetensors",
|
| 475 |
+
"model.layers.47.self_attn.k_norm.weight": "model-00010-of-00014.safetensors",
|
| 476 |
+
"model.layers.47.self_attn.k_proj.weight": "model-00010-of-00014.safetensors",
|
| 477 |
+
"model.layers.47.self_attn.o_proj.weight": "model-00010-of-00014.safetensors",
|
| 478 |
+
"model.layers.47.self_attn.q_norm.weight": "model-00010-of-00014.safetensors",
|
| 479 |
+
"model.layers.47.self_attn.q_proj.weight": "model-00010-of-00014.safetensors",
|
| 480 |
+
"model.layers.47.self_attn.v_proj.weight": "model-00010-of-00014.safetensors",
|
| 481 |
+
"model.layers.48.input_layernorm.weight": "model-00011-of-00014.safetensors",
|
| 482 |
+
"model.layers.48.mlp.down_proj.weight": "model-00011-of-00014.safetensors",
|
| 483 |
+
"model.layers.48.mlp.gate_proj.weight": "model-00010-of-00014.safetensors",
|
| 484 |
+
"model.layers.48.mlp.up_proj.weight": "model-00011-of-00014.safetensors",
|
| 485 |
+
"model.layers.48.post_attention_layernorm.weight": "model-00011-of-00014.safetensors",
|
| 486 |
+
"model.layers.48.self_attn.k_norm.weight": "model-00010-of-00014.safetensors",
|
| 487 |
+
"model.layers.48.self_attn.k_proj.weight": "model-00010-of-00014.safetensors",
|
| 488 |
+
"model.layers.48.self_attn.o_proj.weight": "model-00010-of-00014.safetensors",
|
| 489 |
+
"model.layers.48.self_attn.q_norm.weight": "model-00010-of-00014.safetensors",
|
| 490 |
+
"model.layers.48.self_attn.q_proj.weight": "model-00010-of-00014.safetensors",
|
| 491 |
+
"model.layers.48.self_attn.v_proj.weight": "model-00010-of-00014.safetensors",
|
| 492 |
+
"model.layers.49.input_layernorm.weight": "model-00011-of-00014.safetensors",
|
| 493 |
+
"model.layers.49.mlp.down_proj.weight": "model-00011-of-00014.safetensors",
|
| 494 |
+
"model.layers.49.mlp.gate_proj.weight": "model-00011-of-00014.safetensors",
|
| 495 |
+
"model.layers.49.mlp.up_proj.weight": "model-00011-of-00014.safetensors",
|
| 496 |
+
"model.layers.49.post_attention_layernorm.weight": "model-00011-of-00014.safetensors",
|
| 497 |
+
"model.layers.49.self_attn.k_norm.weight": "model-00011-of-00014.safetensors",
|
| 498 |
+
"model.layers.49.self_attn.k_proj.weight": "model-00011-of-00014.safetensors",
|
| 499 |
+
"model.layers.49.self_attn.o_proj.weight": "model-00011-of-00014.safetensors",
|
| 500 |
+
"model.layers.49.self_attn.q_norm.weight": "model-00011-of-00014.safetensors",
|
| 501 |
+
"model.layers.49.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
|
| 502 |
+
"model.layers.49.self_attn.v_proj.weight": "model-00011-of-00014.safetensors",
|
| 503 |
+
"model.layers.5.input_layernorm.weight": "model-00002-of-00014.safetensors",
|
| 504 |
+
"model.layers.5.mlp.down_proj.weight": "model-00002-of-00014.safetensors",
|
| 505 |
+
"model.layers.5.mlp.gate_proj.weight": "model-00002-of-00014.safetensors",
|
| 506 |
+
"model.layers.5.mlp.up_proj.weight": "model-00002-of-00014.safetensors",
|
| 507 |
+
"model.layers.5.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
|
| 508 |
+
"model.layers.5.self_attn.k_norm.weight": "model-00002-of-00014.safetensors",
|
| 509 |
+
"model.layers.5.self_attn.k_proj.weight": "model-00002-of-00014.safetensors",
|
| 510 |
+
"model.layers.5.self_attn.o_proj.weight": "model-00002-of-00014.safetensors",
|
| 511 |
+
"model.layers.5.self_attn.q_norm.weight": "model-00002-of-00014.safetensors",
|
| 512 |
+
"model.layers.5.self_attn.q_proj.weight": "model-00002-of-00014.safetensors",
|
| 513 |
+
"model.layers.5.self_attn.v_proj.weight": "model-00002-of-00014.safetensors",
|
| 514 |
+
"model.layers.50.input_layernorm.weight": "model-00011-of-00014.safetensors",
|
| 515 |
+
"model.layers.50.mlp.down_proj.weight": "model-00011-of-00014.safetensors",
|
| 516 |
+
"model.layers.50.mlp.gate_proj.weight": "model-00011-of-00014.safetensors",
|
| 517 |
+
"model.layers.50.mlp.up_proj.weight": "model-00011-of-00014.safetensors",
|
| 518 |
+
"model.layers.50.post_attention_layernorm.weight": "model-00011-of-00014.safetensors",
|
| 519 |
+
"model.layers.50.self_attn.k_norm.weight": "model-00011-of-00014.safetensors",
|
| 520 |
+
"model.layers.50.self_attn.k_proj.weight": "model-00011-of-00014.safetensors",
|
| 521 |
+
"model.layers.50.self_attn.o_proj.weight": "model-00011-of-00014.safetensors",
|
| 522 |
+
"model.layers.50.self_attn.q_norm.weight": "model-00011-of-00014.safetensors",
|
| 523 |
+
"model.layers.50.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
|
| 524 |
+
"model.layers.50.self_attn.v_proj.weight": "model-00011-of-00014.safetensors",
|
| 525 |
+
"model.layers.51.input_layernorm.weight": "model-00011-of-00014.safetensors",
|
| 526 |
+
"model.layers.51.mlp.down_proj.weight": "model-00011-of-00014.safetensors",
|
| 527 |
+
"model.layers.51.mlp.gate_proj.weight": "model-00011-of-00014.safetensors",
|
| 528 |
+
"model.layers.51.mlp.up_proj.weight": "model-00011-of-00014.safetensors",
|
| 529 |
+
"model.layers.51.post_attention_layernorm.weight": "model-00011-of-00014.safetensors",
|
| 530 |
+
"model.layers.51.self_attn.k_norm.weight": "model-00011-of-00014.safetensors",
|
| 531 |
+
"model.layers.51.self_attn.k_proj.weight": "model-00011-of-00014.safetensors",
|
| 532 |
+
"model.layers.51.self_attn.o_proj.weight": "model-00011-of-00014.safetensors",
|
| 533 |
+
"model.layers.51.self_attn.q_norm.weight": "model-00011-of-00014.safetensors",
|
| 534 |
+
"model.layers.51.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
|
| 535 |
+
"model.layers.51.self_attn.v_proj.weight": "model-00011-of-00014.safetensors",
|
| 536 |
+
"model.layers.52.input_layernorm.weight": "model-00011-of-00014.safetensors",
|
| 537 |
+
"model.layers.52.mlp.down_proj.weight": "model-00011-of-00014.safetensors",
|
| 538 |
+
"model.layers.52.mlp.gate_proj.weight": "model-00011-of-00014.safetensors",
|
| 539 |
+
"model.layers.52.mlp.up_proj.weight": "model-00011-of-00014.safetensors",
|
| 540 |
+
"model.layers.52.post_attention_layernorm.weight": "model-00011-of-00014.safetensors",
|
| 541 |
+
"model.layers.52.self_attn.k_norm.weight": "model-00011-of-00014.safetensors",
|
| 542 |
+
"model.layers.52.self_attn.k_proj.weight": "model-00011-of-00014.safetensors",
|
| 543 |
+
"model.layers.52.self_attn.o_proj.weight": "model-00011-of-00014.safetensors",
|
| 544 |
+
"model.layers.52.self_attn.q_norm.weight": "model-00011-of-00014.safetensors",
|
| 545 |
+
"model.layers.52.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
|
| 546 |
+
"model.layers.52.self_attn.v_proj.weight": "model-00011-of-00014.safetensors",
|
| 547 |
+
"model.layers.53.input_layernorm.weight": "model-00012-of-00014.safetensors",
|
| 548 |
+
"model.layers.53.mlp.down_proj.weight": "model-00012-of-00014.safetensors",
|
| 549 |
+
"model.layers.53.mlp.gate_proj.weight": "model-00011-of-00014.safetensors",
|
| 550 |
+
"model.layers.53.mlp.up_proj.weight": "model-00012-of-00014.safetensors",
|
| 551 |
+
"model.layers.53.post_attention_layernorm.weight": "model-00012-of-00014.safetensors",
|
| 552 |
+
"model.layers.53.self_attn.k_norm.weight": "model-00011-of-00014.safetensors",
|
| 553 |
+
"model.layers.53.self_attn.k_proj.weight": "model-00011-of-00014.safetensors",
|
| 554 |
+
"model.layers.53.self_attn.o_proj.weight": "model-00011-of-00014.safetensors",
|
| 555 |
+
"model.layers.53.self_attn.q_norm.weight": "model-00011-of-00014.safetensors",
|
| 556 |
+
"model.layers.53.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
|
| 557 |
+
"model.layers.53.self_attn.v_proj.weight": "model-00011-of-00014.safetensors",
|
| 558 |
+
"model.layers.54.input_layernorm.weight": "model-00012-of-00014.safetensors",
|
| 559 |
+
"model.layers.54.mlp.down_proj.weight": "model-00012-of-00014.safetensors",
|
| 560 |
+
"model.layers.54.mlp.gate_proj.weight": "model-00012-of-00014.safetensors",
|
| 561 |
+
"model.layers.54.mlp.up_proj.weight": "model-00012-of-00014.safetensors",
|
| 562 |
+
"model.layers.54.post_attention_layernorm.weight": "model-00012-of-00014.safetensors",
|
| 563 |
+
"model.layers.54.self_attn.k_norm.weight": "model-00012-of-00014.safetensors",
|
| 564 |
+
"model.layers.54.self_attn.k_proj.weight": "model-00012-of-00014.safetensors",
|
| 565 |
+
"model.layers.54.self_attn.o_proj.weight": "model-00012-of-00014.safetensors",
|
| 566 |
+
"model.layers.54.self_attn.q_norm.weight": "model-00012-of-00014.safetensors",
|
| 567 |
+
"model.layers.54.self_attn.q_proj.weight": "model-00012-of-00014.safetensors",
|
| 568 |
+
"model.layers.54.self_attn.v_proj.weight": "model-00012-of-00014.safetensors",
|
| 569 |
+
"model.layers.55.input_layernorm.weight": "model-00012-of-00014.safetensors",
|
| 570 |
+
"model.layers.55.mlp.down_proj.weight": "model-00012-of-00014.safetensors",
|
| 571 |
+
"model.layers.55.mlp.gate_proj.weight": "model-00012-of-00014.safetensors",
|
| 572 |
+
"model.layers.55.mlp.up_proj.weight": "model-00012-of-00014.safetensors",
|
| 573 |
+
"model.layers.55.post_attention_layernorm.weight": "model-00012-of-00014.safetensors",
|
| 574 |
+
"model.layers.55.self_attn.k_norm.weight": "model-00012-of-00014.safetensors",
|
| 575 |
+
"model.layers.55.self_attn.k_proj.weight": "model-00012-of-00014.safetensors",
|
| 576 |
+
"model.layers.55.self_attn.o_proj.weight": "model-00012-of-00014.safetensors",
|
| 577 |
+
"model.layers.55.self_attn.q_norm.weight": "model-00012-of-00014.safetensors",
|
| 578 |
+
"model.layers.55.self_attn.q_proj.weight": "model-00012-of-00014.safetensors",
|
| 579 |
+
"model.layers.55.self_attn.v_proj.weight": "model-00012-of-00014.safetensors",
|
| 580 |
+
"model.layers.56.input_layernorm.weight": "model-00012-of-00014.safetensors",
|
| 581 |
+
"model.layers.56.mlp.down_proj.weight": "model-00012-of-00014.safetensors",
|
| 582 |
+
"model.layers.56.mlp.gate_proj.weight": "model-00012-of-00014.safetensors",
|
| 583 |
+
"model.layers.56.mlp.up_proj.weight": "model-00012-of-00014.safetensors",
|
| 584 |
+
"model.layers.56.post_attention_layernorm.weight": "model-00012-of-00014.safetensors",
|
| 585 |
+
"model.layers.56.self_attn.k_norm.weight": "model-00012-of-00014.safetensors",
|
| 586 |
+
"model.layers.56.self_attn.k_proj.weight": "model-00012-of-00014.safetensors",
|
| 587 |
+
"model.layers.56.self_attn.o_proj.weight": "model-00012-of-00014.safetensors",
|
| 588 |
+
"model.layers.56.self_attn.q_norm.weight": "model-00012-of-00014.safetensors",
|
| 589 |
+
"model.layers.56.self_attn.q_proj.weight": "model-00012-of-00014.safetensors",
|
| 590 |
+
"model.layers.56.self_attn.v_proj.weight": "model-00012-of-00014.safetensors",
|
| 591 |
+
"model.layers.57.input_layernorm.weight": "model-00012-of-00014.safetensors",
|
| 592 |
+
"model.layers.57.mlp.down_proj.weight": "model-00012-of-00014.safetensors",
|
| 593 |
+
"model.layers.57.mlp.gate_proj.weight": "model-00012-of-00014.safetensors",
|
| 594 |
+
"model.layers.57.mlp.up_proj.weight": "model-00012-of-00014.safetensors",
|
| 595 |
+
"model.layers.57.post_attention_layernorm.weight": "model-00012-of-00014.safetensors",
|
| 596 |
+
"model.layers.57.self_attn.k_norm.weight": "model-00012-of-00014.safetensors",
|
| 597 |
+
"model.layers.57.self_attn.k_proj.weight": "model-00012-of-00014.safetensors",
|
| 598 |
+
"model.layers.57.self_attn.o_proj.weight": "model-00012-of-00014.safetensors",
|
| 599 |
+
"model.layers.57.self_attn.q_norm.weight": "model-00012-of-00014.safetensors",
|
| 600 |
+
"model.layers.57.self_attn.q_proj.weight": "model-00012-of-00014.safetensors",
|
| 601 |
+
"model.layers.57.self_attn.v_proj.weight": "model-00012-of-00014.safetensors",
|
| 602 |
+
"model.layers.58.input_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 603 |
+
"model.layers.58.mlp.down_proj.weight": "model-00013-of-00014.safetensors",
|
| 604 |
+
"model.layers.58.mlp.gate_proj.weight": "model-00012-of-00014.safetensors",
|
| 605 |
+
"model.layers.58.mlp.up_proj.weight": "model-00013-of-00014.safetensors",
|
| 606 |
+
"model.layers.58.post_attention_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 607 |
+
"model.layers.58.self_attn.k_norm.weight": "model-00012-of-00014.safetensors",
|
| 608 |
+
"model.layers.58.self_attn.k_proj.weight": "model-00012-of-00014.safetensors",
|
| 609 |
+
"model.layers.58.self_attn.o_proj.weight": "model-00012-of-00014.safetensors",
|
| 610 |
+
"model.layers.58.self_attn.q_norm.weight": "model-00012-of-00014.safetensors",
|
| 611 |
+
"model.layers.58.self_attn.q_proj.weight": "model-00012-of-00014.safetensors",
|
| 612 |
+
"model.layers.58.self_attn.v_proj.weight": "model-00012-of-00014.safetensors",
|
| 613 |
+
"model.layers.59.input_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 614 |
+
"model.layers.59.mlp.down_proj.weight": "model-00013-of-00014.safetensors",
|
| 615 |
+
"model.layers.59.mlp.gate_proj.weight": "model-00013-of-00014.safetensors",
|
| 616 |
+
"model.layers.59.mlp.up_proj.weight": "model-00013-of-00014.safetensors",
|
| 617 |
+
"model.layers.59.post_attention_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 618 |
+
"model.layers.59.self_attn.k_norm.weight": "model-00013-of-00014.safetensors",
|
| 619 |
+
"model.layers.59.self_attn.k_proj.weight": "model-00013-of-00014.safetensors",
|
| 620 |
+
"model.layers.59.self_attn.o_proj.weight": "model-00013-of-00014.safetensors",
|
| 621 |
+
"model.layers.59.self_attn.q_norm.weight": "model-00013-of-00014.safetensors",
|
| 622 |
+
"model.layers.59.self_attn.q_proj.weight": "model-00013-of-00014.safetensors",
|
| 623 |
+
"model.layers.59.self_attn.v_proj.weight": "model-00013-of-00014.safetensors",
|
| 624 |
+
"model.layers.6.input_layernorm.weight": "model-00002-of-00014.safetensors",
|
| 625 |
+
"model.layers.6.mlp.down_proj.weight": "model-00002-of-00014.safetensors",
|
| 626 |
+
"model.layers.6.mlp.gate_proj.weight": "model-00002-of-00014.safetensors",
|
| 627 |
+
"model.layers.6.mlp.up_proj.weight": "model-00002-of-00014.safetensors",
|
| 628 |
+
"model.layers.6.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
|
| 629 |
+
"model.layers.6.self_attn.k_norm.weight": "model-00002-of-00014.safetensors",
|
| 630 |
+
"model.layers.6.self_attn.k_proj.weight": "model-00002-of-00014.safetensors",
|
| 631 |
+
"model.layers.6.self_attn.o_proj.weight": "model-00002-of-00014.safetensors",
|
| 632 |
+
"model.layers.6.self_attn.q_norm.weight": "model-00002-of-00014.safetensors",
|
| 633 |
+
"model.layers.6.self_attn.q_proj.weight": "model-00002-of-00014.safetensors",
|
| 634 |
+
"model.layers.6.self_attn.v_proj.weight": "model-00002-of-00014.safetensors",
|
| 635 |
+
"model.layers.60.input_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 636 |
+
"model.layers.60.mlp.down_proj.weight": "model-00013-of-00014.safetensors",
|
| 637 |
+
"model.layers.60.mlp.gate_proj.weight": "model-00013-of-00014.safetensors",
|
| 638 |
+
"model.layers.60.mlp.up_proj.weight": "model-00013-of-00014.safetensors",
|
| 639 |
+
"model.layers.60.post_attention_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 640 |
+
"model.layers.60.self_attn.k_norm.weight": "model-00013-of-00014.safetensors",
|
| 641 |
+
"model.layers.60.self_attn.k_proj.weight": "model-00013-of-00014.safetensors",
|
| 642 |
+
"model.layers.60.self_attn.o_proj.weight": "model-00013-of-00014.safetensors",
|
| 643 |
+
"model.layers.60.self_attn.q_norm.weight": "model-00013-of-00014.safetensors",
|
| 644 |
+
"model.layers.60.self_attn.q_proj.weight": "model-00013-of-00014.safetensors",
|
| 645 |
+
"model.layers.60.self_attn.v_proj.weight": "model-00013-of-00014.safetensors",
|
| 646 |
+
"model.layers.61.input_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 647 |
+
"model.layers.61.mlp.down_proj.weight": "model-00013-of-00014.safetensors",
|
| 648 |
+
"model.layers.61.mlp.gate_proj.weight": "model-00013-of-00014.safetensors",
|
| 649 |
+
"model.layers.61.mlp.up_proj.weight": "model-00013-of-00014.safetensors",
|
| 650 |
+
"model.layers.61.post_attention_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 651 |
+
"model.layers.61.self_attn.k_norm.weight": "model-00013-of-00014.safetensors",
|
| 652 |
+
"model.layers.61.self_attn.k_proj.weight": "model-00013-of-00014.safetensors",
|
| 653 |
+
"model.layers.61.self_attn.o_proj.weight": "model-00013-of-00014.safetensors",
|
| 654 |
+
"model.layers.61.self_attn.q_norm.weight": "model-00013-of-00014.safetensors",
|
| 655 |
+
"model.layers.61.self_attn.q_proj.weight": "model-00013-of-00014.safetensors",
|
| 656 |
+
"model.layers.61.self_attn.v_proj.weight": "model-00013-of-00014.safetensors",
|
| 657 |
+
"model.layers.62.input_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 658 |
+
"model.layers.62.mlp.down_proj.weight": "model-00013-of-00014.safetensors",
|
| 659 |
+
"model.layers.62.mlp.gate_proj.weight": "model-00013-of-00014.safetensors",
|
| 660 |
+
"model.layers.62.mlp.up_proj.weight": "model-00013-of-00014.safetensors",
|
| 661 |
+
"model.layers.62.post_attention_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 662 |
+
"model.layers.62.self_attn.k_norm.weight": "model-00013-of-00014.safetensors",
|
| 663 |
+
"model.layers.62.self_attn.k_proj.weight": "model-00013-of-00014.safetensors",
|
| 664 |
+
"model.layers.62.self_attn.o_proj.weight": "model-00013-of-00014.safetensors",
|
| 665 |
+
"model.layers.62.self_attn.q_norm.weight": "model-00013-of-00014.safetensors",
|
| 666 |
+
"model.layers.62.self_attn.q_proj.weight": "model-00013-of-00014.safetensors",
|
| 667 |
+
"model.layers.62.self_attn.v_proj.weight": "model-00013-of-00014.safetensors",
|
| 668 |
+
"model.layers.63.input_layernorm.weight": "model-00014-of-00014.safetensors",
|
| 669 |
+
"model.layers.63.mlp.down_proj.weight": "model-00014-of-00014.safetensors",
|
| 670 |
+
"model.layers.63.mlp.gate_proj.weight": "model-00013-of-00014.safetensors",
|
| 671 |
+
"model.layers.63.mlp.up_proj.weight": "model-00014-of-00014.safetensors",
|
| 672 |
+
"model.layers.63.post_attention_layernorm.weight": "model-00014-of-00014.safetensors",
|
| 673 |
+
"model.layers.63.self_attn.k_norm.weight": "model-00013-of-00014.safetensors",
|
| 674 |
+
"model.layers.63.self_attn.k_proj.weight": "model-00013-of-00014.safetensors",
|
| 675 |
+
"model.layers.63.self_attn.o_proj.weight": "model-00013-of-00014.safetensors",
|
| 676 |
+
"model.layers.63.self_attn.q_norm.weight": "model-00013-of-00014.safetensors",
|
| 677 |
+
"model.layers.63.self_attn.q_proj.weight": "model-00013-of-00014.safetensors",
|
| 678 |
+
"model.layers.63.self_attn.v_proj.weight": "model-00013-of-00014.safetensors",
|
| 679 |
+
"model.layers.7.input_layernorm.weight": "model-00002-of-00014.safetensors",
|
| 680 |
+
"model.layers.7.mlp.down_proj.weight": "model-00002-of-00014.safetensors",
|
| 681 |
+
"model.layers.7.mlp.gate_proj.weight": "model-00002-of-00014.safetensors",
|
| 682 |
+
"model.layers.7.mlp.up_proj.weight": "model-00002-of-00014.safetensors",
|
| 683 |
+
"model.layers.7.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
|
| 684 |
+
"model.layers.7.self_attn.k_norm.weight": "model-00002-of-00014.safetensors",
|
| 685 |
+
"model.layers.7.self_attn.k_proj.weight": "model-00002-of-00014.safetensors",
|
| 686 |
+
"model.layers.7.self_attn.o_proj.weight": "model-00002-of-00014.safetensors",
|
| 687 |
+
"model.layers.7.self_attn.q_norm.weight": "model-00002-of-00014.safetensors",
|
| 688 |
+
"model.layers.7.self_attn.q_proj.weight": "model-00002-of-00014.safetensors",
|
| 689 |
+
"model.layers.7.self_attn.v_proj.weight": "model-00002-of-00014.safetensors",
|
| 690 |
+
"model.layers.8.input_layernorm.weight": "model-00003-of-00014.safetensors",
|
| 691 |
+
"model.layers.8.mlp.down_proj.weight": "model-00003-of-00014.safetensors",
|
| 692 |
+
"model.layers.8.mlp.gate_proj.weight": "model-00002-of-00014.safetensors",
|
| 693 |
+
"model.layers.8.mlp.up_proj.weight": "model-00003-of-00014.safetensors",
|
| 694 |
+
"model.layers.8.post_attention_layernorm.weight": "model-00003-of-00014.safetensors",
|
| 695 |
+
"model.layers.8.self_attn.k_norm.weight": "model-00002-of-00014.safetensors",
|
| 696 |
+
"model.layers.8.self_attn.k_proj.weight": "model-00002-of-00014.safetensors",
|
| 697 |
+
"model.layers.8.self_attn.o_proj.weight": "model-00002-of-00014.safetensors",
|
| 698 |
+
"model.layers.8.self_attn.q_norm.weight": "model-00002-of-00014.safetensors",
|
| 699 |
+
"model.layers.8.self_attn.q_proj.weight": "model-00002-of-00014.safetensors",
|
| 700 |
+
"model.layers.8.self_attn.v_proj.weight": "model-00002-of-00014.safetensors",
|
| 701 |
+
"model.layers.9.input_layernorm.weight": "model-00003-of-00014.safetensors",
|
| 702 |
+
"model.layers.9.mlp.down_proj.weight": "model-00003-of-00014.safetensors",
|
| 703 |
+
"model.layers.9.mlp.gate_proj.weight": "model-00003-of-00014.safetensors",
|
| 704 |
+
"model.layers.9.mlp.up_proj.weight": "model-00003-of-00014.safetensors",
|
| 705 |
+
"model.layers.9.post_attention_layernorm.weight": "model-00003-of-00014.safetensors",
|
| 706 |
+
"model.layers.9.self_attn.k_norm.weight": "model-00003-of-00014.safetensors",
|
| 707 |
+
"model.layers.9.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
|
| 708 |
+
"model.layers.9.self_attn.o_proj.weight": "model-00003-of-00014.safetensors",
|
| 709 |
+
"model.layers.9.self_attn.q_norm.weight": "model-00003-of-00014.safetensors",
|
| 710 |
+
"model.layers.9.self_attn.q_proj.weight": "model-00003-of-00014.safetensors",
|
| 711 |
+
"model.layers.9.self_attn.v_proj.weight": "model-00003-of-00014.safetensors",
|
| 712 |
+
"model.norm.weight": "model-00014-of-00014.safetensors"
|
| 713 |
+
}
|
| 714 |
+
}
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<|im_start|>",
|
| 4 |
+
"<|im_end|>",
|
| 5 |
+
"<|object_ref_start|>",
|
| 6 |
+
"<|object_ref_end|>",
|
| 7 |
+
"<|box_start|>",
|
| 8 |
+
"<|box_end|>",
|
| 9 |
+
"<|quad_start|>",
|
| 10 |
+
"<|quad_end|>",
|
| 11 |
+
"<|vision_start|>",
|
| 12 |
+
"<|vision_end|>",
|
| 13 |
+
"<|vision_pad|>",
|
| 14 |
+
"<|image_pad|>",
|
| 15 |
+
"<|video_pad|>"
|
| 16 |
+
],
|
| 17 |
+
"eos_token": {
|
| 18 |
+
"content": "<|im_end|>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
},
|
| 24 |
+
"pad_token": {
|
| 25 |
+
"content": "<|endoftext|>",
|
| 26 |
+
"lstrip": false,
|
| 27 |
+
"normalized": false,
|
| 28 |
+
"rstrip": false,
|
| 29 |
+
"single_word": false
|
| 30 |
+
}
|
| 31 |
+
}
|
tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
|
| 3 |
+
size 11422654
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": false,
|
| 3 |
+
"add_prefix_space": false,
|
| 4 |
+
"added_tokens_decoder": {
|
| 5 |
+
"151643": {
|
| 6 |
+
"content": "<|endoftext|>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false,
|
| 11 |
+
"special": true
|
| 12 |
+
},
|
| 13 |
+
"151644": {
|
| 14 |
+
"content": "<|im_start|>",
|
| 15 |
+
"lstrip": false,
|
| 16 |
+
"normalized": false,
|
| 17 |
+
"rstrip": false,
|
| 18 |
+
"single_word": false,
|
| 19 |
+
"special": true
|
| 20 |
+
},
|
| 21 |
+
"151645": {
|
| 22 |
+
"content": "<|im_end|>",
|
| 23 |
+
"lstrip": false,
|
| 24 |
+
"normalized": false,
|
| 25 |
+
"rstrip": false,
|
| 26 |
+
"single_word": false,
|
| 27 |
+
"special": true
|
| 28 |
+
},
|
| 29 |
+
"151646": {
|
| 30 |
+
"content": "<|object_ref_start|>",
|
| 31 |
+
"lstrip": false,
|
| 32 |
+
"normalized": false,
|
| 33 |
+
"rstrip": false,
|
| 34 |
+
"single_word": false,
|
| 35 |
+
"special": true
|
| 36 |
+
},
|
| 37 |
+
"151647": {
|
| 38 |
+
"content": "<|object_ref_end|>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": false,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false,
|
| 43 |
+
"special": true
|
| 44 |
+
},
|
| 45 |
+
"151648": {
|
| 46 |
+
"content": "<|box_start|>",
|
| 47 |
+
"lstrip": false,
|
| 48 |
+
"normalized": false,
|
| 49 |
+
"rstrip": false,
|
| 50 |
+
"single_word": false,
|
| 51 |
+
"special": true
|
| 52 |
+
},
|
| 53 |
+
"151649": {
|
| 54 |
+
"content": "<|box_end|>",
|
| 55 |
+
"lstrip": false,
|
| 56 |
+
"normalized": false,
|
| 57 |
+
"rstrip": false,
|
| 58 |
+
"single_word": false,
|
| 59 |
+
"special": true
|
| 60 |
+
},
|
| 61 |
+
"151650": {
|
| 62 |
+
"content": "<|quad_start|>",
|
| 63 |
+
"lstrip": false,
|
| 64 |
+
"normalized": false,
|
| 65 |
+
"rstrip": false,
|
| 66 |
+
"single_word": false,
|
| 67 |
+
"special": true
|
| 68 |
+
},
|
| 69 |
+
"151651": {
|
| 70 |
+
"content": "<|quad_end|>",
|
| 71 |
+
"lstrip": false,
|
| 72 |
+
"normalized": false,
|
| 73 |
+
"rstrip": false,
|
| 74 |
+
"single_word": false,
|
| 75 |
+
"special": true
|
| 76 |
+
},
|
| 77 |
+
"151652": {
|
| 78 |
+
"content": "<|vision_start|>",
|
| 79 |
+
"lstrip": false,
|
| 80 |
+
"normalized": false,
|
| 81 |
+
"rstrip": false,
|
| 82 |
+
"single_word": false,
|
| 83 |
+
"special": true
|
| 84 |
+
},
|
| 85 |
+
"151653": {
|
| 86 |
+
"content": "<|vision_end|>",
|
| 87 |
+
"lstrip": false,
|
| 88 |
+
"normalized": false,
|
| 89 |
+
"rstrip": false,
|
| 90 |
+
"single_word": false,
|
| 91 |
+
"special": true
|
| 92 |
+
},
|
| 93 |
+
"151654": {
|
| 94 |
+
"content": "<|vision_pad|>",
|
| 95 |
+
"lstrip": false,
|
| 96 |
+
"normalized": false,
|
| 97 |
+
"rstrip": false,
|
| 98 |
+
"single_word": false,
|
| 99 |
+
"special": true
|
| 100 |
+
},
|
| 101 |
+
"151655": {
|
| 102 |
+
"content": "<|image_pad|>",
|
| 103 |
+
"lstrip": false,
|
| 104 |
+
"normalized": false,
|
| 105 |
+
"rstrip": false,
|
| 106 |
+
"single_word": false,
|
| 107 |
+
"special": true
|
| 108 |
+
},
|
| 109 |
+
"151656": {
|
| 110 |
+
"content": "<|video_pad|>",
|
| 111 |
+
"lstrip": false,
|
| 112 |
+
"normalized": false,
|
| 113 |
+
"rstrip": false,
|
| 114 |
+
"single_word": false,
|
| 115 |
+
"special": true
|
| 116 |
+
},
|
| 117 |
+
"151657": {
|
| 118 |
+
"content": "<tool_call>",
|
| 119 |
+
"lstrip": false,
|
| 120 |
+
"normalized": false,
|
| 121 |
+
"rstrip": false,
|
| 122 |
+
"single_word": false,
|
| 123 |
+
"special": false
|
| 124 |
+
},
|
| 125 |
+
"151658": {
|
| 126 |
+
"content": "</tool_call>",
|
| 127 |
+
"lstrip": false,
|
| 128 |
+
"normalized": false,
|
| 129 |
+
"rstrip": false,
|
| 130 |
+
"single_word": false,
|
| 131 |
+
"special": false
|
| 132 |
+
},
|
| 133 |
+
"151659": {
|
| 134 |
+
"content": "<|fim_prefix|>",
|
| 135 |
+
"lstrip": false,
|
| 136 |
+
"normalized": false,
|
| 137 |
+
"rstrip": false,
|
| 138 |
+
"single_word": false,
|
| 139 |
+
"special": false
|
| 140 |
+
},
|
| 141 |
+
"151660": {
|
| 142 |
+
"content": "<|fim_middle|>",
|
| 143 |
+
"lstrip": false,
|
| 144 |
+
"normalized": false,
|
| 145 |
+
"rstrip": false,
|
| 146 |
+
"single_word": false,
|
| 147 |
+
"special": false
|
| 148 |
+
},
|
| 149 |
+
"151661": {
|
| 150 |
+
"content": "<|fim_suffix|>",
|
| 151 |
+
"lstrip": false,
|
| 152 |
+
"normalized": false,
|
| 153 |
+
"rstrip": false,
|
| 154 |
+
"single_word": false,
|
| 155 |
+
"special": false
|
| 156 |
+
},
|
| 157 |
+
"151662": {
|
| 158 |
+
"content": "<|fim_pad|>",
|
| 159 |
+
"lstrip": false,
|
| 160 |
+
"normalized": false,
|
| 161 |
+
"rstrip": false,
|
| 162 |
+
"single_word": false,
|
| 163 |
+
"special": false
|
| 164 |
+
},
|
| 165 |
+
"151663": {
|
| 166 |
+
"content": "<|repo_name|>",
|
| 167 |
+
"lstrip": false,
|
| 168 |
+
"normalized": false,
|
| 169 |
+
"rstrip": false,
|
| 170 |
+
"single_word": false,
|
| 171 |
+
"special": false
|
| 172 |
+
},
|
| 173 |
+
"151664": {
|
| 174 |
+
"content": "<|file_sep|>",
|
| 175 |
+
"lstrip": false,
|
| 176 |
+
"normalized": false,
|
| 177 |
+
"rstrip": false,
|
| 178 |
+
"single_word": false,
|
| 179 |
+
"special": false
|
| 180 |
+
},
|
| 181 |
+
"151665": {
|
| 182 |
+
"content": "<tool_response>",
|
| 183 |
+
"lstrip": false,
|
| 184 |
+
"normalized": false,
|
| 185 |
+
"rstrip": false,
|
| 186 |
+
"single_word": false,
|
| 187 |
+
"special": false
|
| 188 |
+
},
|
| 189 |
+
"151666": {
|
| 190 |
+
"content": "</tool_response>",
|
| 191 |
+
"lstrip": false,
|
| 192 |
+
"normalized": false,
|
| 193 |
+
"rstrip": false,
|
| 194 |
+
"single_word": false,
|
| 195 |
+
"special": false
|
| 196 |
+
},
|
| 197 |
+
"151667": {
|
| 198 |
+
"content": "<think>",
|
| 199 |
+
"lstrip": false,
|
| 200 |
+
"normalized": false,
|
| 201 |
+
"rstrip": false,
|
| 202 |
+
"single_word": false,
|
| 203 |
+
"special": false
|
| 204 |
+
},
|
| 205 |
+
"151668": {
|
| 206 |
+
"content": "</think>",
|
| 207 |
+
"lstrip": false,
|
| 208 |
+
"normalized": false,
|
| 209 |
+
"rstrip": false,
|
| 210 |
+
"single_word": false,
|
| 211 |
+
"special": false
|
| 212 |
+
}
|
| 213 |
+
},
|
| 214 |
+
"additional_special_tokens": [
|
| 215 |
+
"<|im_start|>",
|
| 216 |
+
"<|im_end|>",
|
| 217 |
+
"<|object_ref_start|>",
|
| 218 |
+
"<|object_ref_end|>",
|
| 219 |
+
"<|box_start|>",
|
| 220 |
+
"<|box_end|>",
|
| 221 |
+
"<|quad_start|>",
|
| 222 |
+
"<|quad_end|>",
|
| 223 |
+
"<|vision_start|>",
|
| 224 |
+
"<|vision_end|>",
|
| 225 |
+
"<|vision_pad|>",
|
| 226 |
+
"<|image_pad|>",
|
| 227 |
+
"<|video_pad|>"
|
| 228 |
+
],
|
| 229 |
+
"bos_token": null,
|
| 230 |
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {{- messages[0].content + '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set content = message.content %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is defined and message.reasoning_content is not none %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '</think>' in message.content %}\n {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index %}\n {%- if loop.last or (not loop.last and reasoning_content) %}\n {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n {%- if enable_thinking is defined and enable_thinking is false %}\n {{- '<think>\\n\\n</think>\\n\\n' }}\n {%- endif %}\n{%- endif %}",
|
| 231 |
+
"clean_up_tokenization_spaces": false,
|
| 232 |
+
"eos_token": "<|im_end|>",
|
| 233 |
+
"errors": "replace",
|
| 234 |
+
"extra_special_tokens": {},
|
| 235 |
+
"model_max_length": 131072,
|
| 236 |
+
"pad_token": "<|endoftext|>",
|
| 237 |
+
"split_special_tokens": false,
|
| 238 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
| 239 |
+
"unk_token": null
|
| 240 |
+
}
|
vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|