Commit
·
0262a1f
verified
·
0
Parent(s):
Duplicate from GAIR/LIMI
Browse filesCo-authored-by: JieSun <Sunshine279@users.noreply.huggingface.co>
This view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +36 -0
- README.md +173 -0
- assets/asi.png +0 -0
- assets/sii.jpg +0 -0
- assets/teaser.jpg +3 -0
- chat_template.jinja +103 -0
- config.json +43 -0
- generation_config.json +10 -0
- model-00000-of-00132.safetensors +3 -0
- model-00001-of-00132.safetensors +3 -0
- model-00002-of-00132.safetensors +3 -0
- model-00003-of-00132.safetensors +3 -0
- model-00004-of-00132.safetensors +3 -0
- model-00005-of-00132.safetensors +3 -0
- model-00006-of-00132.safetensors +3 -0
- model-00007-of-00132.safetensors +3 -0
- model-00008-of-00132.safetensors +3 -0
- model-00009-of-00132.safetensors +3 -0
- model-00010-of-00132.safetensors +3 -0
- model-00011-of-00132.safetensors +3 -0
- model-00012-of-00132.safetensors +3 -0
- model-00013-of-00132.safetensors +3 -0
- model-00014-of-00132.safetensors +3 -0
- model-00015-of-00132.safetensors +3 -0
- model-00016-of-00132.safetensors +3 -0
- model-00017-of-00132.safetensors +3 -0
- model-00018-of-00132.safetensors +3 -0
- model-00019-of-00132.safetensors +3 -0
- model-00020-of-00132.safetensors +3 -0
- model-00021-of-00132.safetensors +3 -0
- model-00022-of-00132.safetensors +3 -0
- model-00023-of-00132.safetensors +3 -0
- model-00024-of-00132.safetensors +3 -0
- model-00025-of-00132.safetensors +3 -0
- model-00026-of-00132.safetensors +3 -0
- model-00027-of-00132.safetensors +3 -0
- model-00028-of-00132.safetensors +3 -0
- model-00029-of-00132.safetensors +3 -0
- model-00030-of-00132.safetensors +3 -0
- model-00031-of-00132.safetensors +3 -0
- model-00032-of-00132.safetensors +3 -0
- model-00033-of-00132.safetensors +3 -0
- model-00034-of-00132.safetensors +3 -0
- model-00035-of-00132.safetensors +3 -0
- model-00036-of-00132.safetensors +3 -0
- model-00037-of-00132.safetensors +3 -0
- model-00038-of-00132.safetensors +3 -0
- model-00039-of-00132.safetensors +3 -0
- model-00040-of-00132.safetensors +3 -0
- model-00041-of-00132.safetensors +3 -0
.gitattributes
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tags:
|
| 3 |
+
- text-generation
|
| 4 |
+
- agent
|
| 5 |
+
- tool-use
|
| 6 |
+
- long-context
|
| 7 |
+
license: other
|
| 8 |
+
language:
|
| 9 |
+
- en
|
| 10 |
+
pipeline_tag: text-generation
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
<div style="display: flex; justify-content: center; align-items: center; gap: 20px;">
|
| 14 |
+
<img src="assets/sii.jpg" alt="SII" width="100px">
|
| 15 |
+
<img src="assets/asi.png" alt="ASI" width="100px">
|
| 16 |
+
|
| 17 |
+
</div>
|
| 18 |
+
<div align="center>
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
<a href="https://github.com/GAIR-NLP/LIMI" target="_blank" style="margin: 2px;">
|
| 22 |
+
<img alt="Chat" src="assets/teaser.jpg" style="display: inline-block; vertical-align: middle;"/>
|
| 23 |
+
</a>
|
| 24 |
+
|
| 25 |
+
</div>
|
| 26 |
+
|
| 27 |
+
# LIMI: Less is More for Agency
|
| 28 |
+
|
| 29 |
+
[](https://arxiv.org/pdf/2509.17567)
|
| 30 |
+
[](https://github.com/GAIR-NLP/LIMI)
|
| 31 |
+
[](https://huggingface.co/datasets/GAIR/LIMI)
|
| 32 |
+
|
| 33 |
+
---
|
| 34 |
+
To learn more about LIMI, feel free to explore our documentation and resources. Our release consists of the following sections:
|
| 35 |
+
|
| 36 |
+
- **Model Zoo && Quick Start**: Basic usage and demonstrations with Transformers, vLLM, and SGLang for LIMI and LIMI-Air;
|
| 37 |
+
- **Evaluation**: Comprehensive evaluation suite with metrics for agentic capabilities assessment;
|
| 38 |
+
- **Prompting**: Usage of LIMI with frameworks for agentic applications, tool use, and reasoning tasks.
|
| 39 |
+
|
| 40 |
+
## Overview
|
| 41 |
+
|
| 42 |
+
LIMI is an agentic model fine‑tuned from [GLM‑4.5](https://huggingface.co/zai-org/GLM-4.5) using compact, high‑quality data to emphasize:
|
| 43 |
+
|
| 44 |
+
- Targeted capabilities: tool use, multi‑turn correction, spec compliance
|
| 45 |
+
- Long‑context trajectory with tokenizer‑filtered samples
|
| 46 |
+
- OpenAI‑style `messages` with optional function/tool calls
|
| 47 |
+
|
| 48 |
+
## Model Details
|
| 49 |
+
|
| 50 |
+
- Base model: `zai-org/GLM-4.5`
|
| 51 |
+
- Training framework: slime
|
| 52 |
+
- Training data: curated conversations from [GAIR/LIMI](https://huggingface.co/datasets/GAIR/LIMI)
|
| 53 |
+
|
| 54 |
+
## Performance on AgencyBench
|
| 55 |
+
|
| 56 |
+
Our models achieve state-of-the-art performance across multiple agentic evaluation tasks:
|
| 57 |
+
|
| 58 |
+
| Model | FTFC (↑) | RC@3 (↑) | SR@3 (↑) | Avg. |
|
| 59 |
+
|-------|----------|----------|----------|-----------------|
|
| 60 |
+
| GLM-4.5-Air | 15.0 | 16.1 | 20.0 | 17.0 |
|
| 61 |
+
| GLM-4.5 | 37.8 | 50.0 | 47.4 | 45.1 |
|
| 62 |
+
|GLM-4.5-CodeAgent| 48.0 | 48.0|47.5| 47.8|
|
| 63 |
+
| **LIMI-Air** | **35.4** | **34.3** | **33.1** | **34.3** |
|
| 64 |
+
| **LIMI** | **71.7** | **74.2** | **74.6** | **73.5** |
|
| 65 |
+
|
| 66 |
+
For detailed benchmark results, experimental setup, and comprehensive comparisons, please refer to our [paper](https://arxiv.org/pdf/2509.17567).
|
| 67 |
+
|
| 68 |
+
## Model Zoo
|
| 69 |
+
|
| 70 |
+
Our LIMO model is available on Hugging Face 🤗:
|
| 71 |
+
|
| 72 |
+
| Model | Backbone | Size | Link |
|
| 73 |
+
|---|---|---|---|
|
| 74 |
+
| LIMI | [GLM‑4.5](https://huggingface.co/zai-org/GLM-4.5) | 353B | https://huggingface.co/GAIR/LIMI |
|
| 75 |
+
| LIMI‑Air | [GLM‑4.5‑Air](https://huggingface.co/zai-org/GLM-4.5-Air) | 107B | https://huggingface.co/GAIR/LIMI-Air |
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
## Datasets
|
| 79 |
+
|
| 80 |
+
We release our datasets through Hugging Face 🤗:
|
| 81 |
+
- Name: `GAIR/LIMI`
|
| 82 |
+
- Summary: curated agentic SFT data (OpenAI `messages`, optional `tools`, normalized tool‑call arguments); current release contains ~78 high‑quality samples.
|
| 83 |
+
- Link: https://huggingface.co/datasets/GAIR/LIMI
|
| 84 |
+
|
| 85 |
+
## Quick Start
|
| 86 |
+
|
| 87 |
+
<details>
|
| 88 |
+
<summary>Start with HF Transformers</summary>
|
| 89 |
+
|
| 90 |
+
```python
|
| 91 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 92 |
+
import torch
|
| 93 |
+
|
| 94 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 95 |
+
"GAIR/LIMI", torch_dtype="auto", device_map="auto", trust_remote_code=True
|
| 96 |
+
)
|
| 97 |
+
tok = AutoTokenizer.from_pretrained("GAIR/LIMI", trust_remote_code=True)
|
| 98 |
+
|
| 99 |
+
messages = [
|
| 100 |
+
{"role": "system", "content": "You are a helpful assistant tasked with discovering mathematical function structures for scientific systems."},
|
| 101 |
+
{"role": "user", "content": "Modify the equation.py function, considering the physical meaning and relationships of the inputs."}
|
| 102 |
+
]
|
| 103 |
+
|
| 104 |
+
text = tok.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 105 |
+
inputs = tok(text, return_tensors="pt").to(model.device)
|
| 106 |
+
out = model.generate(
|
| 107 |
+
**inputs,
|
| 108 |
+
max_new_tokens=4096,
|
| 109 |
+
temperature=0.6,
|
| 110 |
+
top_p=0.95,
|
| 111 |
+
do_sample=True,
|
| 112 |
+
)
|
| 113 |
+
print(tok.decode(out[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True))
|
| 114 |
+
```
|
| 115 |
+
|
| 116 |
+
</details>
|
| 117 |
+
|
| 118 |
+
<details>
|
| 119 |
+
<summary>Start with VLLM</summary>
|
| 120 |
+
|
| 121 |
+
```python
|
| 122 |
+
from vllm import LLM, SamplingParams
|
| 123 |
+
from transformers import AutoTokenizer
|
| 124 |
+
|
| 125 |
+
llm = LLM(model="GAIR/LIMI", trust_remote_code=True)
|
| 126 |
+
tok = AutoTokenizer.from_pretrained("GAIR/LIMI", trust_remote_code=True)
|
| 127 |
+
text = tok.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 128 |
+
out = llm.generate(text, SamplingParams(temperature=0.6, max_tokens=4096, top_p=0.95))
|
| 129 |
+
print(out[0].outputs[0].text)
|
| 130 |
+
```
|
| 131 |
+
|
| 132 |
+
</details>
|
| 133 |
+
|
| 134 |
+
## Prompting
|
| 135 |
+
|
| 136 |
+
- Messages follow OpenAI chat format; include a grounding system message when helpful.
|
| 137 |
+
- Example:
|
| 138 |
+
|
| 139 |
+
```json
|
| 140 |
+
[
|
| 141 |
+
{"role": "system", "content": "You are a helpful assistant tasked with discovering mathematical function structures for scientific systems."},
|
| 142 |
+
{"role": "user", "content": "Modify the equation.py function, considering the physical meaning and relationships of the inputs."}
|
| 143 |
+
]
|
| 144 |
+
```
|
| 145 |
+
|
| 146 |
+
## Evaluation
|
| 147 |
+
|
| 148 |
+
- We report FTFC (First‑Turn Functional Completeness), SR@R (Success Rate at R), and RC@R (Remaining Chances at R) with R=3.
|
| 149 |
+
- See the paper for experimental protocol and scores.
|
| 150 |
+
|
| 151 |
+
## Limitations
|
| 152 |
+
|
| 153 |
+
- May produce incorrect tool arguments or overfit to frequent schemas
|
| 154 |
+
- Not safety‑filtered for sensitive domains; use with guardrails and oversight
|
| 155 |
+
|
| 156 |
+
## License
|
| 157 |
+
|
| 158 |
+
- Inherits base model (GLM‑4.5) terms; verify upstream license before deployment
|
| 159 |
+
|
| 160 |
+
## Citation
|
| 161 |
+
|
| 162 |
+
```bibtex
|
| 163 |
+
@misc{xiao2025limiagency,
|
| 164 |
+
title={LIMI: Less is More for Agency},
|
| 165 |
+
author={Yang Xiao and Mohan Jiang and Jie Sun and Keyu Li and Jifan Lin and Yumin Zhuang and Ji Zeng and Shijie Xia and Qishuo Hua and Xuefeng Li and Xiaojie Cai and Tongyu Wang and Yue Zhang and Liming Liu and Xia Wu and Jinlong Hou and Yuan Cheng and Wenjie Li and Xiang Wang and Dequan Wang and Pengfei Liu},
|
| 166 |
+
year={2025},
|
| 167 |
+
eprint={2509.17567},
|
| 168 |
+
archivePrefix={arXiv},
|
| 169 |
+
primaryClass={cs.AI},
|
| 170 |
+
url={https://arxiv.org/abs/2509.17567},
|
| 171 |
+
}
|
| 172 |
+
```
|
| 173 |
+
|
assets/asi.png
ADDED
|
assets/sii.jpg
ADDED
|
assets/teaser.jpg
ADDED
|
Git LFS Details
|
chat_template.jinja
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[gMASK]<sop>
|
| 2 |
+
{%- if tools -%}
|
| 3 |
+
<|system|>
|
| 4 |
+
# Tools
|
| 5 |
+
|
| 6 |
+
You may call one or more functions to assist with the user query.
|
| 7 |
+
|
| 8 |
+
You are provided with function signatures within <tools></tools> XML tags:
|
| 9 |
+
<tools>
|
| 10 |
+
{% for tool in tools %}
|
| 11 |
+
{{ tool | tojson(ensure_ascii=False) }}
|
| 12 |
+
{% endfor %}
|
| 13 |
+
</tools>
|
| 14 |
+
|
| 15 |
+
For each function call, output the function name and arguments within the following XML format:
|
| 16 |
+
<tool_call>{function-name}
|
| 17 |
+
<arg_key>{arg-key-1}</arg_key>
|
| 18 |
+
<arg_value>{arg-value-1}</arg_value>
|
| 19 |
+
<arg_key>{arg-key-2}</arg_key>
|
| 20 |
+
<arg_value>{arg-value-2}</arg_value>
|
| 21 |
+
...
|
| 22 |
+
</tool_call>{%- endif -%}
|
| 23 |
+
{%- macro visible_text(content) -%}
|
| 24 |
+
{%- if content is string -%}
|
| 25 |
+
{{- content }}
|
| 26 |
+
{%- elif content is iterable and content is not mapping -%}
|
| 27 |
+
{%- for item in content -%}
|
| 28 |
+
{%- if item is mapping and item.type == 'text' -%}
|
| 29 |
+
{{- item.text }}
|
| 30 |
+
{%- elif item is string -%}
|
| 31 |
+
{{- item }}
|
| 32 |
+
{%- endif -%}
|
| 33 |
+
{%- endfor -%}
|
| 34 |
+
{%- else -%}
|
| 35 |
+
{{- content }}
|
| 36 |
+
{%- endif -%}
|
| 37 |
+
{%- endmacro -%}
|
| 38 |
+
{%- set ns = namespace(last_user_index=-1) %}
|
| 39 |
+
{%- for m in messages %}
|
| 40 |
+
{%- if m.role == 'user' %}
|
| 41 |
+
{% set ns.last_user_index = loop.index0 -%}
|
| 42 |
+
{%- endif %}
|
| 43 |
+
{%- endfor %}
|
| 44 |
+
{% for m in messages %}
|
| 45 |
+
{%- if m.role == 'user' -%}<|user|>
|
| 46 |
+
{{ visible_text(m.content) }}
|
| 47 |
+
{{- '/nothink' if (enable_thinking is defined and not enable_thinking and not visible_text(m.content).endswith("/nothink")) else '' -}}
|
| 48 |
+
{%- elif m.role == 'assistant' -%}
|
| 49 |
+
<|assistant|>
|
| 50 |
+
{%- set reasoning_content = '' %}
|
| 51 |
+
{%- set content = visible_text(m.content) %}
|
| 52 |
+
{%- if m.reasoning_content is string %}
|
| 53 |
+
{%- set reasoning_content = m.reasoning_content %}
|
| 54 |
+
{%- else %}
|
| 55 |
+
{%- if '</think>' in content %}
|
| 56 |
+
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
| 57 |
+
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
|
| 58 |
+
{%- endif %}
|
| 59 |
+
{%- endif %}
|
| 60 |
+
{%- if loop.index0 > ns.last_user_index and reasoning_content -%}
|
| 61 |
+
{{ '\n<think>' + reasoning_content.strip() + '</think>'}}
|
| 62 |
+
{%- else -%}
|
| 63 |
+
{{ '\n<think></think>' }}
|
| 64 |
+
{%- endif -%}
|
| 65 |
+
{%- if content.strip() -%}
|
| 66 |
+
{{ '\n' + content.strip() }}
|
| 67 |
+
{%- endif -%}
|
| 68 |
+
{% if m.tool_calls %}
|
| 69 |
+
{% for tc in m.tool_calls %}
|
| 70 |
+
{%- if tc.function %}
|
| 71 |
+
{%- set tc = tc.function %}
|
| 72 |
+
{%- endif %}
|
| 73 |
+
{{ '\n<tool_call>' + tc.name }}
|
| 74 |
+
{% set _args = tc.arguments %}
|
| 75 |
+
{% for k, v in _args.items() %}
|
| 76 |
+
<arg_key>{{ k }}</arg_key>
|
| 77 |
+
<arg_value>{{ v | tojson(ensure_ascii=False) if v is not string else v }}</arg_value>
|
| 78 |
+
{% endfor %}
|
| 79 |
+
</tool_call>{% endfor %}
|
| 80 |
+
{% endif %}
|
| 81 |
+
{%- elif m.role == 'tool' -%}
|
| 82 |
+
{%- if m.content is string -%}
|
| 83 |
+
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
| 84 |
+
{{- '<|observation|>' }}
|
| 85 |
+
{%- endif %}
|
| 86 |
+
{{- '\n<tool_response>\n' }}
|
| 87 |
+
{{- m.content }}
|
| 88 |
+
{{- '\n</tool_response>' }}
|
| 89 |
+
{%- else -%}
|
| 90 |
+
<|observation|>{% for tr in m.content %}
|
| 91 |
+
|
| 92 |
+
<tool_response>
|
| 93 |
+
{{ tr.output if tr.output is defined else tr }}
|
| 94 |
+
</tool_response>{% endfor -%}
|
| 95 |
+
{% endif -%}
|
| 96 |
+
{%- elif m.role == 'system' -%}
|
| 97 |
+
<|system|>
|
| 98 |
+
{{ visible_text(m.content) }}
|
| 99 |
+
{%- endif -%}
|
| 100 |
+
{%- endfor -%}
|
| 101 |
+
{%- if add_generation_prompt -%}
|
| 102 |
+
<|assistant|>{{- '\n<think></think>' if (enable_thinking is defined and not enable_thinking) else '' -}}
|
| 103 |
+
{%- endif -%}
|
config.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"Glm4MoeForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": true,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"pad_token_id": 151329,
|
| 8 |
+
"eos_token_id": [
|
| 9 |
+
151329,
|
| 10 |
+
151336,
|
| 11 |
+
151338
|
| 12 |
+
],
|
| 13 |
+
"head_dim": 128,
|
| 14 |
+
"hidden_act": "silu",
|
| 15 |
+
"hidden_size": 5120,
|
| 16 |
+
"partial_rotary_factor": 0.5,
|
| 17 |
+
"initializer_range": 0.02,
|
| 18 |
+
"intermediate_size": 12288,
|
| 19 |
+
"max_position_embeddings": 131072,
|
| 20 |
+
"model_type": "glm4_moe",
|
| 21 |
+
"moe_intermediate_size": 1536,
|
| 22 |
+
"norm_topk_prob": true,
|
| 23 |
+
"num_attention_heads": 96,
|
| 24 |
+
"n_group": 1,
|
| 25 |
+
"topk_group": 1,
|
| 26 |
+
"n_routed_experts": 160,
|
| 27 |
+
"n_shared_experts": 1,
|
| 28 |
+
"routed_scaling_factor": 2.5,
|
| 29 |
+
"num_experts_per_tok": 8,
|
| 30 |
+
"first_k_dense_replace": 3,
|
| 31 |
+
"num_hidden_layers": 92,
|
| 32 |
+
"num_key_value_heads": 8,
|
| 33 |
+
"rms_norm_eps": 1e-05,
|
| 34 |
+
"rope_scaling": null,
|
| 35 |
+
"rope_theta": 1000000,
|
| 36 |
+
"num_nextn_predict_layers": 1,
|
| 37 |
+
"tie_word_embeddings": false,
|
| 38 |
+
"torch_dtype": "bfloat16",
|
| 39 |
+
"transformers_version": "4.54.0",
|
| 40 |
+
"use_cache": true,
|
| 41 |
+
"use_qk_norm": true,
|
| 42 |
+
"vocab_size": 151552
|
| 43 |
+
}
|
generation_config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
+
"eos_token_id": [
|
| 4 |
+
151329,
|
| 5 |
+
151336,
|
| 6 |
+
151338
|
| 7 |
+
],
|
| 8 |
+
"pad_token_id": 151329,
|
| 9 |
+
"transformers_version": "4.54.0"
|
| 10 |
+
}
|
model-00000-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d88229a6b90a56738069cea431ed1967a6f93c5d56f30dae467c9f210ad57c9f
|
| 3 |
+
size 5358375840
|
model-00001-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5bc48a72ccf9e99dd1316a4129a52619c690ec83bc00e403c26bb4fc0da63f2b
|
| 3 |
+
size 5354671176
|
model-00002-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:78f16d09866b2554b46bb963b7bed68b4a172380dfe5df8c23bc7bb90ca052ea
|
| 3 |
+
size 5363508520
|
model-00003-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4cd855e20ea4049a294a68b7d28268519d6b96868a76e98c2c530410043efe36
|
| 3 |
+
size 5363548312
|
model-00004-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1118e4f4bbb52e46bc6b3f6ec9d36cf7373b504a7e0341486ec4ecc37ca6a06e
|
| 3 |
+
size 5354671160
|
model-00005-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e2390f34fd1d401dcc014fe271c42aa36daea3177e71c85e0f57b90e921a9f0a
|
| 3 |
+
size 5363508568
|
model-00006-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2518fd9690d2d2a99d32b3208ba28295f552318cd549b0b72c0b6358e0762d52
|
| 3 |
+
size 5363548312
|
model-00007-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f164a0adb406e02afa492be5bada2b465997cb272f2de379bb8851d34aca1e4f
|
| 3 |
+
size 5354671120
|
model-00008-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dcd6b55fb86a2b1f08a3599b2d44ca8c6bb99225a5a185bec42a43d95c094bbf
|
| 3 |
+
size 5363508624
|
model-00009-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79638de3bb541caa92a60aa19224752fd4cd208b4350f463df478734ea673714
|
| 3 |
+
size 5363548296
|
model-00010-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1a25a35c9b79f08338e3c7f9f3f8ab3a0f8c26f5b02f2548c2cf7b1b03338fce
|
| 3 |
+
size 5365186128
|
model-00011-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c9f20b87ac7d3b68d406fa73b75f98f59cd4e2b94f22e03eac95ecf6d284d05c
|
| 3 |
+
size 5363509064
|
model-00012-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7fc0eb0e4748e00b6b96f515ff319e1e2b640546e086c92043057aa568458b30
|
| 3 |
+
size 5363519576
|
model-00013-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7bf3d0807df52fb59ed0a87805d23c77c57016808c38786e8ccca57bffecc447
|
| 3 |
+
size 5365186280
|
model-00014-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:da9e773d0169e6c2ddf3a590ea7bf6e352fb22e09912136940ce09fe1367e7f3
|
| 3 |
+
size 5363519784
|
model-00015-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a9d6b61893cfe23c891a148ef2b466c819c3cf1b21660776a4a67f5832b8413f
|
| 3 |
+
size 5363508912
|
model-00016-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:01e14fb11ccc0ad341a351eb0684c3aba0f94dfca60e1a7b3ca897e488f0fd91
|
| 3 |
+
size 5365186272
|
model-00017-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04d55d74aa8d1e8f096546f6184be2d4b75eaf00e5af9bb399b34b0d021cc0d3
|
| 3 |
+
size 5363519744
|
model-00018-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:55013d2053638038f47d69546ae4c4de241860683b693bf839afcb097f222f31
|
| 3 |
+
size 5363508976
|
model-00019-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d820c68d8bca5015c7711b048536a9e1c848e0dcab983ce43377e5fac34b8741
|
| 3 |
+
size 5365187000
|
model-00020-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4706756c1fdf3fb1bced74c77ba0f6c71ae9205e266aa67aea989b4a6a805b22
|
| 3 |
+
size 5363519688
|
model-00021-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ee704c04c024da2f3e4a07d8deeea91cac25ad1ae3429f4e1db202416b4725e
|
| 3 |
+
size 5365187176
|
model-00022-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:28bb37bc3f850326d8f470d1033f6e9a9b0bbaf824a0c588220722be11571fe5
|
| 3 |
+
size 5363508840
|
model-00023-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:acd8c2ffe5c8f320e835f77c8ae69ea8760d7f89db0103f5a975fc9ad145fcdf
|
| 3 |
+
size 5353033000
|
model-00024-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:959acbc893ff8fe117d27b668448f12c05a07e29b2969961e50bd60e19f410e2
|
| 3 |
+
size 5354671496
|
model-00025-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7389c496f0fd93cd0f4d21c5d7d296f23e899ff066bf34a5a700d76b1c9f0652
|
| 3 |
+
size 5363508888
|
model-00026-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:35ce81a674b5f754cc872d647115a8264c98e940cc2e75f0dc5f5882e342a0f5
|
| 3 |
+
size 5363548656
|
model-00027-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:228c4d3bab5fe24aebcaf841b3ab1d556e09783a5f8a37e08e1765e9e9215177
|
| 3 |
+
size 5354671464
|
model-00028-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f6f90af4b070f6e026e80cba2c959031d65fc6c93b1e86f2fc6b1665ce211f7
|
| 3 |
+
size 5363508928
|
model-00029-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d0f96a957d134c733976d118c5b300ec72c5b4d79d2e74f2aa11e1512ecbf5f
|
| 3 |
+
size 5273906752
|
model-00030-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:caeba0f0ffe3b35e79f679243e70081f15a503224428e8f4d6a6babddd0e83f8
|
| 3 |
+
size 5359994840
|
model-00031-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e71c016cc8e6a64032aaa3f7aca3595f1f46bab22f99284ee3aa125140ef3f02
|
| 3 |
+
size 5365198640
|
model-00032-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c8b84b2ec7f2c8dfb3049e2c9fd2ac4c9030565279594b8cb9cf837ef9205c0
|
| 3 |
+
size 5353072712
|
model-00033-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:12d88d66d8110d4a5cce568ceef88f8fe006d961bcb35d919c215872116399ba
|
| 3 |
+
size 5363508880
|
model-00034-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:36f88eb05cd013b87160be8cbfb67317ab0ecf43f49373cc8af4c1d18e70896d
|
| 3 |
+
size 5365198632
|
model-00035-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:19f556cc2d441790060d215148a319118ac86599e33fa31be2c12d048ce295cb
|
| 3 |
+
size 5353072688
|
model-00036-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:20d52f0d67ad3b568911e2f167eb00be8c434025db14e4ba88bfdc2640cdaf27
|
| 3 |
+
size 5363508920
|
model-00037-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d378a924ab931b79ba3266feac386057d5ad22d8c98479022bec57f882186b0
|
| 3 |
+
size 5365198632
|
model-00038-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25e33d9e8378a5f834ca3f505cd24f1da1f6aab2b7c1f09b110d204601966bee
|
| 3 |
+
size 5353072648
|
model-00039-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dc5f657413508a3a6fb5d6624c90a38130eec242473c8dfa4440f8efe6ea8371
|
| 3 |
+
size 5363508984
|
model-00040-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a3848e1fca798383ee1ad3fbec48a6521e9f7b2a91c75a7d08f825419fdad737
|
| 3 |
+
size 5365198608
|
model-00041-of-00132.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6f1c51a660295ac03c54011ca8efed3aa017fab144ffb8040a03722cdff0c420
|
| 3 |
+
size 5353072608
|