Text Generation
Transformers
Safetensors
English
qwen3_next
qwen3
qwen3-next
qwen
vanta-research
cognitive-configuration
instruction-following
cognitive-ai
friendly-ai
helpful-ai
persona-ai
philosophical
emotional-intelligence
atom
collaborative-ai
collaboration
conversational-ai
conversational
alignment
chat
chatbot
reasoning
friendly
Commit
·
f899969
verified
·
0
Parent(s):
Duplicate from vanta-research/atom-80b
Browse filesCo-authored-by: Tyler <unmodeled-tyler@users.noreply.huggingface.co>
This view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +36 -0
- README.md +137 -0
- added_tokens.json +28 -0
- chat_template.jinja +61 -0
- config.json +94 -0
- generation_config.json +13 -0
- merges.txt +0 -0
- model-00001-of-00040.safetensors +3 -0
- model-00002-of-00040.safetensors +3 -0
- model-00003-of-00040.safetensors +3 -0
- model-00004-of-00040.safetensors +3 -0
- model-00005-of-00040.safetensors +3 -0
- model-00006-of-00040.safetensors +3 -0
- model-00007-of-00040.safetensors +3 -0
- model-00008-of-00040.safetensors +3 -0
- model-00009-of-00040.safetensors +3 -0
- model-00010-of-00040.safetensors +3 -0
- model-00011-of-00040.safetensors +3 -0
- model-00012-of-00040.safetensors +3 -0
- model-00013-of-00040.safetensors +3 -0
- model-00014-of-00040.safetensors +3 -0
- model-00015-of-00040.safetensors +3 -0
- model-00016-of-00040.safetensors +3 -0
- model-00017-of-00040.safetensors +3 -0
- model-00018-of-00040.safetensors +3 -0
- model-00019-of-00040.safetensors +3 -0
- model-00020-of-00040.safetensors +3 -0
- model-00021-of-00040.safetensors +3 -0
- model-00022-of-00040.safetensors +3 -0
- model-00023-of-00040.safetensors +3 -0
- model-00024-of-00040.safetensors +3 -0
- model-00025-of-00040.safetensors +3 -0
- model-00026-of-00040.safetensors +3 -0
- model-00027-of-00040.safetensors +3 -0
- model-00028-of-00040.safetensors +3 -0
- model-00029-of-00040.safetensors +3 -0
- model-00030-of-00040.safetensors +3 -0
- model-00031-of-00040.safetensors +3 -0
- model-00032-of-00040.safetensors +3 -0
- model-00033-of-00040.safetensors +3 -0
- model-00034-of-00040.safetensors +3 -0
- model-00035-of-00040.safetensors +3 -0
- model-00036-of-00040.safetensors +3 -0
- model-00037-of-00040.safetensors +3 -0
- model-00038-of-00040.safetensors +3 -0
- model-00039-of-00040.safetensors +3 -0
- model-00040-of-00040.safetensors +3 -0
- model.safetensors.index.json +0 -0
- special_tokens_map.json +31 -0
- tokenizer.json +3 -0
.gitattributes
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: apache-2.0
|
| 3 |
+
language:
|
| 4 |
+
- en
|
| 5 |
+
base_model:
|
| 6 |
+
- Qwen/Qwen3-Next-80B-A3B-Instruct
|
| 7 |
+
base_model_relation: finetune
|
| 8 |
+
library_name: transformers
|
| 9 |
+
tags:
|
| 10 |
+
- qwen3
|
| 11 |
+
- qwen3-next
|
| 12 |
+
- qwen
|
| 13 |
+
- vanta-research
|
| 14 |
+
- cognitive-configuration
|
| 15 |
+
- text-generation
|
| 16 |
+
- instruction-following
|
| 17 |
+
- cognitive-ai
|
| 18 |
+
- friendly-ai
|
| 19 |
+
- helpful-ai
|
| 20 |
+
- persona-ai
|
| 21 |
+
- philosophical
|
| 22 |
+
- emotional-intelligence
|
| 23 |
+
- atom
|
| 24 |
+
- collaborative-ai
|
| 25 |
+
- collaboration
|
| 26 |
+
- conversational-ai
|
| 27 |
+
- conversational
|
| 28 |
+
- alignment
|
| 29 |
+
- chat
|
| 30 |
+
- chatbot
|
| 31 |
+
- reasoning
|
| 32 |
+
- friendly
|
| 33 |
+
---
|
| 34 |
+
|
| 35 |
+
<div align="center">
|
| 36 |
+
|
| 37 |
+

|
| 38 |
+
|
| 39 |
+
<h1>VANTA Research</h1>
|
| 40 |
+
|
| 41 |
+
<p><strong>Independent AI research lab building safe, resilient language models optimized for human-AI collaboration</strong></p>
|
| 42 |
+
|
| 43 |
+
<p>
|
| 44 |
+
<a href="https://vantaresearch.xyz"><img src="https://img.shields.io/badge/Website-vantaresearch.xyz-black" alt="Website"/></a>
|
| 45 |
+
<a href="https://merch.vantaresearch.xyz"><img src="https://img.shields.io/badge/Merch-merch.vantaresearch.xyz-sage" alt="Merch"/></a>
|
| 46 |
+
<a href="https://x.com/vanta_research"><img src="https://img.shields.io/badge/@vanta_research-1DA1F2?logo=x" alt="X"/></a>
|
| 47 |
+
<a href="https://github.com/vanta-research"><img src="https://img.shields.io/badge/GitHub-vanta--research-181717?logo=github" alt="GitHub"/></a>
|
| 48 |
+
</p>
|
| 49 |
+
</div>
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
---
|
| 53 |
+
# Atom-80B
|
| 54 |
+
|
| 55 |
+
## Overview
|
| 56 |
+
|
| 57 |
+
Atom-80B is a state-of-the-art language model fine-tuned on the Qwen3 80B Next base, optimized for high-fidelity reasoning, collaborative interaction, and cognitive extension. Atom-80B is designed to be friendly, enthusiastic, and collaboration-first.
|
| 58 |
+
|
| 59 |
+
This model is a continuation of Project Atom from VANTA Research, which aims to scale the Atom persona from 4B-400B+. This model is the 5th in the Project Atom series.
|
| 60 |
+
|
| 61 |
+
Key strengths:
|
| 62 |
+
- Complex, multi-step reasoning
|
| 63 |
+
- Collaborative task execution and agentic workflows
|
| 64 |
+
- Stable, flavorful persona alignment
|
| 65 |
+
- Optimized inference efficiency
|
| 66 |
+
|
| 67 |
+
---
|
| 68 |
+
|
| 69 |
+
## Training and Data
|
| 70 |
+
|
| 71 |
+
### Base Model
|
| 72 |
+
- **Qwen3 80B Next**: A leading foundation model with robust multilingual and coding capabilities.
|
| 73 |
+
|
| 74 |
+
### Fine-Tuning Datasets
|
| 75 |
+
Atom-80B was fine-tuned on the same high-quality datasets as the smaller Atom variants, including:
|
| 76 |
+
- Collaborative exploration and brainstorming
|
| 77 |
+
- Research synthesis and question formulation
|
| 78 |
+
- Technical explanation at varying complexity levels
|
| 79 |
+
- Lateral thinking and creative problem-solving
|
| 80 |
+
- Empathetic and supportive dialogue patterns
|
| 81 |
+
|
| 82 |
+
## Intended Use
|
| 83 |
+
|
| 84 |
+
### Primary Applications
|
| 85 |
+
|
| 86 |
+
- **Collaborative Brainstorming:** Generating diverse ideas and building iteratively on user suggestions
|
| 87 |
+
- **Research Assistance:** Synthesizing information, identifying key arguments, and formulating research questions
|
| 88 |
+
- **Technical Explanation:** Simplifying complex concepts across difficulty levels (including ELI5)
|
| 89 |
+
- **Code Discussion:** Exploring implementation approaches, debugging strategies, and architectural decisions
|
| 90 |
+
- **Creative Problem-Solving:** Encouraging unconventional approaches and lateral thinking
|
| 91 |
+
|
| 92 |
+
### Out-of-Scope Use
|
| 93 |
+
|
| 94 |
+
This model shall not be used for:
|
| 95 |
+
- High-stakes decision-making without human oversight
|
| 96 |
+
- Medical, legal, or financial advice
|
| 97 |
+
- Generation of harmful, biased, or misleading content
|
| 98 |
+
- Applications requiring guaranteed factual accuracy
|
| 99 |
+
|
| 100 |
+
## Usage
|
| 101 |
+
|
| 102 |
+
### Installation
|
| 103 |
+
```
|
| 104 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 105 |
+
|
| 106 |
+
model = AutoModelForCausalLM.from_pretrained("vanta-research/atom-80B", torch_dtype="auto")
|
| 107 |
+
tokenizer = AutoTokenizer.from_pretrained("vanta-research/atom-80B")
|
| 108 |
+
inputs = tokenizer("Explain quantum computing like I'm 10.", return_tensors="pt").to("cuda")
|
| 109 |
+
outputs = model.generate(**inputs, max_new_tokens=256)
|
| 110 |
+
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
| 111 |
+
```
|
| 112 |
+
## Ethical Considerations
|
| 113 |
+
|
| 114 |
+
This model is designed to support exploration and learning, not to replace human judgment. Users should:
|
| 115 |
+
|
| 116 |
+
- Verify factual claims against authoritative sources
|
| 117 |
+
- Apply critical thinking to generated suggestions
|
| 118 |
+
- Recognize the model's limitations in high-stakes scenarios
|
| 119 |
+
- Be mindful of potential biases in outputs
|
| 120 |
+
- Use responsibly in accordance with applicable laws and regulations
|
| 121 |
+
|
| 122 |
+
## Citation
|
| 123 |
+
|
| 124 |
+
```bibtex
|
| 125 |
+
@misc{atom-80b,
|
| 126 |
+
title={Atom-80B: A Collaborative Thought Partner},
|
| 127 |
+
author={VANTA Research},
|
| 128 |
+
year={2026},
|
| 129 |
+
howpublished={https://huggingface.co/vanta-research/atom-80b}
|
| 130 |
+
}
|
| 131 |
+
```
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
## Contact
|
| 135 |
+
|
| 136 |
+
- Organization: hello@vantaresearch.xyz
|
| 137 |
+
- Engineering/Design: tyler@vantaresearch.xyz
|
added_tokens.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"</think>": 151668,
|
| 3 |
+
"</tool_call>": 151658,
|
| 4 |
+
"</tool_response>": 151666,
|
| 5 |
+
"<think>": 151667,
|
| 6 |
+
"<tool_call>": 151657,
|
| 7 |
+
"<tool_response>": 151665,
|
| 8 |
+
"<|box_end|>": 151649,
|
| 9 |
+
"<|box_start|>": 151648,
|
| 10 |
+
"<|endoftext|>": 151643,
|
| 11 |
+
"<|file_sep|>": 151664,
|
| 12 |
+
"<|fim_middle|>": 151660,
|
| 13 |
+
"<|fim_pad|>": 151662,
|
| 14 |
+
"<|fim_prefix|>": 151659,
|
| 15 |
+
"<|fim_suffix|>": 151661,
|
| 16 |
+
"<|im_end|>": 151645,
|
| 17 |
+
"<|im_start|>": 151644,
|
| 18 |
+
"<|image_pad|>": 151655,
|
| 19 |
+
"<|object_ref_end|>": 151647,
|
| 20 |
+
"<|object_ref_start|>": 151646,
|
| 21 |
+
"<|quad_end|>": 151651,
|
| 22 |
+
"<|quad_start|>": 151650,
|
| 23 |
+
"<|repo_name|>": 151663,
|
| 24 |
+
"<|video_pad|>": 151656,
|
| 25 |
+
"<|vision_end|>": 151653,
|
| 26 |
+
"<|vision_pad|>": 151654,
|
| 27 |
+
"<|vision_start|>": 151652
|
| 28 |
+
}
|
chat_template.jinja
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{%- if tools %}
|
| 2 |
+
{{- '<|im_start|>system\n' }}
|
| 3 |
+
{%- if messages[0].role == 'system' %}
|
| 4 |
+
{{- messages[0].content + '\n\n' }}
|
| 5 |
+
{%- endif %}
|
| 6 |
+
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
| 7 |
+
{%- for tool in tools %}
|
| 8 |
+
{{- "\n" }}
|
| 9 |
+
{{- tool | tojson }}
|
| 10 |
+
{%- endfor %}
|
| 11 |
+
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
| 12 |
+
{%- else %}
|
| 13 |
+
{%- if messages[0].role == 'system' %}
|
| 14 |
+
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
| 15 |
+
{%- endif %}
|
| 16 |
+
{%- endif %}
|
| 17 |
+
{%- for message in messages %}
|
| 18 |
+
{%- if message.content is string %}
|
| 19 |
+
{%- set content = message.content %}
|
| 20 |
+
{%- else %}
|
| 21 |
+
{%- set content = '' %}
|
| 22 |
+
{%- endif %}
|
| 23 |
+
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
| 24 |
+
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
|
| 25 |
+
{%- elif message.role == "assistant" %}
|
| 26 |
+
{{- '<|im_start|>' + message.role + '\n' + content }}
|
| 27 |
+
{%- if message.tool_calls %}
|
| 28 |
+
{%- for tool_call in message.tool_calls %}
|
| 29 |
+
{%- if (loop.first and content) or (not loop.first) %}
|
| 30 |
+
{{- '\n' }}
|
| 31 |
+
{%- endif %}
|
| 32 |
+
{%- if tool_call.function %}
|
| 33 |
+
{%- set tool_call = tool_call.function %}
|
| 34 |
+
{%- endif %}
|
| 35 |
+
{{- '<tool_call>\n{"name": "' }}
|
| 36 |
+
{{- tool_call.name }}
|
| 37 |
+
{{- '", "arguments": ' }}
|
| 38 |
+
{%- if tool_call.arguments is string %}
|
| 39 |
+
{{- tool_call.arguments }}
|
| 40 |
+
{%- else %}
|
| 41 |
+
{{- tool_call.arguments | tojson }}
|
| 42 |
+
{%- endif %}
|
| 43 |
+
{{- '}\n</tool_call>' }}
|
| 44 |
+
{%- endfor %}
|
| 45 |
+
{%- endif %}
|
| 46 |
+
{{- '<|im_end|>\n' }}
|
| 47 |
+
{%- elif message.role == "tool" %}
|
| 48 |
+
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
| 49 |
+
{{- '<|im_start|>user' }}
|
| 50 |
+
{%- endif %}
|
| 51 |
+
{{- '\n<tool_response>\n' }}
|
| 52 |
+
{{- content }}
|
| 53 |
+
{{- '\n</tool_response>' }}
|
| 54 |
+
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
| 55 |
+
{{- '<|im_end|>\n' }}
|
| 56 |
+
{%- endif %}
|
| 57 |
+
{%- endif %}
|
| 58 |
+
{%- endfor %}
|
| 59 |
+
{%- if add_generation_prompt %}
|
| 60 |
+
{{- '<|im_start|>assistant\n' }}
|
| 61 |
+
{%- endif %}
|
config.json
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"Qwen3NextForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 151643,
|
| 8 |
+
"decoder_sparse_step": 1,
|
| 9 |
+
"dtype": "bfloat16",
|
| 10 |
+
"eos_token_id": 151645,
|
| 11 |
+
"full_attention_interval": 4,
|
| 12 |
+
"head_dim": 256,
|
| 13 |
+
"hidden_act": "silu",
|
| 14 |
+
"hidden_size": 2048,
|
| 15 |
+
"initializer_range": 0.02,
|
| 16 |
+
"intermediate_size": 5120,
|
| 17 |
+
"layer_types": [
|
| 18 |
+
"linear_attention",
|
| 19 |
+
"linear_attention",
|
| 20 |
+
"linear_attention",
|
| 21 |
+
"full_attention",
|
| 22 |
+
"linear_attention",
|
| 23 |
+
"linear_attention",
|
| 24 |
+
"linear_attention",
|
| 25 |
+
"full_attention",
|
| 26 |
+
"linear_attention",
|
| 27 |
+
"linear_attention",
|
| 28 |
+
"linear_attention",
|
| 29 |
+
"full_attention",
|
| 30 |
+
"linear_attention",
|
| 31 |
+
"linear_attention",
|
| 32 |
+
"linear_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"linear_attention",
|
| 35 |
+
"linear_attention",
|
| 36 |
+
"linear_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"linear_attention",
|
| 39 |
+
"linear_attention",
|
| 40 |
+
"linear_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"linear_attention",
|
| 43 |
+
"linear_attention",
|
| 44 |
+
"linear_attention",
|
| 45 |
+
"full_attention",
|
| 46 |
+
"linear_attention",
|
| 47 |
+
"linear_attention",
|
| 48 |
+
"linear_attention",
|
| 49 |
+
"full_attention",
|
| 50 |
+
"linear_attention",
|
| 51 |
+
"linear_attention",
|
| 52 |
+
"linear_attention",
|
| 53 |
+
"full_attention",
|
| 54 |
+
"linear_attention",
|
| 55 |
+
"linear_attention",
|
| 56 |
+
"linear_attention",
|
| 57 |
+
"full_attention",
|
| 58 |
+
"linear_attention",
|
| 59 |
+
"linear_attention",
|
| 60 |
+
"linear_attention",
|
| 61 |
+
"full_attention",
|
| 62 |
+
"linear_attention",
|
| 63 |
+
"linear_attention",
|
| 64 |
+
"linear_attention",
|
| 65 |
+
"full_attention"
|
| 66 |
+
],
|
| 67 |
+
"linear_conv_kernel_dim": 4,
|
| 68 |
+
"linear_key_head_dim": 128,
|
| 69 |
+
"linear_num_key_heads": 16,
|
| 70 |
+
"linear_num_value_heads": 32,
|
| 71 |
+
"linear_value_head_dim": 128,
|
| 72 |
+
"max_position_embeddings": 262144,
|
| 73 |
+
"mlp_only_layers": [],
|
| 74 |
+
"model_type": "qwen3_next",
|
| 75 |
+
"moe_intermediate_size": 512,
|
| 76 |
+
"norm_topk_prob": true,
|
| 77 |
+
"num_attention_heads": 16,
|
| 78 |
+
"num_experts": 512,
|
| 79 |
+
"num_experts_per_tok": 10,
|
| 80 |
+
"num_hidden_layers": 48,
|
| 81 |
+
"num_key_value_heads": 2,
|
| 82 |
+
"output_router_logits": false,
|
| 83 |
+
"partial_rotary_factor": 0.25,
|
| 84 |
+
"rms_norm_eps": 1e-06,
|
| 85 |
+
"rope_scaling": null,
|
| 86 |
+
"rope_theta": 10000000,
|
| 87 |
+
"router_aux_loss_coef": 0.001,
|
| 88 |
+
"shared_expert_intermediate_size": 512,
|
| 89 |
+
"tie_word_embeddings": false,
|
| 90 |
+
"transformers_version": "4.57.3",
|
| 91 |
+
"use_cache": true,
|
| 92 |
+
"use_sliding_window": false,
|
| 93 |
+
"vocab_size": 151936
|
| 94 |
+
}
|
generation_config.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token_id": 151643,
|
| 3 |
+
"do_sample": true,
|
| 4 |
+
"eos_token_id": [
|
| 5 |
+
151645,
|
| 6 |
+
151643
|
| 7 |
+
],
|
| 8 |
+
"pad_token_id": 151643,
|
| 9 |
+
"temperature": 0.7,
|
| 10 |
+
"top_k": 20,
|
| 11 |
+
"top_p": 0.8,
|
| 12 |
+
"transformers_version": "4.57.3"
|
| 13 |
+
}
|
merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
model-00001-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:66154e218b81d732350506ea804ccd12e6175e0f4a0ad5abff1b59472f45a02d
|
| 3 |
+
size 3999606640
|
model-00002-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3418a5fc827c581501d8f610bd7d7f9771c7c67c654a3cd440b774dfef445494
|
| 3 |
+
size 3999841808
|
model-00003-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:78a2652e5943d03728179e3c76005270eff82c922d7d9cc899173039a17625c8
|
| 3 |
+
size 3999515712
|
model-00004-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:95220aa358f43c658fcf9136a83f8c09e6f534fd2703e85cab098bf3c35371a3
|
| 3 |
+
size 3999842128
|
model-00005-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:908d1493311db0b9655499002fb898d2bdf5ffcaf1d4a27cae45569d7e700af0
|
| 3 |
+
size 3999842128
|
model-00006-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1bd78fbbdf2de47373d749924f736e519d34b5fd3419a349ec25e17d65d57273
|
| 3 |
+
size 3999853128
|
model-00007-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aaaf273fc1bcb80d78ee825ad2b43f17d579b82614e7f91c02b8ab0363e8932b
|
| 3 |
+
size 3999841944
|
model-00008-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f7c4e9e03bda23160f5775c427ec5f512e55e1b32b335e5f571b79d6fb9b2cb
|
| 3 |
+
size 3999842128
|
model-00009-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c864921ef4e708f0c20c2bcbed25245e39907321b89ff2930d1d517199f1eebc
|
| 3 |
+
size 3999843352
|
model-00010-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c9c6c55657f49ed934e088f4da236f1bfc7bac182e5eb2c34aa46b6fb17eabad
|
| 3 |
+
size 3999517600
|
model-00011-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:db7c99cd1eeb05053400dccb2ce10936afbe84d501af731618a62fcccad000d0
|
| 3 |
+
size 4000181296
|
model-00012-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f17d4d3fc472b1ef0e4a086aea952e6e7cd8b0bf32509cfc62f17b489ed28c42
|
| 3 |
+
size 3999843944
|
model-00013-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:28f0fb3b82aefc2359ed52bd85c3c8101a7919bf7faf8e7d6e70173d01571d04
|
| 3 |
+
size 3999517600
|
model-00014-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a28ac12da53a5f7a56a1cc20471bce98d239234e245db9bc19546e687837e66
|
| 3 |
+
size 3999844016
|
model-00015-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:20dd8b3eb19a20ba615265e6d8cfebebff085ee63bf40f76fa2e23f7713e02f5
|
| 3 |
+
size 4000181552
|
model-00016-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a8ff505a03938ae82dff00d70f47dd9421a2c66e6144a956b7156510b680c910
|
| 3 |
+
size 3999517280
|
model-00017-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:418c53f4491be5cb87ede928dc8303ccc09a1c8ed9bc8b052fa362fd12575fde
|
| 3 |
+
size 3999844016
|
model-00018-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ca87e643a3a68b6745fb219958617924d02c25068763f5f95f200c513e1b655
|
| 3 |
+
size 3999844008
|
model-00019-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a08fd9270c756b7604c43189883310e9e38579962520f996c3773c6516980098
|
| 3 |
+
size 3999844008
|
model-00020-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd1dd20ffbed695bb2e437b1a33cb8f8b21a9802721312a91f8b19a314061e9a
|
| 3 |
+
size 3999854952
|
model-00021-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7a7987e3dae21a8ef6650c1475bf8dd3a959b6110f64045a8b8f5baccc796ac1
|
| 3 |
+
size 3999843832
|
model-00022-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0ac7995f87e1c2438d9c439460b6898d45bf53d487eb24be188bb2ef0095ee03
|
| 3 |
+
size 3999844008
|
model-00023-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ff017165cdd8b9850f069c9243576c564425c274be41b39fd292a08d09de4013
|
| 3 |
+
size 3999517608
|
model-00024-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bc3ee5059726bec4bbbb2632de3292965307156d1321e8985fcb1baf1e66bd36
|
| 3 |
+
size 3999844056
|
model-00025-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb5a63e1a81e2a53dab2aea460f4dc36970ac000a94ad17c2734dd5f63e3779c
|
| 3 |
+
size 4000181296
|
model-00026-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:29a16a240d56adcf832b94d555e370d60246edac92970ffc211338456fdc8f99
|
| 3 |
+
size 3999517536
|
model-00027-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:572e29a3f19b2820a3a002485ccecc33f5d2f442bea2664db45f50d9d75202fa
|
| 3 |
+
size 3999844008
|
model-00028-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:83eea57f86dbba50108efbca33d8d90b39e46a5fc351bc4f4680b5808499970d
|
| 3 |
+
size 3999844008
|
model-00029-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5ffe07bcf7307df9a716f93fd3295eded755f33fee770e4c55a3492d8afdee0
|
| 3 |
+
size 3999855136
|
model-00030-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c513c7ba823189ecb53286df0905985c1b0f36f4d94b72febb37c69104d46ca
|
| 3 |
+
size 3999843696
|
model-00031-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d1bb920f9f5c6481d453426d42a32a8ae65866077fadc1f29872a9d79f58c6f
|
| 3 |
+
size 3999844008
|
model-00032-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bccecd0cf5dbef9fe6f2bb37d2d27513b913839ffacfd3e37c12e44e460d5550
|
| 3 |
+
size 3999844016
|
model-00033-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f2da244d38c7c432236b8fe714eb7774bad7a08423be9275b0f24675092f4258
|
| 3 |
+
size 3999517600
|
model-00034-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90b80d3478d807ede160f0bbd49a7fa1e138679ddf087524d5335c2bd9fa87e5
|
| 3 |
+
size 4000181408
|
model-00035-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fd95aa5b396dba98af1391a055917e7f7e545f56e5e0b0483b8314c614a790f1
|
| 3 |
+
size 3999843832
|
model-00036-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:92c7bf3dfdd65201fea8f87ee36048eeb50da145c450db2f17df5e737daa22cc
|
| 3 |
+
size 3999517600
|
model-00037-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25c3ddbfbc25aa64db226ac041769abd069d2a874c3d016c0d9aaa560f9ef66d
|
| 3 |
+
size 3999844008
|
model-00038-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bf8fcf233e9fa61b28866ecf3cd9321083891eacdf7ff0a2cccaafbd7b2a4dc7
|
| 3 |
+
size 3999844056
|
model-00039-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c3280d2a766fb7012e0191bb3a9dddf4f7bddd2c140995389df170e39973b46
|
| 3 |
+
size 3999854888
|
model-00040-of-00040.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5a19469201abd5b54d565fb203b6417c40f2dd79036e0b0bf3345579e5c2fb3d
|
| 3 |
+
size 3365585136
|
model.safetensors.index.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<|im_start|>",
|
| 4 |
+
"<|im_end|>",
|
| 5 |
+
"<|object_ref_start|>",
|
| 6 |
+
"<|object_ref_end|>",
|
| 7 |
+
"<|box_start|>",
|
| 8 |
+
"<|box_end|>",
|
| 9 |
+
"<|quad_start|>",
|
| 10 |
+
"<|quad_end|>",
|
| 11 |
+
"<|vision_start|>",
|
| 12 |
+
"<|vision_end|>",
|
| 13 |
+
"<|vision_pad|>",
|
| 14 |
+
"<|image_pad|>",
|
| 15 |
+
"<|video_pad|>"
|
| 16 |
+
],
|
| 17 |
+
"eos_token": {
|
| 18 |
+
"content": "<|im_end|>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
},
|
| 24 |
+
"pad_token": {
|
| 25 |
+
"content": "<|endoftext|>",
|
| 26 |
+
"lstrip": false,
|
| 27 |
+
"normalized": false,
|
| 28 |
+
"rstrip": false,
|
| 29 |
+
"single_word": false
|
| 30 |
+
}
|
| 31 |
+
}
|
tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
|
| 3 |
+
size 11422654
|