Spaces:
Runtime error
Runtime error
adds OpenReasoner
Browse files
README.md
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
---
|
| 2 |
-
title: Nemotron
|
| 3 |
emoji: π π€ππ»
|
| 4 |
colorFrom: blue
|
| 5 |
colorTo: red
|
|
|
|
| 1 |
---
|
| 2 |
+
title: OpenReasoning Nemotron 14B
|
| 3 |
emoji: π π€ππ»
|
| 4 |
colorFrom: blue
|
| 5 |
colorTo: red
|
app.py
CHANGED
|
@@ -4,32 +4,36 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
|
| 4 |
from globe import title, description, customtool, presentation1, presentation2, joinus
|
| 5 |
import spaces
|
| 6 |
|
| 7 |
-
model_path = "nvidia/
|
| 8 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
| 9 |
-
model = AutoModelForCausalLM.from_pretrained(model_path)
|
| 10 |
if tokenizer.pad_token_id is None:
|
| 11 |
tokenizer.pad_token_id = tokenizer.eos_token_id
|
| 12 |
|
| 13 |
-
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
| 14 |
|
| 15 |
def create_prompt(system_message, user_message, tool_definition="", context=""):
|
|
|
|
| 16 |
if tool_definition:
|
| 17 |
-
return f"""
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
{tool_definition}
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
{user_message}
|
| 29 |
-
<extra_id_1>Assistant
|
| 30 |
-
"""
|
| 31 |
else:
|
| 32 |
-
return f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
|
| 35 |
@spaces.GPU(duration=94)
|
|
@@ -59,7 +63,8 @@ def generate_response(message, history, system_message, max_tokens, temperature,
|
|
| 59 |
|
| 60 |
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
| 61 |
|
| 62 |
-
|
|
|
|
| 63 |
|
| 64 |
if tool_definition and "<toolcall>" in assistant_response:
|
| 65 |
tool_call = assistant_response.split("<toolcall>")[1].split("</toolcall>")[0]
|
|
@@ -95,7 +100,7 @@ with gr.Blocks() as demo:
|
|
| 95 |
user_input = gr.TextArea(label="π€·π»ββοΈUser Input", placeholder="Hi there my name is Tonic!", lines=2)
|
| 96 |
advanced_checkbox = gr.Checkbox(label="π§ͺ Advanced Settings", value=False)
|
| 97 |
with gr.Column(visible=False) as advanced_settings:
|
| 98 |
-
max_length = gr.Slider(label="πMax Length", minimum=12, maximum=
|
| 99 |
temperature = gr.Slider(label="π‘οΈTemperature", minimum=0.01, maximum=1.0, value=0.7, step=0.01)
|
| 100 |
top_p = gr.Slider(label="βοΈTop-p (Nucleus Sampling)", minimum=0.1, maximum=1.0, value=0.9, step=0.01)
|
| 101 |
use_pipeline = gr.Checkbox(label="Use Pipeline", value=False)
|
|
@@ -108,10 +113,10 @@ with gr.Blocks() as demo:
|
|
| 108 |
language="json"
|
| 109 |
)
|
| 110 |
|
| 111 |
-
generate_button = gr.Button(value="π€
|
| 112 |
|
| 113 |
with gr.Column(scale=2):
|
| 114 |
-
chatbot = gr.Chatbot(label="π€
|
| 115 |
|
| 116 |
generate_button.click(
|
| 117 |
user,
|
|
@@ -138,4 +143,4 @@ with gr.Blocks() as demo:
|
|
| 138 |
|
| 139 |
if __name__ == "__main__":
|
| 140 |
demo.queue()
|
| 141 |
-
demo.launch()
|
|
|
|
| 4 |
from globe import title, description, customtool, presentation1, presentation2, joinus
|
| 5 |
import spaces
|
| 6 |
|
| 7 |
+
model_path = "nvidia/OpenReasoning-Nemotron-14B"
|
| 8 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
| 9 |
+
model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16, device_map="auto")
|
| 10 |
if tokenizer.pad_token_id is None:
|
| 11 |
tokenizer.pad_token_id = tokenizer.eos_token_id
|
| 12 |
|
| 13 |
+
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="auto")
|
| 14 |
|
| 15 |
def create_prompt(system_message, user_message, tool_definition="", context=""):
|
| 16 |
+
# OpenReasoning-Nemotron uses a different prompt format
|
| 17 |
if tool_definition:
|
| 18 |
+
return f"""You are a helpful and harmless assistant. You should think step-by-step before responding to the instruction below.
|
| 19 |
+
|
| 20 |
+
System Context: {system_message}
|
| 21 |
+
|
| 22 |
+
Tool Definition: {tool_definition}
|
| 23 |
+
|
| 24 |
+
Context: {context}
|
| 25 |
+
|
| 26 |
+
User: {user_message}
|
| 27 |
+
|
| 28 |
+
Assistant: Let me think about this step by step."""
|
|
|
|
|
|
|
|
|
|
| 29 |
else:
|
| 30 |
+
return f"""You are a helpful and harmless assistant. You should think step-by-step before responding to the instruction below.
|
| 31 |
+
|
| 32 |
+
System Context: {system_message}
|
| 33 |
+
|
| 34 |
+
User: {user_message}
|
| 35 |
+
|
| 36 |
+
Assistant: Let me think about this step by step."""
|
| 37 |
|
| 38 |
|
| 39 |
@spaces.GPU(duration=94)
|
|
|
|
| 63 |
|
| 64 |
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
| 65 |
|
| 66 |
+
# Extract the assistant response (everything after "Assistant: ")
|
| 67 |
+
assistant_response = response.split("Assistant: ")[-1].strip()
|
| 68 |
|
| 69 |
if tool_definition and "<toolcall>" in assistant_response:
|
| 70 |
tool_call = assistant_response.split("<toolcall>")[1].split("</toolcall>")[0]
|
|
|
|
| 100 |
user_input = gr.TextArea(label="π€·π»ββοΈUser Input", placeholder="Hi there my name is Tonic!", lines=2)
|
| 101 |
advanced_checkbox = gr.Checkbox(label="π§ͺ Advanced Settings", value=False)
|
| 102 |
with gr.Column(visible=False) as advanced_settings:
|
| 103 |
+
max_length = gr.Slider(label="πMax Length", minimum=12, maximum=64000, value=2048, step=1)
|
| 104 |
temperature = gr.Slider(label="π‘οΈTemperature", minimum=0.01, maximum=1.0, value=0.7, step=0.01)
|
| 105 |
top_p = gr.Slider(label="βοΈTop-p (Nucleus Sampling)", minimum=0.1, maximum=1.0, value=0.9, step=0.01)
|
| 106 |
use_pipeline = gr.Checkbox(label="Use Pipeline", value=False)
|
|
|
|
| 113 |
language="json"
|
| 114 |
)
|
| 115 |
|
| 116 |
+
generate_button = gr.Button(value="π€OpenReasoning-Nemotron-14B")
|
| 117 |
|
| 118 |
with gr.Column(scale=2):
|
| 119 |
+
chatbot = gr.Chatbot(label="π€OpenReasoning-Nemotron-14B")
|
| 120 |
|
| 121 |
generate_button.click(
|
| 122 |
user,
|
|
|
|
| 143 |
|
| 144 |
if __name__ == "__main__":
|
| 145 |
demo.queue()
|
| 146 |
+
demo.launch(ssr_mode=False, mcp_server=True)
|
globe.py
CHANGED
|
@@ -3,27 +3,36 @@ joinus = """
|
|
| 3 |
πTeamTonicπ is always making cool demos! Join our active builder's π οΈcommunity π» [](https://discord.gg/qdfnvSPcqP) On π€Huggingface:[MultiTransformer](https://huggingface.co/MultiTransformer) On πGithub: [Tonic-AI](https://github.com/tonic-ai) & contribute toπ [Build Tonic](https://git.tonic-ai.com/contribute)π€Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant π€
|
| 4 |
"""
|
| 5 |
|
| 6 |
-
title = """# ππ»ββοΈWelcome to Tonic's π€
|
| 7 |
|
| 8 |
-
description = """nvidia/π€
|
| 9 |
"""
|
| 10 |
|
| 11 |
-
presentation1 = """Try this model on [
|
| 12 |
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
### License
|
| 16 |
|
| 17 |
-
|
| 18 |
|
| 19 |
presentation2 = """
|
| 20 |
### Model Architecture
|
| 21 |
|
| 22 |
-
π€Nemotron-
|
| 23 |
-
|
| 24 |
-
**Architecture Type:** Transformer Decoder (auto-regressive language model)
|
| 25 |
|
| 26 |
-
**
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
customtool = """{
|
| 29 |
"name": "custom_tool",
|
|
|
|
| 3 |
πTeamTonicπ is always making cool demos! Join our active builder's π οΈcommunity π» [](https://discord.gg/qdfnvSPcqP) On π€Huggingface:[MultiTransformer](https://huggingface.co/MultiTransformer) On πGithub: [Tonic-AI](https://github.com/tonic-ai) & contribute toπ [Build Tonic](https://git.tonic-ai.com/contribute)π€Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant π€
|
| 4 |
"""
|
| 5 |
|
| 6 |
+
title = """# ππ»ββοΈWelcome to Tonic's π€ OpenReasoning-Nemotron-14B Demo π"""
|
| 7 |
|
| 8 |
+
description = """nvidia/π€OpenReasoning-Nemotron-14B is a reasoning model that is post-trained for reasoning about math, code and science solution generation. It demonstrates exceptional performance across challenging reasoning benchmarks.
|
| 9 |
"""
|
| 10 |
|
| 11 |
+
presentation1 = """Try this model on [Hugging Face](https://huggingface.co/nvidia/OpenReasoning-Nemotron-14B).
|
| 12 |
|
| 13 |
+
OpenReasoning-Nemotron-14B is a large language model (LLM) which is a derivative of Qwen2.5-14B-Instruct. It is a reasoning model that is post-trained for reasoning about math, code and science solution generation. This model has been evaluated with up to 64K output tokens. The OpenReasoning model is available in the following sizes: 1.5B, 7B, 14B and 32B.
|
| 14 |
+
|
| 15 |
+
The models demonstrate exceptional performance across a suite of challenging reasoning benchmarks. The 14B model consistently sets new state-of-the-art records for its size class, achieving:
|
| 16 |
+
- **AIME24**: 87.8% pass@1
|
| 17 |
+
- **AIME25**: 82.0% pass@1
|
| 18 |
+
- **HMMT Feb 25**: 71.2% pass@1
|
| 19 |
+
- **LiveCodeBench v6**: 67.9% pass@1
|
| 20 |
+
- **GPQA**: 71.6% pass@1
|
| 21 |
+
- **MMLU-PRO**: 77.5% pass@1
|
| 22 |
|
| 23 |
### License
|
| 24 |
|
| 25 |
+
Creative Commons Attribution 4.0 International License (CC-BY-4.0) with Apache 2.0 License"""
|
| 26 |
|
| 27 |
presentation2 = """
|
| 28 |
### Model Architecture
|
| 29 |
|
| 30 |
+
π€OpenReasoning-Nemotron-14B uses a dense decoder-only Transformer architecture based on Qwen2.5-14B-Instruct. It has 14B model parameters and supports up to 64,000 output tokens for extended reasoning chains.
|
|
|
|
|
|
|
| 31 |
|
| 32 |
+
**Architecture Type:** Dense decoder-only Transformer model
|
| 33 |
+
**Network Architecture:** Qwen2.5-14B-Instruct
|
| 34 |
+
**Model Size:** 14B parameters
|
| 35 |
+
**Max Output Tokens:** 64,000 """
|
| 36 |
|
| 37 |
customtool = """{
|
| 38 |
"name": "custom_tool",
|
transformers
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
Subproject commit 63d9cb0afd2bf5d4cb5431ba1b2c4e353752a937
|
|
|
|
|
|