Spaces:
Sleeping
Sleeping
File size: 13,255 Bytes
f14967b 6a093cd e2eaf4a f14967b 71830ea f14967b fa7e3c5 e2eaf4a f14967b e2eaf4a f14967b 681a9ea e2eaf4a f14967b 681a9ea f14967b e2eaf4a f14967b e2eaf4a f14967b e2eaf4a f14967b 681a9ea f14967b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 |
# ---------------------------------------------------------------
# Fathom-R1-14B ZeroGPU chat-demo (Gradio Blocks)
# ---------------------------------------------------------------
import gradio as gr
import spaces
import torch, re, uuid, tiktoken
from transformers import (AutoModelForCausalLM,
AutoTokenizer,
TextIteratorStreamer)
from threading import Thread
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# 1. Load the model on the single GPU supplied by ZeroGPU
# (4-bit to stay well below the 24 GB VRAM of an A10G)
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
model_name = "FractalAIResearch/Fathom-R1-14B"
try:
# 1-line 4-bit loading (needs bitsandbytes, already in HF Space image)
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="auto",
load_in_4bit=True,
trust_remote_code=True
)
except RuntimeError:
# fallback to fp16 if 4-bit isnβt available
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
device = next(model.parameters()).device # usually cuda:0
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# 2. Helpers
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def format_math(text: str) -> str:
"Replace [...]/\\(...\\) with $$...$$ for nicer math rendering"
text = re.sub(r"\[(.*?)\]", r"$$\1$$", text, flags=re.DOTALL)
return text.replace(r"\(", "$").replace(r"\)", "$")
def generate_conversation_id() -> str:
return str(uuid.uuid4())[:8]
# tiktoken β we just keep it to count tokens during streaming
enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
# Build a prompt that Fathom-R1 understands
BOS, SEP, EOS = "<|im_start|>", "<|im_sep|>", "<|im_end|>"
system_message = (
"Your role as an assistant involves thoroughly exploring questions "
"through a systematic thinking process before providing the final "
"precise and accurate solutions. β¦" # same text you used before
)
def build_prompt(history, user_msg: str) -> str:
prompt = f"{BOS}system{SEP}{system_message}{EOS}"
for m in history:
role = m["role"]
prompt += f"{BOS}{role}{SEP}{m['content']}{EOS}"
prompt += f"{BOS}user{SEP}{user_msg}{EOS}{BOS}assistant{SEP}"
return prompt
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# 3. Generation (runs on the GPU for 60 s max per call)
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
@spaces.GPU(duration=60)
def generate_response(user_message,
max_tokens,
temperature,
top_p,
history_state):
"""
Takes exactly the same signature the rest of the UI expects:
returns (visible_chatbot, history_state)
"""
if not user_message.strip():
return history_state, history_state
prompt = build_prompt(history_state, user_message)
inputs = tokenizer(prompt, return_tensors="pt").to(device)
streamer = TextIteratorStreamer(tokenizer,
skip_prompt=True,
skip_special_tokens=True)
gen_kwargs = dict(
input_ids=inputs["input_ids"],
attention_mask=inputs["attention_mask"],
max_new_tokens=int(max_tokens),
temperature=float(temperature),
top_p=float(top_p),
do_sample=True,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.eos_token_id,
streamer=streamer
)
# run generate in a background thread β lets us stream tokens
Thread(target=model.generate, kwargs=gen_kwargs).start()
assistant_response = ""
new_history = history_state + [
{"role": "user", "content": user_message},
{"role": "assistant", "content": ""}
]
# live-stream tokens to the UI
tokens_seen = 0
token_budget = int(max_tokens)
for new_tok in streamer:
assistant_response += new_tok
tokens_seen += len(enc.encode(new_tok))
new_history[-1]["content"] = format_math(assistant_response.strip())
yield new_history, new_history
if tokens_seen >= token_budget:
break
# final return
yield new_history, new_history
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# 4. Demo UI β identical to your current one
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
example_messages = {
"IIT-JEE 2024 Mathematics": (
"A student appears for a quiz consisting of only true-false type "
"questions and answers all the questions. β¦"
),
"IIT-JEE 2025 Physics": (
"A person sitting inside an elevator performs a weighing experiment β¦"
),
"Goldman Sachs Interview Puzzle": (
"Four friends need to cross a dangerous bridge at night β¦"
),
"IIT-JEE 2025 Mathematics": (
"Let S be the set of all seven-digit numbers that can be formed β¦"
)
}
with gr.Blocks(theme=gr.themes.Soft()) as demo:
# session-scoped states
conversations_state = gr.State({})
current_convo_id = gr.State(generate_conversation_id())
history_state = gr.State([])
# Header
gr.HTML(
"""
<div style="display:flex;align-items:center;gap:16px;margin-bottom:1em">
<div style="background-color:black;padding:6px;border-radius:8px">
<img src="https://framerusercontent.com/images/j0KjQQyrUfkFw4NwSaxQOLAoBU.png"
style="height:48px">
</div>
<h1 style="margin:0;">Fathom R1 14B Chatbot</h1>
</div>
"""
)
# Sidebar
with gr.Sidebar():
gr.Markdown("## Conversations")
conversation_selector = gr.Radio(choices=[], label="Select Conversation", interactive=True)
new_convo_button = gr.Button("New Conversation β")
with gr.Row():
with gr.Column(scale=1):
# intro text
gr.Markdown(
"""
Welcome to the Fathom R1 14B Chatbot, developed by **Fractal AI Research**!
This model excels at reasoning tasks in mathematics and science β¦
Once you close this demo window, all currently saved conversations will be lost.
"""
)
# Settings
gr.Markdown("### Settings")
max_tokens_slider = gr.Slider(6144, 32768, step=1024, value=16384, label="Max Tokens")
with gr.Accordion("Advanced Settings", open=True):
temperature_slider = gr.Slider(0.1, 2.0, value=0.6, label="Temperature")
top_p_slider = gr.Slider(0.1, 1.0, value=0.95, label="Top-p")
gr.Markdown(
"""
We sincerely acknowledge [VIDraft](https://huggingface.co/VIDraft) β¦
"""
)
with gr.Column(scale=4):
chatbot = gr.Chatbot(label="Chat", type="messages", height=520)
with gr.Row():
user_input = gr.Textbox(label="User Input",
placeholder="Type your question hereβ¦",
lines=3, scale=8)
with gr.Column():
submit_button = gr.Button("Send", variant="primary", scale=1)
clear_button = gr.Button("Clear", scale=1)
# examples
gr.Markdown("**Try these examples:**")
with gr.Row():
example1_button = gr.Button("IIT-JEE 2025 Mathematics")
example2_button = gr.Button("IIT-JEE 2025 Physics")
example3_button = gr.Button("Goldman Sachs Interview Puzzle")
example4_button = gr.Button("IIT-JEE 2024 Mathematics")
# βββββββββ conversation-management helpers ββββββββββββββββββ
def update_conversation_list(conversations):
return [conversations[cid]["title"] for cid in conversations]
def start_new_conversation(conversations):
new_id = generate_conversation_id()
conversations[new_id] = {"title": f"New Conversation {new_id}", "messages": []}
return new_id, [], gr.update(choices=update_conversation_list(conversations),
value=conversations[new_id]["title"]), conversations
def load_conversation(selected_title, conversations):
for cid, convo in conversations.items():
if convo["title"] == selected_title:
return cid, convo["messages"], convo["messages"]
return current_convo_id.value, history_state.value, history_state.value
# main βsendβ wrapper: keeps conversations dict in sync
def send_message(user_message, max_tokens, temperature, top_p,
convo_id, history, conversations):
if convo_id not in conversations:
title = " ".join(user_message.strip().split()[:5])
conversations[convo_id] = {"title": title, "messages": history}
if conversations[convo_id]["title"].startswith("New Conversation"):
conversations[convo_id]["title"] = " ".join(user_message.strip().split()[:5])
# call the streamer generator and forward its yields
for updated_history, new_history in generate_response(
user_message, max_tokens, temperature, top_p, history):
conversations[convo_id]["messages"] = new_history
yield (updated_history, new_history,
gr.update(choices=update_conversation_list(conversations),
value=conversations[convo_id]["title"]),
conversations)
# βββββββββ UI β functions wiring ββββββββββββββββββββββββββββ
submit_button.click(
fn=send_message,
inputs=[user_input, max_tokens_slider, temperature_slider, top_p_slider,
current_convo_id, history_state, conversations_state],
outputs=[chatbot, history_state, conversation_selector, conversations_state],
concurrency_limit=16
).then(
fn=lambda: gr.update(value=""),
inputs=None,
outputs=user_input
)
clear_button.click(fn=lambda: ([], []), inputs=None,
outputs=[chatbot, history_state])
new_convo_button.click(fn=start_new_conversation,
inputs=[conversations_state],
outputs=[current_convo_id, history_state,
conversation_selector, conversations_state])
conversation_selector.change(fn=load_conversation,
inputs=[conversation_selector, conversations_state],
outputs=[current_convo_id, history_state, chatbot])
# example buttons
example1_button.click(lambda: gr.update(value=example_messages["IIT-JEE 2025 Mathematics"]),
None, user_input)
example2_button.click(lambda: gr.update(value=example_messages["IIT-JEE 2025 Physics"]),
None, user_input)
example3_button.click(lambda: gr.update(value=example_messages["Goldman Sachs Interview Puzzle"]),
None, user_input)
example4_button.click(lambda: gr.update(value=example_messages["IIT-JEE 2024 Mathematics"]),
None, user_input)
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# 5. Launch
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
if __name__ == "__main__":
demo.queue().launch(share=True, ssr_mode=False) |