Fathom / app.py
FractalAIR's picture
Update app.py
f14967b verified
raw
history blame
13.3 kB
# ---------------------------------------------------------------
# Fathom-R1-14B ZeroGPU chat-demo (Gradio Blocks)
# ---------------------------------------------------------------
import gradio as gr
import spaces
import torch, re, uuid, tiktoken
from transformers import (AutoModelForCausalLM,
AutoTokenizer,
TextIteratorStreamer)
from threading import Thread
# ────────────────────────────────────────────────────────────────
# 1. Load the model on the single GPU supplied by ZeroGPU
# (4-bit to stay well below the 24 GB VRAM of an A10G)
# ────────────────────────────────────────────────────────────────
model_name = "FractalAIResearch/Fathom-R1-14B"
try:
# 1-line 4-bit loading (needs bitsandbytes, already in HF Space image)
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="auto",
load_in_4bit=True,
trust_remote_code=True
)
except RuntimeError:
# fallback to fp16 if 4-bit isn’t available
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
device = next(model.parameters()).device # usually cuda:0
# ────────────────────────────────────────────────────────────────
# 2. Helpers
# ────────────────────────────────────────────────────────────────
def format_math(text: str) -> str:
"Replace [...]/\\(...\\) with $$...$$ for nicer math rendering"
text = re.sub(r"\[(.*?)\]", r"$$\1$$", text, flags=re.DOTALL)
return text.replace(r"\(", "$").replace(r"\)", "$")
def generate_conversation_id() -> str:
return str(uuid.uuid4())[:8]
# tiktoken – we just keep it to count tokens during streaming
enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
# Build a prompt that Fathom-R1 understands
BOS, SEP, EOS = "<|im_start|>", "<|im_sep|>", "<|im_end|>"
system_message = (
"Your role as an assistant involves thoroughly exploring questions "
"through a systematic thinking process before providing the final "
"precise and accurate solutions. …" # same text you used before
)
def build_prompt(history, user_msg: str) -> str:
prompt = f"{BOS}system{SEP}{system_message}{EOS}"
for m in history:
role = m["role"]
prompt += f"{BOS}{role}{SEP}{m['content']}{EOS}"
prompt += f"{BOS}user{SEP}{user_msg}{EOS}{BOS}assistant{SEP}"
return prompt
# ────────────────────────────────────────────────────────────────
# 3. Generation (runs on the GPU for 60 s max per call)
# ────────────────────────────────────────────────────────────────
@spaces.GPU(duration=60)
def generate_response(user_message,
max_tokens,
temperature,
top_p,
history_state):
"""
Takes exactly the same signature the rest of the UI expects:
returns (visible_chatbot, history_state)
"""
if not user_message.strip():
return history_state, history_state
prompt = build_prompt(history_state, user_message)
inputs = tokenizer(prompt, return_tensors="pt").to(device)
streamer = TextIteratorStreamer(tokenizer,
skip_prompt=True,
skip_special_tokens=True)
gen_kwargs = dict(
input_ids=inputs["input_ids"],
attention_mask=inputs["attention_mask"],
max_new_tokens=int(max_tokens),
temperature=float(temperature),
top_p=float(top_p),
do_sample=True,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.eos_token_id,
streamer=streamer
)
# run generate in a background thread – lets us stream tokens
Thread(target=model.generate, kwargs=gen_kwargs).start()
assistant_response = ""
new_history = history_state + [
{"role": "user", "content": user_message},
{"role": "assistant", "content": ""}
]
# live-stream tokens to the UI
tokens_seen = 0
token_budget = int(max_tokens)
for new_tok in streamer:
assistant_response += new_tok
tokens_seen += len(enc.encode(new_tok))
new_history[-1]["content"] = format_math(assistant_response.strip())
yield new_history, new_history
if tokens_seen >= token_budget:
break
# final return
yield new_history, new_history
# ────────────────────────────────────────────────────────────────
# 4. Demo UI – identical to your current one
# ────────────────────────────────────────────────────────────────
example_messages = {
"IIT-JEE 2024 Mathematics": (
"A student appears for a quiz consisting of only true-false type "
"questions and answers all the questions. …"
),
"IIT-JEE 2025 Physics": (
"A person sitting inside an elevator performs a weighing experiment …"
),
"Goldman Sachs Interview Puzzle": (
"Four friends need to cross a dangerous bridge at night …"
),
"IIT-JEE 2025 Mathematics": (
"Let S be the set of all seven-digit numbers that can be formed …"
)
}
with gr.Blocks(theme=gr.themes.Soft()) as demo:
# session-scoped states
conversations_state = gr.State({})
current_convo_id = gr.State(generate_conversation_id())
history_state = gr.State([])
# Header
gr.HTML(
"""
<div style="display:flex;align-items:center;gap:16px;margin-bottom:1em">
<div style="background-color:black;padding:6px;border-radius:8px">
<img src="https://framerusercontent.com/images/j0KjQQyrUfkFw4NwSaxQOLAoBU.png"
style="height:48px">
</div>
<h1 style="margin:0;">Fathom R1 14B Chatbot</h1>
</div>
"""
)
# Sidebar
with gr.Sidebar():
gr.Markdown("## Conversations")
conversation_selector = gr.Radio(choices=[], label="Select Conversation", interactive=True)
new_convo_button = gr.Button("New Conversation βž•")
with gr.Row():
with gr.Column(scale=1):
# intro text
gr.Markdown(
"""
Welcome to the Fathom R1 14B Chatbot, developed by **Fractal AI Research**!
This model excels at reasoning tasks in mathematics and science …
Once you close this demo window, all currently saved conversations will be lost.
"""
)
# Settings
gr.Markdown("### Settings")
max_tokens_slider = gr.Slider(6144, 32768, step=1024, value=16384, label="Max Tokens")
with gr.Accordion("Advanced Settings", open=True):
temperature_slider = gr.Slider(0.1, 2.0, value=0.6, label="Temperature")
top_p_slider = gr.Slider(0.1, 1.0, value=0.95, label="Top-p")
gr.Markdown(
"""
We sincerely acknowledge [VIDraft](https://huggingface.co/VIDraft) …
"""
)
with gr.Column(scale=4):
chatbot = gr.Chatbot(label="Chat", type="messages", height=520)
with gr.Row():
user_input = gr.Textbox(label="User Input",
placeholder="Type your question here…",
lines=3, scale=8)
with gr.Column():
submit_button = gr.Button("Send", variant="primary", scale=1)
clear_button = gr.Button("Clear", scale=1)
# examples
gr.Markdown("**Try these examples:**")
with gr.Row():
example1_button = gr.Button("IIT-JEE 2025 Mathematics")
example2_button = gr.Button("IIT-JEE 2025 Physics")
example3_button = gr.Button("Goldman Sachs Interview Puzzle")
example4_button = gr.Button("IIT-JEE 2024 Mathematics")
# ───────── conversation-management helpers ──────────────────
def update_conversation_list(conversations):
return [conversations[cid]["title"] for cid in conversations]
def start_new_conversation(conversations):
new_id = generate_conversation_id()
conversations[new_id] = {"title": f"New Conversation {new_id}", "messages": []}
return new_id, [], gr.update(choices=update_conversation_list(conversations),
value=conversations[new_id]["title"]), conversations
def load_conversation(selected_title, conversations):
for cid, convo in conversations.items():
if convo["title"] == selected_title:
return cid, convo["messages"], convo["messages"]
return current_convo_id.value, history_state.value, history_state.value
# main β€œsend” wrapper: keeps conversations dict in sync
def send_message(user_message, max_tokens, temperature, top_p,
convo_id, history, conversations):
if convo_id not in conversations:
title = " ".join(user_message.strip().split()[:5])
conversations[convo_id] = {"title": title, "messages": history}
if conversations[convo_id]["title"].startswith("New Conversation"):
conversations[convo_id]["title"] = " ".join(user_message.strip().split()[:5])
# call the streamer generator and forward its yields
for updated_history, new_history in generate_response(
user_message, max_tokens, temperature, top_p, history):
conversations[convo_id]["messages"] = new_history
yield (updated_history, new_history,
gr.update(choices=update_conversation_list(conversations),
value=conversations[convo_id]["title"]),
conversations)
# ───────── UI β†’ functions wiring ────────────────────────────
submit_button.click(
fn=send_message,
inputs=[user_input, max_tokens_slider, temperature_slider, top_p_slider,
current_convo_id, history_state, conversations_state],
outputs=[chatbot, history_state, conversation_selector, conversations_state],
concurrency_limit=16
).then(
fn=lambda: gr.update(value=""),
inputs=None,
outputs=user_input
)
clear_button.click(fn=lambda: ([], []), inputs=None,
outputs=[chatbot, history_state])
new_convo_button.click(fn=start_new_conversation,
inputs=[conversations_state],
outputs=[current_convo_id, history_state,
conversation_selector, conversations_state])
conversation_selector.change(fn=load_conversation,
inputs=[conversation_selector, conversations_state],
outputs=[current_convo_id, history_state, chatbot])
# example buttons
example1_button.click(lambda: gr.update(value=example_messages["IIT-JEE 2025 Mathematics"]),
None, user_input)
example2_button.click(lambda: gr.update(value=example_messages["IIT-JEE 2025 Physics"]),
None, user_input)
example3_button.click(lambda: gr.update(value=example_messages["Goldman Sachs Interview Puzzle"]),
None, user_input)
example4_button.click(lambda: gr.update(value=example_messages["IIT-JEE 2024 Mathematics"]),
None, user_input)
# ────────────────────────────────────────────────────────────────
# 5. Launch
# ────────────────────────────────────────────────────────────────
if __name__ == "__main__":
demo.queue().launch(share=True, ssr_mode=False)