Fathom

Sleeping

App Files Files Community

Fathom / app.py

FractalAIR

Update app.py

f14967b verified 4 months ago

raw

history blame

13.3 kB

	# ---------------------------------------------------------------
	# Fathom-R1-14B ZeroGPU chat-demo (Gradio Blocks)
	# ---------------------------------------------------------------

	import gradio as gr
	import spaces
	import torch, re, uuid, tiktoken
	from transformers import (AutoModelForCausalLM,
	AutoTokenizer,
	TextIteratorStreamer)
	from threading import Thread

	# ────────────────────────────────────────────────────────────────
	# 1. Load the model on the single GPU supplied by ZeroGPU
	# (4-bit to stay well below the 24 GB VRAM of an A10G)
	# ────────────────────────────────────────────────────────────────
	model_name = "FractalAIResearch/Fathom-R1-14B"

	try:
	# 1-line 4-bit loading (needs bitsandbytes, already in HF Space image)
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	device_map="auto",
	load_in_4bit=True,
	trust_remote_code=True
	)
	except RuntimeError:
	# fallback to fp16 if 4-bit isn’t available
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype=torch.float16,
	device_map="auto",
	trust_remote_code=True
	)

	tokenizer = AutoTokenizer.from_pretrained(model_name)
	device = next(model.parameters()).device # usually cuda:0


	# ────────────────────────────────────────────────────────────────
	# 2. Helpers
	# ────────────────────────────────────────────────────────────────
	def format_math(text: str) -> str:
	"Replace [...]/\$...\$ with $$...$$ for nicer math rendering"
	text = re.sub(r"\[(.*?)\]", r"$$\1$$", text, flags=re.DOTALL)
	return text.replace(r"$", "$").replace(r"$", "$")


	def generate_conversation_id() -> str:
	return str(uuid.uuid4())[:8]


	# tiktoken – we just keep it to count tokens during streaming
	enc = tiktoken.encoding_for_model("gpt-3.5-turbo")


	# Build a prompt that Fathom-R1 understands
	BOS, SEP, EOS = "<\|im_start\|>", "<\|im_sep\|>", "<\|im_end\|>"

	system_message = (
	"Your role as an assistant involves thoroughly exploring questions "
	"through a systematic thinking process before providing the final "
	"precise and accurate solutions. …" # same text you used before
	)


	def build_prompt(history, user_msg: str) -> str:
	prompt = f"{BOS}system{SEP}{system_message}{EOS}"
	for m in history:
	role = m["role"]
	prompt += f"{BOS}{role}{SEP}{m['content']}{EOS}"
	prompt += f"{BOS}user{SEP}{user_msg}{EOS}{BOS}assistant{SEP}"
	return prompt


	# ────────────────────────────────────────────────────────────────
	# 3. Generation (runs on the GPU for 60 s max per call)
	# ────────────────────────────────────────────────────────────────
	@spaces.GPU(duration=60)
	def generate_response(user_message,
	max_tokens,
	temperature,
	top_p,
	history_state):
	"""
	Takes exactly the same signature the rest of the UI expects:
	returns (visible_chatbot, history_state)
	"""
	if not user_message.strip():
	return history_state, history_state

	prompt = build_prompt(history_state, user_message)
	inputs = tokenizer(prompt, return_tensors="pt").to(device)

	streamer = TextIteratorStreamer(tokenizer,
	skip_prompt=True,
	skip_special_tokens=True)

	gen_kwargs = dict(
	input_ids=inputs["input_ids"],
	attention_mask=inputs["attention_mask"],
	max_new_tokens=int(max_tokens),
	temperature=float(temperature),
	top_p=float(top_p),
	do_sample=True,
	eos_token_id=tokenizer.eos_token_id,
	pad_token_id=tokenizer.eos_token_id,
	streamer=streamer
	)

	# run generate in a background thread – lets us stream tokens
	Thread(target=model.generate, kwargs=gen_kwargs).start()

	assistant_response = ""
	new_history = history_state + [
	{"role": "user", "content": user_message},
	{"role": "assistant", "content": ""}
	]

	# live-stream tokens to the UI
	tokens_seen = 0
	token_budget = int(max_tokens)

	for new_tok in streamer:
	assistant_response += new_tok
	tokens_seen += len(enc.encode(new_tok))
	new_history[-1]["content"] = format_math(assistant_response.strip())
	yield new_history, new_history
	if tokens_seen >= token_budget:
	break

	# final return
	yield new_history, new_history


	# ────────────────────────────────────────────────────────────────
	# 4. Demo UI – identical to your current one
	# ────────────────────────────────────────────────────────────────
	example_messages = {
	"IIT-JEE 2024 Mathematics": (
	"A student appears for a quiz consisting of only true-false type "
	"questions and answers all the questions. …"
	),
	"IIT-JEE 2025 Physics": (
	"A person sitting inside an elevator performs a weighing experiment …"
	),
	"Goldman Sachs Interview Puzzle": (
	"Four friends need to cross a dangerous bridge at night …"
	),
	"IIT-JEE 2025 Mathematics": (
	"Let S be the set of all seven-digit numbers that can be formed …"
	)
	}

	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	# session-scoped states
	conversations_state = gr.State({})
	current_convo_id = gr.State(generate_conversation_id())
	history_state = gr.State([])

	# Header
	gr.HTML(
	"""
	<div style="display:flex;align-items:center;gap:16px;margin-bottom:1em">
	<div style="background-color:black;padding:6px;border-radius:8px">
	<img src="https://framerusercontent.com/images/j0KjQQyrUfkFw4NwSaxQOLAoBU.png"
	style="height:48px">
	</div>
	<h1 style="margin:0;">Fathom R1 14B Chatbot</h1>
	</div>
	"""
	)

	# Sidebar
	with gr.Sidebar():
	gr.Markdown("## Conversations")
	conversation_selector = gr.Radio(choices=[], label="Select Conversation", interactive=True)
	new_convo_button = gr.Button("New Conversation ➕")

	with gr.Row():
	with gr.Column(scale=1):
	# intro text
	gr.Markdown(
	"""
	Welcome to the Fathom R1 14B Chatbot, developed by Fractal AI Research!
	This model excels at reasoning tasks in mathematics and science …

	Once you close this demo window, all currently saved conversations will be lost.
	"""
	)

	# Settings
	gr.Markdown("### Settings")
	max_tokens_slider = gr.Slider(6144, 32768, step=1024, value=16384, label="Max Tokens")
	with gr.Accordion("Advanced Settings", open=True):
	temperature_slider = gr.Slider(0.1, 2.0, value=0.6, label="Temperature")
	top_p_slider = gr.Slider(0.1, 1.0, value=0.95, label="Top-p")

	gr.Markdown(
	"""
	We sincerely acknowledge [VIDraft](https://huggingface.co/VIDraft) …
	"""
	)

	with gr.Column(scale=4):
	chatbot = gr.Chatbot(label="Chat", type="messages", height=520)
	with gr.Row():
	user_input = gr.Textbox(label="User Input",
	placeholder="Type your question here…",
	lines=3, scale=8)
	with gr.Column():
	submit_button = gr.Button("Send", variant="primary", scale=1)
	clear_button = gr.Button("Clear", scale=1)

	# examples
	gr.Markdown("Try these examples:")
	with gr.Row():
	example1_button = gr.Button("IIT-JEE 2025 Mathematics")
	example2_button = gr.Button("IIT-JEE 2025 Physics")
	example3_button = gr.Button("Goldman Sachs Interview Puzzle")
	example4_button = gr.Button("IIT-JEE 2024 Mathematics")

	# ───────── conversation-management helpers ──────────────────
	def update_conversation_list(conversations):
	return [conversations[cid]["title"] for cid in conversations]

	def start_new_conversation(conversations):
	new_id = generate_conversation_id()
	conversations[new_id] = {"title": f"New Conversation {new_id}", "messages": []}
	return new_id, [], gr.update(choices=update_conversation_list(conversations),
	value=conversations[new_id]["title"]), conversations

	def load_conversation(selected_title, conversations):
	for cid, convo in conversations.items():
	if convo["title"] == selected_title:
	return cid, convo["messages"], convo["messages"]
	return current_convo_id.value, history_state.value, history_state.value

	# main “send” wrapper: keeps conversations dict in sync
	def send_message(user_message, max_tokens, temperature, top_p,
	convo_id, history, conversations):
	if convo_id not in conversations:
	title = " ".join(user_message.strip().split()[:5])
	conversations[convo_id] = {"title": title, "messages": history}
	if conversations[convo_id]["title"].startswith("New Conversation"):
	conversations[convo_id]["title"] = " ".join(user_message.strip().split()[:5])

	# call the streamer generator and forward its yields
	for updated_history, new_history in generate_response(
	user_message, max_tokens, temperature, top_p, history):
	conversations[convo_id]["messages"] = new_history
	yield (updated_history, new_history,
	gr.update(choices=update_conversation_list(conversations),
	value=conversations[convo_id]["title"]),
	conversations)

	# ───────── UI → functions wiring ────────────────────────────
	submit_button.click(
	fn=send_message,
	inputs=[user_input, max_tokens_slider, temperature_slider, top_p_slider,
	current_convo_id, history_state, conversations_state],
	outputs=[chatbot, history_state, conversation_selector, conversations_state],
	concurrency_limit=16
	).then(
	fn=lambda: gr.update(value=""),
	inputs=None,
	outputs=user_input
	)

	clear_button.click(fn=lambda: ([], []), inputs=None,
	outputs=[chatbot, history_state])

	new_convo_button.click(fn=start_new_conversation,
	inputs=[conversations_state],
	outputs=[current_convo_id, history_state,
	conversation_selector, conversations_state])

	conversation_selector.change(fn=load_conversation,
	inputs=[conversation_selector, conversations_state],
	outputs=[current_convo_id, history_state, chatbot])

	# example buttons
	example1_button.click(lambda: gr.update(value=example_messages["IIT-JEE 2025 Mathematics"]),
	None, user_input)
	example2_button.click(lambda: gr.update(value=example_messages["IIT-JEE 2025 Physics"]),
	None, user_input)
	example3_button.click(lambda: gr.update(value=example_messages["Goldman Sachs Interview Puzzle"]),
	None, user_input)
	example4_button.click(lambda: gr.update(value=example_messages["IIT-JEE 2024 Mathematics"]),
	None, user_input)

	# ────────────────────────────────────────────────────────────────
	# 5. Launch
	# ────────────────────────────────────────────────────────────────
	if __name__ == "__main__":
	demo.queue().launch(share=True, ssr_mode=False)