Spaces:
Sleeping
Sleeping
| # --------------------------------------------------------------- | |
| # Fathom-R1-14B ZeroGPU chat-demo (Gradio Blocks) | |
| # --------------------------------------------------------------- | |
| import gradio as gr | |
| import spaces | |
| import torch, re, uuid, tiktoken | |
| from transformers import (AutoModelForCausalLM, | |
| AutoTokenizer, | |
| TextIteratorStreamer) | |
| from threading import Thread | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 1. Load the model on the single GPU supplied by ZeroGPU | |
| # (4-bit to stay well below the 24 GB VRAM of an A10G) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| model_name = "FractalAIResearch/Fathom-R1-14B" | |
| try: | |
| # 1-line 4-bit loading (needs bitsandbytes, already in HF Space image) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| device_map="auto", | |
| load_in_4bit=True, | |
| trust_remote_code=True | |
| ) | |
| except RuntimeError: | |
| # fallback to fp16 if 4-bit isnβt available | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| torch_dtype=torch.float16, | |
| device_map="auto", | |
| trust_remote_code=True | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| device = next(model.parameters()).device # usually cuda:0 | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 2. Helpers | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def format_math(text: str) -> str: | |
| "Replace [...]/\\(...\\) with $$...$$ for nicer math rendering" | |
| text = re.sub(r"\[(.*?)\]", r"$$\1$$", text, flags=re.DOTALL) | |
| return text.replace(r"\(", "$").replace(r"\)", "$") | |
| def generate_conversation_id() -> str: | |
| return str(uuid.uuid4())[:8] | |
| # tiktoken β we just keep it to count tokens during streaming | |
| enc = tiktoken.encoding_for_model("gpt-3.5-turbo") | |
| # Build a prompt that Fathom-R1 understands | |
| BOS, SEP, EOS = "<|im_start|>", "<|im_sep|>", "<|im_end|>" | |
| system_message = ( | |
| "Your role as an assistant involves thoroughly exploring questions " | |
| "through a systematic thinking process before providing the final " | |
| "precise and accurate solutions. β¦" # same text you used before | |
| ) | |
| def build_prompt(history, user_msg: str) -> str: | |
| prompt = f"{BOS}system{SEP}{system_message}{EOS}" | |
| for m in history: | |
| role = m["role"] | |
| prompt += f"{BOS}{role}{SEP}{m['content']}{EOS}" | |
| prompt += f"{BOS}user{SEP}{user_msg}{EOS}{BOS}assistant{SEP}" | |
| return prompt | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 3. Generation (runs on the GPU for 60 s max per call) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def generate_response(user_message, | |
| max_tokens, | |
| temperature, | |
| top_p, | |
| history_state): | |
| """ | |
| Takes exactly the same signature the rest of the UI expects: | |
| returns (visible_chatbot, history_state) | |
| """ | |
| if not user_message.strip(): | |
| return history_state, history_state | |
| prompt = build_prompt(history_state, user_message) | |
| inputs = tokenizer(prompt, return_tensors="pt").to(device) | |
| streamer = TextIteratorStreamer(tokenizer, | |
| skip_prompt=True, | |
| skip_special_tokens=True) | |
| gen_kwargs = dict( | |
| input_ids=inputs["input_ids"], | |
| attention_mask=inputs["attention_mask"], | |
| max_new_tokens=int(max_tokens), | |
| temperature=float(temperature), | |
| top_p=float(top_p), | |
| do_sample=True, | |
| eos_token_id=tokenizer.eos_token_id, | |
| pad_token_id=tokenizer.eos_token_id, | |
| streamer=streamer | |
| ) | |
| # run generate in a background thread β lets us stream tokens | |
| Thread(target=model.generate, kwargs=gen_kwargs).start() | |
| assistant_response = "" | |
| new_history = history_state + [ | |
| {"role": "user", "content": user_message}, | |
| {"role": "assistant", "content": ""} | |
| ] | |
| # live-stream tokens to the UI | |
| tokens_seen = 0 | |
| token_budget = int(max_tokens) | |
| for new_tok in streamer: | |
| assistant_response += new_tok | |
| tokens_seen += len(enc.encode(new_tok)) | |
| new_history[-1]["content"] = format_math(assistant_response.strip()) | |
| yield new_history, new_history | |
| if tokens_seen >= token_budget: | |
| break | |
| # final return | |
| yield new_history, new_history | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 4. Demo UI β identical to your current one | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| example_messages = { | |
| "IIT-JEE 2024 Mathematics": ( | |
| "A student appears for a quiz consisting of only true-false type " | |
| "questions and answers all the questions. β¦" | |
| ), | |
| "IIT-JEE 2025 Physics": ( | |
| "A person sitting inside an elevator performs a weighing experiment β¦" | |
| ), | |
| "Goldman Sachs Interview Puzzle": ( | |
| "Four friends need to cross a dangerous bridge at night β¦" | |
| ), | |
| "IIT-JEE 2025 Mathematics": ( | |
| "Let S be the set of all seven-digit numbers that can be formed β¦" | |
| ) | |
| } | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| # session-scoped states | |
| conversations_state = gr.State({}) | |
| current_convo_id = gr.State(generate_conversation_id()) | |
| history_state = gr.State([]) | |
| # Header | |
| gr.HTML( | |
| """ | |
| <div style="display:flex;align-items:center;gap:16px;margin-bottom:1em"> | |
| <div style="background-color:black;padding:6px;border-radius:8px"> | |
| <img src="https://framerusercontent.com/images/j0KjQQyrUfkFw4NwSaxQOLAoBU.png" | |
| style="height:48px"> | |
| </div> | |
| <h1 style="margin:0;">Fathom R1 14B Chatbot</h1> | |
| </div> | |
| """ | |
| ) | |
| # Sidebar | |
| with gr.Sidebar(): | |
| gr.Markdown("## Conversations") | |
| conversation_selector = gr.Radio(choices=[], label="Select Conversation", interactive=True) | |
| new_convo_button = gr.Button("New Conversation β") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| # intro text | |
| gr.Markdown( | |
| """ | |
| Welcome to the Fathom R1 14B Chatbot, developed by **Fractal AI Research**! | |
| This model excels at reasoning tasks in mathematics and science β¦ | |
| Once you close this demo window, all currently saved conversations will be lost. | |
| """ | |
| ) | |
| # Settings | |
| gr.Markdown("### Settings") | |
| max_tokens_slider = gr.Slider(6144, 32768, step=1024, value=16384, label="Max Tokens") | |
| with gr.Accordion("Advanced Settings", open=True): | |
| temperature_slider = gr.Slider(0.1, 2.0, value=0.6, label="Temperature") | |
| top_p_slider = gr.Slider(0.1, 1.0, value=0.95, label="Top-p") | |
| gr.Markdown( | |
| """ | |
| We sincerely acknowledge [VIDraft](https://huggingface.co/VIDraft) β¦ | |
| """ | |
| ) | |
| with gr.Column(scale=4): | |
| chatbot = gr.Chatbot(label="Chat", type="messages", height=520) | |
| with gr.Row(): | |
| user_input = gr.Textbox(label="User Input", | |
| placeholder="Type your question hereβ¦", | |
| lines=3, scale=8) | |
| with gr.Column(): | |
| submit_button = gr.Button("Send", variant="primary", scale=1) | |
| clear_button = gr.Button("Clear", scale=1) | |
| # examples | |
| gr.Markdown("**Try these examples:**") | |
| with gr.Row(): | |
| example1_button = gr.Button("IIT-JEE 2025 Mathematics") | |
| example2_button = gr.Button("IIT-JEE 2025 Physics") | |
| example3_button = gr.Button("Goldman Sachs Interview Puzzle") | |
| example4_button = gr.Button("IIT-JEE 2024 Mathematics") | |
| # βββββββββ conversation-management helpers ββββββββββββββββββ | |
| def update_conversation_list(conversations): | |
| return [conversations[cid]["title"] for cid in conversations] | |
| def start_new_conversation(conversations): | |
| new_id = generate_conversation_id() | |
| conversations[new_id] = {"title": f"New Conversation {new_id}", "messages": []} | |
| return new_id, [], gr.update(choices=update_conversation_list(conversations), | |
| value=conversations[new_id]["title"]), conversations | |
| def load_conversation(selected_title, conversations): | |
| for cid, convo in conversations.items(): | |
| if convo["title"] == selected_title: | |
| return cid, convo["messages"], convo["messages"] | |
| return current_convo_id.value, history_state.value, history_state.value | |
| # main βsendβ wrapper: keeps conversations dict in sync | |
| def send_message(user_message, max_tokens, temperature, top_p, | |
| convo_id, history, conversations): | |
| if convo_id not in conversations: | |
| title = " ".join(user_message.strip().split()[:5]) | |
| conversations[convo_id] = {"title": title, "messages": history} | |
| if conversations[convo_id]["title"].startswith("New Conversation"): | |
| conversations[convo_id]["title"] = " ".join(user_message.strip().split()[:5]) | |
| # call the streamer generator and forward its yields | |
| for updated_history, new_history in generate_response( | |
| user_message, max_tokens, temperature, top_p, history): | |
| conversations[convo_id]["messages"] = new_history | |
| yield (updated_history, new_history, | |
| gr.update(choices=update_conversation_list(conversations), | |
| value=conversations[convo_id]["title"]), | |
| conversations) | |
| # βββββββββ UI β functions wiring ββββββββββββββββββββββββββββ | |
| submit_button.click( | |
| fn=send_message, | |
| inputs=[user_input, max_tokens_slider, temperature_slider, top_p_slider, | |
| current_convo_id, history_state, conversations_state], | |
| outputs=[chatbot, history_state, conversation_selector, conversations_state], | |
| concurrency_limit=16 | |
| ).then( | |
| fn=lambda: gr.update(value=""), | |
| inputs=None, | |
| outputs=user_input | |
| ) | |
| clear_button.click(fn=lambda: ([], []), inputs=None, | |
| outputs=[chatbot, history_state]) | |
| new_convo_button.click(fn=start_new_conversation, | |
| inputs=[conversations_state], | |
| outputs=[current_convo_id, history_state, | |
| conversation_selector, conversations_state]) | |
| conversation_selector.change(fn=load_conversation, | |
| inputs=[conversation_selector, conversations_state], | |
| outputs=[current_convo_id, history_state, chatbot]) | |
| # example buttons | |
| example1_button.click(lambda: gr.update(value=example_messages["IIT-JEE 2025 Mathematics"]), | |
| None, user_input) | |
| example2_button.click(lambda: gr.update(value=example_messages["IIT-JEE 2025 Physics"]), | |
| None, user_input) | |
| example3_button.click(lambda: gr.update(value=example_messages["Goldman Sachs Interview Puzzle"]), | |
| None, user_input) | |
| example4_button.click(lambda: gr.update(value=example_messages["IIT-JEE 2024 Mathematics"]), | |
| None, user_input) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 5. Launch | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if __name__ == "__main__": | |
| demo.queue().launch(share=True, ssr_mode=False) |