Fathom

Sleeping

App Files Files Community

FractalAIR commited on Aug 6

Commit

f14967b

verified ·

1 Parent(s): e2eaf4a

Update app.py

Browse files

Files changed (1) hide show

app.py +282 -94

app.py CHANGED Viewed

@@ -1,108 +1,296 @@
-# app.py
-import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
-from threading import Thread
 import gradio as gr
 import spaces
-MODEL_NAME = "FractalAIResearch/Fathom-R1-14B"
-@spaces.GPU
-class Chatbot:
-    def __init__(self):
-        print("⏳ Loading model...")
-        self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
-        self.model = AutoModelForCausalLM.from_pretrained(
-            MODEL_NAME,
-            torch_dtype=torch.bfloat16,
-            device_map="auto",
-            trust_remote_code=True,
-        )
-        self.model.eval()
-        print("✅ Model loaded!")
-    def chat(self, messages, temperature, max_new_tokens, top_p, repetition_penalty):
-        # Format messages into prompt
-        prompt = self._format_messages(messages)
-        input_ids = self.tokenizer(prompt, return_tensors="pt").input_ids.to(self.model.device)
-        streamer = TextIteratorStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
-        generation_kwargs = dict(
-            input_ids=input_ids,
-            streamer=streamer,
-            max_new_tokens=max_new_tokens,
-            do_sample=True,
-            top_p=top_p,
-            temperature=temperature,
-            repetition_penalty=repetition_penalty,
-        )
-        thread = Thread(target=self.model.generate, kwargs=generation_kwargs)
-        thread.start()
-        response = ""
-        for token in streamer:
-            response += token
-            yield response
-    def _format_messages(self, messages):
-        prompt = ""
-        for msg in messages:
-            if msg["role"] == "user":
-                prompt += f"<|user|>\n{msg['content'].strip()}\n"
-            elif msg["role"] == "assistant":
-                prompt += f"<|assistant|>\n{msg['content'].strip()}\n"
-        prompt += "<|assistant|>\n"
-        return prompt
-chatbot = Chatbot()
-# Chat state management
-def user_submit(user_message, history):
-    history = history + [{"role": "user", "content": user_message}, {"role": "assistant", "content": ""}]
-    return "", history, gr.update(visible=True)
-def generate(history, temperature, max_new_tokens, top_p, repetition_penalty):
-    response_gen = chatbot.chat(
-        history,
-        temperature=temperature,
-        max_new_tokens=max_new_tokens,
-        top_p=top_p,
-        repetition_penalty=repetition_penalty,
     )
-    partial = ""
-    for chunk in response_gen:
-        partial = chunk
-        history[-1]["content"] = partial
-        yield history, history
-def reset():
-    return [], []
-with gr.Blocks(css="footer {display: none !important;}") as demo:
-    gr.Markdown("<h1 align='center'>🧠 Fathom R1 14B Chatbot</h1>")
-    chatbot_ui = gr.Chatbot([], elem_id="chatbot", height=500, bubble_full_width=False)
-    state = gr.State([])
     with gr.Row():
-        with gr.Column(scale=6):
-            txt = gr.Textbox(placeholder="Ask a math question...", label="Your Message")
         with gr.Column(scale=1):
-            submit = gr.Button("Submit", variant="primary")
-            clear = gr.Button("Clear")
-    with gr.Accordion("Advanced settings", open=False):
-        temperature = gr.Slider(0.1, 1.5, value=0.7, label="Temperature")
-        max_new_tokens = gr.Slider(64, 2048, step=64, value=512, label="Max New Tokens")
-        top_p = gr.Slider(0.1, 1.0, value=0.95, label="Top-p")
-        repetition_penalty = gr.Slider(1.0, 2.0, value=1.1, label="Repetition Penalty")
-    submit.click(user_submit, [txt, state], [txt, state, chatbot_ui], queue=False)\
-          .then(generate, [state, temperature, max_new_tokens, top_p, repetition_penalty], [chatbot_ui, state])
-    txt.submit(user_submit, [txt, state], [txt, state, chatbot_ui], queue=False)\
-       .then(generate, [state, temperature, max_new_tokens, top_p, repetition_penalty], [chatbot_ui, state])
-    clear.click(reset, outputs=[chatbot_ui, state])
-demo.queue().launch()

+# ---------------------------------------------------------------
+#  Fathom-R1-14B ZeroGPU chat-demo  (Gradio Blocks)
+# ---------------------------------------------------------------
 import gradio as gr
 import spaces
+import torch, re, uuid, tiktoken
+from transformers import (AutoModelForCausalLM,
+                          AutoTokenizer,
+                          TextIteratorStreamer)
+from threading import Thread
+# ────────────────────────────────────────────────────────────────
+# 1.  Load the model on the single GPU supplied by ZeroGPU
+#    (4-bit to stay well below the 24 GB VRAM of an A10G)
+# ────────────────────────────────────────────────────────────────
+model_name = "FractalAIResearch/Fathom-R1-14B"
+try:
+    # 1-line 4-bit loading (needs bitsandbytes, already in HF Space image)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        device_map="auto",
+        load_in_4bit=True,
+        trust_remote_code=True
+    )
+except RuntimeError:
+    # fallback to fp16 if 4-bit isn’t available
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        torch_dtype=torch.float16,
+        device_map="auto",
+        trust_remote_code=True
+    )
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+device = next(model.parameters()).device      # usually  cuda:0
+# ────────────────────────────────────────────────────────────────
+# 2.  Helpers
+# ────────────────────────────────────────────────────────────────
+def format_math(text: str) -> str:
+    "Replace [...]/\\(...\\) with $$...$$ for nicer math rendering"
+    text = re.sub(r"\[(.*?)\]", r"$$\1$$", text, flags=re.DOTALL)
+    return text.replace(r"\(", "$").replace(r"\)", "$")
+def generate_conversation_id() -> str:
+    return str(uuid.uuid4())[:8]
+# tiktoken – we just keep it to count tokens during streaming
+enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
+# Build a prompt that Fathom-R1 understands
+BOS, SEP, EOS = "<|im_start|>", "<|im_sep|>", "<|im_end|>"
+system_message = (
+    "Your role as an assistant involves thoroughly exploring questions "
+    "through a systematic thinking process before providing the final "
+    "precise and accurate solutions. …"   # same text you used before
+)
+def build_prompt(history, user_msg: str) -> str:
+    prompt = f"{BOS}system{SEP}{system_message}{EOS}"
+    for m in history:
+        role = m["role"]
+        prompt += f"{BOS}{role}{SEP}{m['content']}{EOS}"
+    prompt += f"{BOS}user{SEP}{user_msg}{EOS}{BOS}assistant{SEP}"
+    return prompt
+# ────────────────────────────────────────────────────────────────
+# 3.  Generation (runs on the GPU for 60 s max per call)
+# ────────────────────────────────────────────────────────────────
+@spaces.GPU(duration=60)
+def generate_response(user_message,
+                      max_tokens,
+                      temperature,
+                      top_p,
+                      history_state):
+    """
+    Takes exactly the same signature the rest of the UI expects:
+    returns   (visible_chatbot,  history_state)
+    """
+    if not user_message.strip():
+        return history_state, history_state
+    prompt = build_prompt(history_state, user_message)
+    inputs = tokenizer(prompt, return_tensors="pt").to(device)
+    streamer = TextIteratorStreamer(tokenizer,
+                                    skip_prompt=True,
+                                    skip_special_tokens=True)
+    gen_kwargs = dict(
+        input_ids=inputs["input_ids"],
+        attention_mask=inputs["attention_mask"],
+        max_new_tokens=int(max_tokens),
+        temperature=float(temperature),
+        top_p=float(top_p),
+        do_sample=True,
+        eos_token_id=tokenizer.eos_token_id,
+        pad_token_id=tokenizer.eos_token_id,
+        streamer=streamer
     )
+    # run generate in a background thread – lets us stream tokens
+    Thread(target=model.generate, kwargs=gen_kwargs).start()
+    assistant_response = ""
+    new_history = history_state + [
+        {"role": "user", "content": user_message},
+        {"role": "assistant", "content": ""}
+    ]
+    # live-stream tokens to the UI
+    tokens_seen = 0
+    token_budget = int(max_tokens)
+    for new_tok in streamer:
+        assistant_response += new_tok
+        tokens_seen += len(enc.encode(new_tok))
+        new_history[-1]["content"] = format_math(assistant_response.strip())
+        yield new_history, new_history
+        if tokens_seen >= token_budget:
+            break
+    # final return
+    yield new_history, new_history
+# ────────────────────────────────────────────────────────────────
+# 4.  Demo UI  – identical to your current one
+# ────────────────────────────────────────────────────────────────
+example_messages = {
+    "IIT-JEE 2024 Mathematics": (
+        "A student appears for a quiz consisting of only true-false type "
+        "questions and answers all the questions. …"
+    ),
+    "IIT-JEE 2025         Physics": (
+        "A person sitting inside an elevator performs a weighing experiment …"
+    ),
+    "Goldman Sachs Interview Puzzle": (
+        "Four friends need to cross a dangerous bridge at night …"
+    ),
+    "IIT-JEE 2025 Mathematics": (
+        "Let S be the set of all seven-digit numbers that can be formed …"
+    )
+}
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    # session-scoped states
+    conversations_state = gr.State({})
+    current_convo_id   = gr.State(generate_conversation_id())
+    history_state      = gr.State([])
+    # Header
+    gr.HTML(
+        """
+        <div style="display:flex;align-items:center;gap:16px;margin-bottom:1em">
+            <div style="background-color:black;padding:6px;border-radius:8px">
+                <img src="https://framerusercontent.com/images/j0KjQQyrUfkFw4NwSaxQOLAoBU.png"
+                     style="height:48px">
+            </div>
+            <h1 style="margin:0;">Fathom R1 14B Chatbot</h1>
+        </div>
+        """
+    )
+    # Sidebar
+    with gr.Sidebar():
+        gr.Markdown("## Conversations")
+        conversation_selector = gr.Radio(choices=[], label="Select Conversation", interactive=True)
+        new_convo_button = gr.Button("New Conversation ���")
     with gr.Row():
         with gr.Column(scale=1):
+            # intro text
+            gr.Markdown(
+                """
+                Welcome to the Fathom R1 14B Chatbot, developed by **Fractal AI Research**!
+                This model excels at reasoning tasks in mathematics and science …
+                Once you close this demo window, all currently saved conversations will be lost.
+                """
+            )
+            # Settings
+            gr.Markdown("### Settings")
+            max_tokens_slider = gr.Slider(6144, 32768, step=1024, value=16384, label="Max Tokens")
+            with gr.Accordion("Advanced Settings", open=True):
+                temperature_slider = gr.Slider(0.1, 2.0, value=0.6, label="Temperature")
+                top_p_slider       = gr.Slider(0.1, 1.0, value=0.95, label="Top-p")
+            gr.Markdown(
+                """
+                We sincerely acknowledge [VIDraft](https://huggingface.co/VIDraft) …
+                """
+            )
+        with gr.Column(scale=4):
+            chatbot = gr.Chatbot(label="Chat", type="messages", height=520)
+            with gr.Row():
+                user_input = gr.Textbox(label="User Input",
+                                        placeholder="Type your question here…",
+                                        lines=3, scale=8)
+                with gr.Column():
+                    submit_button = gr.Button("Send", variant="primary", scale=1)
+                    clear_button  = gr.Button("Clear", scale=1)
+            # examples
+            gr.Markdown("**Try these examples:**")
+            with gr.Row():
+                example1_button = gr.Button("IIT-JEE 2025 Mathematics")
+                example2_button = gr.Button("IIT-JEE 2025         Physics")
+                example3_button = gr.Button("Goldman Sachs Interview Puzzle")
+                example4_button = gr.Button("IIT-JEE 2024 Mathematics")
+    # ───────── conversation-management helpers ──────────────────
+    def update_conversation_list(conversations):
+        return [conversations[cid]["title"] for cid in conversations]
+    def start_new_conversation(conversations):
+        new_id = generate_conversation_id()
+        conversations[new_id] = {"title": f"New Conversation {new_id}", "messages": []}
+        return new_id, [], gr.update(choices=update_conversation_list(conversations),
+                                     value=conversations[new_id]["title"]), conversations
+    def load_conversation(selected_title, conversations):
+        for cid, convo in conversations.items():
+            if convo["title"] == selected_title:
+                return cid, convo["messages"], convo["messages"]
+        return current_convo_id.value, history_state.value, history_state.value
+    # main “send” wrapper: keeps conversations dict in sync
+    def send_message(user_message, max_tokens, temperature, top_p,
+                     convo_id, history, conversations):
+        if convo_id not in conversations:
+            title = " ".join(user_message.strip().split()[:5])
+            conversations[convo_id] = {"title": title, "messages": history}
+        if conversations[convo_id]["title"].startswith("New Conversation"):
+            conversations[convo_id]["title"] = " ".join(user_message.strip().split()[:5])
+        # call the streamer generator and forward its yields
+        for updated_history, new_history in generate_response(
+                user_message, max_tokens, temperature, top_p, history):
+            conversations[convo_id]["messages"] = new_history
+            yield (updated_history, new_history,
+                   gr.update(choices=update_conversation_list(conversations),
+                             value=conversations[convo_id]["title"]),
+                   conversations)
+    # ───────── UI → functions wiring ────────────────────────────
+    submit_button.click(
+        fn=send_message,
+        inputs=[user_input, max_tokens_slider, temperature_slider, top_p_slider,
+                current_convo_id, history_state, conversations_state],
+        outputs=[chatbot, history_state, conversation_selector, conversations_state],
+        concurrency_limit=16
+    ).then(
+        fn=lambda: gr.update(value=""),
+        inputs=None,
+        outputs=user_input
+    )
+    clear_button.click(fn=lambda: ([], []), inputs=None,
+                       outputs=[chatbot, history_state])
+    new_convo_button.click(fn=start_new_conversation,
+                           inputs=[conversations_state],
+                           outputs=[current_convo_id, history_state,
+                                    conversation_selector, conversations_state])
+    conversation_selector.change(fn=load_conversation,
+                                 inputs=[conversation_selector, conversations_state],
+                                 outputs=[current_convo_id, history_state, chatbot])
+    # example buttons
+    example1_button.click(lambda: gr.update(value=example_messages["IIT-JEE 2025 Mathematics"]),
+                          None, user_input)
+    example2_button.click(lambda: gr.update(value=example_messages["IIT-JEE 2025         Physics"]),
+                          None, user_input)
+    example3_button.click(lambda: gr.update(value=example_messages["Goldman Sachs Interview Puzzle"]),
+                          None, user_input)
+    example4_button.click(lambda: gr.update(value=example_messages["IIT-JEE 2024 Mathematics"]),
+                          None, user_input)
+# ────────────────────────────────────────────────────────────────
+# 5.  Launch
+# ────────────────────────────────────────────────────────────────
+if __name__ == "__main__":
+    demo.queue().launch(share=True, ssr_mode=False)