Fathom

Sleeping

File size: 13,255 Bytes

# ---------------------------------------------------------------
#  Fathom-R1-14B ZeroGPU chat-demo  (Gradio Blocks)
# ---------------------------------------------------------------

import gradio as gr
import spaces
import torch, re, uuid, tiktoken
from transformers import (AutoModelForCausalLM,
                          AutoTokenizer,
                          TextIteratorStreamer)
from threading import Thread

# ────────────────────────────────────────────────────────────────
# 1.  Load the model on the single GPU supplied by ZeroGPU
#    (4-bit to stay well below the 24 GB VRAM of an A10G)
# ────────────────────────────────────────────────────────────────
model_name = "FractalAIResearch/Fathom-R1-14B"

try:
    # 1-line 4-bit loading (needs bitsandbytes, already in HF Space image)
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map="auto",
        load_in_4bit=True,
        trust_remote_code=True
    )
except RuntimeError:
    # fallback to fp16 if 4-bit isn’t available
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float16,
        device_map="auto",
        trust_remote_code=True
    )

tokenizer = AutoTokenizer.from_pretrained(model_name)
device = next(model.parameters()).device      # usually  cuda:0


# ────────────────────────────────────────────────────────────────
# 2.  Helpers
# ────────────────────────────────────────────────────────────────
def format_math(text: str) -> str:
    "Replace [...]/\\(...\\) with $$...$$ for nicer math rendering"
    text = re.sub(r"\[(.*?)\]", r"$$\1$$", text, flags=re.DOTALL)
    return text.replace(r"\(", "$").replace(r"\)", "$")


def generate_conversation_id() -> str:
    return str(uuid.uuid4())[:8]


# tiktoken – we just keep it to count tokens during streaming
enc = tiktoken.encoding_for_model("gpt-3.5-turbo")


# Build a prompt that Fathom-R1 understands
BOS, SEP, EOS = "<|im_start|>", "<|im_sep|>", "<|im_end|>"

system_message = (
    "Your role as an assistant involves thoroughly exploring questions "
    "through a systematic thinking process before providing the final "
    "precise and accurate solutions. …"   # same text you used before
)


def build_prompt(history, user_msg: str) -> str:
    prompt = f"{BOS}system{SEP}{system_message}{EOS}"
    for m in history:
        role = m["role"]
        prompt += f"{BOS}{role}{SEP}{m['content']}{EOS}"
    prompt += f"{BOS}user{SEP}{user_msg}{EOS}{BOS}assistant{SEP}"
    return prompt


# ────────────────────────────────────────────────────────────────
# 3.  Generation (runs on the GPU for 60 s max per call)
# ────────────────────────────────────────────────────────────────
@spaces.GPU(duration=60)
def generate_response(user_message,
                      max_tokens,
                      temperature,
                      top_p,
                      history_state):
    """
    Takes exactly the same signature the rest of the UI expects:
    returns   (visible_chatbot,  history_state)
    """
    if not user_message.strip():
        return history_state, history_state

    prompt = build_prompt(history_state, user_message)
    inputs = tokenizer(prompt, return_tensors="pt").to(device)

    streamer = TextIteratorStreamer(tokenizer,
                                    skip_prompt=True,
                                    skip_special_tokens=True)

    gen_kwargs = dict(
        input_ids=inputs["input_ids"],
        attention_mask=inputs["attention_mask"],
        max_new_tokens=int(max_tokens),
        temperature=float(temperature),
        top_p=float(top_p),
        do_sample=True,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id,
        streamer=streamer
    )

    # run generate in a background thread – lets us stream tokens
    Thread(target=model.generate, kwargs=gen_kwargs).start()

    assistant_response = ""
    new_history = history_state + [
        {"role": "user", "content": user_message},
        {"role": "assistant", "content": ""}
    ]

    # live-stream tokens to the UI
    tokens_seen = 0
    token_budget = int(max_tokens)

    for new_tok in streamer:
        assistant_response += new_tok
        tokens_seen += len(enc.encode(new_tok))
        new_history[-1]["content"] = format_math(assistant_response.strip())
        yield new_history, new_history
        if tokens_seen >= token_budget:
            break

    # final return
    yield new_history, new_history


# ────────────────────────────────────────────────────────────────
# 4.  Demo UI  – identical to your current one
# ────────────────────────────────────────────────────────────────
example_messages = {
    "IIT-JEE 2024 Mathematics": (
        "A student appears for a quiz consisting of only true-false type "
        "questions and answers all the questions. …"
    ),
    "IIT-JEE 2025         Physics": (
        "A person sitting inside an elevator performs a weighing experiment …"
    ),
    "Goldman Sachs Interview Puzzle": (
        "Four friends need to cross a dangerous bridge at night …"
    ),
    "IIT-JEE 2025 Mathematics": (
        "Let S be the set of all seven-digit numbers that can be formed …"
    )
}

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    # session-scoped states
    conversations_state = gr.State({})
    current_convo_id   = gr.State(generate_conversation_id())
    history_state      = gr.State([])

    # Header
    gr.HTML(
        """
        <div style="display:flex;align-items:center;gap:16px;margin-bottom:1em">
            <div style="background-color:black;padding:6px;border-radius:8px">
                <img src="https://framerusercontent.com/images/j0KjQQyrUfkFw4NwSaxQOLAoBU.png"
                     style="height:48px">
            </div>
            <h1 style="margin:0;">Fathom R1 14B Chatbot</h1>
        </div>
        """
    )

    # Sidebar
    with gr.Sidebar():
        gr.Markdown("## Conversations")
        conversation_selector = gr.Radio(choices=[], label="Select Conversation", interactive=True)
        new_convo_button = gr.Button("New Conversation ➕")

    with gr.Row():
        with gr.Column(scale=1):
            # intro text
            gr.Markdown(
                """
                Welcome to the Fathom R1 14B Chatbot, developed by **Fractal AI Research**!  
                This model excels at reasoning tasks in mathematics and science …

                Once you close this demo window, all currently saved conversations will be lost.
                """
            )

            # Settings
            gr.Markdown("### Settings")
            max_tokens_slider = gr.Slider(6144, 32768, step=1024, value=16384, label="Max Tokens")
            with gr.Accordion("Advanced Settings", open=True):
                temperature_slider = gr.Slider(0.1, 2.0, value=0.6, label="Temperature")
                top_p_slider       = gr.Slider(0.1, 1.0, value=0.95, label="Top-p")

            gr.Markdown(
                """
                We sincerely acknowledge [VIDraft](https://huggingface.co/VIDraft) …
                """
            )

        with gr.Column(scale=4):
            chatbot = gr.Chatbot(label="Chat", type="messages", height=520)
            with gr.Row():
                user_input = gr.Textbox(label="User Input",
                                        placeholder="Type your question here…",
                                        lines=3, scale=8)
                with gr.Column():
                    submit_button = gr.Button("Send", variant="primary", scale=1)
                    clear_button  = gr.Button("Clear", scale=1)

            # examples
            gr.Markdown("**Try these examples:**")
            with gr.Row():
                example1_button = gr.Button("IIT-JEE 2025 Mathematics")
                example2_button = gr.Button("IIT-JEE 2025         Physics")
                example3_button = gr.Button("Goldman Sachs Interview Puzzle")
                example4_button = gr.Button("IIT-JEE 2024 Mathematics")

    # ───────── conversation-management helpers ──────────────────
    def update_conversation_list(conversations):
        return [conversations[cid]["title"] for cid in conversations]

    def start_new_conversation(conversations):
        new_id = generate_conversation_id()
        conversations[new_id] = {"title": f"New Conversation {new_id}", "messages": []}
        return new_id, [], gr.update(choices=update_conversation_list(conversations),
                                     value=conversations[new_id]["title"]), conversations

    def load_conversation(selected_title, conversations):
        for cid, convo in conversations.items():
            if convo["title"] == selected_title:
                return cid, convo["messages"], convo["messages"]
        return current_convo_id.value, history_state.value, history_state.value

    # main “send” wrapper: keeps conversations dict in sync
    def send_message(user_message, max_tokens, temperature, top_p,
                     convo_id, history, conversations):
        if convo_id not in conversations:
            title = " ".join(user_message.strip().split()[:5])
            conversations[convo_id] = {"title": title, "messages": history}
        if conversations[convo_id]["title"].startswith("New Conversation"):
            conversations[convo_id]["title"] = " ".join(user_message.strip().split()[:5])

        # call the streamer generator and forward its yields
        for updated_history, new_history in generate_response(
                user_message, max_tokens, temperature, top_p, history):
            conversations[convo_id]["messages"] = new_history
            yield (updated_history, new_history,
                   gr.update(choices=update_conversation_list(conversations),
                             value=conversations[convo_id]["title"]),
                   conversations)

    # ───────── UI → functions wiring ────────────────────────────
    submit_button.click(
        fn=send_message,
        inputs=[user_input, max_tokens_slider, temperature_slider, top_p_slider,
                current_convo_id, history_state, conversations_state],
        outputs=[chatbot, history_state, conversation_selector, conversations_state],
        concurrency_limit=16
    ).then(
        fn=lambda: gr.update(value=""),
        inputs=None,
        outputs=user_input
    )

    clear_button.click(fn=lambda: ([], []), inputs=None,
                       outputs=[chatbot, history_state])

    new_convo_button.click(fn=start_new_conversation,
                           inputs=[conversations_state],
                           outputs=[current_convo_id, history_state,
                                    conversation_selector, conversations_state])

    conversation_selector.change(fn=load_conversation,
                                 inputs=[conversation_selector, conversations_state],
                                 outputs=[current_convo_id, history_state, chatbot])

    # example buttons
    example1_button.click(lambda: gr.update(value=example_messages["IIT-JEE 2025 Mathematics"]),
                          None, user_input)
    example2_button.click(lambda: gr.update(value=example_messages["IIT-JEE 2025         Physics"]),
                          None, user_input)
    example3_button.click(lambda: gr.update(value=example_messages["Goldman Sachs Interview Puzzle"]),
                          None, user_input)
    example4_button.click(lambda: gr.update(value=example_messages["IIT-JEE 2024 Mathematics"]),
                          None, user_input)

# ────────────────────────────────────────────────────────────────
# 5.  Launch
# ────────────────────────────────────────────────────────────────
if __name__ == "__main__":
    demo.queue().launch(share=True, ssr_mode=False)