Spaces:

Xerv-AI
/

Maxwell

Running

File size: 2,567 Bytes

c9f8abe
b6b2f09
c9f8abe
 
 
b6b2f09
c9f8abe
 
b6b2f09
 
 
 
 
d603f91
c9f8abe
 
b6b2f09
c9f8abe
b6b2f09
c9f8abe
 
 
 
 
 
b6b2f09
c9f8abe
 
 
 
d603f91
c9f8abe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d603f91
c9f8abe
d603f91
c9f8abe
b6b2f09
d603f91
c9f8abe
 
d603f91
 
 
 
 
 
 
c9f8abe

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
import gradio as gr
import threading

# --- 1. MODEL SETUP (CPU COMPATIBLE) ---
MODEL_NAME = "Xerv-AI/MAXWELL"

print("Loading model on CPU... this may take a few minutes.")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="cpu",
    torch_dtype=torch.float32 
)

# --- 2. INFERENCE LOGIC ---
def stream_maxwell(message, history):
    prompt = f"<|im_start|>system\nYou are Maxwell, a highly analytical STEM assistant. Keep your responses very direct and to the point. Wrap your internal thought process in <reasoning> tags.<|im_end|>\n"
    
    for user_msg, assistant_msg in history:
        prompt += f"<|im_start|>user\n{user_msg}<|im_end|>\n<|im_start|>assistant\n{assistant_msg}<|im_end|>\n"
    
    prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
    
    inputs = tokenizer([prompt], return_tensors="pt").to("cpu")
    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
    
    gen_kwargs = dict(
        **inputs,
        max_new_tokens=512,
        temperature=0.3,
        do_sample=True,
        streamer=streamer,
    )
    
    thread = threading.Thread(target=model.generate, kwargs=gen_kwargs)
    thread.start()

    partial_text = ""
    for new_text in streamer:
        partial_text += new_text
        display_text = partial_text
        if "<reasoning>" in display_text:
            display_text = display_text.replace("<reasoning>", "\n\n<details><summary><b>🔍 Internal Trace</b></summary><i>")
        if "</reasoning>" in display_text:
            display_text = display_text.replace("</reasoning>", "</i></details>\n\n")
        yield display_text

# --- 3. UI DESIGN (Fixed for Gradio 4.0+) ---
custom_css = """
footer {visibility: hidden !important;}
.gradio-container {background-color: #121212 !important; color: white !important;}
details { background: #1A1A1A; border-left: 2px solid #3b82f6; padding: 10px; margin: 10px 0; color: #A0A0A0; }
summary { cursor: pointer; color: #5c94ff; font-weight: bold; }
"""

# Wrap ChatInterface in Blocks to apply the CSS
with gr.Blocks(css=custom_css, theme=gr.themes.Default(primary_hue="blue", neutral_hue="zinc")) as demo:
    gr.ChatInterface(
        fn=stream_maxwell,
        title="M. (CPU Mode)",
        description="The computational throne is currently on backup power (CPU).",
    )

if __name__ == "__main__":
    demo.queue().launch()