Maxwell / app.py
Phase-Technologies's picture
Update app.py
d603f91 verified
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
import gradio as gr
import threading
# --- 1. MODEL SETUP (CPU COMPATIBLE) ---
MODEL_NAME = "Xerv-AI/MAXWELL"
print("Loading model on CPU... this may take a few minutes.")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
device_map="cpu",
torch_dtype=torch.float32
)
# --- 2. INFERENCE LOGIC ---
def stream_maxwell(message, history):
prompt = f"<|im_start|>system\nYou are Maxwell, a highly analytical STEM assistant. Keep your responses very direct and to the point. Wrap your internal thought process in <reasoning> tags.<|im_end|>\n"
for user_msg, assistant_msg in history:
prompt += f"<|im_start|>user\n{user_msg}<|im_end|>\n<|im_start|>assistant\n{assistant_msg}<|im_end|>\n"
prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
inputs = tokenizer([prompt], return_tensors="pt").to("cpu")
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
gen_kwargs = dict(
**inputs,
max_new_tokens=512,
temperature=0.3,
do_sample=True,
streamer=streamer,
)
thread = threading.Thread(target=model.generate, kwargs=gen_kwargs)
thread.start()
partial_text = ""
for new_text in streamer:
partial_text += new_text
display_text = partial_text
if "<reasoning>" in display_text:
display_text = display_text.replace("<reasoning>", "\n\n<details><summary><b>🔍 Internal Trace</b></summary><i>")
if "</reasoning>" in display_text:
display_text = display_text.replace("</reasoning>", "</i></details>\n\n")
yield display_text
# --- 3. UI DESIGN (Fixed for Gradio 4.0+) ---
custom_css = """
footer {visibility: hidden !important;}
.gradio-container {background-color: #121212 !important; color: white !important;}
details { background: #1A1A1A; border-left: 2px solid #3b82f6; padding: 10px; margin: 10px 0; color: #A0A0A0; }
summary { cursor: pointer; color: #5c94ff; font-weight: bold; }
"""
# Wrap ChatInterface in Blocks to apply the CSS
with gr.Blocks(css=custom_css, theme=gr.themes.Default(primary_hue="blue", neutral_hue="zinc")) as demo:
gr.ChatInterface(
fn=stream_maxwell,
title="M. (CPU Mode)",
description="The computational throne is currently on backup power (CPU).",
)
if __name__ == "__main__":
demo.queue().launch()