Spaces:

Xerv-AI
/

Maxwell

Running

App Files Files Community

Maxwell / app.py

Phase-Technologies

Update app.py

d603f91 verified 9 days ago

raw

history blame contribute delete

2.57 kB

	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
	import gradio as gr
	import threading

	# --- 1. MODEL SETUP (CPU COMPATIBLE) ---
	MODEL_NAME = "Xerv-AI/MAXWELL"

	print("Loading model on CPU... this may take a few minutes.")
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	device_map="cpu",
	torch_dtype=torch.float32
	)

	# --- 2. INFERENCE LOGIC ---
	def stream_maxwell(message, history):
	prompt = f"<\|im_start\|>system\nYou are Maxwell, a highly analytical STEM assistant. Keep your responses very direct and to the point. Wrap your internal thought process in <reasoning> tags.<\|im_end\|>\n"

	for user_msg, assistant_msg in history:
	prompt += f"<\|im_start\|>user\n{user_msg}<\|im_end\|>\n<\|im_start\|>assistant\n{assistant_msg}<\|im_end\|>\n"

	prompt += f"<\|im_start\|>user\n{message}<\|im_end\|>\n<\|im_start\|>assistant\n"

	inputs = tokenizer([prompt], return_tensors="pt").to("cpu")
	streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

	gen_kwargs = dict(
	**inputs,
	max_new_tokens=512,
	temperature=0.3,
	do_sample=True,
	streamer=streamer,
	)

	thread = threading.Thread(target=model.generate, kwargs=gen_kwargs)
	thread.start()

	partial_text = ""
	for new_text in streamer:
	partial_text += new_text
	display_text = partial_text
	if "<reasoning>" in display_text:
	display_text = display_text.replace("<reasoning>", "\n\n<details><summary><b>🔍 Internal Trace</b></summary><i>")
	if "</reasoning>" in display_text:
	display_text = display_text.replace("</reasoning>", "</i></details>\n\n")
	yield display_text

	# --- 3. UI DESIGN (Fixed for Gradio 4.0+) ---
	custom_css = """
	footer {visibility: hidden !important;}
	.gradio-container {background-color: #121212 !important; color: white !important;}
	details { background: #1A1A1A; border-left: 2px solid #3b82f6; padding: 10px; margin: 10px 0; color: #A0A0A0; }
	summary { cursor: pointer; color: #5c94ff; font-weight: bold; }
	"""

	# Wrap ChatInterface in Blocks to apply the CSS
	with gr.Blocks(css=custom_css, theme=gr.themes.Default(primary_hue="blue", neutral_hue="zinc")) as demo:
	gr.ChatInterface(
	fn=stream_maxwell,
	title="M. (CPU Mode)",
	description="The computational throne is currently on backup power (CPU).",
	)

	if __name__ == "__main__":
	demo.queue().launch()