Spaces:

Xerv-AI
/

Qwen-ReasonBorn-Adapter

Running

App Files Files Community

Qwen-ReasonBorn-Adapter / app.py

Phase-Technologies

Create app.py

5899137 verified 2 days ago

raw

history blame contribute delete

16.5 kB

	import os
	# Fixes the Gradio Analytics crash bug on Colab/Spaces
	os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"

	import torch
	import gc
	import re
	import threading
	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
	from peft import PeftModel

	# ==========================================
	# 1. SMART PRE-LOAD MODELS (NO QUANTIZATION)
	# ==========================================
	if "loaded_engines" not in globals():
	global loaded_engines
	loaded_engines = {}

	MODELS_CONFIG = {
	"ReasonBorn-Instruct": {
	"base": "Qwen/Qwen2.5-3B-Instruct",
	"adapter": "Phase-Technologies/ReasonBorn-Qwen-3B",
	},
	"ReasonBorn-LoRA": {
	"base": "Qwen/Qwen2.5-3B",
	"adapter": "Phase-Technologies/rb-qwen3b-16ds-lora",
	}
	}

	if not loaded_engines:
	print("Initializing Xerv Systems... Pre-loading models for instant streaming.")

	# Force single-device mapping to prevent PEFT offload KeyError
	target_device = "cuda" if torch.cuda.is_available() else "cpu"
	print(f"Targeting inference device: {target_device.upper()}")

	for key, cfg in MODELS_CONFIG.items():
	print(f"--- Loading {key} (Unquantized BF16) ---")
	tokenizer = AutoTokenizer.from_pretrained(cfg["adapter"])

	# Load Base Model on a single device to avoid meta-tensor offloading issues
	base_model = AutoModelForCausalLM.from_pretrained(
	cfg["base"],
	torch_dtype=torch.bfloat16,
	device_map={"": target_device},
	trust_remote_code=True
	)

	# Merge adapter for inference
	model = PeftModel.from_pretrained(base_model, cfg["adapter"])
	model.eval()

	loaded_engines[key] = {"model": model, "tokenizer": tokenizer}

	print("✅ Both Reasoning Engines successfully loaded and ready.")
	else:
	print("⚡ Models already detected in memory! Skipping load phase for instant boot.")

	# ==========================================
	# 2. BULLETPROOF LATEX & TAG PARSER
	# ==========================================
	def format_output_with_latex_support(text):
	# Standardize LaTeX delimiters for Gradio
	text = text.replace(r'\$', '$').replace(r'\$', '$')
	text = text.replace(r'\\[', '$$').replace(r'\\]', '$$')

	# Extract Conclusion
	conclusion_match = re.search(r"<conclusion>(.*?)(?:</conclusion>\|$)", text, re.DOTALL)

	if conclusion_match:
	conclusion_text = conclusion_match.group(1).strip()
	thinking_text = text[:conclusion_match.start()].strip()

	# Format Thinking Process
	thinking_text = thinking_text.replace("<plan>", "🔹 PLAN:\n").replace("</plan>", "\n")
	thinking_text = thinking_text.replace("<reasoning>", "\n").replace("</reasoning>", "\n")

	# Handle dynamic <step> tags
	thinking_text = re.sub(r"<step(?:\s+index=\"(\d+)\")?>",
	lambda m: f"🔸 STEP {m.group(1)}: " if m.group(1) else "🔸 STEP: ",
	thinking_text)
	thinking_text = thinking_text.replace("</step>", "\n")
	thinking_text = thinking_text.replace("<verify>", "✅ VERIFY: ").replace("</verify>", "\n")

	# Wrap thinking in a collapsible HTML details block
	formatted = (
	f"<details>\n"
	f"<summary>🧠 View Thinking Process</summary>\n\n"
	f"{thinking_text}\n\n"
	f"</details>\n\n"
	f"🎯 CONCLUSION:\n\n{conclusion_text}"
	)
	return formatted
	else:
	# Fallback if generation stops before conclusion
	text = text.replace("<plan>", "🔹 PLAN:\n").replace("</plan>", "\n")
	text = text.replace("<reasoning>", "\n").replace("</reasoning>", "\n")
	text = re.sub(r"<step(?:\s+index=\"(\d+)\")?>",
	lambda m: f"🔸 STEP {m.group(1)}: " if m.group(1) else "🔸 STEP: ",
	text)
	text = text.replace("</step>", "\n")
	text = text.replace("<verify>", "✅ VERIFY: ").replace("</verify>", "\n")
	return text

	# ==========================================
	# 3. REAL-TIME STREAMING GENERATOR
	# ==========================================
	def process_chat_stream(user_message, history, model_choice):
	"""
	Handles Gradio's 'messages' format natively: [{"role": "user", "content": "..."}, ...]
	"""
	if not user_message.strip():
	yield "", gr.update(), gr.update(), gr.update()
	return

	# Initialize history if empty and append new user/assistant dicts
	history = history or []
	history.append({"role": "user", "content": user_message})
	history.append({"role": "assistant", "content": ""})

	# Yield immediately to update UI (hide hero/suggestions, show chatbot)
	yield "", gr.update(value=history, visible=True), gr.update(visible=False), gr.update(visible=False)

	try:
	engine = loaded_engines[model_choice]
	model = engine["model"]
	tokenizer = engine["tokenizer"]

	# Build strict ReasonBorn System Prompt
	prompt = "<\|im_start\|>system\nYou are ReasonBorn. Use <plan>, <reasoning> with <step> & <verify>, <conclusion> strictly.<\|im_end\|>\n"

	# Append prior conversation history (excluding the two entries we just appended)
	for msg in history[:-2]:
	role = msg["role"]
	content = msg["content"]

	if role == "user":
	prompt += f"<\|im_start\|>user\n{content}<\|im_end\|>\n"
	elif role == "assistant":
	# Strip out HTML UI elements so the model only sees plain text history
	clean_content = re.sub(r"<.*?>", "", content)
	prompt += f"<\|im_start\|>assistant\n{clean_content}<\|im_end\|>\n"

	# Append current message
	prompt += f"<\|im_start\|>user\n{user_message}<\|im_end\|>\n<\|im_start\|>assistant\n"

	inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
	streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

	generation_kwargs = dict(
	**inputs,
	max_new_tokens=1024,
	temperature=0.2,
	top_p=0.9,
	repetition_penalty=1.1,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id,
	eos_token_id=tokenizer.convert_tokens_to_ids("<\|im_end\|>"),
	streamer=streamer
	)

	# Start generation in a separate thread
	thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
	thread.start()

	accumulated_text = ""

	# Stream chunks back to UI
	for new_text in streamer:
	accumulated_text += new_text

	# Real-time formatting for visual feedback
	live_text = accumulated_text.replace(r'\$', '$').replace(r'\$', '$').replace(r'\\[', '$$').replace(r'\\]', '$$')
	live_text = live_text.replace("<plan>", "🔹 PLAN:\n").replace("</plan>", "\n")
	live_text = live_text.replace("<reasoning>", "\n").replace("</reasoning>", "\n")
	live_text = re.sub(r"<step(?:\s+index=\"(\d+)\")?>",
	lambda m: f"🔸 STEP {m.group(1)}: " if m.group(1) else "🔸 STEP: ",
	live_text)
	live_text = live_text.replace("</step>", "\n")
	live_text = live_text.replace("<verify>", "✅ VERIFY: ").replace("</verify>", "\n")
	live_text = live_text.replace("<conclusion>", "\n\n🎯 CONCLUSION:\n\n").replace("</conclusion>", "")

	# Update the latest bot message in history dictionaries
	history[-1]["content"] = live_text + " ⏳"
	yield "", gr.update(value=history, visible=True), gr.update(visible=False), gr.update(visible=False)

	# Final formatting pass with HTML block wrapping
	final_formatted = format_output_with_latex_support(accumulated_text)
	history[-1]["content"] = final_formatted

	yield "", gr.update(value=history, visible=True), gr.update(visible=False), gr.update(visible=False)

	# Cleanup memory
	if torch.cuda.is_available():
	torch.cuda.empty_cache()
	gc.collect()

	except Exception as e:
	history[-1]["content"] = f"System Error: {str(e)}"
	yield "", gr.update(value=history, visible=True), gr.update(visible=False), gr.update(visible=False)


	# ==========================================
	# 4. UI/UX: ADAPTIVE DARK/LIGHT MODE CSS
	# ==========================================
	CSS = """
	@import url('https://fonts.googleapis.com/css2?family=Google+Sans:wght@400;500;700&display=swap');

	/* Global Typography & Layout */
	.gradio-container { font-family: 'Google Sans', sans-serif !important; }
	.main-wrap { max-width: 750px !important; margin: 0 auto !important; padding-bottom: 100px !important; }

	/* Hero Section */
	.xerv-title { font-size: 46px; font-weight: 700; letter-spacing: -1px; margin-top: 40px; margin-bottom: 8px;}
	.greeting { font-size: 18px; margin-bottom: 4px; opacity: 0.7;}
	.subtitle { font-size: 26px; font-weight: 500; margin-bottom: 30px;}

	/* Chat Window Base */
	#chat-window { height: 65vh !important; }

	/* User Bubble - Always Blue */
	.message.user { background: #2563eb !important; color: white !important; border-radius: 20px 20px 0 20px !important; padding: 14px 20px !important; font-size: 16px !important; }
	.message.user * { color: white !important; }

	/* Bot Bubble - Light Mode (Default) */
	.message.bot { background: #ffffff !important; color: #0f172a !important; border: 1px solid #e2e8f0 !important; border-radius: 20px 20px 20px 0 !important; padding: 16px 20px !important; font-size: 16px !important; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.05) !important; }

	/* Bot Bubble - Dark Mode */
	.dark .message.bot { background: #1e293b !important; color: #f8fafc !important; border-color: #334155 !important; }

	/* Thinking Details Block - Light Mode */
	#chat-window details { background-color: #f8fafc !important; border: 1px solid #e2e8f0 !important; border-radius: 12px !important; padding: 14px !important; margin-bottom: 16px !important; box-shadow: inset 0 2px 4px 0 rgb(0 0 0 / 0.02) !important; transition: all 0.2s ease !important; }
	#chat-window summary { cursor: pointer !important; font-weight: 600 !important; font-size: 15px !important; user-select: none !important; outline: none !important; color: #334155 !important;}

	/* Thinking Details Block - Dark Mode */
	.dark #chat-window details { background-color: #0f172a !important; border-color: #1e293b !important; color: #cbd5e1 !important; }
	.dark #chat-window summary { color: #94a3b8 !important; }

	#chat-window details[open] summary { margin-bottom: 12px !important; padding-bottom: 12px !important; border-bottom: 1px solid rgba(128,128,128,0.2) !important; }

	/* Input Row - Adaptive */
	.input-row { align-items: center !important; border-radius: 30px !important; padding: 6px 14px !important; border: 1px solid #cbd5e1 !important; transition: all 0.2s; box-shadow: 0 4px 6px -1px rgba(0,0,0,0.05) !important; background: #f8fafc !important; }
	.dark .input-row { background: #1e293b !important; border-color: #334155 !important; }
	.input-row:focus-within { border-color: #3b82f6 !important; box-shadow: 0 4px 12px rgba(59, 130, 246, 0.15) !important; }
	.input-row textarea { background: transparent !important; border: none !important; box-shadow: none !important; font-size: 16px !important; }
	.input-row textarea:focus { outline: none !important; border: none !important; box-shadow: none !important; }

	/* Buttons */
	.send-button { background: #2563eb !important; color: white !important; border-radius: 50% !important; height: 42px !important; width: 42px !important; min-width: 42px !important; padding: 0 !important; border: none !important; display: flex; justify-content: center; align-items: center; }
	.send-button:disabled { background: #94a3b8 !important; }
	.dark .send-button:disabled { background: #334155 !important; color: #64748b !important; }

	/* Suggestions - Adaptive */
	.sugg-btn { background: #ffffff !important; border: 1px solid #e2e8f0 !important; border-radius: 16px !important; padding: 16px 20px !important; text-align: left !important; justify-content: flex-start !important; font-size: 16px !important; color: #1e293b !important; box-shadow: 0 1px 2px rgba(0,0,0,0.05) !important; margin-bottom: 12px !important; cursor: pointer !important; }
	.dark .sugg-btn { background: #1e293b !important; border-color: #334155 !important; color: #f8fafc !important; }
	.sugg-btn:hover { opacity: 0.8; }

	/* LaTeX Fixes */
	.katex-display { margin: 1em 0 !important; overflow-x: auto !important; overflow-y: hidden !important; padding: 8px 0 !important; }
	.katex { font-size: 1.1em !important; }
	footer, .label-wrap { display: none !important; }
	"""

	with gr.Blocks() as demo:
	with gr.Column(elem_classes="main-wrap"):
	with gr.Column(elem_id="hero-section") as hero:
	gr.HTML("""
	<div class="xerv-title">Xerv</div>
	<div class="greeting">Hey there!</div>
	<div class="subtitle">Let's make something happen.</div>
	""")

	with gr.Column(elem_id="suggestions-section") as suggestions:
	btn1 = gr.Button(r"🔍 Prove that $\sqrt{2}$ is irrational", elem_classes="sugg-btn")
	btn2 = gr.Button(r"🧮 Solve $x^3 - 6x^2 + 11x - 6 = 0$", elem_classes="sugg-btn")
	btn3 = gr.Button(r"📊 Explain eigenvalues with a matrix example", elem_classes="sugg-btn")

	chatbot = gr.Chatbot(
	visible=False,
	elem_id="chat-window",
	show_label=False,
	avatar_images=(None, None),
	sanitize_html=False,
	# Note: Removed type="messages" to resolve the TypeError in Gradio 6.0
	latex_delimiters=[
	{"left": "$$", "right": "$$", "display": True},
	{"left": "$", "right": "$", "display": False}
	]
	)

	with gr.Column():
	with gr.Row(elem_classes="input-row"):
	chat_input = gr.Textbox(
	show_label=False,
	placeholder="Ask Xerv to solve complex math...",
	lines=1,
	max_lines=4,
	scale=8
	)
	send_btn = gr.Button("🚀", elem_classes="send-button", scale=1)

	model_selector = gr.Radio(
	choices=list(MODELS_CONFIG.keys()),
	value="ReasonBorn-Instruct",
	label="Reasoning Engine",
	container=False
	)

	# --- Wire up Interactivity ---
	chat_input.submit(
	process_chat_stream,
	inputs=[chat_input, chatbot, model_selector],
	outputs=[chat_input, chatbot, hero, suggestions]
	)

	send_btn.click(
	process_chat_stream,
	inputs=[chat_input, chatbot, model_selector],
	outputs=[chat_input, chatbot, hero, suggestions]
	)

	btn1.click(
	fn=lambda: r"Prove that $\sqrt{2}$ is irrational using step-by-step logic",
	outputs=[chat_input]
	).then(
	fn=process_chat_stream,
	inputs=[chat_input, chatbot, model_selector],
	outputs=[chat_input, chatbot, hero, suggestions]
	)

	btn2.click(
	fn=lambda: r"Solve $x^3 - 6x^2 + 11x - 6 = 0$ and verify roots",
	outputs=[chat_input]
	).then(
	fn=process_chat_stream,
	inputs=[chat_input, chatbot, model_selector],
	outputs=[chat_input, chatbot, hero, suggestions]
	)

	btn3.click(
	fn=lambda: r"Explain eigenvalues in linear algebra with an example matrix",
	outputs=[chat_input]
	).then(
	fn=process_chat_stream,
	inputs=[chat_input, chatbot, model_selector],
	outputs=[chat_input, chatbot, hero, suggestions]
	)

	if __name__ == "__main__":
	# Removed the manual light mode javascript. Added adaptive CSS directly to launch parameters.
	demo.launch(
	share=True,
	debug=True,
	css=CSS,
	theme=gr.themes.Default()
	)