Spaces:
Running
Running
| import os | |
| # Fixes the Gradio Analytics crash bug on Colab/Spaces | |
| os.environ["GRADIO_ANALYTICS_ENABLED"] = "False" | |
| import torch | |
| import gc | |
| import re | |
| import threading | |
| import gradio as gr | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer | |
| from peft import PeftModel | |
| # ========================================== | |
| # 1. SMART PRE-LOAD MODELS (NO QUANTIZATION) | |
| # ========================================== | |
| if "loaded_engines" not in globals(): | |
| global loaded_engines | |
| loaded_engines = {} | |
| MODELS_CONFIG = { | |
| "ReasonBorn-Instruct": { | |
| "base": "Qwen/Qwen2.5-3B-Instruct", | |
| "adapter": "Phase-Technologies/ReasonBorn-Qwen-3B", | |
| }, | |
| "ReasonBorn-LoRA": { | |
| "base": "Qwen/Qwen2.5-3B", | |
| "adapter": "Phase-Technologies/rb-qwen3b-16ds-lora", | |
| } | |
| } | |
| if not loaded_engines: | |
| print("Initializing Xerv Systems... Pre-loading models for instant streaming.") | |
| # Force single-device mapping to prevent PEFT offload KeyError | |
| target_device = "cuda" if torch.cuda.is_available() else "cpu" | |
| print(f"Targeting inference device: {target_device.upper()}") | |
| for key, cfg in MODELS_CONFIG.items(): | |
| print(f"--- Loading {key} (Unquantized BF16) ---") | |
| tokenizer = AutoTokenizer.from_pretrained(cfg["adapter"]) | |
| # Load Base Model on a single device to avoid meta-tensor offloading issues | |
| base_model = AutoModelForCausalLM.from_pretrained( | |
| cfg["base"], | |
| torch_dtype=torch.bfloat16, | |
| device_map={"": target_device}, | |
| trust_remote_code=True | |
| ) | |
| # Merge adapter for inference | |
| model = PeftModel.from_pretrained(base_model, cfg["adapter"]) | |
| model.eval() | |
| loaded_engines[key] = {"model": model, "tokenizer": tokenizer} | |
| print("โ Both Reasoning Engines successfully loaded and ready.") | |
| else: | |
| print("โก Models already detected in memory! Skipping load phase for instant boot.") | |
| # ========================================== | |
| # 2. BULLETPROOF LATEX & TAG PARSER | |
| # ========================================== | |
| def format_output_with_latex_support(text): | |
| # Standardize LaTeX delimiters for Gradio | |
| text = text.replace(r'\\(', '$').replace(r'\\)', '$') | |
| text = text.replace(r'\\[', '$$').replace(r'\\]', '$$') | |
| # Extract Conclusion | |
| conclusion_match = re.search(r"<conclusion>(.*?)(?:</conclusion>|$)", text, re.DOTALL) | |
| if conclusion_match: | |
| conclusion_text = conclusion_match.group(1).strip() | |
| thinking_text = text[:conclusion_match.start()].strip() | |
| # Format Thinking Process | |
| thinking_text = thinking_text.replace("<plan>", "**๐น PLAN:**\n").replace("</plan>", "\n") | |
| thinking_text = thinking_text.replace("<reasoning>", "\n").replace("</reasoning>", "\n") | |
| # Handle dynamic <step> tags | |
| thinking_text = re.sub(r"<step(?:\s+index=\"(\d+)\")?>", | |
| lambda m: f"**๐ธ STEP {m.group(1)}:** " if m.group(1) else "**๐ธ STEP:** ", | |
| thinking_text) | |
| thinking_text = thinking_text.replace("</step>", "\n") | |
| thinking_text = thinking_text.replace("<verify>", "**โ VERIFY:** ").replace("</verify>", "\n") | |
| # Wrap thinking in a collapsible HTML details block | |
| formatted = ( | |
| f"<details>\n" | |
| f"<summary>๐ง View Thinking Process</summary>\n\n" | |
| f"{thinking_text}\n\n" | |
| f"</details>\n\n" | |
| f"**๐ฏ CONCLUSION:**\n\n{conclusion_text}" | |
| ) | |
| return formatted | |
| else: | |
| # Fallback if generation stops before conclusion | |
| text = text.replace("<plan>", "**๐น PLAN:**\n").replace("</plan>", "\n") | |
| text = text.replace("<reasoning>", "\n").replace("</reasoning>", "\n") | |
| text = re.sub(r"<step(?:\s+index=\"(\d+)\")?>", | |
| lambda m: f"**๐ธ STEP {m.group(1)}:** " if m.group(1) else "**๐ธ STEP:** ", | |
| text) | |
| text = text.replace("</step>", "\n") | |
| text = text.replace("<verify>", "**โ VERIFY:** ").replace("</verify>", "\n") | |
| return text | |
| # ========================================== | |
| # 3. REAL-TIME STREAMING GENERATOR | |
| # ========================================== | |
| def process_chat_stream(user_message, history, model_choice): | |
| """ | |
| Handles Gradio's 'messages' format natively: [{"role": "user", "content": "..."}, ...] | |
| """ | |
| if not user_message.strip(): | |
| yield "", gr.update(), gr.update(), gr.update() | |
| return | |
| # Initialize history if empty and append new user/assistant dicts | |
| history = history or [] | |
| history.append({"role": "user", "content": user_message}) | |
| history.append({"role": "assistant", "content": ""}) | |
| # Yield immediately to update UI (hide hero/suggestions, show chatbot) | |
| yield "", gr.update(value=history, visible=True), gr.update(visible=False), gr.update(visible=False) | |
| try: | |
| engine = loaded_engines[model_choice] | |
| model = engine["model"] | |
| tokenizer = engine["tokenizer"] | |
| # Build strict ReasonBorn System Prompt | |
| prompt = "<|im_start|>system\nYou are ReasonBorn. Use <plan>, <reasoning> with <step> & <verify>, <conclusion> strictly.<|im_end|>\n" | |
| # Append prior conversation history (excluding the two entries we just appended) | |
| for msg in history[:-2]: | |
| role = msg["role"] | |
| content = msg["content"] | |
| if role == "user": | |
| prompt += f"<|im_start|>user\n{content}<|im_end|>\n" | |
| elif role == "assistant": | |
| # Strip out HTML UI elements so the model only sees plain text history | |
| clean_content = re.sub(r"<.*?>", "", content) | |
| prompt += f"<|im_start|>assistant\n{clean_content}<|im_end|>\n" | |
| # Append current message | |
| prompt += f"<|im_start|>user\n{user_message}<|im_end|>\n<|im_start|>assistant\n" | |
| inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
| streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) | |
| generation_kwargs = dict( | |
| **inputs, | |
| max_new_tokens=1024, | |
| temperature=0.2, | |
| top_p=0.9, | |
| repetition_penalty=1.1, | |
| do_sample=True, | |
| pad_token_id=tokenizer.eos_token_id, | |
| eos_token_id=tokenizer.convert_tokens_to_ids("<|im_end|>"), | |
| streamer=streamer | |
| ) | |
| # Start generation in a separate thread | |
| thread = threading.Thread(target=model.generate, kwargs=generation_kwargs) | |
| thread.start() | |
| accumulated_text = "" | |
| # Stream chunks back to UI | |
| for new_text in streamer: | |
| accumulated_text += new_text | |
| # Real-time formatting for visual feedback | |
| live_text = accumulated_text.replace(r'\\(', '$').replace(r'\\)', '$').replace(r'\\[', '$$').replace(r'\\]', '$$') | |
| live_text = live_text.replace("<plan>", "**๐น PLAN:**\n").replace("</plan>", "\n") | |
| live_text = live_text.replace("<reasoning>", "\n").replace("</reasoning>", "\n") | |
| live_text = re.sub(r"<step(?:\s+index=\"(\d+)\")?>", | |
| lambda m: f"**๐ธ STEP {m.group(1)}:** " if m.group(1) else "**๐ธ STEP:** ", | |
| live_text) | |
| live_text = live_text.replace("</step>", "\n") | |
| live_text = live_text.replace("<verify>", "**โ VERIFY:** ").replace("</verify>", "\n") | |
| live_text = live_text.replace("<conclusion>", "\n\n**๐ฏ CONCLUSION:**\n\n").replace("</conclusion>", "") | |
| # Update the latest bot message in history dictionaries | |
| history[-1]["content"] = live_text + " โณ" | |
| yield "", gr.update(value=history, visible=True), gr.update(visible=False), gr.update(visible=False) | |
| # Final formatting pass with HTML block wrapping | |
| final_formatted = format_output_with_latex_support(accumulated_text) | |
| history[-1]["content"] = final_formatted | |
| yield "", gr.update(value=history, visible=True), gr.update(visible=False), gr.update(visible=False) | |
| # Cleanup memory | |
| if torch.cuda.is_available(): | |
| torch.cuda.empty_cache() | |
| gc.collect() | |
| except Exception as e: | |
| history[-1]["content"] = f"**System Error:** {str(e)}" | |
| yield "", gr.update(value=history, visible=True), gr.update(visible=False), gr.update(visible=False) | |
| # ========================================== | |
| # 4. UI/UX: ADAPTIVE DARK/LIGHT MODE CSS | |
| # ========================================== | |
| CSS = """ | |
| @import url('https://fonts.googleapis.com/css2?family=Google+Sans:wght@400;500;700&display=swap'); | |
| /* Global Typography & Layout */ | |
| .gradio-container { font-family: 'Google Sans', sans-serif !important; } | |
| .main-wrap { max-width: 750px !important; margin: 0 auto !important; padding-bottom: 100px !important; } | |
| /* Hero Section */ | |
| .xerv-title { font-size: 46px; font-weight: 700; letter-spacing: -1px; margin-top: 40px; margin-bottom: 8px;} | |
| .greeting { font-size: 18px; margin-bottom: 4px; opacity: 0.7;} | |
| .subtitle { font-size: 26px; font-weight: 500; margin-bottom: 30px;} | |
| /* Chat Window Base */ | |
| #chat-window { height: 65vh !important; } | |
| /* User Bubble - Always Blue */ | |
| .message.user { background: #2563eb !important; color: white !important; border-radius: 20px 20px 0 20px !important; padding: 14px 20px !important; font-size: 16px !important; } | |
| .message.user * { color: white !important; } | |
| /* Bot Bubble - Light Mode (Default) */ | |
| .message.bot { background: #ffffff !important; color: #0f172a !important; border: 1px solid #e2e8f0 !important; border-radius: 20px 20px 20px 0 !important; padding: 16px 20px !important; font-size: 16px !important; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.05) !important; } | |
| /* Bot Bubble - Dark Mode */ | |
| .dark .message.bot { background: #1e293b !important; color: #f8fafc !important; border-color: #334155 !important; } | |
| /* Thinking Details Block - Light Mode */ | |
| #chat-window details { background-color: #f8fafc !important; border: 1px solid #e2e8f0 !important; border-radius: 12px !important; padding: 14px !important; margin-bottom: 16px !important; box-shadow: inset 0 2px 4px 0 rgb(0 0 0 / 0.02) !important; transition: all 0.2s ease !important; } | |
| #chat-window summary { cursor: pointer !important; font-weight: 600 !important; font-size: 15px !important; user-select: none !important; outline: none !important; color: #334155 !important;} | |
| /* Thinking Details Block - Dark Mode */ | |
| .dark #chat-window details { background-color: #0f172a !important; border-color: #1e293b !important; color: #cbd5e1 !important; } | |
| .dark #chat-window summary { color: #94a3b8 !important; } | |
| #chat-window details[open] summary { margin-bottom: 12px !important; padding-bottom: 12px !important; border-bottom: 1px solid rgba(128,128,128,0.2) !important; } | |
| /* Input Row - Adaptive */ | |
| .input-row { align-items: center !important; border-radius: 30px !important; padding: 6px 14px !important; border: 1px solid #cbd5e1 !important; transition: all 0.2s; box-shadow: 0 4px 6px -1px rgba(0,0,0,0.05) !important; background: #f8fafc !important; } | |
| .dark .input-row { background: #1e293b !important; border-color: #334155 !important; } | |
| .input-row:focus-within { border-color: #3b82f6 !important; box-shadow: 0 4px 12px rgba(59, 130, 246, 0.15) !important; } | |
| .input-row textarea { background: transparent !important; border: none !important; box-shadow: none !important; font-size: 16px !important; } | |
| .input-row textarea:focus { outline: none !important; border: none !important; box-shadow: none !important; } | |
| /* Buttons */ | |
| .send-button { background: #2563eb !important; color: white !important; border-radius: 50% !important; height: 42px !important; width: 42px !important; min-width: 42px !important; padding: 0 !important; border: none !important; display: flex; justify-content: center; align-items: center; } | |
| .send-button:disabled { background: #94a3b8 !important; } | |
| .dark .send-button:disabled { background: #334155 !important; color: #64748b !important; } | |
| /* Suggestions - Adaptive */ | |
| .sugg-btn { background: #ffffff !important; border: 1px solid #e2e8f0 !important; border-radius: 16px !important; padding: 16px 20px !important; text-align: left !important; justify-content: flex-start !important; font-size: 16px !important; color: #1e293b !important; box-shadow: 0 1px 2px rgba(0,0,0,0.05) !important; margin-bottom: 12px !important; cursor: pointer !important; } | |
| .dark .sugg-btn { background: #1e293b !important; border-color: #334155 !important; color: #f8fafc !important; } | |
| .sugg-btn:hover { opacity: 0.8; } | |
| /* LaTeX Fixes */ | |
| .katex-display { margin: 1em 0 !important; overflow-x: auto !important; overflow-y: hidden !important; padding: 8px 0 !important; } | |
| .katex { font-size: 1.1em !important; } | |
| footer, .label-wrap { display: none !important; } | |
| """ | |
| with gr.Blocks() as demo: | |
| with gr.Column(elem_classes="main-wrap"): | |
| with gr.Column(elem_id="hero-section") as hero: | |
| gr.HTML(""" | |
| <div class="xerv-title">Xerv</div> | |
| <div class="greeting">Hey there!</div> | |
| <div class="subtitle">Let's make something happen.</div> | |
| """) | |
| with gr.Column(elem_id="suggestions-section") as suggestions: | |
| btn1 = gr.Button(r"๐ Prove that $\sqrt{2}$ is irrational", elem_classes="sugg-btn") | |
| btn2 = gr.Button(r"๐งฎ Solve $x^3 - 6x^2 + 11x - 6 = 0$", elem_classes="sugg-btn") | |
| btn3 = gr.Button(r"๐ Explain eigenvalues with a matrix example", elem_classes="sugg-btn") | |
| chatbot = gr.Chatbot( | |
| visible=False, | |
| elem_id="chat-window", | |
| show_label=False, | |
| avatar_images=(None, None), | |
| sanitize_html=False, | |
| # Note: Removed type="messages" to resolve the TypeError in Gradio 6.0 | |
| latex_delimiters=[ | |
| {"left": "$$", "right": "$$", "display": True}, | |
| {"left": "$", "right": "$", "display": False} | |
| ] | |
| ) | |
| with gr.Column(): | |
| with gr.Row(elem_classes="input-row"): | |
| chat_input = gr.Textbox( | |
| show_label=False, | |
| placeholder="Ask Xerv to solve complex math...", | |
| lines=1, | |
| max_lines=4, | |
| scale=8 | |
| ) | |
| send_btn = gr.Button("๐", elem_classes="send-button", scale=1) | |
| model_selector = gr.Radio( | |
| choices=list(MODELS_CONFIG.keys()), | |
| value="ReasonBorn-Instruct", | |
| label="Reasoning Engine", | |
| container=False | |
| ) | |
| # --- Wire up Interactivity --- | |
| chat_input.submit( | |
| process_chat_stream, | |
| inputs=[chat_input, chatbot, model_selector], | |
| outputs=[chat_input, chatbot, hero, suggestions] | |
| ) | |
| send_btn.click( | |
| process_chat_stream, | |
| inputs=[chat_input, chatbot, model_selector], | |
| outputs=[chat_input, chatbot, hero, suggestions] | |
| ) | |
| btn1.click( | |
| fn=lambda: r"Prove that $\sqrt{2}$ is irrational using step-by-step logic", | |
| outputs=[chat_input] | |
| ).then( | |
| fn=process_chat_stream, | |
| inputs=[chat_input, chatbot, model_selector], | |
| outputs=[chat_input, chatbot, hero, suggestions] | |
| ) | |
| btn2.click( | |
| fn=lambda: r"Solve $x^3 - 6x^2 + 11x - 6 = 0$ and verify roots", | |
| outputs=[chat_input] | |
| ).then( | |
| fn=process_chat_stream, | |
| inputs=[chat_input, chatbot, model_selector], | |
| outputs=[chat_input, chatbot, hero, suggestions] | |
| ) | |
| btn3.click( | |
| fn=lambda: r"Explain eigenvalues in linear algebra with an example matrix", | |
| outputs=[chat_input] | |
| ).then( | |
| fn=process_chat_stream, | |
| inputs=[chat_input, chatbot, model_selector], | |
| outputs=[chat_input, chatbot, hero, suggestions] | |
| ) | |
| if __name__ == "__main__": | |
| # Removed the manual light mode javascript. Added adaptive CSS directly to launch parameters. | |
| demo.launch( | |
| share=True, | |
| debug=True, | |
| css=CSS, | |
| theme=gr.themes.Default() | |
| ) | |