Spaces:

Xerv-AI
/

Qwen-ReasonBorn-Adapter

Runtime error

App Files Files Community

Phase-Technologies commited on Mar 6

Commit

5899137

verified ·

1 Parent(s): 45448e5

Create app.py

Browse files

Files changed (1) hide show

app.py +357 -0

app.py ADDED Viewed

	@@ -0,0 +1,357 @@

+import os
+# Fixes the Gradio Analytics crash bug on Colab/Spaces
+os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
+import torch
+import gc
+import re
+import threading
+import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+from peft import PeftModel
+# ==========================================
+# 1. SMART PRE-LOAD MODELS (NO QUANTIZATION)
+# ==========================================
+if "loaded_engines" not in globals():
+    global loaded_engines
+    loaded_engines = {}
+MODELS_CONFIG = {
+    "ReasonBorn-Instruct": {
+        "base": "Qwen/Qwen2.5-3B-Instruct",
+        "adapter": "Phase-Technologies/ReasonBorn-Qwen-3B",
+    },
+    "ReasonBorn-LoRA": {
+        "base": "Qwen/Qwen2.5-3B",
+        "adapter": "Phase-Technologies/rb-qwen3b-16ds-lora",
+    }
+}
+if not loaded_engines:
+    print("Initializing Xerv Systems... Pre-loading models for instant streaming.")
+    # Force single-device mapping to prevent PEFT offload KeyError
+    target_device = "cuda" if torch.cuda.is_available() else "cpu"
+    print(f"Targeting inference device: {target_device.upper()}")
+    for key, cfg in MODELS_CONFIG.items():
+        print(f"--- Loading {key} (Unquantized BF16) ---")
+        tokenizer = AutoTokenizer.from_pretrained(cfg["adapter"])
+        # Load Base Model on a single device to avoid meta-tensor offloading issues
+        base_model = AutoModelForCausalLM.from_pretrained(
+            cfg["base"],
+            torch_dtype=torch.bfloat16,
+            device_map={"": target_device},
+            trust_remote_code=True
+        )
+        # Merge adapter for inference
+        model = PeftModel.from_pretrained(base_model, cfg["adapter"])
+        model.eval()
+        loaded_engines[key] = {"model": model, "tokenizer": tokenizer}
+    print("✅ Both Reasoning Engines successfully loaded and ready.")
+else:
+    print("⚡ Models already detected in memory! Skipping load phase for instant boot.")
+# ==========================================
+# 2. BULLETPROOF LATEX & TAG PARSER
+# ==========================================
+def format_output_with_latex_support(text):
+    # Standardize LaTeX delimiters for Gradio
+    text = text.replace(r'\\(', '$').replace(r'\\)', '$')
+    text = text.replace(r'\\[', '$$').replace(r'\\]', '$$')
+    # Extract Conclusion
+    conclusion_match = re.search(r"<conclusion>(.*?)(?:</conclusion>|$)", text, re.DOTALL)
+    if conclusion_match:
+        conclusion_text = conclusion_match.group(1).strip()
+        thinking_text = text[:conclusion_match.start()].strip()
+        # Format Thinking Process
+        thinking_text = thinking_text.replace("<plan>", "**🔹 PLAN:**\n").replace("</plan>", "\n")
+        thinking_text = thinking_text.replace("<reasoning>", "\n").replace("</reasoning>", "\n")
+        # Handle dynamic <step> tags
+        thinking_text = re.sub(r"<step(?:\s+index=\"(\d+)\")?>",
+                               lambda m: f"**🔸 STEP {m.group(1)}:** " if m.group(1) else "**🔸 STEP:** ",
+                               thinking_text)
+        thinking_text = thinking_text.replace("</step>", "\n")
+        thinking_text = thinking_text.replace("<verify>", "**✅ VERIFY:** ").replace("</verify>", "\n")
+        # Wrap thinking in a collapsible HTML details block
+        formatted = (
+            f"<details>\n"
+            f"<summary>🧠 View Thinking Process</summary>\n\n"
+            f"{thinking_text}\n\n"
+            f"</details>\n\n"
+            f"**🎯 CONCLUSION:**\n\n{conclusion_text}"
+        )
+        return formatted
+    else:
+        # Fallback if generation stops before conclusion
+        text = text.replace("<plan>", "**🔹 PLAN:**\n").replace("</plan>", "\n")
+        text = text.replace("<reasoning>", "\n").replace("</reasoning>", "\n")
+        text = re.sub(r"<step(?:\s+index=\"(\d+)\")?>",
+                      lambda m: f"**🔸 STEP {m.group(1)}:** " if m.group(1) else "**🔸 STEP:** ",
+                      text)
+        text = text.replace("</step>", "\n")
+        text = text.replace("<verify>", "**✅ VERIFY:** ").replace("</verify>", "\n")
+        return text
+# ==========================================
+# 3. REAL-TIME STREAMING GENERATOR
+# ==========================================
+def process_chat_stream(user_message, history, model_choice):
+    """
+    Handles Gradio's 'messages' format natively: [{"role": "user", "content": "..."}, ...]
+    """
+    if not user_message.strip():
+        yield "", gr.update(), gr.update(), gr.update()
+        return
+    # Initialize history if empty and append new user/assistant dicts
+    history = history or []
+    history.append({"role": "user", "content": user_message})
+    history.append({"role": "assistant", "content": ""})
+    # Yield immediately to update UI (hide hero/suggestions, show chatbot)
+    yield "", gr.update(value=history, visible=True), gr.update(visible=False), gr.update(visible=False)
+    try:
+        engine = loaded_engines[model_choice]
+        model = engine["model"]
+        tokenizer = engine["tokenizer"]
+        # Build strict ReasonBorn System Prompt
+        prompt = "<|im_start|>system\nYou are ReasonBorn. Use <plan>, <reasoning> with <step> & <verify>, <conclusion> strictly.<|im_end|>\n"
+        # Append prior conversation history (excluding the two entries we just appended)
+        for msg in history[:-2]:
+            role = msg["role"]
+            content = msg["content"]
+            if role == "user":
+                prompt += f"<|im_start|>user\n{content}<|im_end|>\n"
+            elif role == "assistant":
+                # Strip out HTML UI elements so the model only sees plain text history
+                clean_content = re.sub(r"<.*?>", "", content)
+                prompt += f"<|im_start|>assistant\n{clean_content}<|im_end|>\n"
+        # Append current message
+        prompt += f"<|im_start|>user\n{user_message}<|im_end|>\n<|im_start|>assistant\n"
+        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+        streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+        generation_kwargs = dict(
+            **inputs,
+            max_new_tokens=1024,
+            temperature=0.2,
+            top_p=0.9,
+            repetition_penalty=1.1,
+            do_sample=True,
+            pad_token_id=tokenizer.eos_token_id,
+            eos_token_id=tokenizer.convert_tokens_to_ids("<|im_end|>"),
+            streamer=streamer
+        )
+        # Start generation in a separate thread
+        thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
+        thread.start()
+        accumulated_text = ""
+        # Stream chunks back to UI
+        for new_text in streamer:
+            accumulated_text += new_text
+            # Real-time formatting for visual feedback
+            live_text = accumulated_text.replace(r'\\(', '$').replace(r'\\)', '$').replace(r'\\[', '$$').replace(r'\\]', '$$')
+            live_text = live_text.replace("<plan>", "**🔹 PLAN:**\n").replace("</plan>", "\n")
+            live_text = live_text.replace("<reasoning>", "\n").replace("</reasoning>", "\n")
+            live_text = re.sub(r"<step(?:\s+index=\"(\d+)\")?>",
+                               lambda m: f"**🔸 STEP {m.group(1)}:** " if m.group(1) else "**🔸 STEP:** ",
+                               live_text)
+            live_text = live_text.replace("</step>", "\n")
+            live_text = live_text.replace("<verify>", "**✅ VERIFY:** ").replace("</verify>", "\n")
+            live_text = live_text.replace("<conclusion>", "\n\n**🎯 CONCLUSION:**\n\n").replace("</conclusion>", "")
+            # Update the latest bot message in history dictionaries
+            history[-1]["content"] = live_text + " ⏳"
+            yield "", gr.update(value=history, visible=True), gr.update(visible=False), gr.update(visible=False)
+        # Final formatting pass with HTML block wrapping
+        final_formatted = format_output_with_latex_support(accumulated_text)
+        history[-1]["content"] = final_formatted
+        yield "", gr.update(value=history, visible=True), gr.update(visible=False), gr.update(visible=False)
+        # Cleanup memory
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        gc.collect()
+    except Exception as e:
+        history[-1]["content"] = f"**System Error:** {str(e)}"
+        yield "", gr.update(value=history, visible=True), gr.update(visible=False), gr.update(visible=False)
+# ==========================================
+# 4. UI/UX: ADAPTIVE DARK/LIGHT MODE CSS
+# ==========================================
+CSS = """
+@import url('https://fonts.googleapis.com/css2?family=Google+Sans:wght@400;500;700&display=swap');
+/* Global Typography & Layout */
+.gradio-container { font-family: 'Google Sans', sans-serif !important; }
+.main-wrap { max-width: 750px !important; margin: 0 auto !important; padding-bottom: 100px !important; }
+/* Hero Section */
+.xerv-title { font-size: 46px; font-weight: 700; letter-spacing: -1px; margin-top: 40px; margin-bottom: 8px;}
+.greeting { font-size: 18px; margin-bottom: 4px; opacity: 0.7;}
+.subtitle { font-size: 26px; font-weight: 500; margin-bottom: 30px;}
+/* Chat Window Base */
+#chat-window { height: 65vh !important; }
+/* User Bubble - Always Blue */
+.message.user { background: #2563eb !important; color: white !important; border-radius: 20px 20px 0 20px !important; padding: 14px 20px !important; font-size: 16px !important; }
+.message.user * { color: white !important; }
+/* Bot Bubble - Light Mode (Default) */
+.message.bot { background: #ffffff !important; color: #0f172a !important; border: 1px solid #e2e8f0 !important; border-radius: 20px 20px 20px 0 !important; padding: 16px 20px !important; font-size: 16px !important; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.05) !important; }
+/* Bot Bubble - Dark Mode */
+.dark .message.bot { background: #1e293b !important; color: #f8fafc !important; border-color: #334155 !important; }
+/* Thinking Details Block - Light Mode */
+#chat-window details { background-color: #f8fafc !important; border: 1px solid #e2e8f0 !important; border-radius: 12px !important; padding: 14px !important; margin-bottom: 16px !important; box-shadow: inset 0 2px 4px 0 rgb(0 0 0 / 0.02) !important; transition: all 0.2s ease !important; }
+#chat-window summary { cursor: pointer !important; font-weight: 600 !important; font-size: 15px !important; user-select: none !important; outline: none !important; color: #334155 !important;}
+/* Thinking Details Block - Dark Mode */
+.dark #chat-window details { background-color: #0f172a !important; border-color: #1e293b !important; color: #cbd5e1 !important; }
+.dark #chat-window summary { color: #94a3b8 !important; }
+#chat-window details[open] summary { margin-bottom: 12px !important; padding-bottom: 12px !important; border-bottom: 1px solid rgba(128,128,128,0.2) !important; }
+/* Input Row - Adaptive */
+.input-row { align-items: center !important; border-radius: 30px !important; padding: 6px 14px !important; border: 1px solid #cbd5e1 !important; transition: all 0.2s; box-shadow: 0 4px 6px -1px rgba(0,0,0,0.05) !important; background: #f8fafc !important; }
+.dark .input-row { background: #1e293b !important; border-color: #334155 !important; }
+.input-row:focus-within { border-color: #3b82f6 !important; box-shadow: 0 4px 12px rgba(59, 130, 246, 0.15) !important; }
+.input-row textarea { background: transparent !important; border: none !important; box-shadow: none !important; font-size: 16px !important; }
+.input-row textarea:focus { outline: none !important; border: none !important; box-shadow: none !important; }
+/* Buttons */
+.send-button { background: #2563eb !important; color: white !important; border-radius: 50% !important; height: 42px !important; width: 42px !important; min-width: 42px !important; padding: 0 !important; border: none !important; display: flex; justify-content: center; align-items: center; }
+.send-button:disabled { background: #94a3b8 !important; }
+.dark .send-button:disabled { background: #334155 !important; color: #64748b !important; }
+/* Suggestions - Adaptive */
+.sugg-btn { background: #ffffff !important; border: 1px solid #e2e8f0 !important; border-radius: 16px !important; padding: 16px 20px !important; text-align: left !important; justify-content: flex-start !important; font-size: 16px !important; color: #1e293b !important; box-shadow: 0 1px 2px rgba(0,0,0,0.05) !important; margin-bottom: 12px !important; cursor: pointer !important; }
+.dark .sugg-btn { background: #1e293b !important; border-color: #334155 !important; color: #f8fafc !important; }
+.sugg-btn:hover { opacity: 0.8; }
+/* LaTeX Fixes */
+.katex-display { margin: 1em 0 !important; overflow-x: auto !important; overflow-y: hidden !important; padding: 8px 0 !important; }
+.katex { font-size: 1.1em !important; }
+footer, .label-wrap { display: none !important; }
+"""
+with gr.Blocks() as demo:
+    with gr.Column(elem_classes="main-wrap"):
+        with gr.Column(elem_id="hero-section") as hero:
+            gr.HTML("""
+            <div class="xerv-title">Xerv</div>
+            <div class="greeting">Hey there!</div>
+            <div class="subtitle">Let's make something happen.</div>
+            """)
+        with gr.Column(elem_id="suggestions-section") as suggestions:
+            btn1 = gr.Button(r"🔍 Prove that $\sqrt{2}$ is irrational", elem_classes="sugg-btn")
+            btn2 = gr.Button(r"🧮 Solve $x^3 - 6x^2 + 11x - 6 = 0$", elem_classes="sugg-btn")
+            btn3 = gr.Button(r"📊 Explain eigenvalues with a matrix example", elem_classes="sugg-btn")
+        chatbot = gr.Chatbot(
+            visible=False,
+            elem_id="chat-window",
+            show_label=False,
+            avatar_images=(None, None),
+            sanitize_html=False,
+            # Note: Removed type="messages" to resolve the TypeError in Gradio 6.0
+            latex_delimiters=[
+                {"left": "$$", "right": "$$", "display": True},
+                {"left": "$", "right": "$", "display": False}
+            ]
+        )
+        with gr.Column():
+            with gr.Row(elem_classes="input-row"):
+                chat_input = gr.Textbox(
+                    show_label=False,
+                    placeholder="Ask Xerv to solve complex math...",
+                    lines=1,
+                    max_lines=4,
+                    scale=8
+                )
+                send_btn = gr.Button("🚀", elem_classes="send-button", scale=1)
+            model_selector = gr.Radio(
+                choices=list(MODELS_CONFIG.keys()),
+                value="ReasonBorn-Instruct",
+                label="Reasoning Engine",
+                container=False
+            )
+    # --- Wire up Interactivity ---
+    chat_input.submit(
+        process_chat_stream,
+        inputs=[chat_input, chatbot, model_selector],
+        outputs=[chat_input, chatbot, hero, suggestions]
+    )
+    send_btn.click(
+        process_chat_stream,
+        inputs=[chat_input, chatbot, model_selector],
+        outputs=[chat_input, chatbot, hero, suggestions]
+    )
+    btn1.click(
+        fn=lambda: r"Prove that $\sqrt{2}$ is irrational using step-by-step logic",
+        outputs=[chat_input]
+    ).then(
+        fn=process_chat_stream,
+        inputs=[chat_input, chatbot, model_selector],
+        outputs=[chat_input, chatbot, hero, suggestions]
+    )
+    btn2.click(
+        fn=lambda: r"Solve $x^3 - 6x^2 + 11x - 6 = 0$ and verify roots",
+        outputs=[chat_input]
+    ).then(
+        fn=process_chat_stream,
+        inputs=[chat_input, chatbot, model_selector],
+        outputs=[chat_input, chatbot, hero, suggestions]
+    )
+    btn3.click(
+        fn=lambda: r"Explain eigenvalues in linear algebra with an example matrix",
+        outputs=[chat_input]
+    ).then(
+        fn=process_chat_stream,
+        inputs=[chat_input, chatbot, model_selector],
+        outputs=[chat_input, chatbot, hero, suggestions]
+    )
+if __name__ == "__main__":
+    # Removed the manual light mode javascript. Added adaptive CSS directly to launch parameters.
+    demo.launch(
+        share=True,
+        debug=True,
+        css=CSS,
+        theme=gr.themes.Default()
+    )