Phase-Technologies commited on
Commit
5899137
ยท
verified ยท
1 Parent(s): 45448e5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +357 -0
app.py ADDED
@@ -0,0 +1,357 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ # Fixes the Gradio Analytics crash bug on Colab/Spaces
3
+ os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
4
+
5
+ import torch
6
+ import gc
7
+ import re
8
+ import threading
9
+ import gradio as gr
10
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
11
+ from peft import PeftModel
12
+
13
+ # ==========================================
14
+ # 1. SMART PRE-LOAD MODELS (NO QUANTIZATION)
15
+ # ==========================================
16
+ if "loaded_engines" not in globals():
17
+ global loaded_engines
18
+ loaded_engines = {}
19
+
20
+ MODELS_CONFIG = {
21
+ "ReasonBorn-Instruct": {
22
+ "base": "Qwen/Qwen2.5-3B-Instruct",
23
+ "adapter": "Phase-Technologies/ReasonBorn-Qwen-3B",
24
+ },
25
+ "ReasonBorn-LoRA": {
26
+ "base": "Qwen/Qwen2.5-3B",
27
+ "adapter": "Phase-Technologies/rb-qwen3b-16ds-lora",
28
+ }
29
+ }
30
+
31
+ if not loaded_engines:
32
+ print("Initializing Xerv Systems... Pre-loading models for instant streaming.")
33
+
34
+ # Force single-device mapping to prevent PEFT offload KeyError
35
+ target_device = "cuda" if torch.cuda.is_available() else "cpu"
36
+ print(f"Targeting inference device: {target_device.upper()}")
37
+
38
+ for key, cfg in MODELS_CONFIG.items():
39
+ print(f"--- Loading {key} (Unquantized BF16) ---")
40
+ tokenizer = AutoTokenizer.from_pretrained(cfg["adapter"])
41
+
42
+ # Load Base Model on a single device to avoid meta-tensor offloading issues
43
+ base_model = AutoModelForCausalLM.from_pretrained(
44
+ cfg["base"],
45
+ torch_dtype=torch.bfloat16,
46
+ device_map={"": target_device},
47
+ trust_remote_code=True
48
+ )
49
+
50
+ # Merge adapter for inference
51
+ model = PeftModel.from_pretrained(base_model, cfg["adapter"])
52
+ model.eval()
53
+
54
+ loaded_engines[key] = {"model": model, "tokenizer": tokenizer}
55
+
56
+ print("โœ… Both Reasoning Engines successfully loaded and ready.")
57
+ else:
58
+ print("โšก Models already detected in memory! Skipping load phase for instant boot.")
59
+
60
+ # ==========================================
61
+ # 2. BULLETPROOF LATEX & TAG PARSER
62
+ # ==========================================
63
+ def format_output_with_latex_support(text):
64
+ # Standardize LaTeX delimiters for Gradio
65
+ text = text.replace(r'\\(', '$').replace(r'\\)', '$')
66
+ text = text.replace(r'\\[', '$$').replace(r'\\]', '$$')
67
+
68
+ # Extract Conclusion
69
+ conclusion_match = re.search(r"<conclusion>(.*?)(?:</conclusion>|$)", text, re.DOTALL)
70
+
71
+ if conclusion_match:
72
+ conclusion_text = conclusion_match.group(1).strip()
73
+ thinking_text = text[:conclusion_match.start()].strip()
74
+
75
+ # Format Thinking Process
76
+ thinking_text = thinking_text.replace("<plan>", "**๐Ÿ”น PLAN:**\n").replace("</plan>", "\n")
77
+ thinking_text = thinking_text.replace("<reasoning>", "\n").replace("</reasoning>", "\n")
78
+
79
+ # Handle dynamic <step> tags
80
+ thinking_text = re.sub(r"<step(?:\s+index=\"(\d+)\")?>",
81
+ lambda m: f"**๐Ÿ”ธ STEP {m.group(1)}:** " if m.group(1) else "**๐Ÿ”ธ STEP:** ",
82
+ thinking_text)
83
+ thinking_text = thinking_text.replace("</step>", "\n")
84
+ thinking_text = thinking_text.replace("<verify>", "**โœ… VERIFY:** ").replace("</verify>", "\n")
85
+
86
+ # Wrap thinking in a collapsible HTML details block
87
+ formatted = (
88
+ f"<details>\n"
89
+ f"<summary>๐Ÿง  View Thinking Process</summary>\n\n"
90
+ f"{thinking_text}\n\n"
91
+ f"</details>\n\n"
92
+ f"**๐ŸŽฏ CONCLUSION:**\n\n{conclusion_text}"
93
+ )
94
+ return formatted
95
+ else:
96
+ # Fallback if generation stops before conclusion
97
+ text = text.replace("<plan>", "**๐Ÿ”น PLAN:**\n").replace("</plan>", "\n")
98
+ text = text.replace("<reasoning>", "\n").replace("</reasoning>", "\n")
99
+ text = re.sub(r"<step(?:\s+index=\"(\d+)\")?>",
100
+ lambda m: f"**๐Ÿ”ธ STEP {m.group(1)}:** " if m.group(1) else "**๐Ÿ”ธ STEP:** ",
101
+ text)
102
+ text = text.replace("</step>", "\n")
103
+ text = text.replace("<verify>", "**โœ… VERIFY:** ").replace("</verify>", "\n")
104
+ return text
105
+
106
+ # ==========================================
107
+ # 3. REAL-TIME STREAMING GENERATOR
108
+ # ==========================================
109
+ def process_chat_stream(user_message, history, model_choice):
110
+ """
111
+ Handles Gradio's 'messages' format natively: [{"role": "user", "content": "..."}, ...]
112
+ """
113
+ if not user_message.strip():
114
+ yield "", gr.update(), gr.update(), gr.update()
115
+ return
116
+
117
+ # Initialize history if empty and append new user/assistant dicts
118
+ history = history or []
119
+ history.append({"role": "user", "content": user_message})
120
+ history.append({"role": "assistant", "content": ""})
121
+
122
+ # Yield immediately to update UI (hide hero/suggestions, show chatbot)
123
+ yield "", gr.update(value=history, visible=True), gr.update(visible=False), gr.update(visible=False)
124
+
125
+ try:
126
+ engine = loaded_engines[model_choice]
127
+ model = engine["model"]
128
+ tokenizer = engine["tokenizer"]
129
+
130
+ # Build strict ReasonBorn System Prompt
131
+ prompt = "<|im_start|>system\nYou are ReasonBorn. Use <plan>, <reasoning> with <step> & <verify>, <conclusion> strictly.<|im_end|>\n"
132
+
133
+ # Append prior conversation history (excluding the two entries we just appended)
134
+ for msg in history[:-2]:
135
+ role = msg["role"]
136
+ content = msg["content"]
137
+
138
+ if role == "user":
139
+ prompt += f"<|im_start|>user\n{content}<|im_end|>\n"
140
+ elif role == "assistant":
141
+ # Strip out HTML UI elements so the model only sees plain text history
142
+ clean_content = re.sub(r"<.*?>", "", content)
143
+ prompt += f"<|im_start|>assistant\n{clean_content}<|im_end|>\n"
144
+
145
+ # Append current message
146
+ prompt += f"<|im_start|>user\n{user_message}<|im_end|>\n<|im_start|>assistant\n"
147
+
148
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
149
+ streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
150
+
151
+ generation_kwargs = dict(
152
+ **inputs,
153
+ max_new_tokens=1024,
154
+ temperature=0.2,
155
+ top_p=0.9,
156
+ repetition_penalty=1.1,
157
+ do_sample=True,
158
+ pad_token_id=tokenizer.eos_token_id,
159
+ eos_token_id=tokenizer.convert_tokens_to_ids("<|im_end|>"),
160
+ streamer=streamer
161
+ )
162
+
163
+ # Start generation in a separate thread
164
+ thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
165
+ thread.start()
166
+
167
+ accumulated_text = ""
168
+
169
+ # Stream chunks back to UI
170
+ for new_text in streamer:
171
+ accumulated_text += new_text
172
+
173
+ # Real-time formatting for visual feedback
174
+ live_text = accumulated_text.replace(r'\\(', '$').replace(r'\\)', '$').replace(r'\\[', '$$').replace(r'\\]', '$$')
175
+ live_text = live_text.replace("<plan>", "**๐Ÿ”น PLAN:**\n").replace("</plan>", "\n")
176
+ live_text = live_text.replace("<reasoning>", "\n").replace("</reasoning>", "\n")
177
+ live_text = re.sub(r"<step(?:\s+index=\"(\d+)\")?>",
178
+ lambda m: f"**๐Ÿ”ธ STEP {m.group(1)}:** " if m.group(1) else "**๐Ÿ”ธ STEP:** ",
179
+ live_text)
180
+ live_text = live_text.replace("</step>", "\n")
181
+ live_text = live_text.replace("<verify>", "**โœ… VERIFY:** ").replace("</verify>", "\n")
182
+ live_text = live_text.replace("<conclusion>", "\n\n**๐ŸŽฏ CONCLUSION:**\n\n").replace("</conclusion>", "")
183
+
184
+ # Update the latest bot message in history dictionaries
185
+ history[-1]["content"] = live_text + " โณ"
186
+ yield "", gr.update(value=history, visible=True), gr.update(visible=False), gr.update(visible=False)
187
+
188
+ # Final formatting pass with HTML block wrapping
189
+ final_formatted = format_output_with_latex_support(accumulated_text)
190
+ history[-1]["content"] = final_formatted
191
+
192
+ yield "", gr.update(value=history, visible=True), gr.update(visible=False), gr.update(visible=False)
193
+
194
+ # Cleanup memory
195
+ if torch.cuda.is_available():
196
+ torch.cuda.empty_cache()
197
+ gc.collect()
198
+
199
+ except Exception as e:
200
+ history[-1]["content"] = f"**System Error:** {str(e)}"
201
+ yield "", gr.update(value=history, visible=True), gr.update(visible=False), gr.update(visible=False)
202
+
203
+
204
+ # ==========================================
205
+ # 4. UI/UX: ADAPTIVE DARK/LIGHT MODE CSS
206
+ # ==========================================
207
+ CSS = """
208
+ @import url('https://fonts.googleapis.com/css2?family=Google+Sans:wght@400;500;700&display=swap');
209
+
210
+ /* Global Typography & Layout */
211
+ .gradio-container { font-family: 'Google Sans', sans-serif !important; }
212
+ .main-wrap { max-width: 750px !important; margin: 0 auto !important; padding-bottom: 100px !important; }
213
+
214
+ /* Hero Section */
215
+ .xerv-title { font-size: 46px; font-weight: 700; letter-spacing: -1px; margin-top: 40px; margin-bottom: 8px;}
216
+ .greeting { font-size: 18px; margin-bottom: 4px; opacity: 0.7;}
217
+ .subtitle { font-size: 26px; font-weight: 500; margin-bottom: 30px;}
218
+
219
+ /* Chat Window Base */
220
+ #chat-window { height: 65vh !important; }
221
+
222
+ /* User Bubble - Always Blue */
223
+ .message.user { background: #2563eb !important; color: white !important; border-radius: 20px 20px 0 20px !important; padding: 14px 20px !important; font-size: 16px !important; }
224
+ .message.user * { color: white !important; }
225
+
226
+ /* Bot Bubble - Light Mode (Default) */
227
+ .message.bot { background: #ffffff !important; color: #0f172a !important; border: 1px solid #e2e8f0 !important; border-radius: 20px 20px 20px 0 !important; padding: 16px 20px !important; font-size: 16px !important; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.05) !important; }
228
+
229
+ /* Bot Bubble - Dark Mode */
230
+ .dark .message.bot { background: #1e293b !important; color: #f8fafc !important; border-color: #334155 !important; }
231
+
232
+ /* Thinking Details Block - Light Mode */
233
+ #chat-window details { background-color: #f8fafc !important; border: 1px solid #e2e8f0 !important; border-radius: 12px !important; padding: 14px !important; margin-bottom: 16px !important; box-shadow: inset 0 2px 4px 0 rgb(0 0 0 / 0.02) !important; transition: all 0.2s ease !important; }
234
+ #chat-window summary { cursor: pointer !important; font-weight: 600 !important; font-size: 15px !important; user-select: none !important; outline: none !important; color: #334155 !important;}
235
+
236
+ /* Thinking Details Block - Dark Mode */
237
+ .dark #chat-window details { background-color: #0f172a !important; border-color: #1e293b !important; color: #cbd5e1 !important; }
238
+ .dark #chat-window summary { color: #94a3b8 !important; }
239
+
240
+ #chat-window details[open] summary { margin-bottom: 12px !important; padding-bottom: 12px !important; border-bottom: 1px solid rgba(128,128,128,0.2) !important; }
241
+
242
+ /* Input Row - Adaptive */
243
+ .input-row { align-items: center !important; border-radius: 30px !important; padding: 6px 14px !important; border: 1px solid #cbd5e1 !important; transition: all 0.2s; box-shadow: 0 4px 6px -1px rgba(0,0,0,0.05) !important; background: #f8fafc !important; }
244
+ .dark .input-row { background: #1e293b !important; border-color: #334155 !important; }
245
+ .input-row:focus-within { border-color: #3b82f6 !important; box-shadow: 0 4px 12px rgba(59, 130, 246, 0.15) !important; }
246
+ .input-row textarea { background: transparent !important; border: none !important; box-shadow: none !important; font-size: 16px !important; }
247
+ .input-row textarea:focus { outline: none !important; border: none !important; box-shadow: none !important; }
248
+
249
+ /* Buttons */
250
+ .send-button { background: #2563eb !important; color: white !important; border-radius: 50% !important; height: 42px !important; width: 42px !important; min-width: 42px !important; padding: 0 !important; border: none !important; display: flex; justify-content: center; align-items: center; }
251
+ .send-button:disabled { background: #94a3b8 !important; }
252
+ .dark .send-button:disabled { background: #334155 !important; color: #64748b !important; }
253
+
254
+ /* Suggestions - Adaptive */
255
+ .sugg-btn { background: #ffffff !important; border: 1px solid #e2e8f0 !important; border-radius: 16px !important; padding: 16px 20px !important; text-align: left !important; justify-content: flex-start !important; font-size: 16px !important; color: #1e293b !important; box-shadow: 0 1px 2px rgba(0,0,0,0.05) !important; margin-bottom: 12px !important; cursor: pointer !important; }
256
+ .dark .sugg-btn { background: #1e293b !important; border-color: #334155 !important; color: #f8fafc !important; }
257
+ .sugg-btn:hover { opacity: 0.8; }
258
+
259
+ /* LaTeX Fixes */
260
+ .katex-display { margin: 1em 0 !important; overflow-x: auto !important; overflow-y: hidden !important; padding: 8px 0 !important; }
261
+ .katex { font-size: 1.1em !important; }
262
+ footer, .label-wrap { display: none !important; }
263
+ """
264
+
265
+ with gr.Blocks() as demo:
266
+ with gr.Column(elem_classes="main-wrap"):
267
+ with gr.Column(elem_id="hero-section") as hero:
268
+ gr.HTML("""
269
+ <div class="xerv-title">Xerv</div>
270
+ <div class="greeting">Hey there!</div>
271
+ <div class="subtitle">Let's make something happen.</div>
272
+ """)
273
+
274
+ with gr.Column(elem_id="suggestions-section") as suggestions:
275
+ btn1 = gr.Button(r"๐Ÿ” Prove that $\sqrt{2}$ is irrational", elem_classes="sugg-btn")
276
+ btn2 = gr.Button(r"๐Ÿงฎ Solve $x^3 - 6x^2 + 11x - 6 = 0$", elem_classes="sugg-btn")
277
+ btn3 = gr.Button(r"๐Ÿ“Š Explain eigenvalues with a matrix example", elem_classes="sugg-btn")
278
+
279
+ chatbot = gr.Chatbot(
280
+ visible=False,
281
+ elem_id="chat-window",
282
+ show_label=False,
283
+ avatar_images=(None, None),
284
+ sanitize_html=False,
285
+ # Note: Removed type="messages" to resolve the TypeError in Gradio 6.0
286
+ latex_delimiters=[
287
+ {"left": "$$", "right": "$$", "display": True},
288
+ {"left": "$", "right": "$", "display": False}
289
+ ]
290
+ )
291
+
292
+ with gr.Column():
293
+ with gr.Row(elem_classes="input-row"):
294
+ chat_input = gr.Textbox(
295
+ show_label=False,
296
+ placeholder="Ask Xerv to solve complex math...",
297
+ lines=1,
298
+ max_lines=4,
299
+ scale=8
300
+ )
301
+ send_btn = gr.Button("๐Ÿš€", elem_classes="send-button", scale=1)
302
+
303
+ model_selector = gr.Radio(
304
+ choices=list(MODELS_CONFIG.keys()),
305
+ value="ReasonBorn-Instruct",
306
+ label="Reasoning Engine",
307
+ container=False
308
+ )
309
+
310
+ # --- Wire up Interactivity ---
311
+ chat_input.submit(
312
+ process_chat_stream,
313
+ inputs=[chat_input, chatbot, model_selector],
314
+ outputs=[chat_input, chatbot, hero, suggestions]
315
+ )
316
+
317
+ send_btn.click(
318
+ process_chat_stream,
319
+ inputs=[chat_input, chatbot, model_selector],
320
+ outputs=[chat_input, chatbot, hero, suggestions]
321
+ )
322
+
323
+ btn1.click(
324
+ fn=lambda: r"Prove that $\sqrt{2}$ is irrational using step-by-step logic",
325
+ outputs=[chat_input]
326
+ ).then(
327
+ fn=process_chat_stream,
328
+ inputs=[chat_input, chatbot, model_selector],
329
+ outputs=[chat_input, chatbot, hero, suggestions]
330
+ )
331
+
332
+ btn2.click(
333
+ fn=lambda: r"Solve $x^3 - 6x^2 + 11x - 6 = 0$ and verify roots",
334
+ outputs=[chat_input]
335
+ ).then(
336
+ fn=process_chat_stream,
337
+ inputs=[chat_input, chatbot, model_selector],
338
+ outputs=[chat_input, chatbot, hero, suggestions]
339
+ )
340
+
341
+ btn3.click(
342
+ fn=lambda: r"Explain eigenvalues in linear algebra with an example matrix",
343
+ outputs=[chat_input]
344
+ ).then(
345
+ fn=process_chat_stream,
346
+ inputs=[chat_input, chatbot, model_selector],
347
+ outputs=[chat_input, chatbot, hero, suggestions]
348
+ )
349
+
350
+ if __name__ == "__main__":
351
+ # Removed the manual light mode javascript. Added adaptive CSS directly to launch parameters.
352
+ demo.launch(
353
+ share=True,
354
+ debug=True,
355
+ css=CSS,
356
+ theme=gr.themes.Default()
357
+ )