VladBoyko commited on
Commit
1ff1f45
·
verified ·
1 Parent(s): 3b97453

Update app.py

Browse files

adjusting output formatting and parse

Files changed (1) hide show
  1. app.py +267 -372
app.py CHANGED
@@ -1,441 +1,336 @@
1
  import gradio as gr
 
2
  import re
3
  from vllm import LLM, SamplingParams
4
- import spaces
5
- import os
6
 
7
- # Force XFormers backend for T4 GPU compatibility (prevent Triton compilation errors)
8
- os.environ["VLLM_ATTENTION_BACKEND"] = "XFORMERS"
9
- os.environ["VLLM_USE_TRITON_FLASH_ATTN"] = "0"
10
 
11
  class VibeThinkerVLLM:
12
- def __init__(self, model_path="WeiboAI/VibeThinker-1.5B"):
13
- self.model_path = model_path
14
- print("Loading model with vLLM... This may take a minute.")
15
-
16
- # T4 GPU compatible - using float16 with XFormers backend
17
- self.model = LLM(
18
- model=self.model_path,
19
- dtype="float16",
20
- gpu_memory_utilization=0.85,
21
- max_model_len=16384, # Reduced for T4 stability
22
- trust_remote_code=True,
23
- enforce_eager=True, # Disable CUDA graphs to save memory
24
- disable_custom_all_reduce=True, # Prevent Triton compilation issues
25
- enable_prefix_caching=False, # Disable prefix caching (causes Triton issues on T4)
26
- max_num_seqs=1, # Process one sequence at a time for stability
27
- )
28
-
29
- print(f"Model loaded successfully with vLLM!")
30
- print(f"Using dtype: float16 with XFormers backend (T4 GPU compatible)")
 
 
 
31
 
32
- @spaces.GPU
33
- def infer_text(self, prompt, temperature=0.6, max_tokens=8192, top_p=0.95):
34
- """Generate response with vLLM for faster inference"""
35
-
36
- # Ensure max_tokens doesn't exceed model capacity
37
- max_tokens = min(max_tokens, 16384)
38
-
39
- messages = [
40
- {"role": "user", "content": prompt}
41
- ]
42
-
43
- sampling_params = SamplingParams(
44
- temperature=temperature,
45
- max_tokens=max_tokens,
46
- top_p=top_p,
47
- top_k=-1,
48
- )
49
-
50
- print(f"Generating with vLLM (temp={temperature}, max_tokens={max_tokens})...")
51
 
52
- outputs = self.model.chat(messages, sampling_params=sampling_params)
53
- response = outputs[0].outputs[0].text
 
 
 
 
 
 
54
 
55
- return response
 
 
 
 
 
 
 
 
 
 
 
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  def parse_model_output(text):
59
- """Parse model output into structured components"""
60
- sections = []
61
-
62
- # Patterns
63
- think_pattern = r'<think>(.*?)</think>'
64
- code_pattern = r'```(\w+)?\n(.*?)```'
65
-
66
- # Extract thinking sections
67
- think_matches = list(re.finditer(think_pattern, text, re.DOTALL))
 
68
 
69
- last_pos = 0
 
 
 
 
 
 
 
70
 
71
- for match in think_matches:
72
- # Process text before thinking section
73
- before_text = text[last_pos:match.start()].strip()
74
- if before_text:
75
- sections.extend(parse_text_with_code(before_text))
 
 
 
 
 
 
76
 
77
- # Add thinking section
78
- think_content = match.group(1).strip()
79
- sections.append({
80
- 'type': 'thinking',
81
- 'content': think_content
82
- })
83
-
84
- last_pos = match.end()
85
-
86
- # Process remaining text
87
- remaining = text[last_pos:].strip()
88
- if remaining:
89
- sections.extend(parse_text_with_code(remaining))
90
 
91
- return sections
92
-
93
-
94
- def parse_text_with_code(text):
95
- """Helper function to parse text containing code blocks"""
96
- sections = []
97
  code_pattern = r'```(\w+)?\n(.*?)```'
98
- code_blocks = list(re.finditer(code_pattern, text, re.DOTALL))
99
-
100
- if not code_blocks:
101
- return [{'type': 'text', 'content': text}]
102
 
103
- text_pos = 0
104
- for code_match in code_blocks:
105
- # Add text before code
106
- pre_code_text = text[text_pos:code_match.start()].strip()
107
- if pre_code_text:
108
- sections.append({
109
- 'type': 'text',
110
- 'content': pre_code_text
111
- })
112
-
113
- # Add code block
114
- language = code_match.group(1) or 'python'
115
- code_content = code_match.group(2).strip()
116
- sections.append({
117
- 'type': 'code',
118
- 'language': language,
119
- 'content': code_content
120
- })
121
-
122
- text_pos = code_match.end()
123
 
124
- # Add remaining text
125
- remaining_text = text[text_pos:].strip()
126
- if remaining_text:
127
- sections.append({
128
- 'type': 'text',
129
- 'content': remaining_text
130
- })
131
-
132
- return sections
133
-
134
 
135
- def format_sections_to_html(sections):
136
  """
137
- Convert parsed sections to rich HTML with collapsible elements
138
- This approach works reliably with Gradio 5's HTML component
139
  """
140
- html_parts = []
141
-
142
- # Add JavaScript for interactivity
143
- html_parts.append("""
144
- <script>
145
- function copyCode(elementId) {
146
- const codeElement = document.getElementById(elementId);
147
- const code = codeElement.textContent;
148
- navigator.clipboard.writeText(code).then(() => {
149
- // Show temporary success message
150
- const btn = event.target;
151
- const originalText = btn.textContent;
152
- btn.textContent = '✅ Copied!';
153
- setTimeout(() => { btn.textContent = originalText; }, 2000);
154
- }).catch(err => {
155
- console.error('Failed to copy:', err);
156
- alert('Failed to copy code');
157
- });
158
- }
159
 
160
- function downloadCode(elementId, language) {
161
- const codeElement = document.getElementById(elementId);
162
- const code = codeElement.textContent;
163
 
164
- const extensions = {
165
- 'python': 'py', 'javascript': 'js', 'typescript': 'ts',
166
- 'html': 'html', 'css': 'css', 'java': 'java',
167
- 'cpp': 'cpp', 'c': 'c', 'ruby': 'rb',
168
- 'go': 'go', 'rust': 'rs', 'swift': 'swift',
169
- 'kotlin': 'kt', 'plaintext': 'txt'
170
- };
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
- const ext = extensions[language.toLowerCase()] || 'txt';
173
- const filename = `code_snippet.${ext}`;
 
 
 
 
 
 
 
 
 
 
 
174
 
175
- const blob = new Blob([code], { type: 'text/plain' });
176
- const url = window.URL.createObjectURL(blob);
177
- const a = document.createElement('a');
178
- a.href = url;
179
- a.download = filename;
180
- document.body.appendChild(a);
181
- a.click();
182
- document.body.removeChild(a);
183
- window.URL.revokeObjectURL(url);
184
- }
185
- </script>
186
- """)
187
-
188
- for i, section in enumerate(sections):
189
- if section['type'] == 'thinking':
190
- # Collapsible thinking section
191
- html_parts.append(f"""
192
- <details class="thinking-section" style="margin: 15px 0; border: 2px solid #f39c12; border-radius: 8px; background-color: #fff9e6;">
193
- <summary style="padding: 12px; cursor: pointer; font-weight: bold; color: #d68910; user-select: none;">
194
- 🤔 Thinking Process (Click to expand)
195
- </summary>
196
- <div style="padding: 15px; border-top: 1px solid #f39c12; background-color: #fffef7; white-space: pre-wrap; font-family: 'Courier New', monospace; font-size: 13px; color: #333; line-height: 1.6; max-height: 500px; overflow-y: auto;">
197
- {section['content']}
198
- </div>
199
- </details>
200
- """)
201
 
202
- elif section['type'] == 'code':
203
- # Code block with copy/download buttons
204
- code_id = f"code-{i}"
205
- # Escape HTML in code
206
- escaped_code = section['content'].replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
207
-
208
- html_parts.append(f"""
209
- <details class="code-section" open style="margin: 15px 0; border: 2px solid #3498db; border-radius: 8px; background-color: #e8f4fd;">
210
- <summary style="padding: 12px; cursor: pointer; font-weight: bold; color: #2874a6; user-select: none;">
211
- 💻 Code ({section['language']}) - Click to collapse
212
- </summary>
213
- <div style="position: relative; padding: 0;">
214
- <div style="position: absolute; top: 10px; right: 10px; z-index: 10;">
215
- <button onclick="copyCode('{code_id}')" style="padding: 6px 12px; margin-right: 5px; background-color: #3498db; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 12px;">
216
- 📋 Copy
217
- </button>
218
- <button onclick="downloadCode('{code_id}', '{section['language']}')" style="padding: 6px 12px; background-color: #27ae60; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 12px;">
219
- ⬇️ Download
220
- </button>
221
- </div>
222
- <pre id="{code_id}" style="margin: 0; padding: 40px 15px 15px 15px; background-color: #f8f9fa; border-top: 1px solid #3498db; overflow-x: auto; font-family: 'Courier New', monospace; font-size: 13px; line-height: 1.5;"><code class="language-{section['language']}">{escaped_code}</code></pre>
223
  </div>
224
- </details>
225
- """)
226
-
227
- else: # text
228
- # Regular text output with markdown-style rendering
229
- # Convert markdown to HTML
230
- text_html = section['content']
231
- # Basic markdown conversions
232
- text_html = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', text_html)
233
- text_html = re.sub(r'\*(.*?)\*', r'<em>\1</em>', text_html)
234
- text_html = re.sub(r'`(.*?)`', r'<code style="background-color: #f4f4f4; padding: 2px 5px; border-radius: 3px;">\1</code>', text_html)
235
-
236
- html_parts.append(f"""
237
- <div class="text-section" style="margin: 15px 0; padding: 15px; border: 1px solid #bdc3c7; border-radius: 8px; background-color: #ffffff; white-space: pre-wrap; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; font-size: 14px; line-height: 1.8; color: #2c3e50;">
238
- {text_html}
239
  </div>
240
- """)
241
-
242
- return "\n".join(html_parts)
243
-
 
 
 
244
 
245
  # Initialize model
246
- print("Initializing VibeThinker-1.5B with vLLM...")
247
- model = VibeThinkerVLLM()
248
 
249
-
250
- def generate_response(prompt, temperature, max_tokens, top_p):
251
- """Generate and return formatted HTML response"""
252
  if not prompt.strip():
253
- return "<div style='color: #e74c3c; padding: 20px; text-align: center;'>⚠️ Please enter a question.</div>"
254
 
255
- try:
256
- # Show generating message
257
- yield "<div style='text-align: center; padding: 40px; color: #3498db;'><h3>🤖 Generating response...</h3><p>This may take a moment...</p></div>"
258
-
259
- # Generate raw response
260
- raw_response = model.infer_text(
261
- prompt=prompt,
262
- temperature=temperature,
263
- max_tokens=max_tokens,
264
- top_p=top_p
265
- )
266
-
267
- # Parse the response
268
- sections = parse_model_output(raw_response)
269
-
270
- # Convert to HTML
271
- html_output = format_sections_to_html(sections)
272
-
273
- yield html_output
274
 
275
- except Exception as e:
276
- error_html = f"""
277
- <div style='color: #e74c3c; padding: 20px; border: 2px solid #e74c3c; border-radius: 8px; background-color: #fadbd8; margin: 15px 0;'>
278
- <h3>❌ Error</h3>
279
- <p><strong>{str(e)}</strong></p>
280
- <p>Please try again or adjust the parameters.</p>
281
- </div>
282
- """
283
- yield error_html
284
-
285
-
286
- # Custom theme for Gradio 5
287
- theme = gr.themes.Soft(
288
- primary_hue="blue",
289
- secondary_hue="purple",
290
- neutral_hue="slate",
291
- font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"],
292
- ).set(
293
- button_primary_background_fill="*primary_600",
294
- button_primary_background_fill_hover="*primary_700",
295
- block_label_text_weight="600",
296
- block_title_text_weight="700",
297
- )
298
-
299
 
300
- # Gradio 5 UI
301
  with gr.Blocks(
302
- title="VibeThinker-1.5B Advanced",
303
- theme=theme,
304
- fill_height=False,
 
 
 
 
 
 
305
  ) as demo:
306
-
307
  gr.Markdown("""
308
- # 🧠 VibeThinker-1.5B: Advanced Reasoning Interface
309
 
310
- ** Powered by vLLM + XFormers** for 10x faster inference on T4 GPU!
311
 
312
- ### Features:
313
- - 🤔 **Collapsible Thinking Sections** - Explore the model's reasoning process
314
- - 💻 **Interactive Code Blocks** - Copy or download code with one click
315
- - 📝 **Clean Formatted Output** - Beautiful rendering for all content types
316
 
317
- **Best for:** Competitive math problems and algorithm coding challenges
318
-
319
- [GitHub](https://github.com/WeiboAI/VibeThinker) | [Model](https://huggingface.co/WeiboAI/VibeThinker-1.5B) | [Paper](https://huggingface.co/papers/2511.06221)
320
  """)
321
 
322
  with gr.Row():
323
  with gr.Column(scale=1):
324
  prompt_input = gr.Textbox(
325
- label="💬 Your Question",
326
- placeholder="Ask a math problem or coding challenge (English works best)...",
327
- lines=6,
328
- max_lines=15
329
  )
330
 
331
  with gr.Accordion("⚙️ Advanced Settings", open=False):
332
  temperature_slider = gr.Slider(
333
- minimum=0.1,
334
- maximum=1.5,
335
  value=0.6,
336
  step=0.1,
337
- label="🌡️ Temperature",
338
- info="0.6 or 1.0 recommended"
339
  )
340
-
341
  max_tokens_slider = gr.Slider(
342
- minimum=512,
343
- maximum=16384, # Reduced for T4 stability
344
  value=8192,
 
 
 
 
 
 
 
345
  step=512,
346
- label="📏 Max Tokens",
347
- info="Model supports up to 16,384 tokens (T4 optimized)"
348
  )
349
 
350
- top_p_slider = gr.Slider(
351
- minimum=0.1,
352
- maximum=1.0,
353
- value=0.95,
354
- step=0.05,
355
- label="🎯 Top P",
356
- info="Nucleus sampling parameter"
357
- )
358
 
359
- with gr.Row():
360
- submit_btn = gr.Button(
361
- "🚀 Generate Solution",
362
- variant="primary",
363
- scale=2
364
- )
365
- clear_btn = gr.Button(
366
- "🗑️ Clear",
367
- variant="secondary",
368
- scale=1
369
- )
370
 
371
- with gr.Column(scale=1):
372
- # Output area using HTML component
373
- output_html = gr.HTML(
374
- value="""
375
- <div style='text-align: center; padding: 60px; color: #7f8c8d;'>
376
- <h3>👋 Ready to solve problems!</h3>
377
- <p>Enter your question and click Generate Solution</p>
378
- </div>
379
- """
380
- )
381
 
382
- # Example problems
383
- gr.Examples(
384
- examples=[
385
- ["Make me a single page HTML application that takes a color and outputs a color theme", 0.6, 16384, 0.95],
386
- ["Solve: Find the number of positive integers n ≤ 1000 such that n^2 + n + 41 is prime.", 0.6, 12288, 0.95],
387
- ["Write an efficient Python implementation of the Sieve of Eratosthenes algorithm.", 0.6, 8192, 0.95],
388
- ["Prove using mathematical induction that 1 + 2 + 3 + ... + n = n(n+1)/2", 0.6, 8192, 0.95],
389
- ],
390
- inputs=[prompt_input, temperature_slider, max_tokens_slider, top_p_slider],
391
- label="📚 Example Problems",
392
- examples_per_page=4
393
- )
394
-
395
- gr.Markdown("""
396
- ---
397
- ### 📊 Performance Highlights:
398
-
399
- | Benchmark | VibeThinker-1.5B | DeepSeek R1 (671B) | Advantage |
400
- |-----------|------------------|---------------------|-----------|
401
- | **AIME24** | **80.3** ✨ | 79.8 | 400× smaller! |
402
- | **AIME25** | **74.4** ✨ | 70.0 | 400× smaller! |
403
- | **HMMT25** | **50.4** ✨ | 41.7 | 400× smaller! |
404
- | **Training Cost** | **$7,800** | $294,000+ | 40× cheaper! |
405
-
406
- 💡 **Powered by Spectrum-to-Signal Principle (SSP)** training framework
407
- """)
408
-
409
- # Event handlers
410
- def clear_interface():
411
- return "", """
412
- <div style='text-align: center; padding: 60px; color: #7f8c8d;'>
413
- <h3>👋 Ready to solve problems!</h3>
414
- <p>Enter your question and click Generate Solution</p>
415
- </div>
416
- """
417
-
418
- submit_btn.click(
419
- fn=generate_response,
420
- inputs=[prompt_input, temperature_slider, max_tokens_slider, top_p_slider],
421
- outputs=output_html,
422
- show_progress="full"
423
  )
424
 
425
  clear_btn.click(
426
- fn=clear_interface,
427
- inputs=[],
428
  outputs=[prompt_input, output_html]
429
  )
 
 
 
 
 
 
 
 
 
 
430
 
431
-
432
- # Launch with Gradio 5 optimizations
433
  if __name__ == "__main__":
434
- demo.queue(
435
- max_size=20,
436
- default_concurrency_limit=10
437
- )
438
- demo.launch(
439
- show_api=True,
440
- show_error=True,
441
- )
 
1
  import gradio as gr
2
+ import os
3
  import re
4
  from vllm import LLM, SamplingParams
 
 
5
 
6
+ # Force XFormers backend for T4 compatibility
7
+ os.environ['VLLM_ATTENTION_BACKEND'] = 'XFORMERS'
8
+ os.environ['VLLM_USE_TRITON_FLASH_ATTN'] = '0'
9
 
10
  class VibeThinkerVLLM:
11
+ def __init__(self):
12
+ self.model = None
13
+ self.load_model()
14
+
15
+ def load_model(self):
16
+ """Load VibeThinker model with vLLM (T4-compatible settings)"""
17
+ try:
18
+ self.model = LLM(
19
+ model="WeiboAI/VibeThinker-1.5B",
20
+ dtype="float16", # Use float16 instead of bfloat16 for T4
21
+ gpu_memory_utilization=0.85,
22
+ max_model_len=16384, # Reduced from 40960 for T4 stability
23
+ enforce_eager=True, # Disable CUDA graphs for T4
24
+ disable_custom_all_reduce=True, # Avoid custom kernels
25
+ enable_prefix_caching=False, # Disable for stability
26
+ max_num_seqs=1, # Process one sequence at a time
27
+ trust_remote_code=True
28
+ )
29
+ print(" vLLM model loaded successfully with T4-compatible settings")
30
+ except Exception as e:
31
+ print(f"❌ Error loading model: {e}")
32
+ raise
33
 
34
+ def generate_response(self, prompt, temperature=0.6, max_tokens=8192, max_thinking_tokens=4096):
35
+ """
36
+ Generate response with thinking length control
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
+ Args:
39
+ prompt: Input prompt
40
+ temperature: Sampling temperature
41
+ max_tokens: Total max tokens (thinking + output)
42
+ max_thinking_tokens: Maximum tokens for reasoning phase
43
+ """
44
+ if not self.model:
45
+ return "Model not loaded!", 0, 0
46
 
47
+ try:
48
+ # Create sampling params with thinking token limit
49
+ sampling_params = SamplingParams(
50
+ temperature=temperature,
51
+ top_p=0.95,
52
+ top_k=-1,
53
+ max_tokens=max_tokens,
54
+ stop=None # Let model decide when to stop
55
+ )
56
+
57
+ # Format prompt for competitive coding style
58
+ formatted_prompt = f"""You are a competitive programming assistant. Solve the following problem efficiently.
59
 
60
+ Problem:
61
+ {prompt}
62
+
63
+ Think step by step, but be concise. Limit your reasoning to the most important steps (max {max_thinking_tokens} tokens for thinking). Then provide your solution."""
64
+
65
+ # Generate with vLLM
66
+ outputs = self.model.generate([formatted_prompt], sampling_params)
67
+
68
+ if outputs and len(outputs) > 0:
69
+ output = outputs[0]
70
+ generated_text = output.outputs[0].text
71
+
72
+ # Get token counts
73
+ prompt_tokens = len(output.prompt_token_ids)
74
+ completion_tokens = len(output.outputs[0].token_ids)
75
+
76
+ return generated_text, prompt_tokens, completion_tokens
77
+ else:
78
+ return "No output generated", 0, 0
79
+
80
+ except Exception as e:
81
+ return f"Error during generation: {str(e)}", 0, 0
82
 
83
  def parse_model_output(text):
84
+ """
85
+ Parse model output to separate thinking and final answer
86
+ Returns: (thinking_content, answer_content, code_blocks)
87
+ """
88
+ # Try to find thinking section (common patterns)
89
+ thinking_patterns = [
90
+ r'<think>(.*?)</think>',
91
+ r'<thinking>(.*?)</thinking>',
92
+ r'(?:Let me think|Let\'s think|Thinking):(.*?)(?=\n\n[A-Z]|\n\nSolution:|\n\nAnswer:|\Z)',
93
+ ]
94
 
95
+ thinking_content = ""
96
+ for pattern in thinking_patterns:
97
+ match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
98
+ if match:
99
+ thinking_content = match.group(1).strip()
100
+ # Remove thinking section from text
101
+ text = re.sub(pattern, '', text, flags=re.DOTALL | re.IGNORECASE)
102
+ break
103
 
104
+ # If no explicit thinking tags, try to detect reasoning section
105
+ if not thinking_content:
106
+ lines = text.split('\n')
107
+ potential_thinking = []
108
+ for i, line in enumerate(lines):
109
+ # Stop if we hit solution/code markers
110
+ if any(marker in line.lower() for marker in ['```', 'solution:', 'answer:', 'final answer', 'boxed{']):
111
+ break
112
+ # Collect reasoning-like content
113
+ if any(word in line.lower() for word in ['step', 'first', 'then', 'next', 'so', 'therefore', 'because']):
114
+ potential_thinking.append(line)
115
 
116
+ if len(potential_thinking) > 3: # If substantial reasoning found
117
+ thinking_content = '\n'.join(potential_thinking)
118
+ # Remove from main text
119
+ for line in potential_thinking:
120
+ text = text.replace(line, '', 1)
 
 
 
 
 
 
 
 
121
 
122
+ # Extract code blocks
 
 
 
 
 
123
  code_pattern = r'```(\w+)?\n(.*?)```'
124
+ code_blocks = re.findall(code_pattern, text, re.DOTALL)
 
 
 
125
 
126
+ # Extract final answer (boxed or explicit)
127
+ answer_match = re.search(r'\\boxed\{([^}]+)\}', text)
128
+ if answer_match:
129
+ answer_content = f"**Final Answer:** {answer_match.group(1)}"
130
+ else:
131
+ # Just use remaining text as answer
132
+ answer_content = text.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
+ return thinking_content, answer_content, code_blocks
 
 
 
 
 
 
 
 
 
135
 
136
+ def format_output_html(thinking, answer, code_blocks, prompt_tokens, completion_tokens):
137
  """
138
+ Format output as styled HTML with good contrast and modern design
 
139
  """
140
+ # Calculate total and thinking token estimates
141
+ total_tokens = prompt_tokens + completion_tokens
142
+ thinking_tokens_est = len(thinking.split()) * 1.3 # Rough estimate
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
+ html = f"""
145
+ <div style="font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; max-width: 100%; margin: 0 auto; background: #ffffff; color: #1a1a1a;">
 
146
 
147
+ <!-- Token Stats -->
148
+ <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 20px; border-radius: 12px; margin-bottom: 24px; color: white; box-shadow: 0 4px 6px rgba(0,0,0,0.1);">
149
+ <h3 style="margin: 0 0 12px 0; font-size: 18px; font-weight: 600;">📊 Generation Stats</h3>
150
+ <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); gap: 12px; font-size: 14px;">
151
+ <div style="background: rgba(255,255,255,0.2); padding: 12px; border-radius: 8px;">
152
+ <div style="opacity: 0.9; font-size: 12px; margin-bottom: 4px;">Prompt Tokens</div>
153
+ <div style="font-size: 20px; font-weight: bold;">{prompt_tokens:,}</div>
154
+ </div>
155
+ <div style="background: rgba(255,255,255,0.2); padding: 12px; border-radius: 8px;">
156
+ <div style="opacity: 0.9; font-size: 12px; margin-bottom: 4px;">Completion Tokens</div>
157
+ <div style="font-size: 20px; font-weight: bold;">{completion_tokens:,}</div>
158
+ </div>
159
+ <div style="background: rgba(255,255,255,0.2); padding: 12px; border-radius: 8px;">
160
+ <div style="opacity: 0.9; font-size: 12px; margin-bottom: 4px;">Est. Thinking Tokens</div>
161
+ <div style="font-size: 20px; font-weight: bold;">{int(thinking_tokens_est):,}</div>
162
+ </div>
163
+ <div style="background: rgba(255,255,255,0.2); padding: 12px; border-radius: 8px;">
164
+ <div style="opacity: 0.9; font-size: 12px; margin-bottom: 4px;">Total Tokens</div>
165
+ <div style="font-size: 20px; font-weight: bold;">{total_tokens:,}</div>
166
+ </div>
167
+ </div>
168
+ </div>
169
 
170
+ <!-- Thinking Section (Collapsible) -->
171
+ {f'''
172
+ <details style="background: #f8f9fa; border: 2px solid #e9ecef; border-radius: 12px; padding: 20px; margin-bottom: 24px; box-shadow: 0 2px 4px rgba(0,0,0,0.05);">
173
+ <summary style="cursor: pointer; font-weight: 600; font-size: 16px; color: #495057; user-select: none; display: flex; align-items: center; gap: 8px;">
174
+ <span style="font-size: 20px;">🧠</span>
175
+ <span>Reasoning Process ({int(thinking_tokens_est):,} tokens)</span>
176
+ <span style="margin-left: auto; font-size: 12px; color: #6c757d;">Click to expand/collapse</span>
177
+ </summary>
178
+ <div style="margin-top: 16px; padding-top: 16px; border-top: 1px solid #dee2e6; color: #212529; line-height: 1.7; white-space: pre-wrap; font-size: 14px; font-family: 'SF Mono', Monaco, Consolas, monospace; background: #ffffff; padding: 16px; border-radius: 8px;">
179
+ {thinking}
180
+ </div>
181
+ </details>
182
+ ''' if thinking else ''}
183
 
184
+ <!-- Answer Section -->
185
+ <div style="background: #ffffff; border: 2px solid #28a745; border-radius: 12px; padding: 24px; margin-bottom: 24px; box-shadow: 0 2px 4px rgba(40,167,69,0.1);">
186
+ <h3 style="margin: 0 0 16px 0; color: #28a745; font-size: 18px; font-weight: 600; display: flex; align-items: center; gap: 8px;">
187
+ <span style="font-size: 22px;">✅</span> Final Solution
188
+ </h3>
189
+ <div style="color: #212529; line-height: 1.8; font-size: 15px; white-space: pre-wrap;">
190
+ {answer}
191
+ </div>
192
+ </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
+ <!-- Code Blocks -->
195
+ {f'''
196
+ <div style="margin-top: 24px;">
197
+ <h3 style="color: #1a1a1a; font-size: 18px; font-weight: 600; margin-bottom: 16px; display: flex; align-items: center; gap: 8px;">
198
+ <span style="font-size: 22px;">💻</span> Code
199
+ </h3>
200
+ {"".join([f'''
201
+ <div style="margin-bottom: 16px; background: #1e1e1e; border-radius: 12px; overflow: hidden; box-shadow: 0 4px 6px rgba(0,0,0,0.1);">
202
+ <div style="background: #2d2d2d; padding: 12px 20px; color: #ffffff; font-weight: 600; font-size: 13px; display: flex; justify-content: space-between; align-items: center; border-bottom: 1px solid #3d3d3d;">
203
+ <span>{lang if lang else "code"}</span>
204
+ <button onclick="navigator.clipboard.writeText(this.parentElement.nextElementSibling.textContent)"
205
+ style="background: #4CAF50; color: white; border: none; padding: 6px 14px; border-radius: 6px; cursor: pointer; font-size: 12px; font-weight: 500; transition: background 0.2s;"
206
+ onmouseover="this.style.background='#45a049'"
207
+ onmouseout="this.style.background='#4CAF50'">
208
+ 📋 Copy
209
+ </button>
 
 
 
 
 
210
  </div>
211
+ <pre style="margin: 0; padding: 20px; color: #d4d4d4; overflow-x: auto; font-family: 'SF Mono', Monaco, Consolas, monospace; font-size: 14px; line-height: 1.6;"><code>{code.strip()}</code></pre>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  </div>
213
+ ''' for lang, code in code_blocks])}
214
+ </div>
215
+ ''' if code_blocks else ''}
216
+
217
+ </div>
218
+ """
219
+ return html
220
 
221
  # Initialize model
222
+ print("🔄 Initializing VibeThinker with vLLM (T4-optimized)...")
223
+ vibe_model = VibeThinkerVLLM()
224
 
225
+ def generate_solution(prompt, temperature=0.6, max_tokens=8192, max_thinking_tokens=4096):
226
+ """Generate and format solution"""
 
227
  if not prompt.strip():
228
+ return "<p style='color: #dc3545; font-size: 16px; padding: 20px;'>⚠️ Please enter a problem to solve.</p>"
229
 
230
+ # Generate response with thinking token limit
231
+ response, prompt_tokens, completion_tokens = vibe_model.generate_response(
232
+ prompt,
233
+ temperature=temperature,
234
+ max_tokens=max_tokens,
235
+ max_thinking_tokens=max_thinking_tokens
236
+ )
 
 
 
 
 
 
 
 
 
 
 
 
237
 
238
+ # Parse output
239
+ thinking, answer, code_blocks = parse_model_output(response)
240
+
241
+ # Format as HTML
242
+ html_output = format_output_html(thinking, answer, code_blocks, prompt_tokens, completion_tokens)
243
+
244
+ return html_output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
 
246
+ # Create Gradio interface
247
  with gr.Blocks(
248
+ theme=gr.themes.Soft(
249
+ primary_hue="indigo",
250
+ secondary_hue="purple",
251
+ ),
252
+ css="""
253
+ .gradio-container {
254
+ max-width: 1400px !important;
255
+ }
256
+ """
257
  ) as demo:
 
258
  gr.Markdown("""
259
+ # 🧠 VibeThinker-1.5B Competitive Coding Assistant
260
 
261
+ **Optimized for**: Competitive programming (LeetCode, Codeforces, AtCoder) and algorithm challenges
262
 
263
+ **Powered by vLLM** | 🎯 **Best for**: Python algorithmic problems with clear input/output specs
 
 
 
264
 
265
+ ⚠️ **Note**: This model is specialized for competitive programming, not general software development
 
 
266
  """)
267
 
268
  with gr.Row():
269
  with gr.Column(scale=1):
270
  prompt_input = gr.Textbox(
271
+ label="💭 Your Coding Problem",
272
+ placeholder="Example: Write a Python function to find the longest palindromic substring in a given string. Include test cases.",
273
+ lines=8
 
274
  )
275
 
276
  with gr.Accordion("⚙️ Advanced Settings", open=False):
277
  temperature_slider = gr.Slider(
278
+ minimum=0.0,
279
+ maximum=1.0,
280
  value=0.6,
281
  step=0.1,
282
+ label="🌡️ Temperature (0.6 recommended)"
 
283
  )
 
284
  max_tokens_slider = gr.Slider(
285
+ minimum=1024,
286
+ maximum=16384,
287
  value=8192,
288
+ step=1024,
289
+ label="📝 Max Total Tokens"
290
+ )
291
+ max_thinking_slider = gr.Slider(
292
+ minimum=512,
293
+ maximum=8192,
294
+ value=3072,
295
  step=512,
296
+ label="🧠 Max Thinking Tokens (Lower = faster, less verbose)"
 
297
  )
298
 
299
+ gr.Markdown("""
300
+ **Tips:**
301
+ - Lower thinking tokens (1024-2048) for faster, more direct solutions
302
+ - Higher thinking tokens (4096-8192) for complex problems requiring detailed reasoning
303
+ - Temperature 0.6 balances creativity and accuracy
304
+ """)
 
 
305
 
306
+ generate_btn = gr.Button("🚀 Generate Solution", variant="primary", size="lg")
307
+ clear_btn = gr.Button("🗑️ Clear", size="sm")
 
 
 
 
 
 
 
 
 
308
 
309
+ with gr.Column(scale=2):
310
+ output_html = gr.HTML(label="Solution")
 
 
 
 
 
 
 
 
311
 
312
+ # Button actions
313
+ generate_btn.click(
314
+ fn=generate_solution,
315
+ inputs=[prompt_input, temperature_slider, max_tokens_slider, max_thinking_slider],
316
+ outputs=output_html
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
  )
318
 
319
  clear_btn.click(
320
+ fn=lambda: ("", ""),
321
+ inputs=None,
322
  outputs=[prompt_input, output_html]
323
  )
324
+
325
+ # Example problems
326
+ gr.Examples(
327
+ examples=[
328
+ ["Write a function to find the maximum sum of a contiguous subarray (Kadane's Algorithm). Include edge cases and test with array [-2,1,-3,4,-1,2,1,-5,4]"],
329
+ ["Implement a function to detect if a linked list has a cycle. Explain your approach and provide the solution."],
330
+ ["Given an array of integers and a target sum, find two numbers that add up to the target. Optimize for time complexity."],
331
+ ],
332
+ inputs=prompt_input
333
+ )
334
 
 
 
335
  if __name__ == "__main__":
336
+ demo.launch()