Spestly commited on
Commit
9319628
·
verified ·
1 Parent(s): a21a221

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +193 -22
app.py CHANGED
@@ -2,6 +2,8 @@ import gradio as gr
2
  import spaces
3
  from transformers import pipeline
4
  import torch
 
 
5
  from typing import List, Dict, Optional
6
 
7
  # Global variable to store pipelines
@@ -9,11 +11,107 @@ model_cache = {}
9
 
10
  # Available models
11
  AVAILABLE_MODELS = {
12
- "Apollo-1-4B": "NoemaResearch/Apollo-1-4B",
13
- "Apollo-1-8B": "NoemaResearch/Apollo-1-8B",
14
- "Apollo-1-2B": "NoemaResearch/Apollo-1-2B",
15
  }
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  @spaces.GPU
18
  def initialize_model(model_name):
19
  global model_cache
@@ -62,7 +160,9 @@ def generate_response(message, history, model_name, max_length=512, temperature=
62
  for user_msg, assistant_msg in history:
63
  messages.append({"role": "user", "content": user_msg})
64
  if assistant_msg:
65
- messages.append({"role": "assistant", "content": assistant_msg})
 
 
66
 
67
  # Add current message
68
  messages.append({"role": "user", "content": message})
@@ -116,7 +216,10 @@ def generate_response(message, history, model_name, max_length=512, temperature=
116
  if "Assistant:" in assistant_response:
117
  assistant_response = assistant_response.split("Assistant:")[-1].strip()
118
 
119
- return assistant_response
 
 
 
120
 
121
  except Exception as e:
122
  return f"Error generating response: {str(e)}"
@@ -134,7 +237,7 @@ def generate(
134
  API endpoint for LLM generation
135
 
136
  Args:
137
- model: Model name to use (Nous-1-2B, Nous-1-4B, or Nous-1-8B)
138
  user_input: Current user message/input
139
  history: JSON string of conversation history in format [{"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]
140
  temperature: Temperature for generation (0.1-2.0)
@@ -186,30 +289,72 @@ def generate(
186
 
187
  # Create the Gradio interface
188
  def create_interface():
189
- with gr.Blocks(title="Multi-Model Chat", theme=gr.themes.Soft()) as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  gr.Markdown("""
191
- # 🚀 Apollo-1 Model Chat Interface
192
 
193
- Chat with the Nous-1 models by Noema Research.
194
 
195
  **Available Models:**
196
- - Apollo-1-4B (4 billion parameters)
197
- - Apollo-1-8B (8 billion parameters)
198
- - Apollo-1-2B (2 billion parameters)
199
  """)
200
 
201
  with gr.Row():
202
  model_selector = gr.Dropdown(
203
  choices=list(AVAILABLE_MODELS.keys()),
204
- value="Nous-1-4B",
205
  label="Select Model",
206
  info="Choose which model to use for generation"
207
  )
208
 
209
  chatbot = gr.Chatbot(
210
- height=400,
211
  placeholder="Select a model and start chatting...",
212
- label="Chat"
 
 
213
  )
214
 
215
  msg = gr.Textbox(
@@ -248,6 +393,29 @@ def create_interface():
248
  info="Controls diversity via nucleus sampling"
249
  )
250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
  # Event handlers
252
  def user_message(message, history):
253
  return "", history + [[message, None]]
@@ -282,19 +450,22 @@ def create_interface():
282
 
283
  model_selector.change(model_changed, model_selector, chatbot)
284
 
 
 
285
  gr.Markdown("""
286
  ---
287
 
288
- ### About the Apollo-1 Models
289
- **Apollo-1-2B**: 2 billion parameter model by Noema Research, designed for fast and quick infrencing
290
 
291
- **Apollo-1-4B**: 4 billion parameter model by Noema Research, optimisd for efficient conversation and text generation
292
-
293
- **Apollo-1-8B**: 8 billion parameter model by Noema Research, offering enhanced capabilities and better performance for complex tasks
 
294
 
295
- All models are designed for conversational AI and support various text generation tasks. The 8B model provides more sophisticated responses but requires more computational resources.
 
296
 
297
- This Space uses ZeroGPU for efficient GPU allocation across both model sizes.
298
  """)
299
 
300
  return demo
 
2
  import spaces
3
  from transformers import pipeline
4
  import torch
5
+ import re
6
+ import json
7
  from typing import List, Dict, Optional
8
 
9
  # Global variable to store pipelines
 
11
 
12
  # Available models
13
  AVAILABLE_MODELS = {
14
+ "Daedalus-1-8B": "NoemaResearch/Daedalus-1-8B",
 
 
15
  }
16
 
17
+ def parse_thinking_tags(text):
18
+ """Parse text and extract thinking sections"""
19
+ think_pattern = r'<think>(.*?)</think>'
20
+ parts = []
21
+ last_end = 0
22
+
23
+ for match in re.finditer(think_pattern, text, re.DOTALL):
24
+ # Add text before thinking block
25
+ if match.start() > last_end:
26
+ before_text = text[last_end:match.start()].strip()
27
+ if before_text:
28
+ parts.append({"type": "text", "content": before_text})
29
+
30
+ # Add thinking block
31
+ thinking_content = match.group(1).strip()
32
+ if thinking_content:
33
+ parts.append({"type": "thinking", "content": thinking_content})
34
+
35
+ last_end = match.end()
36
+
37
+ # Add remaining text
38
+ if last_end < len(text):
39
+ remaining_text = text[last_end:].strip()
40
+ if remaining_text:
41
+ parts.append({"type": "text", "content": remaining_text})
42
+
43
+ return parts
44
+
45
+ def create_thinking_html(thinking_content, token_count=None):
46
+ """Create HTML for thinking block"""
47
+ if token_count is None:
48
+ token_count = f"{len(thinking_content.split()) * 4:,}" # Rough token estimate
49
+
50
+ thinking_id = f"thinking_{hash(thinking_content) % 10000}"
51
+
52
+ html = f"""
53
+ <div class="thinking-container" style="margin: 16px 0; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;">
54
+ <div class="thinking-header" onclick="toggleThinking('{thinking_id}')"
55
+ style="background: #3a3a3a; border-radius: 8px; padding: 12px 16px; cursor: pointer;
56
+ display: flex; align-items: center; gap: 10px; transition: all 0.2s ease;
57
+ border: 1px solid #4a4a4a;">
58
+ <div style="width: 16px; height: 16px; background: #6366f1; border-radius: 50%; position: relative;">
59
+ <div style="position: absolute; top: 50%; left: 50%; width: 6px; height: 6px;
60
+ background: white; border-radius: 50%; transform: translate(-50%, -50%);"></div>
61
+ </div>
62
+ <span style="font-weight: 500; color: #f0f0f0;">Thinking completed</span>
63
+ <span style="color: #888; font-size: 14px; margin-left: auto;">{token_count} tokens budget</span>
64
+ <div class="chevron" id="chevron_{thinking_id}"
65
+ style="width: 16px; height: 16px; border: 2px solid #888; border-left: none;
66
+ border-bottom: none; transform: rotate(45deg); transition: transform 0.2s ease;"></div>
67
+ </div>
68
+ <div class="thinking-content" id="{thinking_id}"
69
+ style="background: #333; border-radius: 0 0 8px 8px; padding: 20px;
70
+ border: 1px solid #4a4a4a; border-top: none; display: none;
71
+ max-height: 400px; overflow-y: auto;">
72
+ <div style="white-space: pre-wrap; font-family: 'SF Mono', 'Monaco', 'Inconsolata', monospace;
73
+ font-size: 14px; line-height: 1.5; color: #d0d0d0;">{thinking_content}</div>
74
+ </div>
75
+ </div>
76
+
77
+ <script>
78
+ function toggleThinking(id) {{
79
+ const content = document.getElementById(id);
80
+ const chevron = document.getElementById('chevron_' + id);
81
+ const header = content.previousElementSibling;
82
+
83
+ if (content.style.display === 'none' || !content.style.display) {{
84
+ content.style.display = 'block';
85
+ chevron.style.transform = 'rotate(135deg)';
86
+ header.style.background = '#404040';
87
+ }} else {{
88
+ content.style.display = 'none';
89
+ chevron.style.transform = 'rotate(45deg)';
90
+ header.style.background = '#3a3a3a';
91
+ }}
92
+ }}
93
+ </script>
94
+ """
95
+ return html
96
+
97
+ def format_response_with_thinking(response_text):
98
+ """Format response to include thinking blocks"""
99
+ parts = parse_thinking_tags(response_text)
100
+
101
+ if not parts:
102
+ return response_text
103
+
104
+ formatted_html = ""
105
+
106
+ for part in parts:
107
+ if part["type"] == "thinking":
108
+ formatted_html += create_thinking_html(part["content"])
109
+ else:
110
+ # Regular text content
111
+ formatted_html += f'<div style="margin: 16px 0; line-height: 1.6; color: #e0e0e0;">{part["content"]}</div>'
112
+
113
+ return formatted_html
114
+
115
  @spaces.GPU
116
  def initialize_model(model_name):
117
  global model_cache
 
160
  for user_msg, assistant_msg in history:
161
  messages.append({"role": "user", "content": user_msg})
162
  if assistant_msg:
163
+ # Parse HTML back to get original content if needed
164
+ clean_assistant_msg = re.sub(r'<[^>]+>', '', assistant_msg) if assistant_msg else ""
165
+ messages.append({"role": "assistant", "content": clean_assistant_msg})
166
 
167
  # Add current message
168
  messages.append({"role": "user", "content": message})
 
216
  if "Assistant:" in assistant_response:
217
  assistant_response = assistant_response.split("Assistant:")[-1].strip()
218
 
219
+ # Format the response with thinking blocks
220
+ formatted_response = format_response_with_thinking(assistant_response)
221
+
222
+ return formatted_response
223
 
224
  except Exception as e:
225
  return f"Error generating response: {str(e)}"
 
237
  API endpoint for LLM generation
238
 
239
  Args:
240
+ model: Model name to use (Daedalus-1-8B)
241
  user_input: Current user message/input
242
  history: JSON string of conversation history in format [{"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]
243
  temperature: Temperature for generation (0.1-2.0)
 
289
 
290
  # Create the Gradio interface
291
  def create_interface():
292
+ # Custom CSS for dark theme and thinking blocks
293
+ custom_css = """
294
+ .dark {
295
+ background: #1a1a1a !important;
296
+ }
297
+
298
+ .chatbot .message-wrap .message {
299
+ background: #2a2a2a !important;
300
+ border: 1px solid #3a3a3a !important;
301
+ }
302
+
303
+ .chatbot .message-wrap .message.user {
304
+ background: #2d3748 !important;
305
+ }
306
+
307
+ .chatbot .message-wrap .message.bot {
308
+ background: #2a2a2a !important;
309
+ }
310
+
311
+ .thinking-container {
312
+ margin: 16px 0;
313
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
314
+ }
315
+
316
+ .thinking-header {
317
+ background: #3a3a3a;
318
+ border-radius: 8px;
319
+ padding: 12px 16px;
320
+ cursor: pointer;
321
+ display: flex;
322
+ align-items: center;
323
+ gap: 10px;
324
+ transition: all 0.2s ease;
325
+ border: 1px solid #4a4a4a;
326
+ }
327
+
328
+ .thinking-header:hover {
329
+ background: #404040 !important;
330
+ border-color: #5a5a5a !important;
331
+ }
332
+ """
333
+
334
+ with gr.Blocks(title="Multi-Model Chat", theme=gr.themes.Soft(), css=custom_css) as demo:
335
  gr.Markdown("""
336
+ # 🚀 Daedalus-1 Model Chat Interface
337
 
338
+ Chat with the Daedalus-1 models by Noema Research. This interface will render thinking processes when the model outputs `<think></think>` tags.
339
 
340
  **Available Models:**
341
+ - Daedalus-1-8B (8 billion parameters)
 
 
342
  """)
343
 
344
  with gr.Row():
345
  model_selector = gr.Dropdown(
346
  choices=list(AVAILABLE_MODELS.keys()),
347
+ value="Daedalus-1-8B",
348
  label="Select Model",
349
  info="Choose which model to use for generation"
350
  )
351
 
352
  chatbot = gr.Chatbot(
353
+ height=500,
354
  placeholder="Select a model and start chatting...",
355
+ label="Chat",
356
+ render_markdown=False, # Disable markdown to allow custom HTML
357
+ elem_classes=["chatbot"]
358
  )
359
 
360
  msg = gr.Textbox(
 
393
  info="Controls diversity via nucleus sampling"
394
  )
395
 
396
+ # Add JavaScript for thinking block interactions
397
+ gr.HTML("""
398
+ <script>
399
+ function toggleThinking(id) {
400
+ const content = document.getElementById(id);
401
+ if (!content) return;
402
+
403
+ const chevron = document.getElementById('chevron_' + id);
404
+ const header = content.previousElementSibling;
405
+
406
+ if (content.style.display === 'none' || !content.style.display) {
407
+ content.style.display = 'block';
408
+ if (chevron) chevron.style.transform = 'rotate(135deg)';
409
+ if (header) header.style.background = '#404040';
410
+ } else {
411
+ content.style.display = 'none';
412
+ if (chevron) chevron.style.transform = 'rotate(45deg)';
413
+ if (header) header.style.background = '#3a3a3a';
414
+ }
415
+ }
416
+ </script>
417
+ """)
418
+
419
  # Event handlers
420
  def user_message(message, history):
421
  return "", history + [[message, None]]
 
450
 
451
  model_selector.change(model_changed, model_selector, chatbot)
452
 
453
+
454
+
455
  gr.Markdown("""
456
  ---
457
 
458
+ ### About the Daedalus-1 Models
 
459
 
460
+ **Daedalus-1-8B**: Daedalus-1-8B is an 8 billion parameter language model for code generation and reasoning, developed by **Noema Research**.
461
+ It is a finetuned derivative of [Seed-Coder-8B-Reasoning](https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Reasoning),
462
+ with enhancements for instruction following, structured code generation, and improved safety alignment.
463
+
464
 
465
+ The model is designed for conversational AI and supports various text generation tasks. When the model uses thinking tags (`<think></think>`),
466
+ this interface will render them as expandable sections similar to Claude's thinking process visualization.
467
 
468
+ This Space uses ZeroGPU for efficient GPU allocation.
469
  """)
470
 
471
  return demo