LeroyDyer commited on
Commit
ebb7cff
Β·
verified Β·
1 Parent(s): 34fd794

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +964 -58
app.py CHANGED
@@ -1,70 +1,976 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
 
3
 
4
 
5
- def respond(
6
- message,
7
- history: list[dict[str, str]],
8
- system_message,
9
- max_tokens,
10
- temperature,
11
- top_p,
12
- hf_token: gr.OAuthToken,
13
- ):
14
- """
15
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
16
- """
17
- client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
 
 
 
 
 
 
18
 
19
- messages = [{"role": "system", "content": system_message}]
 
 
20
 
21
- messages.extend(history)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
- messages.append({"role": "user", "content": message})
 
 
 
 
 
 
 
 
24
 
25
- response = ""
 
 
 
 
 
26
 
27
- for message in client.chat_completion(
28
- messages,
29
- max_tokens=max_tokens,
30
- stream=True,
31
- temperature=temperature,
32
- top_p=top_p,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  ):
34
- choices = message.choices
35
- token = ""
36
- if len(choices) and choices[0].delta.content:
37
- token = choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
-
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- chatbot = gr.ChatInterface(
47
- respond,
48
- type="messages",
49
- additional_inputs=[
50
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
51
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
52
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
53
- gr.Slider(
54
- minimum=0.1,
55
- maximum=1.0,
56
- value=0.95,
57
- step=0.05,
58
- label="Top-p (nucleus sampling)",
59
- ),
60
- ],
61
- )
62
-
63
- with gr.Blocks() as demo:
64
- with gr.Sidebar():
65
- gr.LoginButton()
66
- chatbot.render()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  if __name__ == "__main__":
70
- demo.launch()
 
1
+ # File: enhanced_gradio_interface.py
2
+ import asyncio
3
+ from collections import defaultdict
4
+ import json
5
+ import os
6
+ import re
7
+ from time import time
8
+ import uuid
9
+ from typing import List, Dict, Any, Optional
10
+ from dataclasses import dataclass
11
+ from threading import Lock
12
+ import threading
13
+ import json
14
+ import os
15
+ import queue
16
+ import traceback
17
+ import uuid
18
+ from typing import Coroutine, Dict, List, Any, Optional, Callable
19
+ from dataclasses import dataclass
20
+ from queue import Queue, Empty
21
+ from threading import Lock, Event, Thread
22
+ import threading
23
+ from concurrent.futures import ThreadPoolExecutor
24
+ import time
25
  import gradio as gr
26
+ from openai import AsyncOpenAI, OpenAI
27
+ import pyttsx3
28
+ from rich.console import Console
29
 
30
 
31
+ BASE_URL="http://localhost:1234/v1"
32
+ BASE_API_KEY="not-needed"
33
+ BASE_CLIENT = AsyncOpenAI(
34
+ base_url=BASE_URL,
35
+ api_key=BASE_API_KEY
36
+ ) # Global state for client
37
+ BASEMODEL_ID = "leroydyer/qwen/qwen3-0.6b-q4_k_m.gguf" # Global state for selected model ID
38
+ CLIENT =OpenAI(
39
+ base_url=BASE_URL,
40
+ api_key=BASE_API_KEY
41
+ ) # Global state for client
42
+ # --- Global Variables (if needed) ---
43
+ console = Console()
44
+ # --- Configuration ---
45
+ LOCAL_BASE_URL = "http://localhost:1234/v1"
46
+ LOCAL_API_KEY = "not-needed"
47
+ # HuggingFace Spaces configuration
48
+ HF_INFERENCE_URL = "https://api-inference.huggingface.co/models/"
49
+ HF_API_KEY = os.getenv("HF_API_KEY", "")
50
 
51
+ DEFAULT_TEMPERATURE = 0.7
52
+ DEFAULT_MAX_TOKENS = 5000
53
+ console = Console()
54
 
55
+ #############################################################
56
+ @dataclass
57
+ class LLMMessage:
58
+ role: str
59
+ content: str
60
+ message_id: str = None
61
+ conversation_id: str = None
62
+ timestamp: float = None
63
+ metadata: Dict[str, Any] = None
64
+
65
+ def __post_init__(self):
66
+ if self.message_id is None:
67
+ self.message_id = str(uuid.uuid4())
68
+ if self.timestamp is None:
69
+ self.timestamp = time.time()
70
+ if self.metadata is None:
71
+ self.metadata = {}
72
 
73
+ @dataclass
74
+ class LLMRequest:
75
+ message: LLMMessage
76
+ response_event: str = None
77
+ callback: Callable = None
78
+
79
+ def __post_init__(self):
80
+ if self.response_event is None:
81
+ self.response_event = f"llm_response_{self.message.message_id}"
82
 
83
+ @dataclass
84
+ class LLMResponse:
85
+ message: LLMMessage
86
+ request_id: str
87
+ success: bool = True
88
+ error: str = None
89
 
90
+ #############################################################
91
+ class EventManager:
92
+ def __init__(self):
93
+ self._handlers = defaultdict(list)
94
+ self._lock = threading.Lock()
95
+
96
+ def register(self, event: str, handler: Callable):
97
+ with self._lock:
98
+ self._handlers[event].append(handler)
99
+
100
+ def unregister(self, event: str, handler: Callable):
101
+ with self._lock:
102
+ if event in self._handlers and handler in self._handlers[event]:
103
+ self._handlers[event].remove(handler)
104
+
105
+ def raise_event(self, event: str, data: Any):
106
+ with self._lock:
107
+ handlers = self._handlers[event][:]
108
+
109
+ for handler in handlers:
110
+ try:
111
+ handler(data)
112
+ except Exception as e:
113
+ console.log(f"Error in event handler for {event}: {e}", style="bold red")
114
+
115
+
116
+ EVENT_MANAGER = EventManager()
117
+ def RegisterEvent(event: str, handler: Callable):
118
+ EVENT_MANAGER.register(event, handler)
119
+
120
+ def RaiseEvent(event: str, data: Any):
121
+ EVENT_MANAGER.raise_event(event, data)
122
+
123
+ def UnregisterEvent(event: str, handler: Callable):
124
+ EVENT_MANAGER.unregister(event, handler)
125
+
126
+
127
+ #############################################################
128
+ @dataclass
129
+ class CanvasArtifact:
130
+ id: str
131
+ type: str # 'code', 'diagram', 'text', 'image'
132
+ content: str
133
+ title: str
134
+ timestamp: float
135
+ metadata: Dict[str, Any] = None
136
+
137
+ def __post_init__(self):
138
+ if self.metadata is None:
139
+ self.metadata = {}
140
+
141
+ class LLMAgent:
142
+ """Main Agent Driver !
143
+ Agent For Multiple messages at once ,
144
+ has a message queing service as well as agenerator method for easy intergration with console
145
+ applications as well as ui !"""
146
+ def __init__(
147
+ self,
148
+ model_id: str = BASEMODEL_ID,
149
+ system_prompt: str = None,
150
+ max_queue_size: int = 1000,
151
+ max_retries: int = 3,
152
+ timeout: int = 30000,
153
+ max_tokens: int = 5000,
154
+ temperature: float = 0.3,
155
+ base_url: str = "http://localhost:1234/v1",
156
+ api_key: str = "not-needed",
157
+ generate_fn: Callable[[List[Dict[str, str]]], Coroutine[Any, Any, str]] = None,
158
  ):
159
+ self.model_id = model_id
160
+ self.system_prompt = system_prompt or "You are a helpful AI assistant."
161
+ self.request_queue = Queue(maxsize=max_queue_size)
162
+ self.max_retries = max_retries
163
+ self.timeout = timeout
164
+ self.is_running = False
165
+ self._stop_event = Event()
166
+ self.processing_thread = None
167
+ # Canvas artifacts
168
+ self.canvas_artifacts: Dict[str, List[CanvasArtifact]] = defaultdict(list)
169
+ self.max_canvas_artifacts = 1000
170
+ # Conversation tracking
171
+ self.conversations: Dict[str, List[LLMMessage]] = {}
172
+ self.max_history_length = 100
173
+ self._generate = generate_fn or self._default_generate
174
+ self.api_key = api_key
175
+ self.base_url = base_url
176
+ self.max_tokens = max_tokens
177
+ self.temperature = temperature
178
+ self.async_client = self.CreateClient(base_url, api_key)
179
+ self.current_conversation = "default"
180
+
181
+ # Active requests waiting for responses
182
+ self.pending_requests: Dict[str, LLMRequest] = {}
183
+ self.pending_requests_lock = Lock()
184
+
185
+ # Register internal event handlers
186
+ self._register_event_handlers()
187
+ # Register internal event handlers
188
+ self._register_event_handlers()
189
+ # Speech synthesis
190
+ try:
191
+ self.tts_engine = pyttsx3.init()
192
+ self.setup_tts()
193
+ self.speech_enabled = True
194
+ except Exception as e:
195
+ console.log(f"[yellow]TTS not available: {e}[/yellow]")
196
+ self.speech_enabled = False
197
+
198
+ console.log("[bold green]πŸš€ Enhanced LLM Agent Initialized[/bold green]")
199
+
200
+ # Start the processing thread immediately
201
+ self.start()
202
+ def setup_tts(self):
203
+ """Configure text-to-speech engine"""
204
+ if hasattr(self, 'tts_engine'):
205
+ voices = self.tts_engine.getProperty('voices')
206
+ if voices:
207
+ self.tts_engine.setProperty('voice', voices[0].id)
208
+ self.tts_engine.setProperty('rate', 150)
209
+ self.tts_engine.setProperty('volume', 0.8)
210
+
211
+ def speak(self, text: str):
212
+ """Convert text to speech in a non-blocking way"""
213
+ if not hasattr(self, 'speech_enabled') or not self.speech_enabled:
214
+ return
215
+
216
+ def _speak():
217
+ try:
218
+ # Clean text for speech (remove markdown, code blocks)
219
+ clean_text = re.sub(r'```.*?```', '', text, flags=re.DOTALL)
220
+ clean_text = re.sub(r'`.*?`', '', clean_text)
221
+ clean_text = clean_text.strip()
222
+ if clean_text:
223
+ self.tts_engine.say(clean_text)
224
+ self.tts_engine.runAndWait()
225
+ else:
226
+ self.tts_engine.say(text)
227
+ self.tts_engine.runAndWait()
228
+ except Exception as e:
229
+ console.log(f"[red]TTS Error: {e}[/red]")
230
+
231
+ thread = threading.Thread(target=_speak, daemon=True)
232
+ thread.start()
233
+
234
+ async def _default_generate(self, messages: List[Dict[str, str]]) -> str:
235
+ """Default generate function if none provided"""
236
+ return await self.openai_generate(messages)
237
+ def create_interface(self):
238
+ """Create the full LCARS-styled interface without HuggingFace options"""
239
+ lcars_css = """
240
+ :root {
241
+ --lcars-orange: #FF9900;
242
+ --lcars-red: #FF0033;
243
+ --lcars-blue: #6699FF;
244
+ --lcars-purple: #CC99FF;
245
+ --lcars-pale-blue: #99CCFF;
246
+ --lcars-black: #000000;
247
+ --lcars-dark-blue: #3366CC;
248
+ --lcars-gray: #424242;
249
+ --lcars-yellow: #FFFF66;
250
+ }
251
+ body {
252
+ background: var(--lcars-black);
253
+ color: var(--lcars-orange);
254
+ font-family: 'Antonio', 'LCD', 'Courier New', monospace;
255
+ margin: 0;
256
+ padding: 0;
257
+ }
258
+ .gradio-container {
259
+ background: var(--lcars-black) !important;
260
+ min-height: 100vh;
261
+ }
262
+ .lcars-container {
263
+ background: var(--lcars-black);
264
+ border: 4px solid var(--lcars-orange);
265
+ border-radius: 0 30px 0 0;
266
+ min-height: 100vh;
267
+ padding: 20px;
268
+ }
269
+ .lcars-header {
270
+ background: linear-gradient(90deg, var(--lcars-red), var(--lcars-orange));
271
+ padding: 20px 40px;
272
+ border-radius: 0 60px 0 0;
273
+ margin: -20px -20px 20px -20px;
274
+ border-bottom: 6px solid var(--lcars-blue);
275
+ }
276
+ .lcars-title {
277
+ font-size: 2.5em;
278
+ font-weight: bold;
279
+ color: var(--lcars-black);
280
+ margin: 0;
281
+ }
282
+ .lcars-subtitle {
283
+ font-size: 1.2em;
284
+ color: var(--lcars-black);
285
+ margin: 10px 0 0 0;
286
+ }
287
+ .lcars-panel {
288
+ background: rgba(66, 66, 66, 0.9);
289
+ border: 2px solid var(--lcars-orange);
290
+ border-radius: 0 20px 0 20px;
291
+ padding: 15px;
292
+ margin-bottom: 15px;
293
+ }
294
+ .lcars-button {
295
+ background: var(--lcars-orange);
296
+ color: var(--lcars-black) !important;
297
+ border: none !important;
298
+ border-radius: 0 15px 0 15px !important;
299
+ padding: 10px 20px !important;
300
+ font-family: inherit !important;
301
+ font-weight: bold !important;
302
+ margin: 5px !important;
303
+ }
304
+ .lcars-button:hover {
305
+ background: var(--lcars-red) !important;
306
+ }
307
+ .lcars-input {
308
+ background: var(--lcars-black) !important;
309
+ color: var(--lcars-orange) !important;
310
+ border: 2px solid var(--lcars-blue) !important;
311
+ border-radius: 0 10px 0 10px !important;
312
+ padding: 10px !important;
313
+ }
314
+ .lcars-chatbot {
315
+ background: var(--lcars-black) !important;
316
+ border: 2px solid var(--lcars-purple) !important;
317
+ border-radius: 0 15px 0 15px !important;
318
+ }
319
+ .status-indicator {
320
+ display: inline-block;
321
+ width: 12px;
322
+ height: 12px;
323
+ border-radius: 50%;
324
+ background: var(--lcars-red);
325
+ margin-right: 8px;
326
+ }
327
+ .status-online {
328
+ background: var(--lcars-blue);
329
+ animation: pulse 2s infinite;
330
+ }
331
+ @keyframes pulse {
332
+ 0% { opacity: 1; }
333
+ 50% { opacity: 0.5; }
334
+ 100% { opacity: 1; }
335
+ }
336
+ """
337
+ with gr.Blocks(css=lcars_css, theme=gr.themes.Default(), title="LCARS Terminal") as interface:
338
+ with gr.Column(elem_classes="lcars-container"):
339
+ # Header
340
+ with gr.Row(elem_classes="lcars-header"):
341
+ gr.Markdown("""
342
+ <div style="text-align: center; width: 100%;">
343
+ <div class="lcars-title">πŸš€ LCARS TERMINAL</div>
344
+ <div class="lcars-subtitle">STARFLEET AI DEVELOPMENT CONSOLE</div>
345
+ <div style="margin-top: 10px;">
346
+ <span class="status-indicator status-online"></span>
347
+ <span style="color: var(--lcars-black); font-weight: bold;">SYSTEM ONLINE</span>
348
+ </div>
349
+ </div>
350
+ """)
351
+ # Main Content
352
+ with gr.Row():
353
+ # Left Sidebar
354
+ with gr.Column(scale=1):
355
+ # Configuration Panel
356
+ with gr.Column(elem_classes="lcars-panel"):
357
+
358
+ pass
359
+ # Canvas Artifacts
360
+ with gr.Column(elem_classes="lcars-panel"):
361
+ gr.Markdown("""### 🎨 CANVAS ARTIFACTS""")
362
+ artifact_display = gr.JSON(label="")
363
+ with gr.Row():
364
+ refresh_artifacts_btn = gr.Button("πŸ”„ Refresh", elem_classes="lcars-button")
365
+ clear_canvas_btn = gr.Button("πŸ—‘οΈ Clear Canvas", elem_classes="lcars-button")
366
+ # Main Content Area
367
+ with gr.Column(scale=2):
368
+ # Code Canvas
369
+ with gr.Accordion("πŸ’» COLLABORATIVE CODE CANVAS", open=False):
370
+ code_editor = gr.Code(interactive=True,
371
+ value="# Welcome to LCARS Collaborative Canvas\nprint('Hello, Starfleet!')",
372
+ language="python",
373
+ lines=15,
374
+ label=""
375
+ )
376
+ with gr.Row():
377
+ load_to_chat_btn = gr.Button("πŸ’¬ Discuss Code", elem_classes="lcars-button")
378
+ analyze_btn = gr.Button("πŸ” Analyze", elem_classes="lcars-button")
379
+ optimize_btn = gr.Button("⚑ Optimize", elem_classes="lcars-button")
380
+ # Chat Interface
381
+ with gr.Column(elem_classes="lcars-panel"):
382
+ gr.Markdown("""### πŸ’¬ MISSION LOG""")
383
+ chatbot = gr.Chatbot(label="", height=300)
384
+ with gr.Row():
385
+ message_input = gr.Textbox(
386
+ placeholder="Enter your command or query...",
387
+ show_label=False,
388
+ lines=2,
389
+ scale=4
390
+ )
391
+ send_btn = gr.Button("πŸš€ SEND", elem_classes="lcars-button", scale=1)
392
+ # Status
393
+ with gr.Row():
394
+ status_display = gr.Textbox(
395
+ value="LCARS terminal operational. Awaiting commands.",
396
+ label="Status",
397
+ max_lines=2
398
+ )
399
+ with gr.Column(scale=0):
400
+ clear_chat_btn = gr.Button("πŸ—‘οΈ Clear Chat", elem_classes="lcars-button")
401
+ new_session_btn = gr.Button("πŸ†• New Session", elem_classes="lcars-button")
402
+
403
+ # Event handlers are connected here, no change needed
404
+ async def process_message(message, history, speech_enabled=True):
405
+ if not message.strip():
406
+ return "", history, "Please enter a message"
407
+ history = history + [[message, None]]
408
+ try:
409
+ # Fixed: Uses the new chat_with_canvas method which includes canvas context
410
+ response = await self.chat_with_canvas(
411
+ message, self.current_conversation, include_canvas=True
412
+ )
413
+ history[-1][1] = response
414
+ if speech_enabled and self.speech_enabled:
415
+ self.speak(response)
416
+ artifacts = self.get_canvas_summary(self.current_conversation)
417
+ status = f"βœ… Response received. Canvas artifacts: {len(artifacts)}"
418
+ return "", history, status, artifacts
419
+ except Exception as e:
420
+ error_msg = f"❌ Error: {str(e)}"
421
+ history[-1][1] = error_msg
422
+ return "", history, error_msg, self.get_canvas_summary(self.current_conversation)
423
+
424
+ def get_artifacts():
425
+ return self.get_canvas_summary(self.current_conversation)
426
+
427
+ def clear_canvas():
428
+ self.clear_canvas(self.current_conversation)
429
+ return [], "βœ… Canvas cleared"
430
+
431
+ def clear_chat():
432
+ self.clear_conversation(self.current_conversation)
433
+ return [], "βœ… Chat cleared"
434
+
435
+ def new_session():
436
+ self.clear_conversation(self.current_conversation)
437
+ self.clear_canvas(self.current_conversation)
438
+ return [], "# New session started\nprint('Ready!')", "πŸ†• New session started", []
439
+
440
+ # Connect events
441
+ send_btn.click(process_message,
442
+ inputs=[message_input, chatbot],
443
+ outputs=[message_input, chatbot, status_display, artifact_display])
444
+ message_input.submit(process_message,
445
+ inputs=[message_input, chatbot],
446
+ outputs=[message_input, chatbot, status_display, artifact_display])
447
+ refresh_artifacts_btn.click(get_artifacts, outputs=artifact_display)
448
+ clear_canvas_btn.click(clear_canvas, outputs=[artifact_display, status_display])
449
+ clear_chat_btn.click(clear_chat, outputs=[chatbot, status_display])
450
+ new_session_btn.click(new_session, outputs=[chatbot, code_editor, status_display, artifact_display])
451
+ return interface
452
+
453
+ def _register_event_handlers(self):
454
+ """Register internal event handlers for response routing"""
455
+ RegisterEvent("llm_internal_response", self._handle_internal_response)
456
+
457
+ def _handle_internal_response(self, response: LLMResponse):
458
+ """Route responses to the appropriate request handlers"""
459
+ console.log(f"[bold cyan]Handling internal response for: {response.request_id}[/bold cyan]")
460
+
461
+ request = None
462
+ with self.pending_requests_lock:
463
+ if response.request_id in self.pending_requests:
464
+ request = self.pending_requests[response.request_id]
465
+ del self.pending_requests[response.request_id]
466
+ console.log(f"Found pending request for: {response.request_id}")
467
+ else:
468
+ console.log(f"No pending request found for: {response.request_id}", style="yellow")
469
+ return
470
+
471
+ # Raise the specific response event
472
+ if request.response_event:
473
+ console.log(f"[bold green]Raising event: {request.response_event}[/bold green]")
474
+ RaiseEvent(request.response_event, response)
475
+
476
+ # Call callback if provided
477
+ if request.callback:
478
+ try:
479
+ console.log(f"[bold yellow]Calling callback for: {response.request_id}[/bold yellow]")
480
+ request.callback(response)
481
+ except Exception as e:
482
+ console.log(f"Error in callback: {e}", style="bold red")
483
+
484
+ def _add_to_conversation_history(self, conversation_id: str, message: LLMMessage):
485
+ """Add message to conversation history"""
486
+ if conversation_id not in self.conversations:
487
+ self.conversations[conversation_id] = []
488
+
489
+ self.conversations[conversation_id].append(message)
490
+
491
+ # Trim history if too long
492
+ if len(self.conversations[conversation_id]) > self.max_history_length * 2:
493
+ self.conversations[conversation_id] = self.conversations[conversation_id][-(self.max_history_length * 2):]
494
+
495
+ def _build_messages_from_conversation(self, conversation_id: str, new_message: LLMMessage) -> List[Dict[str, str]]:
496
+ """Build message list from conversation history"""
497
+ messages = []
498
+
499
+ # Add system prompt
500
+ if self.system_prompt:
501
+ messages.append({"role": "system", "content": self.system_prompt})
502
+
503
+ # Add conversation history
504
+ if conversation_id in self.conversations:
505
+ for msg in self.conversations[conversation_id][-self.max_history_length:]:
506
+ messages.append({"role": msg.role, "content": msg.content})
507
+
508
+ # Add the new message
509
+ messages.append({"role": new_message.role, "content": new_message.content})
510
+
511
+ return messages
512
+
513
+ def _process_llm_request(self, request: LLMRequest):
514
+ """Process a single LLM request"""
515
+ console.log(f"[bold green]Processing LLM request: {request.message.message_id}[/bold green]")
516
+ try:
517
+ # Build messages for LLM
518
+ messages = self._build_messages_from_conversation(
519
+ request.message.conversation_id or "default",
520
+ request.message
521
+ )
522
+
523
+ console.log(f"Calling LLM with {len(messages)} messages")
524
+
525
+ # Call LLM - Use sync call for thread compatibility
526
+ response_content = self._call_llm_sync(messages)
527
+
528
+ console.log(f"[bold green]LLM response received: {response_content}...[/bold green]")
529
+
530
+ # Create response message
531
+ response_message = LLMMessage(
532
+ role="assistant",
533
+ content=response_content,
534
+ conversation_id=request.message.conversation_id,
535
+ metadata={"request_id": request.message.message_id}
536
+ )
537
+
538
+ # Update conversation history
539
+ self._add_to_conversation_history(
540
+ request.message.conversation_id or "default",
541
+ request.message
542
+ )
543
+ self._add_to_conversation_history(
544
+ request.message.conversation_id or "default",
545
+ response_message
546
+ )
547
+
548
+ # Create and send response
549
+ response = LLMResponse(
550
+ message=response_message,
551
+ request_id=request.message.message_id,
552
+ success=True
553
+ )
554
+
555
+ console.log(f"[bold blue]Sending internal response for: {request.message.message_id}[/bold blue]")
556
+ RaiseEvent("llm_internal_response", response)
557
+
558
+ except Exception as e:
559
+ console.log(f"[bold red]Error processing LLM request: {e}[/bold red]")
560
+ traceback.print_exc()
561
+ # Create error response
562
+ error_response = LLMResponse(
563
+ message=LLMMessage(
564
+ role="system",
565
+ content=f"Error: {str(e)}",
566
+ conversation_id=request.message.conversation_id
567
+ ),
568
+ request_id=request.message.message_id,
569
+ success=False,
570
+ error=str(e)
571
+ )
572
+
573
+ RaiseEvent("llm_internal_response", error_response)
574
+
575
+ def _call_llm_sync(self, messages: List[Dict[str, str]]) -> str:
576
+ """Sync call to the LLM with retry logic"""
577
+ console.log(f"Making LLM call to {self.model_id}")
578
+ for attempt in range(self.max_retries):
579
+ try:
580
+ response = CLIENT.chat.completions.create(
581
+ model=self.model_id,
582
+ messages=messages,
583
+ temperature=self.temperature,
584
+ max_tokens=self.max_tokens
585
+ )
586
+ content = response.choices[0].message.content
587
+ console.log(f"LLM call successful, response length: {len(content)}")
588
+ return content
589
+ except Exception as e:
590
+ console.log(f"LLM call attempt {attempt + 1} failed: {e}")
591
+ if attempt == self.max_retries - 1:
592
+ raise e
593
+ # Wait before retry
594
+
595
+ def _process_queue(self):
596
+ """Main queue processing loop"""
597
+ console.log("[bold cyan]LLM Agent queue processor started[/bold cyan]")
598
+ while not self._stop_event.is_set():
599
+ try:
600
+ request = self.request_queue.get(timeout=1.0)
601
+ if request:
602
+ console.log(f"Got request from queue: {request.message.message_id}")
603
+ self._process_llm_request(request)
604
+ self.request_queue.task_done()
605
+ except Empty:
606
+ continue
607
+ except Exception as e:
608
+ console.log(f"Error in queue processing: {e}", style="bold red")
609
+ traceback.print_exc()
610
+ console.log("[bold cyan]LLM Agent queue processor stopped[/bold cyan]")
611
+
612
+ def send_message(
613
+ self,
614
+ content: str,
615
+ role: str = "user",
616
+ conversation_id: str = None,
617
+ response_event: str = None,
618
+ callback: Callable = None,
619
+ metadata: Dict = None
620
+ ) -> str:
621
+ """Send a message to the LLM and get response via events"""
622
+ if not self.is_running:
623
+ raise RuntimeError("LLM Agent is not running. Call start() first.")
624
+
625
+ # Create message
626
+ message = LLMMessage(
627
+ role=role,
628
+ content=content,
629
+ conversation_id=conversation_id,
630
+ metadata=metadata or {}
631
+ )
632
+
633
+ # Create request
634
+ request = LLMRequest(
635
+ message=message,
636
+ response_event=response_event,
637
+ callback=callback
638
+ )
639
+
640
+ # Store in pending requests BEFORE adding to queue
641
+ with self.pending_requests_lock:
642
+ self.pending_requests[message.message_id] = request
643
+ console.log(f"Added to pending requests: {message.message_id}")
644
+
645
+ # Add to queue
646
+ try:
647
+ self.request_queue.put(request, timeout=5.0)
648
+ console.log(f"[bold magenta]Message queued: {message.message_id}, Content: {content[:50]}...[/bold magenta]")
649
+ return message.message_id
650
+ except queue.Full:
651
+ console.log(f"[bold red]Queue full, cannot send message[/bold red]")
652
+ with self.pending_requests_lock:
653
+ if message.message_id in self.pending_requests:
654
+ del self.pending_requests[message.message_id]
655
+ raise RuntimeError("LLM Agent queue is full")
656
+
657
+ async def chat(self, messages: List[Dict[str, str]]) -> str:
658
+ """
659
+ Async chat method that sends message via queue and returns response string.
660
+ This is the main method you should use.
661
+ """
662
+ # Create future for the response
663
+ loop = asyncio.get_event_loop()
664
+ response_future = loop.create_future()
665
+
666
+ def chat_callback(response: LLMResponse):
667
+ """Callback when LLM responds - thread-safe"""
668
+ console.log(f"[bold yellow]βœ“ CHAT CALLBACK TRIGGERED![/bold yellow]")
669
+
670
+ if not response_future.done():
671
+ if response.success:
672
+ content = response.message.content
673
+ console.log(f"Callback received content: {content}...")
674
+ # Schedule setting the future result on the main event loop
675
+ loop.call_soon_threadsafe(response_future.set_result, content)
676
+ else:
677
+ console.log(f"Error in response: {response.error}")
678
+ error_msg = f"❌ Error: {response.error}"
679
+ loop.call_soon_threadsafe(response_future.set_result, error_msg)
680
+ else:
681
+ console.log(f"[bold red]Future already done, ignoring callback[/bold red]")
682
+
683
+ console.log(f"Sending message to LLM agent...")
684
+
685
+ # Extract the actual message content from the messages list
686
+ user_message = ""
687
+ for msg in messages:
688
+ if msg.get("role") == "user":
689
+ user_message = msg.get("content", "")
690
+ break
691
+
692
+ if not user_message.strip():
693
+ return ""
694
+
695
+ # Send message with callback using the queue system
696
+ try:
697
+ message_id = self.send_message(
698
+ content=user_message,
699
+ conversation_id="default",
700
+ callback=chat_callback
701
+ )
702
+
703
+ console.log(f"Message sent with ID: {message_id}, waiting for response...")
704
+
705
+ # Wait for the response and return it
706
+ try:
707
+ response = await asyncio.wait_for(response_future, timeout=self.timeout)
708
+ console.log(f"[bold green]βœ“ Chat complete! Response length: {len(response)}[/bold green]")
709
+ return response
710
+
711
+ except asyncio.TimeoutError:
712
+ console.log("[bold red]Response timeout[/bold red]")
713
+ # Clean up the pending request
714
+ with self.pending_requests_lock:
715
+ if message_id in self.pending_requests:
716
+ del self.pending_requests[message_id]
717
+ return "❌ Response timeout - check if LLM server is running"
718
+
719
+ except Exception as e:
720
+ console.log(f"[bold red]Error sending message: {e}[/bold red]")
721
+ traceback.print_exc()
722
+ return f"❌ Error sending message: {e}"
723
+
724
+ def start(self):
725
+ """Start the LLM agent"""
726
+ if not self.is_running:
727
+ self.is_running = True
728
+ self._stop_event.clear()
729
+ self.processing_thread = Thread(target=self._process_queue, daemon=True)
730
+ self.processing_thread.start()
731
+ console.log("[bold green]LLM Agent started[/bold green]")
732
+
733
+ def stop(self):
734
+ """Stop the LLM agent"""
735
+ console.log("Stopping LLM Agent...")
736
+ self._stop_event.set()
737
+ if self.processing_thread and self.processing_thread.is_alive():
738
+ self.processing_thread.join(timeout=10)
739
+ self.is_running = False
740
+ console.log("LLM Agent stopped")
741
+
742
+ def get_conversation_history(self, conversation_id: str = "default") -> List[LLMMessage]:
743
+ """Get conversation history"""
744
+ return self.conversations.get(conversation_id, [])[:]
745
+
746
+ def clear_conversation(self, conversation_id: str = "default"):
747
+ """Clear conversation history"""
748
+ if conversation_id in self.conversations:
749
+ del self.conversations[conversation_id]
750
+
751
+
752
+ async def _chat(self, messages: List[Dict[str, str]]) -> str:
753
+ return await self._generate(messages)
754
+
755
+ @staticmethod
756
+ async def openai_generate(messages: List[Dict[str, str]], max_tokens: int = 8096, temperature: float = 0.4, model: str = BASEMODEL_ID,tools=None) -> str:
757
+ """Static method for generating responses using OpenAI API"""
758
+ try:
759
+ resp = await BASE_CLIENT.chat.completions.create(
760
+ model=model,
761
+ messages=messages,
762
+ temperature=temperature,
763
+ max_tokens=max_tokens,
764
+ tools=tools
765
+ )
766
+ response_text = resp.choices[0].message.content or ""
767
+ return response_text
768
+ except Exception as e:
769
+ console.log(f"[bold red]Error in openai_generate: {e}[/bold red]")
770
+ return f"[LLM_Agent Error - openai_generate: {str(e)}]"
771
+
772
+ async def _call_(self, messages: List[Dict[str, str]]) -> str:
773
+ """Internal call method using instance client"""
774
+ try:
775
+ resp = await self.async_client.chat.completions.create(
776
+ model=self.model_id,
777
+ messages=messages,
778
+ temperature=self.temperature,
779
+ max_tokens=self.max_tokens
780
+ )
781
+ response_text = resp.choices[0].message.content or ""
782
+ return response_text
783
+ except Exception as e:
784
+ console.log(f"[bold red]Error in _call_: {e}[/bold red]")
785
+ return f"[LLM_Agent Error - _call_: {str(e)}]"
786
+
787
+ @staticmethod
788
+ def CreateClient(base_url: str, api_key: str) -> AsyncOpenAI:
789
+ '''Create async OpenAI Client required for multi tasking'''
790
+ return AsyncOpenAI(
791
+ base_url=base_url,
792
+ api_key=api_key
793
+ )
794
+
795
+ @staticmethod
796
+ async def fetch_available_models(base_url: str, api_key: str) -> List[str]:
797
+ """Fetches available models from the OpenAI API."""
798
+ try:
799
+ async_client = AsyncOpenAI(base_url=base_url, api_key=api_key)
800
+ models = await async_client.models.list()
801
+ model_choices = [model.id for model in models.data]
802
+ return model_choices
803
+ except Exception as e:
804
+ console.log(f"[bold red]LLM_Agent Error fetching models: {e}[/bold red]")
805
+ return ["LLM_Agent Error fetching models"]
806
+
807
+ def get_models(self) -> List[str]:
808
+ """Get available models using instance credentials"""
809
+ return asyncio.run(self.fetch_available_models(self.base_url, self.api_key))
810
+
811
+
812
+ def get_queue_size(self) -> int:
813
+ """Get current queue size"""
814
+ return self.request_queue.qsize()
815
+
816
+ def get_pending_requests_count(self) -> int:
817
+ """Get number of pending requests"""
818
+ with self.pending_requests_lock:
819
+ return len(self.pending_requests)
820
+
821
+ def get_status(self) :
822
+ """Get agent status information"""
823
+ return str({
824
+ "is_running": self.is_running,
825
+ "queue_size": self.get_queue_size(),
826
+ "pending_requests": self.get_pending_requests_count(),
827
+ "conversations_count": len(self.conversations),
828
+ "model": self.model_id, "BaseURL": self.base_url
829
+ })
830
+
831
+
832
+ def direct_chat(self, user_message: str, conversation_id: str = "default") -> str:
833
+ """
834
+ Send a message and get a response using direct API call.
835
+ """
836
+ try:
837
+ # Create message object
838
+ message = LLMMessage(role="user", content=user_message, conversation_id=conversation_id)
839
+
840
+ # Build messages for LLM
841
+ messages = self._build_messages_from_conversation(conversation_id, message)
842
+ console.log(f"Calling LLM at {self.base_url} with {len(messages)} messages")
843
+
844
+ # Make the direct API call
845
+ response = CLIENT.chat.completions.create(
846
+ model=self.model_id,
847
+ messages=messages,
848
+ temperature=self.temperature,
849
+ max_tokens=self.max_tokens
850
+ )
851
+ response_content = response.choices[0].message.content
852
+ console.log(f"[bold green]LLM response received: {response_content[:50]}...[/bold green]")
853
+
854
+ # Update conversation history
855
+ self._add_to_conversation_history(conversation_id, message)
856
+ response_message = LLMMessage(role="assistant", content=response_content, conversation_id=conversation_id)
857
+ self._add_to_conversation_history(conversation_id, response_message)
858
+
859
+ return response_content
860
+
861
+ except Exception as e:
862
+ console.log(f"[bold red]Error in chat: {e}[/bold red]")
863
+ traceback.print_exc()
864
+ return f"❌ Error communicating with LLM: {str(e)}"
865
+
866
+
867
+ # --- TEST Canvas Methods ---
868
+ def add_artifact(self, conversation_id: str, artifact_type: str, content: str, title: str = "", metadata: Dict = None):
869
+ artifact = CanvasArtifact(
870
+ id=str(uuid.uuid4()),
871
+ type=artifact_type,
872
+ content=content,
873
+ title=title,
874
+ timestamp=time.time(),
875
+ metadata=metadata or {}
876
+ )
877
+ self.canvas_artifacts[conversation_id].append(artifact)
878
+
879
+ def get_canvas_artifacts(self, conversation_id: str = "default") -> List[CanvasArtifact]:
880
+ return self.canvas_artifacts.get(conversation_id, [])
881
+
882
+ def get_canvas_summary(self, conversation_id: str = "default") -> List[Dict[str, Any]]:
883
+ artifacts = self.get_canvas_artifacts(conversation_id)
884
+ return [{"id": a.id, "type": a.type, "title": a.title, "timestamp": a.timestamp} for a in artifacts]
885
+
886
+ def clear_canvas(self, conversation_id: str = "default"):
887
+ if conversation_id in self.canvas_artifacts:
888
+ self.canvas_artifacts[conversation_id] = []
889
+
890
+ def clear_conversation(self, conversation_id: str = "default"):
891
+ if conversation_id in self.conversations:
892
+ del self.conversations[conversation_id]
893
+
894
+ def get_latest_code_artifact(self, conversation_id: str) -> Optional[str]:
895
+ """Get the most recent code artifact content"""
896
+ if conversation_id not in self.canvas_artifacts:
897
+ return None
898
+
899
+ for artifact in reversed(self.canvas_artifacts[conversation_id]):
900
+ if artifact.type == "code":
901
+ return artifact.content
902
+ return None
903
+
904
+ def get_canvas_context(self, conversation_id: str) -> str:
905
+ """Get formatted canvas context for LLM prompts"""
906
+ if conversation_id not in self.canvas_artifacts or not self.canvas_artifacts[conversation_id]:
907
+ return ""
908
+
909
+ context_lines = ["\n=== COLLABORATIVE CANVAS ARTIFACTS ==="]
910
+ for artifact in self.canvas_artifacts[conversation_id][-10:]: # Last 10 artifacts
911
+ context_lines.append(f"\n--- {artifact.title} [{artifact.type.upper()}] ---")
912
+ preview = artifact.content[:500] + "..." if len(artifact.content) > 500 else artifact.content
913
+ context_lines.append(preview)
914
+
915
+ return "\n".join(context_lines) + "\n=================================\n"
916
+ def get_artifact_by_id(self, conversation_id: str, artifact_id: str) -> Optional[CanvasArtifact]:
917
+ """Get specific artifact by ID"""
918
+ if conversation_id not in self.canvas_artifacts:
919
+ return None
920
+
921
+ for artifact in self.canvas_artifacts[conversation_id]:
922
+ if artifact.id == artifact_id:
923
+ return artifact
924
+ return None
925
+ def _extract_artifacts_to_canvas(self, response: str, conversation_id: str):
926
+ """Automatically extract code blocks and add to canvas"""
927
+ # Find all code blocks with optional language specification
928
+ code_blocks = re.findall(r'```(?:(\w+)\n)?(.*?)```', response, re.DOTALL)
929
+ for i, (lang, code_block) in enumerate(code_blocks):
930
+ if len(code_block.strip()) > 10: # Only add substantial code blocks
931
+ self.add_artifact_to_canvas(
932
+ conversation_id,
933
+ code_block.strip(),
934
+ "code",
935
+ f"code_snippet_{lang or 'unknown'}_{len(self.canvas_artifacts.get(conversation_id, [])) + 1}"
936
+ )
937
+
938
+ async def chat_with_canvas(self, message: str, conversation_id: str, include_canvas: bool = False):
939
+ """Chat method that can optionally include canvas context."""
940
+ messages = [{"role": "user", "content": message}]
941
+
942
+ if include_canvas:
943
+ artifacts = self.get_canvas_summary(conversation_id)
944
+ if artifacts:
945
+ canvas_context = "Current Canvas Context:\\n" + "\\n".join([
946
+ f"- [{art['type'].upper()}] {art['title'] or 'Untitled'}: {art['content_preview']}"
947
+ for art in artifacts
948
+ ])
949
+ messages.insert(0, {"role": "system", "content": canvas_context})
950
+
951
+ return await self.chat(messages)
952
+
953
+
954
+
955
+
956
+
957
+ console = Console()
958
+
959
+
960
 
961
+ # --- Main Application ---
962
+ def main():
963
+ console.log("[bold blue]πŸš€ Starting LCARS Terminal...[/bold blue]")
964
+ is_space = os.getenv('SPACE_ID') is not None
965
+ if is_space:
966
+ console.log("[green]🌐 Detected HuggingFace Space[/green]")
967
+ else:
968
+ console.log("[blue]πŸ’» Running locally[/blue]")
969
+ interface = LLMAgent()
970
+ demo = interface.create_interface()
971
+ demo.launch(
972
+ share=is_space
973
+ )
974
 
975
  if __name__ == "__main__":
976
+ main()