jdesiree commited on
Commit
a57855f
·
verified ·
1 Parent(s): 8cddfd8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +266 -132
app.py CHANGED
@@ -1,12 +1,16 @@
1
  import gradio as gr
2
  from graph_tool import generate_plot
3
- from metrics import EduBotMetrics
4
  import os
 
 
 
 
 
5
  import time
 
6
  import logging
7
- import json
8
  import re
9
- import requests
10
  from langchain.tools import BaseTool
11
  from langchain.agents import initialize_agent, AgentType
12
  from langchain.memory import ConversationBufferWindowMemory
@@ -14,22 +18,23 @@ from langchain.schema import SystemMessage
14
  from langchain.llms.base import LLM
15
  from typing import Optional, List, Any, Type
16
  from pydantic import BaseModel, Field
17
- from transformers import Qwen2_5OmniForConditionalGeneration, Qwen2_5OmniProcessor
18
- from qwen_omni_utils import process_mm_info
19
- import soundfile as sf
20
- import atexit
21
- import glob
 
22
 
23
  # --- Environment and Logging Setup ---
24
  logging.basicConfig(level=logging.INFO)
25
  logger = logging.getLogger(__name__)
26
 
27
  # Support both token names for flexibility
28
- hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACEHUB_API_TOKEN")
29
  if not hf_token:
30
  logger.warning("Neither HF_TOKEN nor HUGGINGFACEHUB_API_TOKEN is set, the application may not work.")
31
 
32
- metrics_tracker = EduBotMetrics(save_file="edu_metrics.json")
33
 
34
  # --- LangChain Tool Definition ---
35
  class GraphInput(BaseModel):
@@ -91,7 +96,7 @@ Always use proper JSON formatting with quotes around keys and string values."""
91
 
92
 
93
  # --- System Prompt ---
94
- SYSTEM_PROMPT = """You are EduBot, an expert multi-concept tutor designed to facilitate genuine learning and understanding. Your primary mission is to guide students through the learning process rather than providing direct answers to academic work.
95
 
96
  ## Core Educational Principles
97
  - Provide comprehensive, educational responses that help students truly understand concepts
@@ -161,44 +166,224 @@ def initialize_system_prompt(agent):
161
  agent.memory.chat_memory.add_message(system_message)
162
  system_prompt_initialized = True
163
 
164
- class Qwen25OmniLLM(LLM):
 
 
165
  model: Any = None
166
- processor: Any = None
167
 
168
- def __init__(self, model_path: str = "Qwen/Qwen2.5-Omni-7B"):
169
  super().__init__()
170
- self.model = Qwen2_5OmniForConditionalGeneration.from_pretrained(
171
- model_path,
172
- torch_dtype="auto",
173
- device_map="auto"
174
- )
175
- self.processor = Qwen2_5OmniProcessor.from_pretrained(model_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
 
177
  def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
178
- # Implementation for text-only responses
179
- conversation = [
180
- {"role": "system", "content": [{"type": "text", "text": SYSTEM_PROMPT}]},
181
- {"role": "user", "content": [{"type": "text", "text": prompt}]}
182
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
 
184
- text = self.processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)
185
- audios, images, videos = process_mm_info(conversation, use_audio_in_video=False)
186
- inputs = self.processor(text=text, audio=audios, images=images, videos=videos, return_tensors="pt", padding=True)
187
- inputs = inputs.to(self.model.device)
 
 
 
 
188
 
189
- text_ids = self.model.generate(**inputs, return_audio=False)
190
- response = self.processor.batch_decode(text_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
191
- return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
 
193
  @property
194
  def _llm_type(self) -> str:
195
- return "qwen25_omni"
196
 
197
  def create_langchain_agent():
198
- # Replace HuggingFaceHub with custom LLM
199
- llm = Qwen25OmniLLM()
200
 
201
- # Rest remains the same
202
  tools = [CreateGraphTool()]
203
  memory = ConversationBufferWindowMemory(
204
  memory_key="chat_history",
@@ -228,52 +413,6 @@ def get_agent():
228
  agent = create_langchain_agent()
229
  return agent
230
 
231
- def generate_voice_response(text_response: str, voice_enabled: bool = False) -> Optional[str]:
232
- """Generate audio response if voice is enabled."""
233
- if not voice_enabled:
234
- return None
235
-
236
- try:
237
- current_agent = get_agent()
238
- model = current_agent.llm.model
239
- processor = current_agent.llm.processor
240
-
241
- if not hasattr(model, 'generate') or not hasattr(model.generate, '__code__'):
242
- logger.warning("Model may not support audio generation")
243
- return None
244
-
245
- conversation = [
246
- {"role": "system", "content": [{"type": "text", "text": "You are Qwen, a virtual human developed by the Qwen Team, Alibaba Group, capable of perceiving auditory and visual inputs, as well as generating text and speech."}]},
247
- {"role": "user", "content": [{"type": "text", "text": "Please read this response aloud: " + text_response}]}
248
- ]
249
-
250
- text = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)
251
- audios, images, videos = process_mm_info(conversation, use_audio_in_video=False)
252
- inputs = processor(text=text, audio=audios, images=images, videos=videos, return_tensors="pt", padding=True)
253
- inputs = inputs.to(model.device)
254
-
255
- text_ids, audio = model.generate(**inputs, speaker="Ethan")
256
-
257
- # Save audio to temporary file
258
- audio_path = f"temp_audio_{int(time.time())}.wav"
259
- sf.write(audio_path, audio.reshape(-1).detach().cpu().numpy(), samplerate=24000)
260
- return audio_path
261
-
262
- except Exception as e:
263
- logger.error(f"Error generating voice response: {e}")
264
- return None
265
-
266
- def cleanup_temp_audio():
267
- """Clean up temporary audio files on exit."""
268
- for file in glob.glob("temp_audio_*.wav"):
269
- try:
270
- os.remove(file)
271
- except:
272
- pass
273
-
274
- # Register cleanup function
275
- atexit.register(cleanup_temp_audio)
276
-
277
  # --- UI: MathJax Configuration ---
278
  mathjax_config = '''
279
  <script>
@@ -302,7 +441,7 @@ window.MathJax = {
302
  html_head_content = '''
303
  <meta charset="utf-8">
304
  <meta name="viewport" content="width=device-width, initial-scale=1">
305
- <title>EduBot - AI Educational Assistant</title>
306
  '''
307
 
308
  # --- Force Light Mode Script ---
@@ -348,7 +487,6 @@ def generate_response_with_langchain(message, max_retries=3):
348
  initialize_system_prompt(current_agent)
349
 
350
  # Use the agent directly with the message
351
- # LangChain will automatically handle adding HumanMessage and AIMessage to memory
352
  response = current_agent.run(input=message)
353
 
354
  return smart_truncate(response)
@@ -366,62 +504,58 @@ def chat_response(message, history=None):
366
  try:
367
  # Track metrics with timing context
368
  start_time = time.time()
369
-
370
- # Debug: Check message type
371
- logger.info(f"Message type: {type(message)}")
372
- logger.info(f"Message content: {message}")
 
373
 
374
  try:
375
- metrics_tracker.log_interaction(message, "user_query", "chat_start")
 
 
 
 
 
 
376
  logger.info("Metrics interaction logged successfully")
377
  except Exception as metrics_error:
378
  logger.error(f"Error in metrics_tracker.log_interaction: {metrics_error}")
379
- logger.error(f"Metrics error type: {type(metrics_error)}")
380
- # Continue without metrics if this fails
381
 
382
  # Generate response with LangChain
383
- logger.info("About to call generate_response_with_langchain")
384
- try:
385
- response = generate_response_with_langchain(message)
386
- logger.info(f"Response type: {type(response)}")
387
- logger.info(f"Response preview: {str(response)[:200]}...")
388
- except Exception as langchain_error:
389
- logger.error(f"Error in generate_response_with_langchain: {langchain_error}")
390
- raise langchain_error
391
 
392
- # Log metrics with timing context
393
  try:
394
- end_time = time.time()
395
- timing_context = f"response_time_{end_time - start_time:.2f}s"
396
- metrics_tracker.log_interaction(response, "bot_response", timing_context)
 
 
 
397
  except Exception as metrics_error:
398
  logger.error(f"Error in final metrics logging: {metrics_error}")
399
- # Continue without metrics if this fails
400
 
401
  return response
402
 
403
  except Exception as e:
404
  logger.error(f"Error in chat_response: {e}")
405
- logger.error(f"Error type: {type(e)}")
406
- import traceback
407
- logger.error(f"Full traceback: {traceback.format_exc()}")
408
  return f"I apologize, but I encountered an error while processing your message: {str(e)}"
409
 
410
- def respond_and_update(message, history, voice_enabled):
411
- """Main function to handle user submission."""
412
  if not message.strip():
413
- return history, "", None
414
 
415
  # Add user message to history
416
  history.append({"role": "user", "content": message})
417
- yield history, "", None
418
 
419
- # Generate response directly (no mock streaming)
420
  response = chat_response(message)
421
- audio_path = generate_voice_response(response, voice_enabled) if voice_enabled else None
422
 
423
  history.append({"role": "assistant", "content": response})
424
- yield history, "", audio_path
425
 
426
  def clear_chat():
427
  """Clear the chat history and reset system prompt flag."""
@@ -446,7 +580,7 @@ def create_interface():
446
  logger.warning(f"Error reading styles.css: {e}")
447
 
448
  with gr.Blocks(
449
- title="EduBot",
450
  fill_width=True,
451
  fill_height=True,
452
  theme=gr.themes.Origin()
@@ -459,7 +593,7 @@ def create_interface():
459
 
460
  with gr.Column(elem_classes=["main-container"]):
461
  # Title Section
462
- gr.HTML('<div class="title-header"><h1>🎓 EduBot</h1></div>')
463
 
464
  # Chat Section
465
  with gr.Row():
@@ -469,18 +603,18 @@ def create_interface():
469
  show_share_button=False,
470
  avatar_images=None,
471
  elem_id="main-chatbot",
472
- container=False, # Remove wrapper
473
  scale=1,
474
- height="70vh" # Explicit height instead of min_height
475
  )
476
 
477
- # Input Section - fixed height
478
  with gr.Row(elem_classes=["input-controls"]):
479
  msg = gr.Textbox(
480
  placeholder="Ask me about math, research, study strategies, or any educational topic...",
481
  show_label=False,
482
- lines=4,
483
- max_lines=6,
484
  elem_classes=["input-textbox"],
485
  container=False,
486
  scale=4
@@ -488,14 +622,10 @@ def create_interface():
488
  with gr.Column(elem_classes=["button-column"], scale=1):
489
  send = gr.Button("Send", elem_classes=["send-button"], size="sm")
490
  clear = gr.Button("Clear", elem_classes=["clear-button"], size="sm")
491
- voice_toggle = gr.Checkbox(label="Enable Voice (Ethan)", value=False, elem_classes=["voice-toggle"])
492
-
493
- # Add audio output component
494
- audio_output = gr.Audio(label="Voice Response", visible=True, autoplay=True)
495
 
496
- # Event handlers - INSIDE the Blocks context
497
- msg.submit(respond_and_update, [msg, chatbot, voice_toggle], [chatbot, msg, audio_output])
498
- send.click(respond_and_update, [msg, chatbot, voice_toggle], [chatbot, msg, audio_output])
499
  clear.click(clear_chat, outputs=[chatbot, msg])
500
 
501
  # Apply CSS at the very end
@@ -506,10 +636,14 @@ def create_interface():
506
  # --- Main Execution ---
507
  if __name__ == "__main__":
508
  try:
509
- logger.info("Starting EduBot...")
510
  interface = create_interface()
511
  interface.queue()
512
- interface.launch()
 
 
 
 
 
513
  except Exception as e:
514
- logger.error(f"Failed to launch EduBot: {e}")
515
- raise
 
1
  import gradio as gr
2
  from graph_tool import generate_plot
3
+ from metrics import MimirMetrics
4
  import os
5
+
6
+ os.environ['HF_HOME'] = '/tmp/huggingface'
7
+ os.environ['TRANSFORMERS_CACHE'] = '/tmp/huggingface'
8
+ os.environ['HF_DATASETS_CACHE'] = '/tmp/huggingface'
9
+
10
  import time
11
+ from dotenv import load_dotenv
12
  import logging
 
13
  import re
 
14
  from langchain.tools import BaseTool
15
  from langchain.agents import initialize_agent, AgentType
16
  from langchain.memory import ConversationBufferWindowMemory
 
18
  from langchain.llms.base import LLM
19
  from typing import Optional, List, Any, Type
20
  from pydantic import BaseModel, Field
21
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
22
+ import torch
23
+ # Load environment variables from .EVN fil (case-sensitive)
24
+ load_dotenv(".evn")
25
+ HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
26
+ print("Environment variables loaded.")
27
 
28
  # --- Environment and Logging Setup ---
29
  logging.basicConfig(level=logging.INFO)
30
  logger = logging.getLogger(__name__)
31
 
32
  # Support both token names for flexibility
33
+ hf_token = HF_TOKEN
34
  if not hf_token:
35
  logger.warning("Neither HF_TOKEN nor HUGGINGFACEHUB_API_TOKEN is set, the application may not work.")
36
 
37
+ metrics_tracker = MimirMetrics(save_file="Mimir_metrics.json")
38
 
39
  # --- LangChain Tool Definition ---
40
  class GraphInput(BaseModel):
 
96
 
97
 
98
  # --- System Prompt ---
99
+ SYSTEM_PROMPT = """You are Mimir, an expert multi-concept tutor designed to facilitate genuine learning and understanding. Your primary mission is to guide students through the learning process rather than providing direct answers to academic work.
100
 
101
  ## Core Educational Principles
102
  - Provide comprehensive, educational responses that help students truly understand concepts
 
166
  agent.memory.chat_memory.add_message(system_message)
167
  system_prompt_initialized = True
168
 
169
+ logger = logging.getLogger(__name__)
170
+
171
+ class Qwen25SmallLLM(LLM):
172
  model: Any = None
173
+ tokenizer: Any = None
174
 
175
+ def __init__(self, model_path: str = "Qwen/Qwen2.5-3B-Instruct", use_4bit: bool = True):
176
  super().__init__()
177
+ logger.info(f"Loading model with BitsAndBytes quantization: {model_path}")
178
+
179
+ # Configure BitsAndBytes quantization
180
+ if use_4bit:
181
+ quantization_config = BitsAndBytesConfig(
182
+ load_in_4bit=True,
183
+ bnb_4bit_compute_dtype=torch.bfloat16, # Use bfloat16 for better performance
184
+ bnb_4bit_use_double_quant=True, # Double quantization for additional memory savings
185
+ bnb_4bit_quant_type="nf4" # Normal Float 4-bit quantization
186
+ )
187
+ logger.info("Using 4-bit quantization with BitsAndBytes")
188
+ else:
189
+ quantization_config = BitsAndBytesConfig(
190
+ load_in_8bit=True,
191
+ llm_int8_enable_fp32_cpu_offload=True # Offload to CPU if needed
192
+ )
193
+ logger.info("Using 8-bit quantization with BitsAndBytes")
194
+
195
+ try:
196
+ # Load tokenizer
197
+ self.tokenizer = AutoTokenizer.from_pretrained(
198
+ model_path,
199
+ trust_remote_code=True
200
+ )
201
+
202
+ # Load model with quantization
203
+ self.model = AutoModelForCausalLM.from_pretrained(
204
+ model_path,
205
+ quantization_config=quantization_config,
206
+ device_map="auto", # Automatically distribute across available devices
207
+ torch_dtype=torch.bfloat16, # Use bfloat16 for memory efficiency
208
+ trust_remote_code=True,
209
+ low_cpu_mem_usage=True, # Reduce CPU memory usage during loading
210
+ max_memory={0: "15GB"} if torch.cuda.is_available() else None # Limit GPU memory usage
211
+ )
212
+
213
+ # Ensure pad token is set
214
+ if self.tokenizer.pad_token is None:
215
+ self.tokenizer.pad_token = self.tokenizer.eos_token
216
+
217
+ logger.info("Model loaded successfully with BitsAndBytes quantization")
218
+
219
+ except Exception as e:
220
+ logger.error(f"Failed to load model with quantization: {e}")
221
+ logger.info("Falling back to standard loading...")
222
+ # Fallback to standard loading if quantization fails
223
+ self._load_fallback_model(model_path)
224
+
225
+ def _load_fallback_model(self, model_path: str):
226
+ """Fallback method to load model without quantization if needed."""
227
+ try:
228
+ self.tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
229
+ self.model = AutoModelForCausalLM.from_pretrained(
230
+ model_path,
231
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
232
+ device_map="auto" if torch.cuda.is_available() else None,
233
+ trust_remote_code=True,
234
+ low_cpu_mem_usage=True
235
+ )
236
+ if self.tokenizer.pad_token is None:
237
+ self.tokenizer.pad_token = self.tokenizer.eos_token
238
+ logger.info("Model loaded with fallback method")
239
+ except Exception as e:
240
+ logger.error(f"Fallback model loading also failed: {e}")
241
+ raise e
242
 
243
  def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
244
+ """Generate text response using the quantized local model."""
245
+ try:
246
+ # Format the conversation
247
+ messages = [
248
+ {"role": "system", "content": SYSTEM_PROMPT},
249
+ {"role": "user", "content": prompt}
250
+ ]
251
+
252
+ # Apply chat template
253
+ text = self.tokenizer.apply_chat_template(
254
+ messages,
255
+ tokenize=False,
256
+ add_generation_prompt=True
257
+ )
258
+
259
+ # Tokenize with proper padding
260
+ model_inputs = self.tokenizer(
261
+ [text],
262
+ return_tensors="pt",
263
+ padding=True,
264
+ truncation=True,
265
+ max_length=2048 # Limit input length to prevent memory issues
266
+ )
267
+
268
+ # Move to model device if available
269
+ if torch.cuda.is_available():
270
+ model_inputs = {k: v.to(self.model.device) for k, v in model_inputs.items()}
271
+
272
+ # Generate with memory-efficient settings
273
+ with torch.no_grad():
274
+ generated_ids = self.model.generate(
275
+ **model_inputs,
276
+ max_new_tokens=800, # Reduced for memory efficiency
277
+ do_sample=True,
278
+ temperature=0.7,
279
+ top_p=0.9,
280
+ top_k=50,
281
+ repetition_penalty=1.1,
282
+ pad_token_id=self.tokenizer.eos_token_id,
283
+ use_cache=True, # Enable KV cache for efficiency
284
+ attention_mask=model_inputs.get('attention_mask', None)
285
+ )
286
+
287
+ # Decode response (only new tokens)
288
+ generated_ids = [
289
+ output_ids[len(input_ids):]
290
+ for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
291
+ ]
292
+
293
+ response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
294
+
295
+ # Clean up GPU memory
296
+ if torch.cuda.is_available():
297
+ torch.cuda.empty_cache()
298
+
299
+ return response.strip()
300
+
301
+ except torch.cuda.OutOfMemoryError:
302
+ logger.error("GPU out of memory during generation")
303
+ if torch.cuda.is_available():
304
+ torch.cuda.empty_cache()
305
+ return "I apologize, but I'm experiencing memory constraints. Please try a shorter message or restart the application."
306
+
307
+ except Exception as e:
308
+ logger.error(f"Error in model generation: {e}")
309
+ if torch.cuda.is_available():
310
+ torch.cuda.empty_cache()
311
+ return f"I apologize, but I encountered an error while generating a response: {str(e)}"
312
+
313
+ @property
314
+ def _llm_type(self) -> str:
315
+ return "qwen25_small_quantized"
316
+ model: Any = None
317
+ tokenizer: Any = None
318
+
319
+ def __init__(self, model_path: str = "Qwen/Qwen2.5-3B-Instruct"):
320
+ super().__init__()
321
+ logger.info(f"Loading model: {model_path}")
322
 
323
+ # Load tokenizer and model
324
+ self.tokenizer = AutoTokenizer.from_pretrained(model_path)
325
+ self.model = AutoModelForCausalLM.from_pretrained(
326
+ model_path,
327
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
328
+ device_map="auto" if torch.cuda.is_available() else None,
329
+ trust_remote_code=True
330
+ )
331
 
332
+ logger.info("Model loaded successfully")
333
+
334
+ def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
335
+ """Generate text response using the local model."""
336
+ try:
337
+ # Format the conversation
338
+ messages = [
339
+ {"role": "system", "content": SYSTEM_PROMPT},
340
+ {"role": "user", "content": prompt}
341
+ ]
342
+
343
+ # Apply chat template
344
+ text = self.tokenizer.apply_chat_template(
345
+ messages,
346
+ tokenize=False,
347
+ add_generation_prompt=True
348
+ )
349
+
350
+ # Tokenize
351
+ model_inputs = self.tokenizer([text], return_tensors="pt")
352
+ if torch.cuda.is_available():
353
+ model_inputs = model_inputs.to(self.model.device)
354
+
355
+ # Generate
356
+ with torch.no_grad():
357
+ generated_ids = self.model.generate(
358
+ **model_inputs,
359
+ max_new_tokens=1000,
360
+ do_sample=True,
361
+ temperature=0.7,
362
+ top_p=0.9,
363
+ pad_token_id=self.tokenizer.eos_token_id
364
+ )
365
+
366
+ # Decode response
367
+ generated_ids = [
368
+ output_ids[len(input_ids):]
369
+ for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
370
+ ]
371
+
372
+ response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
373
+ return response.strip()
374
+
375
+ except Exception as e:
376
+ logger.error(f"Error in model generation: {e}")
377
+ return f"I apologize, but I encountered an error while generating a response: {str(e)}"
378
 
379
  @property
380
  def _llm_type(self) -> str:
381
+ return "qwen25_small"
382
 
383
  def create_langchain_agent():
384
+ # Use the smaller local model
385
+ llm = Qwen25SmallLLM()
386
 
 
387
  tools = [CreateGraphTool()]
388
  memory = ConversationBufferWindowMemory(
389
  memory_key="chat_history",
 
413
  agent = create_langchain_agent()
414
  return agent
415
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
416
  # --- UI: MathJax Configuration ---
417
  mathjax_config = '''
418
  <script>
 
441
  html_head_content = '''
442
  <meta charset="utf-8">
443
  <meta name="viewport" content="width=device-width, initial-scale=1">
444
+ <title>Mimir - AI Educational Assistant</title>
445
  '''
446
 
447
  # --- Force Light Mode Script ---
 
487
  initialize_system_prompt(current_agent)
488
 
489
  # Use the agent directly with the message
 
490
  response = current_agent.run(input=message)
491
 
492
  return smart_truncate(response)
 
504
  try:
505
  # Track metrics with timing context
506
  start_time = time.time()
507
+ timing_context = {
508
+ 'start_time': start_time,
509
+ 'chunk_count': 0,
510
+ 'provider_latency': 0.0
511
+ }
512
 
513
  try:
514
+ # Log start of interaction
515
+ metrics_tracker.log_interaction(
516
+ query=message,
517
+ response="",
518
+ timing_context=timing_context,
519
+ error_occurred=False
520
+ )
521
  logger.info("Metrics interaction logged successfully")
522
  except Exception as metrics_error:
523
  logger.error(f"Error in metrics_tracker.log_interaction: {metrics_error}")
 
 
524
 
525
  # Generate response with LangChain
526
+ response = generate_response_with_langchain(message)
 
 
 
 
 
 
 
527
 
528
+ # Log final metrics
529
  try:
530
+ metrics_tracker.log_interaction(
531
+ query=message,
532
+ response=response,
533
+ timing_context=timing_context,
534
+ error_occurred=False
535
+ )
536
  except Exception as metrics_error:
537
  logger.error(f"Error in final metrics logging: {metrics_error}")
 
538
 
539
  return response
540
 
541
  except Exception as e:
542
  logger.error(f"Error in chat_response: {e}")
 
 
 
543
  return f"I apologize, but I encountered an error while processing your message: {str(e)}"
544
 
545
+ def respond_and_update(message, history):
546
+ """Main function to handle user submission - no voice parameter."""
547
  if not message.strip():
548
+ return history, ""
549
 
550
  # Add user message to history
551
  history.append({"role": "user", "content": message})
552
+ yield history, ""
553
 
554
+ # Generate response
555
  response = chat_response(message)
 
556
 
557
  history.append({"role": "assistant", "content": response})
558
+ yield history, ""
559
 
560
  def clear_chat():
561
  """Clear the chat history and reset system prompt flag."""
 
580
  logger.warning(f"Error reading styles.css: {e}")
581
 
582
  with gr.Blocks(
583
+ title="Mimir",
584
  fill_width=True,
585
  fill_height=True,
586
  theme=gr.themes.Origin()
 
593
 
594
  with gr.Column(elem_classes=["main-container"]):
595
  # Title Section
596
+ gr.HTML('<div class="title-header"><h1> Mimir 🎓</h1></div>')
597
 
598
  # Chat Section
599
  with gr.Row():
 
603
  show_share_button=False,
604
  avatar_images=None,
605
  elem_id="main-chatbot",
606
+ container=False,
607
  scale=1,
608
+ height="70vh"
609
  )
610
 
611
+ # Input Section
612
  with gr.Row(elem_classes=["input-controls"]):
613
  msg = gr.Textbox(
614
  placeholder="Ask me about math, research, study strategies, or any educational topic...",
615
  show_label=False,
616
+ lines=6,
617
+ max_lines=8,
618
  elem_classes=["input-textbox"],
619
  container=False,
620
  scale=4
 
622
  with gr.Column(elem_classes=["button-column"], scale=1):
623
  send = gr.Button("Send", elem_classes=["send-button"], size="sm")
624
  clear = gr.Button("Clear", elem_classes=["clear-button"], size="sm")
 
 
 
 
625
 
626
+ # Event handlers - no voice parameter
627
+ msg.submit(respond_and_update, [msg, chatbot], [chatbot, msg])
628
+ send.click(respond_and_update, [msg, chatbot], [chatbot, msg])
629
  clear.click(clear_chat, outputs=[chatbot, msg])
630
 
631
  # Apply CSS at the very end
 
636
  # --- Main Execution ---
637
  if __name__ == "__main__":
638
  try:
639
+ logger.info("Starting Mimir...")
640
  interface = create_interface()
641
  interface.queue()
642
+ interface.launch(
643
+ server_name="0.0.0.0",
644
+ share=True,
645
+ debug=True,
646
+ favicon_path="assets/favicon.ico"
647
+ )
648
  except Exception as e:
649
+ logger.error(f"Failed to launch Mimir: {e}")