jdesiree commited on
Commit
52f25ab
Β·
verified Β·
1 Parent(s): 5ff5bc4

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +1461 -0
app.py ADDED
@@ -0,0 +1,1461 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ print(">>> ENTERING app.py (top-level) <<<")
3
+ """
4
+ Mimir Educational AI Assistant - Main Application
5
+ Architecture:
6
+ - Multi-page Gradio interface (Chatbot + Analytics with link to Mimir case study)
7
+ - Agent-based orchestration (Tool, Routing, Thinking, Response)
8
+ - Global state management with SQLite + HF dataset backup
9
+ - Prompt state tracking per turn
10
+ - LightEval for metrics tracking
11
+ - Logger for timing functions
12
+ - OPTIMIZED: Single Llama-3.2-3B model with lazy loading (loads on first use, ~1GB)
13
+ """
14
+ import os
15
+ import re
16
+ import sys
17
+ import time
18
+ import json
19
+ import base64
20
+ import logging
21
+ import sqlite3
22
+ import subprocess
23
+ import threading
24
+ import warnings
25
+ import uuid
26
+ from datetime import datetime
27
+ from pathlib import Path
28
+ from typing import Dict, List, Optional, Tuple, Any
29
+
30
+ # ============================================================================
31
+ # HUGGINGFACE CACHE SETUP - Avoid Permission Errors
32
+ # ============================================================================
33
+ # Use /tmp for all HuggingFace operations (writable at runtime)
34
+ HF_CACHE = "/tmp/huggingface"
35
+ os.makedirs(f"{HF_CACHE}/hub", exist_ok=True)
36
+ os.makedirs(f"{HF_CACHE}/modules", exist_ok=True)
37
+ os.makedirs(f"{HF_CACHE}/transformers", exist_ok=True)
38
+
39
+ # Configure HuggingFace cache locations
40
+ os.environ['HF_HOME'] = HF_CACHE
41
+ os.environ['HF_HUB_CACHE'] = f"{HF_CACHE}/hub"
42
+ os.environ['HF_MODULES_CACHE'] = f"{HF_CACHE}/modules"
43
+ os.environ['TRANSFORMERS_CACHE'] = f"{HF_CACHE}/transformers"
44
+ os.environ['HF_HUB_ENABLE_HF_TRANSFER'] = '1' # Faster downloads
45
+
46
+ # Matplotlib cache (avoid permission warnings)
47
+ os.environ['MPLCONFIGDIR'] = "/tmp/matplotlib"
48
+ os.makedirs("/tmp/matplotlib", exist_ok=True)
49
+
50
+ # ============================================================================
51
+ # CORE DEPENDENCIES
52
+ # ============================================================================
53
+ import torch
54
+ import gradio as gr
55
+ from dotenv import load_dotenv
56
+
57
+ # Agent architecture
58
+ from agents import (
59
+ ToolDecisionAgent,
60
+ PromptRoutingAgents,
61
+ ThinkingAgents,
62
+ ResponseAgent,
63
+ )
64
+
65
+ # Lazy-loading model (optional pre-warm)
66
+ from model_manager import get_model
67
+
68
+ # State management
69
+ from state_manager import (
70
+ GlobalStateManager,
71
+ LogicalExpressions,
72
+ )
73
+
74
+ # Prompt library
75
+ from prompt_library import (
76
+ CORE_IDENTITY,
77
+ VAUGE_INPUT,
78
+ USER_UNDERSTANDING,
79
+ GENERAL_FORMATTING,
80
+ LATEX_FORMATTING,
81
+ GUIDING_TEACHING,
82
+ STRUCTURE_PRACTICE_QUESTIONS,
83
+ PRACTICE_QUESTION_FOLLOWUP,
84
+ TOOL_USE_ENHANCEMENT,
85
+ )
86
+
87
+ # LangGraph imports
88
+ from langgraph.graph import StateGraph, START, END
89
+ from langgraph.graph.message import add_messages
90
+ from langgraph.checkpoint.memory import MemorySaver
91
+ from langgraph.prebuilt import ToolNode
92
+
93
+ # LangChain Core
94
+ from langchain_core.tools import tool
95
+ from langchain_core.messages import HumanMessage, SystemMessage, AIMessage, ToolMessage, BaseMessage
96
+
97
+ # Tool for graphing
98
+ from graph_tool import generate_plot
99
+
100
+
101
+ # ============================================================================
102
+ # LIGHTEVAL FOR METRICS
103
+ # ============================================================================
104
+ try:
105
+ from lighteval.logging.evaluation_tracker import EvaluationTracker
106
+ from lighteval.models.transformers.transformers_model import TransformersModel
107
+ from lighteval.metrics.metrics_sample import BertScore, ROUGE
108
+ from lighteval.tasks.requests import Doc
109
+ LIGHTEVAL_AVAILABLE = True
110
+ except ImportError:
111
+ LIGHTEVAL_AVAILABLE = False
112
+ logging.warning("LightEval not available - metrics tracking limited")
113
+
114
+ # ============================================================================
115
+ # CONFIGURATION
116
+ # ============================================================================
117
+ # Suppress warnings
118
+ warnings.filterwarnings("ignore", category=UserWarning)
119
+ warnings.filterwarnings("ignore", category=FutureWarning)
120
+
121
+ # Load environment
122
+ load_dotenv(".env")
123
+ HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
124
+
125
+ # Debug and runtime settings
126
+ DEBUG_STATE = os.getenv("DEBUG_STATE", "false").lower() == "true"
127
+ CURRENT_YEAR = datetime.now().year
128
+
129
+
130
+ # ============================================================================
131
+ # LOGGING SETUP
132
+ # ============================================================================
133
+
134
+ logging.basicConfig(
135
+ level=logging.INFO,
136
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
137
+ )
138
+ logger = logging.getLogger(__name__)
139
+
140
+
141
+ def log_step(step_name: str, start_time: Optional[float] = None) -> float:
142
+ """
143
+ Log a pipeline step with timestamp and duration.
144
+
145
+ Args:
146
+ step_name: Name of the step
147
+ start_time: Start time from previous call (if completing a step)
148
+
149
+ Returns:
150
+ Current time for next call
151
+ """
152
+ now = time.time()
153
+ timestamp = datetime.now().strftime("%H:%M:%S.%f")[:-3]
154
+
155
+ if start_time:
156
+ duration = now - start_time
157
+ logger.info(f"[{timestamp}] COMPLETED: {step_name} ({duration:.2f}s)")
158
+ else:
159
+ logger.info(f"[{timestamp}] STARTING: {step_name}")
160
+
161
+ return now
162
+
163
+ # ============================================================================
164
+ # MODEL INFORMATION
165
+ # ============================================================================
166
+ print("="*60)
167
+ print("MIMIR - Using Llama-3.2-3B-Instruct")
168
+ print(" Model: meta-llama/Llama-3.2-3B-Instruct")
169
+ print(" Memory: ~1GB (4-bit quantized)")
170
+ print(" Context: 128K tokens")
171
+ print(" Architecture: Single unified model")
172
+ print("="*60)
173
+
174
+ # ============================================================================
175
+ # GLOBAL INITIALIZATION
176
+ # ============================================================================
177
+
178
+ logger.info("="*60)
179
+ logger.info("INITIALIZING MIMIR APPLICATION")
180
+ logger.info("="*60)
181
+
182
+ init_start = log_step("Global Initialization")
183
+
184
+ # Initialize state management
185
+ global_state_manager = GlobalStateManager()
186
+ logical_expressions = LogicalExpressions()
187
+ logger.info("State management initialized")
188
+
189
+ # Initialize agents (lazy loading - models load on first use)
190
+ tool_agent = ToolDecisionAgent()
191
+ routing_agents = PromptRoutingAgents()
192
+ thinking_agents = ThinkingAgents()
193
+ response_agent = ResponseAgent()
194
+ logger.info("Agents initialized (using shared get_shared_llama)")
195
+
196
+ # Pre-warm shared Qwen3-Claude (optional - happens on first agent call anyway)
197
+ logger.info("Shared Qwen3-Claude agent ready (loads on first use)")
198
+
199
+ log_step("Global Initialization", init_start)
200
+
201
+
202
+ # ============================================================================
203
+ # ANALYTICS & DATABASE FUNCTIONS
204
+ # ============================================================================
205
+
206
+ def get_trackio_database_path(project_name: str) -> Optional[str]:
207
+ """Get path to metrics SQLite database"""
208
+ possible_paths = [
209
+ f"./{project_name}.db",
210
+ f"./trackio_data/{project_name}.db",
211
+ f"./.trackio/{project_name}.db",
212
+ "./mimir_metrics.db"
213
+ ]
214
+
215
+ for path in possible_paths:
216
+ if os.path.exists(path):
217
+ return path
218
+
219
+ return None
220
+
221
+
222
+ def get_project_statistics_with_nulls(cursor, project_name: str) -> Dict:
223
+ """Query metrics database for project statistics"""
224
+ try:
225
+ stats = {}
226
+
227
+ # Total conversations
228
+ try:
229
+ cursor.execute("""
230
+ SELECT COUNT(DISTINCT run_id) as total_runs
231
+ FROM metrics
232
+ WHERE project_name = ?
233
+ """, (project_name,))
234
+ result = cursor.fetchone()
235
+ stats["total_conversations"] = result["total_runs"] if result and result["total_runs"] > 0 else None
236
+ except sqlite3.Error:
237
+ stats["total_conversations"] = None
238
+
239
+ # Average response time
240
+ try:
241
+ cursor.execute("""
242
+ SELECT AVG(CAST(value AS FLOAT)) as avg_response_time
243
+ FROM metrics
244
+ WHERE project_name = ? AND metric_name = 'response_time'
245
+ """, (project_name,))
246
+ result = cursor.fetchone()
247
+ if result and result["avg_response_time"] is not None:
248
+ stats["avg_session_length"] = round(result["avg_response_time"], 2)
249
+ else:
250
+ stats["avg_session_length"] = None
251
+ except sqlite3.Error:
252
+ stats["avg_session_length"] = None
253
+
254
+ # Success rate
255
+ try:
256
+ cursor.execute("""
257
+ SELECT
258
+ COUNT(*) as total_responses,
259
+ SUM(CASE WHEN CAST(value AS FLOAT) > 3.5 THEN 1 ELSE 0 END) as successful_responses
260
+ FROM metrics
261
+ WHERE project_name = ? AND metric_name = 'quality_score'
262
+ """, (project_name,))
263
+ result = cursor.fetchone()
264
+ if result and result["total_responses"] > 0:
265
+ success_rate = (result["successful_responses"] / result["total_responses"]) * 100
266
+ stats["success_rate"] = round(success_rate, 1)
267
+ else:
268
+ stats["success_rate"] = None
269
+ except sqlite3.Error:
270
+ stats["success_rate"] = None
271
+
272
+ return stats
273
+
274
+ except sqlite3.Error as e:
275
+ logger.error(f"Database error: {e}")
276
+ return {"total_conversations": None, "avg_session_length": None, "success_rate": None}
277
+
278
+
279
+ def get_recent_interactions_with_nulls(cursor, project_name: str, limit: int = 10) -> List:
280
+ """Query for recent interactions"""
281
+ try:
282
+ cursor.execute("""
283
+ SELECT
284
+ m1.timestamp,
285
+ m2.value as response_time,
286
+ m3.value as prompt_mode,
287
+ m4.value as tools_used,
288
+ m5.value as quality_score,
289
+ m6.value as adapter_used,
290
+ m1.run_id
291
+ FROM metrics m1
292
+ LEFT JOIN metrics m2 ON m1.run_id = m2.run_id AND m2.metric_name = 'response_time'
293
+ LEFT JOIN metrics m3 ON m1.run_id = m3.run_id AND m3.metric_name = 'prompt_mode'
294
+ LEFT JOIN metrics m4 ON m1.run_id = m4.run_id AND m4.metric_name = 'tools_used'
295
+ LEFT JOIN metrics m5 ON m1.run_id = m5.run_id AND m5.metric_name = 'quality_score'
296
+ LEFT JOIN metrics m6 ON m1.run_id = m6.run_id AND m6.metric_name = 'active_adapter'
297
+ WHERE m1.project_name = ? AND m1.metric_name = 'conversation_start'
298
+ ORDER BY m1.timestamp DESC
299
+ LIMIT ?
300
+ """, (project_name, limit))
301
+
302
+ results = cursor.fetchall()
303
+ recent_data = []
304
+
305
+ for row in results:
306
+ recent_data.append([
307
+ row["timestamp"][:16] if row["timestamp"] else None,
308
+ float(row["response_time"]) if row["response_time"] is not None else None,
309
+ row["prompt_mode"] if row["prompt_mode"] else None,
310
+ bool(int(row["tools_used"])) if row["tools_used"] is not None else None,
311
+ float(row["quality_score"]) if row["quality_score"] is not None else None,
312
+ row["adapter_used"] if row["adapter_used"] else None
313
+ ])
314
+
315
+ return recent_data
316
+
317
+ except sqlite3.Error as e:
318
+ logger.error(f"Database error: {e}")
319
+ return []
320
+
321
+
322
+ def create_dashboard_html_with_nulls(project_name: str, project_stats: Dict) -> str:
323
+ """Create dashboard HTML with enhanced agent-based metrics"""
324
+ def format_stat(value, suffix="", no_data_text="No data"):
325
+ if value is None:
326
+ return f'<span style="color: #999; font-style: italic;">{no_data_text}</span>'
327
+ return f"{value}{suffix}"
328
+
329
+ def format_large_stat(value, suffix="", no_data_text="--"):
330
+ if value is None:
331
+ return f'<span style="color: #ccc;">{no_data_text}</span>'
332
+ return f"{value}{suffix}"
333
+
334
+ # Get evaluation metrics from global state
335
+ try:
336
+ eval_summary = global_state_manager.get_evaluation_summary()
337
+ cache_status = global_state_manager.get_cache_status()
338
+
339
+ project_stats["ml_educational_quality"] = eval_summary['aggregate_metrics']['avg_educational_quality']
340
+ project_stats["user_satisfaction"] = eval_summary['aggregate_metrics']['user_satisfaction_rate']
341
+ project_stats["active_sessions"] = cache_status['total_conversation_sessions']
342
+
343
+ except Exception as e:
344
+ logger.warning(f"Could not get global state metrics: {e}")
345
+ project_stats["ml_educational_quality"] = None
346
+ project_stats["user_satisfaction"] = None
347
+ project_stats["active_sessions"] = None
348
+
349
+ # Status determination
350
+ success_rate = project_stats.get("success_rate")
351
+ if success_rate is not None:
352
+ if success_rate >= 80:
353
+ status_color = "#4CAF50"
354
+ status_text = "Excellent"
355
+ elif success_rate >= 60:
356
+ status_color = "#FF9800"
357
+ status_text = "Good"
358
+ else:
359
+ status_color = "#F44336"
360
+ status_text = "Needs Improvement"
361
+ else:
362
+ status_color = "#999"
363
+ status_text = "No data"
364
+
365
+ # Agent-based metrics section
366
+ agent_metrics_section = f"""
367
+ <div style="margin: 15px 0; padding: 10px; background: #f0f8ff; border-radius: 4px; border-left: 4px solid #007bff;">
368
+ <strong>πŸš€ Agent Performance (Qwen3-Claude Single Model):</strong>
369
+ Educational Quality: {format_stat(project_stats.get('ml_educational_quality'), '', 'N/A')} |
370
+ User Satisfaction: {format_stat(project_stats.get('user_satisfaction'), '%' if project_stats.get('user_satisfaction') else '', 'N/A')} |
371
+ Active Sessions: {format_stat(project_stats.get('active_sessions'), '', 'N/A')}
372
+ </div>
373
+ """
374
+
375
+ dashboard_html = f'''
376
+ <div style="text-align: center; padding: 20px; border: 1px solid #ddd; border-radius: 8px; background: #f9f9f9;">
377
+ <h3>πŸ“Š {project_name} Analytics</h3>
378
+
379
+ <div style="display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 15px; margin: 20px 0;">
380
+ <div style="padding: 15px; background: white; border-radius: 6px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
381
+ <div style="font-size: 24px; font-weight: bold; color: #2196F3;">{format_large_stat(project_stats.get('total_conversations'))}</div>
382
+ <div style="color: #666; font-size: 12px;">Total Sessions</div>
383
+ </div>
384
+ <div style="padding: 15px; background: white; border-radius: 6px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
385
+ <div style="font-size: 24px; font-weight: bold; color: #FF9800;">{format_large_stat(project_stats.get('avg_session_length'), 's' if project_stats.get('avg_session_length') else '')}</div>
386
+ <div style="color: #666; font-size: 12px;">Avg Response Time</div>
387
+ </div>
388
+ <div style="padding: 15px; background: white; border-radius: 6px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
389
+ <div style="font-size: 24px; font-weight: bold; color: {status_color};">{format_large_stat(success_rate, '%' if success_rate else '')}</div>
390
+ <div style="color: #666; font-size: 12px;">Success Rate ({status_text})</div>
391
+ </div>
392
+ </div>
393
+
394
+ {agent_metrics_section}
395
+
396
+ <div style="margin: 15px 0; padding: 10px; background: #fff3cd; border-radius: 4px; font-size: 14px;">
397
+ <strong>Model:</strong> {format_stat(project_stats.get('model_type'), no_data_text='Unknown')} |
398
+ <strong>Last Updated:</strong> {project_stats.get('last_updated', 'Unknown')}
399
+ </div>
400
+ </div>
401
+ '''
402
+
403
+ return dashboard_html
404
+
405
+
406
+ def calculate_response_quality(response: str) -> float:
407
+ """Calculate response quality score"""
408
+ try:
409
+ length_score = min(len(response) / 200, 1.0)
410
+ educational_keywords = ['learn', 'understand', 'concept', 'example', 'practice']
411
+ keyword_score = sum(1 for keyword in educational_keywords if keyword in response.lower()) / len(educational_keywords)
412
+
413
+ if len(response) < 20:
414
+ return 2.0
415
+ elif len(response) > 2000:
416
+ return 3.5
417
+
418
+ base_score = 2.5 + (length_score * 1.5) + (keyword_score * 1.0)
419
+ return min(max(base_score, 1.0), 5.0)
420
+ except:
421
+ return 3.0
422
+
423
+
424
+ def evaluate_educational_quality_with_tracking(user_query: str, response: str, thread_id: str = None, session_id: str = None):
425
+ """Educational quality evaluation with state tracking using LightEval"""
426
+ start_time = time.time()
427
+
428
+ try:
429
+ # Educational indicators
430
+ educational_indicators = {
431
+ 'has_examples': 'example' in response.lower(),
432
+ 'structured_explanation': '##' in response or '1.' in response,
433
+ 'appropriate_length': 100 < len(response) < 1500,
434
+ 'encourages_learning': any(phrase in response.lower()
435
+ for phrase in ['practice', 'try', 'consider', 'think about']),
436
+ 'uses_latex': '$' in response,
437
+ 'has_clear_sections': response.count('\n\n') >= 2
438
+ }
439
+
440
+ educational_score = sum(educational_indicators.values()) / len(educational_indicators)
441
+ semantic_quality = min(len(response) / 500, 1.0)
442
+ response_time = time.time() - start_time
443
+
444
+ # Use LightEval if available
445
+ if LIGHTEVAL_AVAILABLE:
446
+ try:
447
+ doc = Doc(
448
+ task_name=f"turn_{thread_id or session_id}",
449
+ query=user_query,
450
+ choices=[response],
451
+ gold_index=-1,
452
+ specific_output=response
453
+ )
454
+
455
+ bert_score = BertScore().compute(doc)
456
+ semantic_quality = bert_score if bert_score else semantic_quality
457
+
458
+ except Exception as lighteval_error:
459
+ logger.warning(f"LightEval computation failed: {lighteval_error}")
460
+
461
+ metrics = {
462
+ 'semantic_quality': semantic_quality,
463
+ 'educational_score': educational_score,
464
+ 'response_time': response_time,
465
+ 'indicators': educational_indicators
466
+ }
467
+
468
+ # Track in global state
469
+ global_state_manager.add_educational_quality_score(
470
+ user_query=user_query,
471
+ response=response,
472
+ metrics=metrics,
473
+ session_id=session_id
474
+ )
475
+
476
+ logger.info(f"Educational quality evaluated: {educational_score:.3f}")
477
+ return metrics
478
+
479
+ except Exception as e:
480
+ logger.error(f"Educational quality evaluation failed: {e}")
481
+ return {'educational_score': 0.5, 'semantic_quality': 0.5, 'response_time': 0.0}
482
+
483
+ def log_metrics_to_database(project_name: str, run_id: str, metrics: Dict):
484
+ """Log metrics to SQLite database for dashboard"""
485
+ try:
486
+ db_path = get_trackio_database_path(project_name)
487
+
488
+ if db_path is None:
489
+ db_path = "./mimir_metrics.db"
490
+
491
+ conn = sqlite3.connect(db_path)
492
+ cursor = conn.cursor()
493
+
494
+ # Create metrics table if not exists
495
+ cursor.execute("""
496
+ CREATE TABLE IF NOT EXISTS metrics (
497
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
498
+ project_name TEXT,
499
+ run_id TEXT,
500
+ metric_name TEXT,
501
+ value TEXT,
502
+ timestamp TEXT
503
+ )
504
+ """)
505
+
506
+ # Insert metrics
507
+ timestamp = datetime.now().isoformat()
508
+ for metric_name, metric_value in metrics.items():
509
+ cursor.execute("""
510
+ INSERT INTO metrics (project_name, run_id, metric_name, value, timestamp)
511
+ VALUES (?, ?, ?, ?, ?)
512
+ """, (project_name, run_id, metric_name, str(metric_value), timestamp))
513
+
514
+ conn.commit()
515
+ conn.close()
516
+
517
+ logger.info(f"Logged {len(metrics)} metrics to database")
518
+
519
+ except Exception as e:
520
+ logger.error(f"Failed to log metrics to database: {e}")
521
+
522
+
523
+ def sync_trackio_with_global_state():
524
+ """Sync metrics database with global state manager data"""
525
+ try:
526
+ eval_summary = global_state_manager.get_evaluation_summary()
527
+
528
+ # Log to database (agent-based metrics only)
529
+ metrics = {
530
+ "educational_quality_avg": eval_summary['aggregate_metrics']['avg_educational_quality'],
531
+ "user_satisfaction": eval_summary['aggregate_metrics']['user_satisfaction_rate'],
532
+ "total_evaluations": sum(eval_summary['total_evaluations'].values())
533
+ }
534
+
535
+ log_metrics_to_database("Mimir", str(uuid.uuid4()), metrics)
536
+
537
+ logger.info("Synced global state metrics to database")
538
+
539
+ except Exception as e:
540
+ logger.error(f"Failed to sync metrics to database: {e}")
541
+
542
+
543
+ def refresh_analytics_data_persistent():
544
+ """Refresh analytics data with global state persistence"""
545
+ project_name = "Mimir"
546
+
547
+ try:
548
+ analytics_state = global_state_manager.get_analytics_state()
549
+ last_refresh = analytics_state.get('last_refresh')
550
+
551
+ # If refreshed within last 30 seconds, return cached
552
+ if last_refresh and (datetime.now() - last_refresh).seconds < 30:
553
+ logger.info("Using cached analytics data (recent refresh)")
554
+ return (
555
+ analytics_state['project_stats'],
556
+ analytics_state['recent_interactions'],
557
+ analytics_state['dashboard_html']
558
+ )
559
+
560
+ db_path = get_trackio_database_path(project_name)
561
+
562
+ if db_path is None:
563
+ logger.warning("No metrics database found")
564
+ project_stats = {
565
+ "total_conversations": None,
566
+ "avg_session_length": None,
567
+ "success_rate": None,
568
+ "model_type": "Qwen3-4B-Claude GGUF (Q6_K - Single Model)",
569
+ "last_updated": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
570
+ }
571
+
572
+ dashboard_html = create_dashboard_html_with_nulls(project_name, project_stats)
573
+ recent_interactions = []
574
+
575
+ global_state_manager.update_analytics_state(
576
+ project_stats=project_stats,
577
+ recent_interactions=recent_interactions,
578
+ dashboard_html=dashboard_html
579
+ )
580
+
581
+ return project_stats, recent_interactions, dashboard_html
582
+
583
+ conn = sqlite3.connect(db_path)
584
+ conn.row_factory = sqlite3.Row
585
+ cursor = conn.cursor()
586
+
587
+ project_stats = get_project_statistics_with_nulls(cursor, project_name)
588
+ project_stats["model_type"] = "Qwen3-4B-Claude GGUF (Q6_K - Single Model)"
589
+ project_stats["last_updated"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
590
+
591
+ recent_data = get_recent_interactions_with_nulls(cursor, project_name, limit=10)
592
+ dashboard_html = create_dashboard_html_with_nulls(project_name, project_stats)
593
+
594
+ conn.close()
595
+
596
+ global_state_manager.update_analytics_state(
597
+ project_stats=project_stats,
598
+ recent_interactions=recent_data,
599
+ dashboard_html=dashboard_html
600
+ )
601
+
602
+ logger.info("Analytics data refreshed and cached successfully")
603
+ return project_stats, recent_data, dashboard_html
604
+
605
+ except Exception as e:
606
+ logger.error(f"Error refreshing analytics: {e}")
607
+
608
+ error_stats = {
609
+ "error": str(e),
610
+ "total_conversations": None,
611
+ "avg_session_length": None,
612
+ "success_rate": None,
613
+ "model_type": "Error",
614
+ "last_updated": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
615
+ }
616
+
617
+ error_html = f"""
618
+ <div style="text-align: center; padding: 40px; border: 2px dashed #f44336; border-radius: 8px; background: #ffebee;">
619
+ <h3 style="color: #f44336;">⚠️ Analytics Error</h3>
620
+ <p>Could not load analytics data: {str(e)[:100]}</p>
621
+ </div>
622
+ """
623
+
624
+ global_state_manager.update_analytics_state(
625
+ project_stats=error_stats,
626
+ recent_interactions=[],
627
+ dashboard_html=error_html,
628
+ error_state=str(e)
629
+ )
630
+
631
+ return error_stats, [], error_html
632
+
633
+
634
+ def export_metrics_json_persistent():
635
+ """Export metrics as JSON file"""
636
+ try:
637
+ project_stats, recent_data, _ = refresh_analytics_data_persistent()
638
+
639
+ export_data = {
640
+ "project": "Mimir",
641
+ "export_timestamp": datetime.now().isoformat(),
642
+ "statistics": project_stats,
643
+ "recent_interactions": recent_data
644
+ }
645
+
646
+ filename = f"mimir_metrics_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
647
+
648
+ with open(filename, 'w') as f:
649
+ json.dump(export_data, f, indent=2, default=str)
650
+
651
+ global_state_manager.add_export_record("JSON", filename, success=True)
652
+
653
+ logger.info(f"Metrics exported to {filename}")
654
+ gr.Info(f"Metrics exported successfully to {filename}")
655
+
656
+ except Exception as e:
657
+ global_state_manager.add_export_record("JSON", "failed", success=False)
658
+ logger.error(f"Export failed: {e}")
659
+ gr.Warning(f"Export failed: {str(e)}")
660
+
661
+
662
+ def export_metrics_csv_persistent():
663
+ """Export metrics as CSV file"""
664
+ try:
665
+ import csv
666
+
667
+ _, recent_data, _ = refresh_analytics_data_persistent()
668
+
669
+ filename = f"mimir_metrics_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
670
+
671
+ with open(filename, 'w', newline='') as f:
672
+ writer = csv.writer(f)
673
+ writer.writerow(["Timestamp", "Response Time", "Mode", "Tools Used", "Quality Score", "Adapter"])
674
+
675
+ for row in recent_data:
676
+ writer.writerow(row)
677
+
678
+ global_state_manager.add_export_record("CSV", filename, success=True)
679
+
680
+ logger.info(f"Metrics exported to {filename}")
681
+ gr.Info(f"Metrics exported successfully to {filename}")
682
+
683
+ except Exception as e:
684
+ global_state_manager.add_export_record("CSV", "failed", success=False)
685
+ logger.error(f"Export failed: {e}")
686
+ gr.Warning(f"Export failed: {str(e)}")
687
+
688
+
689
+ def load_analytics_state():
690
+ """Load analytics state from global manager"""
691
+ analytics_state = global_state_manager.get_analytics_state()
692
+
693
+ project_stats = analytics_state['project_stats']
694
+ recent_interactions = analytics_state['recent_interactions']
695
+ dashboard_html = analytics_state['dashboard_html']
696
+
697
+ if dashboard_html is None:
698
+ dashboard_html = """
699
+ <div style="text-align: center; padding: 40px; border: 2px dashed #ccc; border-radius: 8px; background: #f8f9fa;">
700
+ <h3>πŸ“Š Analytics Dashboard</h3>
701
+ <p>Click "Refresh Data" to load analytics.</p>
702
+ </div>
703
+ """
704
+
705
+ return project_stats, recent_interactions, dashboard_html
706
+
707
+
708
+ def get_global_state_debug_info():
709
+ """Get debug information about global state"""
710
+ cache_status = global_state_manager.get_cache_status()
711
+
712
+ debug_info = {
713
+ "cache_status": cache_status,
714
+ "timestamp": datetime.now().isoformat(),
715
+ "sessions": global_state_manager.get_all_sessions()
716
+ }
717
+
718
+ return debug_info
719
+
720
+
721
+ # ============================================================================
722
+ # POST-PROCESSING
723
+ # ============================================================================
724
+
725
+ class ResponsePostProcessor:
726
+ """Post-processing pipeline for educational responses"""
727
+
728
+ def __init__(self, max_length: int = 1800, min_length: int = 10):
729
+ self.max_length = max_length
730
+ self.min_length = min_length
731
+
732
+ self.logical_stop_patterns = [
733
+ r'\n\n---\n',
734
+ r'\n\n## Summary\b',
735
+ r'\n\nIn conclusion\b',
736
+ r'\n\nTo summarize\b',
737
+ ]
738
+
739
+ def process_response(self, raw_response: str, user_query: str = "") -> str:
740
+ """Main post-processing pipeline"""
741
+ try:
742
+ cleaned = self._enhanced_token_cleanup(raw_response)
743
+ cleaned = self._truncate_intelligently(cleaned)
744
+ cleaned = self._enhance_readability(cleaned)
745
+
746
+ if not self._passes_quality_check(cleaned):
747
+ return self._generate_fallback_response(user_query)
748
+
749
+ return cleaned.strip()
750
+
751
+ except Exception as e:
752
+ logger.error(f"Post-processing error: {e}")
753
+ return raw_response
754
+
755
+ def _enhanced_token_cleanup(self, text: str) -> str:
756
+ """Remove model artifacts"""
757
+ artifacts = [
758
+ r'<\|.*?\|>',
759
+ r'###\s*$',
760
+ r'User:\s*$',
761
+ r'Assistant:\s*$',
762
+ r'\n\s*\n\s*\n+',
763
+ ]
764
+
765
+ for pattern in artifacts:
766
+ text = re.sub(pattern, '', text, flags=re.MULTILINE)
767
+
768
+ return text
769
+
770
+ def _truncate_intelligently(self, text: str) -> str:
771
+ """Truncate at logical educational endpoints"""
772
+ for pattern in self.logical_stop_patterns:
773
+ match = re.search(pattern, text, re.IGNORECASE)
774
+ if match:
775
+ return text[:match.start()].strip()
776
+
777
+ if len(text) <= self.max_length:
778
+ return text
779
+
780
+ sentences = re.split(r'[.!?]+\s+', text)
781
+ truncated = ""
782
+
783
+ for sentence in sentences:
784
+ test_length = len(truncated + sentence + ". ")
785
+ if test_length <= self.max_length:
786
+ truncated += sentence + ". "
787
+ else:
788
+ break
789
+
790
+ return truncated.strip()
791
+
792
+ def _enhance_readability(self, text: str) -> str:
793
+ """Format for better presentation"""
794
+ text = re.sub(r'([.!?])([A-Z])', r'\1 \2', text)
795
+ text = re.sub(r'\s{2,}', ' ', text)
796
+ text = re.sub(r'\n\s*[-*]\s*', '\n- ', text)
797
+
798
+ return text
799
+
800
+ def _passes_quality_check(self, text: str) -> bool:
801
+ """Final quality validation"""
802
+ if len(text.strip()) < self.min_length:
803
+ return False
804
+
805
+ sentences = re.split(r'[.!?]+', text)
806
+ valid_sentences = [s for s in sentences if len(s.strip()) > 5]
807
+
808
+ return len(valid_sentences) > 0
809
+
810
+ def _generate_fallback_response(self, user_query: str) -> str:
811
+ """Generate safe fallback"""
812
+ return "I'd be happy to help you understand this better. Could you clarify what specific aspect you'd like me to focus on?"
813
+
814
+ def process_and_stream_response(self, raw_response: str, user_query: str = ""):
815
+ """Process response then stream word-by-word"""
816
+ try:
817
+ processed_response = self.process_response(raw_response, user_query)
818
+
819
+ words = processed_response.split()
820
+ current_output = ""
821
+
822
+ for i, word in enumerate(words):
823
+ current_output += word
824
+ if i < len(words) - 1:
825
+ current_output += " "
826
+
827
+ yield current_output
828
+ time.sleep(0.015)
829
+
830
+ except Exception as e:
831
+ logger.error(f"Stream processing error: {e}")
832
+ yield "I encountered an error processing the response."
833
+
834
+
835
+ post_processor = ResponsePostProcessor()
836
+
837
+
838
+ # ============================================================================
839
+ # TOOL FUNCTIONS
840
+ # ============================================================================
841
+
842
+ @tool(return_direct=False)
843
+ def Create_Graph_Tool(
844
+ data: dict,
845
+ plot_type: str,
846
+ title: str = "Generated Plot",
847
+ x_label: str = "",
848
+ y_label: str = "",
849
+ educational_context: str = ""
850
+ ) -> str:
851
+ """Generate educational graphs"""
852
+ tool_start = log_step("Create_Graph_Tool")
853
+
854
+ try:
855
+ content, artifact = generate_plot(
856
+ data=data,
857
+ plot_type=plot_type,
858
+ title=title,
859
+ x_label=x_label,
860
+ y_label=y_label
861
+ )
862
+
863
+ if "error" in artifact:
864
+ log_step("Create_Graph_Tool", tool_start)
865
+ return f'<p style="color:red;">Graph generation failed: {artifact["error"]}</p>'
866
+
867
+ base64_image = artifact["base64_image"]
868
+
869
+ context_html = ""
870
+ if educational_context:
871
+ context_html = f'<div style="margin: 10px 0; padding: 10px; background: #f8f9fa; border-left: 4px solid #007bff;">πŸ’‘ {educational_context}</div>'
872
+
873
+ result = f"""{context_html}
874
+ <div style="text-align: center; margin: 20px 0;">
875
+ <img src="data:image/png;base64,{base64_image}"
876
+ style="max-width: 100%; height: auto; border-radius: 8px; box-shadow: 0 2px 10px rgba(0,0,0,0.1);"
877
+ alt="{title}" />
878
+ </div>"""
879
+
880
+ log_step("Create_Graph_Tool", tool_start)
881
+ return result
882
+
883
+ except Exception as e:
884
+ logger.error(f"Graph tool error: {e}")
885
+ log_step("Create_Graph_Tool", tool_start)
886
+ return f'<p style="color:red;">Error: {str(e)}</p>'
887
+
888
+
889
+ # ============================================================================
890
+ # MAIN ORCHESTRATION WORKFLOW
891
+ # ============================================================================
892
+
893
+ def orchestrate_turn(user_input: str, session_id: str = "default") -> str:
894
+ """
895
+ Main orchestration function implementing the redesign workflow.
896
+
897
+ OPTIMIZED: Uses single Qwen3-Claude GGUF (loads once, all agents share)
898
+
899
+ Steps:
900
+ 1. Reset prompt state
901
+ 2. Process user input (history)
902
+ 3. Tool decision (Qwen3-Claude)
903
+ 4. Regex checks
904
+ 5. Agent execution (Qwen3-Claude)
905
+ 6. Thinking agents (Qwen3-Claude)
906
+ 7. Prompt assembly
907
+ 8. Response generation (Qwen3-Claude)
908
+ 9. Post-processing
909
+ 10. Metrics tracking (background thread)
910
+ """
911
+ turn_start = log_step("orchestrate_turn")
912
+ run_id = str(uuid.uuid4())
913
+
914
+ try:
915
+ # ====================================================================
916
+ # STEP 1: RESET PROMPT STATE
917
+ # ====================================================================
918
+ step_start = log_step("Step 1: Reset prompt state")
919
+ global_state_manager.reset_prompt_state()
920
+ prompt_state = global_state_manager.get_prompt_state_manager()
921
+ log_step("Step 1: Reset prompt state", step_start)
922
+
923
+ # ====================================================================
924
+ # STEP 2: USER INPUT PROCESSING
925
+ # ====================================================================
926
+ step_start = log_step("Step 2: Process user input")
927
+
928
+ # Get conversation history
929
+ conversation_state = global_state_manager.get_conversation_state(session_id)
930
+ recent_history = conversation_state['conversation_state'][-8:] if conversation_state['conversation_state'] else []
931
+
932
+ # Format history for agents
933
+ recent_history_formatted = "\n".join([
934
+ f"{msg['role']}: {msg['content'][:100]}"
935
+ for msg in recent_history
936
+ ]) if recent_history else "No previous conversation"
937
+
938
+ log_step("Step 2: Process user input", step_start)
939
+
940
+ # ====================================================================
941
+ # STEP 3: TOOL DECISION ENGINE (Qwen3-Claude)
942
+ # ====================================================================
943
+ step_start = log_step("Step 3: Tool decision")
944
+ tool_decision_result = tool_agent.should_use_visualization(user_input)
945
+
946
+ tool_img_output = ""
947
+ tool_context = ""
948
+
949
+ if tool_decision_result:
950
+ logger.info("Tool decision: YES - visualization needed")
951
+ prompt_state.update("TOOL_USE_ENHANCEMENT", True)
952
+ else:
953
+ logger.info("Tool decision: NO - no visualization needed")
954
+
955
+ log_step("Step 3: Tool decision", step_start)
956
+
957
+ # ====================================================================
958
+ # STEP 4: REGEX LOGICAL EXPRESSIONS
959
+ # ====================================================================
960
+ step_start = log_step("Step 4: Regex checks")
961
+ logical_expressions.apply_all_checks(user_input, prompt_state)
962
+ log_step("Step 4: Regex checks", step_start)
963
+
964
+ # ====================================================================
965
+ # STEP 5: SEQUENTIAL AGENT EXECUTION (Qwen3-Claude)
966
+ # ====================================================================
967
+ step_start = log_step("Step 5: Routing agents")
968
+
969
+ # Use unified process() method that handles all 4 routing agents
970
+ response_prompts_str, thinking_prompts_str = routing_agents.process(
971
+ user_input=user_input,
972
+ tool_used=(tool_decision_result and bool(tool_img_output))
973
+ )
974
+
975
+ # Update prompt state with response prompts
976
+ if response_prompts_str:
977
+ for prompt_name in response_prompts_str.split('\n'):
978
+ if prompt_name.strip():
979
+ prompt_state.update(prompt_name.strip(), True)
980
+ logger.info(f"Response prompt activated: {prompt_name.strip()}")
981
+
982
+ # Store thinking prompts for Step 6 (will be processed by ThinkingAgents)
983
+ thinking_prompts_from_routing = thinking_prompts_str.split('\n') if thinking_prompts_str else []
984
+ for prompt_name in thinking_prompts_from_routing:
985
+ if prompt_name.strip():
986
+ logger.info(f"Thinking prompt queued: {prompt_name.strip()}")
987
+
988
+ log_step("Step 5: Routing agents", step_start)
989
+
990
+ # ====================================================================
991
+ # STEP 6: THINKING AGENT PROCESSING (Qwen3-Claude)
992
+ # ====================================================================
993
+ step_start = log_step("Step 6: Thinking agents")
994
+
995
+ # Use thinking prompts identified by routing agents in Step 5
996
+ thinking_prompts_list = []
997
+
998
+ # Add thinking prompts from routing agents
999
+ for prompt_name in thinking_prompts_from_routing:
1000
+ if prompt_name.strip():
1001
+ thinking_prompts_list.append(prompt_name.strip())
1002
+ prompt_state.update(prompt_name.strip(), True)
1003
+
1004
+ # Additional heuristic: Add MATH_THINKING if LATEX_FORMATTING is active
1005
+ # (This ensures math thinking is triggered even if routing agents didn't detect it)
1006
+ if prompt_state.is_active("LATEX_FORMATTING") and "MATH_THINKING" not in thinking_prompts_list:
1007
+ thinking_prompts_list.append("MATH_THINKING")
1008
+ prompt_state.update("MATH_THINKING", True)
1009
+
1010
+ # Execute thinking agents if any are active
1011
+ thinking_context = ""
1012
+ if thinking_prompts_list:
1013
+ thinking_prompts_string = '\n'.join(thinking_prompts_list)
1014
+ logger.info(f"Active thinking agents: {thinking_prompts_list}")
1015
+
1016
+ think_start = log_step("Thinking agents execution")
1017
+ thinking_context = thinking_agents.process(
1018
+ user_input=user_input,
1019
+ conversation_history=recent_history_formatted,
1020
+ thinking_prompts=thinking_prompts_string,
1021
+ tool_img_output=tool_img_output,
1022
+ tool_context=tool_context
1023
+ )
1024
+ log_step("Thinking agents execution", think_start)
1025
+
1026
+ log_step("Step 6: Thinking agents", step_start)
1027
+
1028
+ # ====================================================================
1029
+ # STEP 7: RESPONSE PROMPT ASSEMBLY
1030
+ # ====================================================================
1031
+ step_start = log_step("Step 7: Prompt assembly")
1032
+
1033
+ # Get active response prompts
1034
+ response_prompt_names = prompt_state.get_active_response_prompts()
1035
+
1036
+ # Build prompt segments
1037
+ prompt_segments = [CORE_IDENTITY]
1038
+
1039
+ prompt_map = {
1040
+ "VAUGE_INPUT": VAUGE_INPUT,
1041
+ "USER_UNDERSTANDING": USER_UNDERSTANDING,
1042
+ "GENERAL_FORMATTING": GENERAL_FORMATTING,
1043
+ "LATEX_FORMATTING": LATEX_FORMATTING,
1044
+ "GUIDING_TEACHING": GUIDING_TEACHING,
1045
+ "STRUCTURE_PRACTICE_QUESTIONS": STRUCTURE_PRACTICE_QUESTIONS,
1046
+ "PRACTICE_QUESTION_FOLLOWUP": PRACTICE_QUESTION_FOLLOWUP,
1047
+ "TOOL_USE_ENHANCEMENT": TOOL_USE_ENHANCEMENT,
1048
+ }
1049
+
1050
+ for prompt_name in response_prompt_names:
1051
+ if prompt_name in prompt_map:
1052
+ prompt_segments.append(prompt_map[prompt_name])
1053
+
1054
+ prompt_segments_text = "\n\n".join(prompt_segments)
1055
+
1056
+ logger.info(f"Active prompts: {response_prompt_names}")
1057
+ log_step("Step 7: Prompt assembly", step_start)
1058
+
1059
+ # ====================================================================
1060
+ # STEP 8: FINAL PROMPT CONSTRUCTION
1061
+ # ====================================================================
1062
+ step_start = log_step("Step 8: Final prompt construction")
1063
+
1064
+ # Knowledge cutoff
1065
+ knowledge_cutoff = f"""
1066
+
1067
+ The current year is {CURRENT_YEAR}. Your knowledge cutoff date is October 2023. If the user asks about recent events or dynamic facts, inform them you may not have the most up-to-date information and suggest referencing direct sources."""
1068
+
1069
+ complete_prompt = f"""
1070
+ {prompt_segments_text}
1071
+
1072
+ If tools were used, context and output will be here. Ignore if empty:
1073
+ Image output: {tool_img_output}
1074
+ Image context: {tool_context}
1075
+
1076
+ Conversation history, if available:
1077
+ {recent_history_formatted}
1078
+
1079
+ Consider any context available to you:
1080
+ {thinking_context}
1081
+
1082
+ Here is the user's current query:
1083
+ {user_input}
1084
+
1085
+ {knowledge_cutoff}
1086
+ """
1087
+
1088
+ log_step("Step 8: Final prompt construction", step_start)
1089
+
1090
+ # ====================================================================
1091
+ # STEP 9: RESPONSE GENERATION (Phi3)
1092
+ # ====================================================================
1093
+ step_start = log_step("Step 9: Response generation")
1094
+ raw_response = response_agent.invoke(complete_prompt)
1095
+ log_step("Step 9: Response generation", step_start)
1096
+
1097
+ # ====================================================================
1098
+ # STEP 10: POST-PROCESSING
1099
+ # ====================================================================
1100
+ step_start = log_step("Step 10: Post-processing")
1101
+ processed_response = post_processor.process_response(raw_response, user_input)
1102
+ log_step("Step 10: Post-processing", step_start)
1103
+
1104
+ # ====================================================================
1105
+ # STEP 11: METRICS TRACKING (BACKGROUND THREAD - NON-BLOCKING)
1106
+ # ====================================================================
1107
+ step_start = log_step("Step 11: Metrics tracking")
1108
+
1109
+ def track_metrics_async():
1110
+ """Run metrics tracking in background to avoid blocking"""
1111
+ try:
1112
+ logger.info("[Background] Starting metrics tracking...")
1113
+
1114
+ # Track educational quality
1115
+ quality_metrics = evaluate_educational_quality_with_tracking(
1116
+ user_query=user_input,
1117
+ response=processed_response,
1118
+ thread_id=run_id,
1119
+ session_id=session_id
1120
+ )
1121
+
1122
+ # Log metrics to database
1123
+ metrics_to_log = {
1124
+ "conversation_start": datetime.now().isoformat(),
1125
+ "response_time": time.time() - turn_start,
1126
+ "quality_score": calculate_response_quality(processed_response),
1127
+ "educational_score": quality_metrics['educational_score'],
1128
+ "prompt_mode": ",".join(response_prompt_names),
1129
+ "tools_used": 1 if prompt_state.is_active("TOOL_USE_ENHANCEMENT") else 0,
1130
+ "thinking_agents": ",".join(thinking_prompts_list) if thinking_prompts_list else "none",
1131
+ "active_adapter": response_agent.model_type if response_agent.model_loaded else "not_loaded"
1132
+ }
1133
+
1134
+ log_metrics_to_database("Mimir", run_id, metrics_to_log)
1135
+ logger.info("[Background] βœ“ Metrics tracking completed")
1136
+
1137
+ except Exception as metrics_error:
1138
+ logger.warning(f"[Background] Metrics tracking failed: {metrics_error}")
1139
+
1140
+ # Start background thread (daemon=True so it doesn't block shutdown)
1141
+ metrics_thread = threading.Thread(
1142
+ target=track_metrics_async,
1143
+ daemon=True,
1144
+ name="MetricsTracking"
1145
+ )
1146
+ metrics_thread.start()
1147
+
1148
+ log_step("Step 11: Metrics tracking", step_start)
1149
+ logger.info("βœ“ Metrics tracking started in background - continuing immediately")
1150
+
1151
+ log_step("orchestrate_turn", turn_start)
1152
+ return processed_response
1153
+
1154
+ except Exception as e:
1155
+ logger.error(f"Orchestration error: {e}")
1156
+ import traceback
1157
+ logger.error(traceback.format_exc())
1158
+ log_step("orchestrate_turn", turn_start)
1159
+ return f"I encountered an error: {str(e)}"
1160
+
1161
+
1162
+ # ============================================================================
1163
+ # GRADIO CALLBACK FUNCTIONS (FIXED STATE MANAGEMENT)
1164
+ # ============================================================================
1165
+
1166
+ def get_loading_animation_base64():
1167
+ """Load animated GIF as base64"""
1168
+ try:
1169
+ with open("loading_animation.gif", "rb") as gif_file:
1170
+ gif_data = gif_file.read()
1171
+ gif_base64 = base64.b64encode(gif_data).decode('utf-8')
1172
+ return f"data:image/gif;base64,{gif_base64}"
1173
+ except FileNotFoundError:
1174
+ logger.warning("loading_animation.gif not found")
1175
+ return None
1176
+
1177
+
1178
+ def remove_loading_animations(chat_history):
1179
+ """Remove loading animations from chat"""
1180
+ return [msg for msg in chat_history if not (
1181
+ msg.get("role") == "assistant" and
1182
+ "loading-animation" in str(msg.get("content", ""))
1183
+ )]
1184
+
1185
+
1186
+ def add_user_message(message, chat_history, conversation_state):
1187
+ """
1188
+ Add user message with proper state management.
1189
+ βœ… FIXED: Returns updated states to Gradio components.
1190
+ """
1191
+ callback_start = log_step("add_user_message")
1192
+
1193
+ if not message.strip():
1194
+ log_step("add_user_message", callback_start)
1195
+ return "", chat_history, conversation_state
1196
+
1197
+ # Get current state from global manager
1198
+ current_state = global_state_manager.get_conversation_state()
1199
+ chat_history = current_state['chat_history']
1200
+ conversation_state = current_state['conversation_state']
1201
+
1202
+ # Add to both states
1203
+ conversation_state.append({"role": "user", "content": message})
1204
+ chat_history.append({"role": "user", "content": message})
1205
+
1206
+ # Update global state
1207
+ global_state_manager.update_conversation_state(chat_history, conversation_state)
1208
+
1209
+ log_step("add_user_message", callback_start)
1210
+
1211
+ # βœ… CRITICAL: Return updated states to Gradio
1212
+ return "", chat_history, conversation_state
1213
+
1214
+
1215
+ def add_loading_animation(chat_history, conversation_state):
1216
+ """
1217
+ Add loading animation with proper state management.
1218
+ βœ… FIXED: Returns updated states to Gradio components.
1219
+ """
1220
+ callback_start = log_step("add_loading_animation")
1221
+
1222
+ # Get current state from global manager
1223
+ current_state = global_state_manager.get_conversation_state()
1224
+ chat_history = current_state['chat_history']
1225
+ conversation_state = current_state['conversation_state']
1226
+
1227
+ if not conversation_state:
1228
+ log_step("add_loading_animation", callback_start)
1229
+ return chat_history, conversation_state
1230
+
1231
+ # Remove any existing loading animations
1232
+ chat_history = remove_loading_animations(chat_history)
1233
+
1234
+ # Add loading animation
1235
+ gif_data = get_loading_animation_base64()
1236
+ if gif_data:
1237
+ loading_html = f'<div class="loading-animation" style="display: flex; align-items: center; justify-content: center; padding: 0.5px;"><img src="{gif_data}" alt="Thinking..." style="height: 64px; width: auto; max-width: 80px;" /></div>'
1238
+ else:
1239
+ loading_html = '<div class="loading-animation" style="display: flex; align-items: center; justify-content: center; padding: 0.5px;"><div style="width: 64px; height: 64px;"></div></div>'
1240
+
1241
+ chat_history.append({"role": "assistant", "content": loading_html})
1242
+
1243
+ # Update global state
1244
+ global_state_manager.update_conversation_state(chat_history, conversation_state)
1245
+
1246
+ log_step("add_loading_animation", callback_start)
1247
+
1248
+ # βœ… CRITICAL: Return updated states to Gradio
1249
+ return chat_history, conversation_state
1250
+
1251
+
1252
+ def generate_response(chat_history, conversation_state):
1253
+ """
1254
+ Generate response using orchestration with proper streaming.
1255
+ βœ… FIXED: Loading animation stays until first chunk, then streams properly.
1256
+ """
1257
+ callback_start = log_step("generate_response")
1258
+
1259
+ # Get fresh state from global manager
1260
+ current_state = global_state_manager.get_conversation_state()
1261
+ chat_history = current_state['chat_history']
1262
+ conversation_state = current_state['conversation_state']
1263
+
1264
+ if not conversation_state:
1265
+ log_step("generate_response", callback_start)
1266
+ return chat_history, conversation_state
1267
+
1268
+ # Get last user message
1269
+ last_user_message = ""
1270
+ for msg in reversed(conversation_state):
1271
+ if msg["role"] == "user":
1272
+ last_user_message = msg["content"]
1273
+ break
1274
+
1275
+ if not last_user_message:
1276
+ log_step("generate_response", callback_start)
1277
+ return chat_history, conversation_state
1278
+
1279
+ try:
1280
+ # βœ… DON'T remove loading animation yet - let it show during orchestration
1281
+
1282
+ # Call orchestration (this takes time)
1283
+ orch_start = log_step("orchestrate_turn call")
1284
+ raw_response = orchestrate_turn(last_user_message)
1285
+ log_step("orchestrate_turn call", orch_start)
1286
+
1287
+ # Stream the processed response
1288
+ first_chunk = True
1289
+ for chunk in post_processor.process_and_stream_response(raw_response, last_user_message):
1290
+ # βœ… Remove loading animation on FIRST chunk only
1291
+ if first_chunk:
1292
+ chat_history = remove_loading_animations(chat_history)
1293
+ first_chunk = False
1294
+
1295
+ # Update chat display
1296
+ if chat_history and chat_history[-1]["role"] == "assistant":
1297
+ chat_history[-1]["content"] = chunk
1298
+ else:
1299
+ chat_history.append({"role": "assistant", "content": chunk})
1300
+
1301
+ # βœ… Yield to update UI during streaming
1302
+ yield chat_history, conversation_state
1303
+
1304
+ # Add final response to conversation state
1305
+ final_response = chunk if 'chunk' in locals() else raw_response
1306
+ conversation_state.append({"role": "assistant", "content": final_response})
1307
+
1308
+ # Update global state with final conversation
1309
+ global_state_manager.update_conversation_state(chat_history, conversation_state)
1310
+
1311
+ # βœ… Final yield with complete states
1312
+ yield chat_history, conversation_state
1313
+
1314
+ except Exception as e:
1315
+ logger.error(f"Response generation error: {e}")
1316
+ import traceback
1317
+ logger.error(traceback.format_exc())
1318
+
1319
+ error_msg = f"I encountered an error: {str(e)}"
1320
+
1321
+ # Clean up and show error
1322
+ chat_history = remove_loading_animations(chat_history)
1323
+ chat_history.append({"role": "assistant", "content": error_msg})
1324
+ conversation_state.append({"role": "assistant", "content": error_msg})
1325
+
1326
+ global_state_manager.update_conversation_state(chat_history, conversation_state)
1327
+ yield chat_history, conversation_state
1328
+
1329
+ log_step("generate_response", callback_start)
1330
+
1331
+
1332
+ def reset_conversation():
1333
+ """
1334
+ Reset conversation with global state persistence.
1335
+ βœ… Returns empty states to Gradio components.
1336
+ """
1337
+ callback_start = log_step("reset_conversation")
1338
+ global_state_manager.reset_conversation_state()
1339
+ log_step("reset_conversation", callback_start)
1340
+ return [], []
1341
+
1342
+
1343
+ def load_conversation_state():
1344
+ """
1345
+ Load conversation state from global manager.
1346
+ βœ… Returns current states to Gradio components.
1347
+ """
1348
+ callback_start = log_step("load_conversation_state")
1349
+ current_state = global_state_manager.get_conversation_state()
1350
+ log_step("load_conversation_state", callback_start)
1351
+
1352
+ # βœ… Extract and return both states
1353
+ return current_state['chat_history'], current_state['conversation_state']
1354
+
1355
+
1356
+ # ============================================================================
1357
+ # MULTI-PAGE INTERFACE
1358
+ # ============================================================================
1359
+ def create_interface():
1360
+ """Create multi-page Gradio interface"""
1361
+ logger.info("Creating Gradio interface...")
1362
+
1363
+ # Import page modules
1364
+ import gradio_chatbot
1365
+ import gradio_analytics
1366
+ import gradio_prompt_testing # NEW
1367
+
1368
+ with gr.Blocks(title="Mimir - Educational AI Assistant") as demo:
1369
+ navbar = gr.Navbar(
1370
+ visible=True,
1371
+ main_page_name="Mimir Chatbot",
1372
+ value=[("Case Study", "https://github.com/Jdesiree112/Technical_Portfolio/tree/main/CaseStudy_Mimir")]
1373
+ )
1374
+ gradio_chatbot.demo.render()
1375
+
1376
+ with demo.route("Analytics"):
1377
+ navbar = gr.Navbar(
1378
+ visible=True,
1379
+ main_page_name="Mimir Chatbot",
1380
+ value=[("Case Study", "https://github.com/Jdesiree112/Technical_Portfolio/tree/main/CaseStudy_Mimir")]
1381
+ )
1382
+ gradio_analytics.demo.render()
1383
+
1384
+ with demo.route("Prompt Testing"):
1385
+ navbar = gr.Navbar(
1386
+ visible=True,
1387
+ main_page_name="Mimir Chatbot",
1388
+ value=[("Case Study", "https://github.com/Jdesiree112/Technical_Portfolio/tree/main/CaseStudy_Mimir")]
1389
+ )
1390
+ gradio_prompt_testing.demo.render()
1391
+
1392
+ logger.info("Interface created successfully")
1393
+ return demo
1394
+
1395
+
1396
+ # ============================================================================
1397
+ # MAIN EXECUTION
1398
+ # ============================================================================
1399
+ if __name__ == "__main__":
1400
+ try:
1401
+ logger.info("="*60)
1402
+ logger.info("STARTING MAIN EXECUTION")
1403
+ logger.info("="*60)
1404
+
1405
+ # Warm up models first
1406
+ logger.info("β†’ Importing compile_model...")
1407
+ from compile_model import compile_all
1408
+
1409
+ logger.info("β†’ Starting model compilation...")
1410
+ compile_start = time.time()
1411
+ compile_all()
1412
+ compile_duration = time.time() - compile_start
1413
+ logger.info(f"βœ“ Model compilation completed in {compile_duration:.2f}s")
1414
+
1415
+ logger.info("="*60)
1416
+ logger.info("MIMIR APPLICATION READY")
1417
+ logger.info("="*60)
1418
+ logger.info(f"LightEval available: {LIGHTEVAL_AVAILABLE}")
1419
+ logger.info(f"Current year: {CURRENT_YEAR}")
1420
+ logger.info(f"Single Qwen3-Claude model optimization: ENABLED βœ…")
1421
+ logger.info("="*60)
1422
+
1423
+ # Create and launch interface
1424
+ logger.info("β†’ Creating Gradio interface...")
1425
+ interface_start = time.time()
1426
+ interface = create_interface()
1427
+ interface_duration = time.time() - interface_start
1428
+ logger.info(f"βœ“ Interface created in {interface_duration:.2f}s")
1429
+
1430
+ logger.info("β†’ Launching Gradio server on 0.0.0.0:7860...")
1431
+ logger.info("β†’ Waiting for first user connection...")
1432
+
1433
+ interface.launch(
1434
+ server_name="0.0.0.0",
1435
+ server_port=7860,
1436
+ share=False,
1437
+ debug=True,
1438
+ favicon_path="favicon.ico" if os.path.exists("favicon.ico") else None,
1439
+ show_error=True,
1440
+ ssr_mode=False,
1441
+ quiet=False,
1442
+ prevent_thread_lock=False,
1443
+ max_threads=40
1444
+ )
1445
+
1446
+ logger.info("βœ“ Gradio server started successfully")
1447
+
1448
+ except KeyboardInterrupt:
1449
+ logger.info("Shutting down Mimir gracefully...")
1450
+ except Exception as e:
1451
+ logger.error("="*60)
1452
+ logger.error("CRITICAL ERROR IN MAIN EXECUTION")
1453
+ logger.error("="*60)
1454
+ logger.error(f"Error type: {type(e).__name__}")
1455
+ logger.error(f"Error message: {e}")
1456
+ logger.error("="*60)
1457
+ logger.error("Full traceback:")
1458
+ import traceback
1459
+ logger.error(traceback.format_exc())
1460
+ logger.error("="*60)
1461
+ raise