jdesiree commited on
Commit
8e0d766
Β·
verified Β·
1 Parent(s): ad38d0d

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +1470 -0
app.py ADDED
@@ -0,0 +1,1470 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ print(">>> ENTERING app.py (top-level) <<<")
3
+ """
4
+ Mimir Educational AI Assistant - Main Application
5
+ Architecture:
6
+ - Multi-page Gradio interface (Chatbot + Analytics with link to Mimir case study)
7
+ - Agent-based orchestration (Tool, Routing, Thinking, Response)
8
+ - Global state management with SQLite + HF dataset backup
9
+ - Prompt state tracking per turn
10
+ - LightEval for metrics tracking
11
+ - Logger for timing functions
12
+ - OPTIMIZED: Single Qwen3-4B-Claude model for all agents (3.3GB, fast startup)
13
+ """
14
+ import os
15
+ import re
16
+ import sys
17
+ import time
18
+ import json
19
+ import base64
20
+ import logging
21
+ import sqlite3
22
+ import subprocess
23
+ import threading
24
+ import warnings
25
+ import uuid
26
+ from datetime import datetime
27
+ from pathlib import Path
28
+ from typing import Dict, List, Optional, Tuple, Any
29
+
30
+ # ============================================================================
31
+ # HUGGINGFACE CACHE SETUP - Avoid Permission Errors
32
+ # ============================================================================
33
+ # Use /tmp for all HuggingFace operations (writable at runtime)
34
+ HF_CACHE = "/tmp/huggingface"
35
+ os.makedirs(f"{HF_CACHE}/hub", exist_ok=True)
36
+ os.makedirs(f"{HF_CACHE}/modules", exist_ok=True)
37
+ os.makedirs(f"{HF_CACHE}/transformers", exist_ok=True)
38
+
39
+ # Configure HuggingFace cache locations
40
+ os.environ['HF_HOME'] = HF_CACHE
41
+ os.environ['HF_HUB_CACHE'] = f"{HF_CACHE}/hub"
42
+ os.environ['HF_MODULES_CACHE'] = f"{HF_CACHE}/modules"
43
+ os.environ['TRANSFORMERS_CACHE'] = f"{HF_CACHE}/transformers"
44
+ os.environ['HF_HUB_ENABLE_HF_TRANSFER'] = '1' # Faster downloads
45
+
46
+ # Matplotlib cache (avoid permission warnings)
47
+ os.environ['MPLCONFIGDIR'] = "/tmp/matplotlib"
48
+ os.makedirs("/tmp/matplotlib", exist_ok=True)
49
+
50
+ # ============================================================================
51
+ # CORE DEPENDENCIES
52
+ # ============================================================================
53
+ import torch
54
+ import gradio as gr
55
+ from dotenv import load_dotenv
56
+
57
+ # Agent architecture (now with shared Qwen3-Claude!)
58
+ from agents import (
59
+ ToolDecisionAgent,
60
+ PromptRoutingAgents,
61
+ ThinkingAgents,
62
+ ResponseAgent,
63
+ get_shared_qwen3, # Pre-warm shared Qwen3-Claude
64
+ )
65
+
66
+ # State management
67
+ from state_manager import (
68
+ GlobalStateManager,
69
+ LogicalExpressions,
70
+ )
71
+
72
+ # Prompt library
73
+ from prompt_library import (
74
+ CORE_IDENTITY,
75
+ VAUGE_INPUT,
76
+ USER_UNDERSTANDING,
77
+ GENERAL_FORMATTING,
78
+ LATEX_FORMATTING,
79
+ GUIDING_TEACHING,
80
+ STRUCTURE_PRACTICE_QUESTIONS,
81
+ PRACTICE_QUESTION_FOLLOWUP,
82
+ TOOL_USE_ENHANCEMENT,
83
+ )
84
+
85
+ # LangGraph imports
86
+ from langgraph.graph import StateGraph, START, END
87
+ from langgraph.graph.message import add_messages
88
+ from langgraph.checkpoint.memory import MemorySaver
89
+ from langgraph.prebuilt import ToolNode
90
+
91
+ # LangChain Core
92
+ from langchain_core.tools import tool
93
+ from langchain_core.messages import HumanMessage, SystemMessage, AIMessage, ToolMessage, BaseMessage
94
+
95
+ # Tool for graphing
96
+ from graph_tool import generate_plot
97
+
98
+ # ============================================================================
99
+ # LLAMA-CPP-PYTHON WHEEL INSTALLATION
100
+ # ============================================================================
101
+ wheel_url = "https://huggingface.co/spaces/jdesiree/Mimir/resolve/main/wheels/llama_cpp_python-0.3.16-cp310-cp310-linux_x86_64.whl"
102
+
103
+ # Check if the package is already installed
104
+ try:
105
+ import llama_cpp_python
106
+ print("βœ“ llama_cpp_python is already installed.")
107
+ except ImportError:
108
+ print("β†’ llama_cpp_python not found. Installing from wheel...")
109
+
110
+ try:
111
+ subprocess.check_call([
112
+ sys.executable, "-m", "pip", "install",
113
+ "--no-cache-dir",
114
+ wheel_url
115
+ ])
116
+ print("βœ“ Installation successful.")
117
+ except subprocess.CalledProcessError as e:
118
+ print(f"❌ ERROR: Installation failed: {e}")
119
+
120
+ # ============================================================================
121
+ # LIGHTEVAL FOR METRICS
122
+ # ============================================================================
123
+ try:
124
+ from lighteval.logging.evaluation_tracker import EvaluationTracker
125
+ from lighteval.models.transformers.transformers_model import TransformersModel
126
+ from lighteval.metrics.metrics_sample import BertScore, ROUGE
127
+ from lighteval.tasks.requests import Doc
128
+ LIGHTEVAL_AVAILABLE = True
129
+ except ImportError:
130
+ LIGHTEVAL_AVAILABLE = False
131
+ logging.warning("LightEval not available - metrics tracking limited")
132
+
133
+ # ============================================================================
134
+ # CONFIGURATION
135
+ # ============================================================================
136
+ # Suppress warnings
137
+ warnings.filterwarnings("ignore", category=UserWarning)
138
+ warnings.filterwarnings("ignore", category=FutureWarning)
139
+
140
+ # Load environment
141
+ load_dotenv(".env")
142
+ HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
143
+
144
+ # Debug and runtime settings
145
+ DEBUG_STATE = os.getenv("DEBUG_STATE", "false").lower() == "true"
146
+ CURRENT_YEAR = datetime.now().year
147
+
148
+
149
+ # ============================================================================
150
+ # LOGGING SETUP
151
+ # ============================================================================
152
+
153
+ logging.basicConfig(
154
+ level=logging.INFO,
155
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
156
+ )
157
+ logger = logging.getLogger(__name__)
158
+
159
+
160
+ def log_step(step_name: str, start_time: Optional[float] = None) -> float:
161
+ """
162
+ Log a pipeline step with timestamp and duration.
163
+
164
+ Args:
165
+ step_name: Name of the step
166
+ start_time: Start time from previous call (if completing a step)
167
+
168
+ Returns:
169
+ Current time for next call
170
+ """
171
+ now = time.time()
172
+ timestamp = datetime.now().strftime("%H:%M:%S.%f")[:-3]
173
+
174
+ if start_time:
175
+ duration = now - start_time
176
+ logger.info(f"[{timestamp}] COMPLETED: {step_name} ({duration:.2f}s)")
177
+ else:
178
+ logger.info(f"[{timestamp}] STARTING: {step_name}")
179
+
180
+ return now
181
+
182
+
183
+ # ============================================================================
184
+ # GLOBAL INITIALIZATION
185
+ # ============================================================================
186
+
187
+ logger.info("="*60)
188
+ logger.info("INITIALIZING MIMIR APPLICATION")
189
+ logger.info("="*60)
190
+
191
+ init_start = log_step("Global Initialization")
192
+
193
+ # Initialize state management
194
+ global_state_manager = GlobalStateManager()
195
+ logical_expressions = LogicalExpressions()
196
+ logger.info("State management initialized")
197
+
198
+ # Initialize agents (lazy loading - models load on first use)
199
+ tool_agent = ToolDecisionAgent()
200
+ routing_agents = PromptRoutingAgents()
201
+ thinking_agents = ThinkingAgents()
202
+ response_agent = ResponseAgent()
203
+ logger.info("Agents initialized (using shared Qwen3-Claude)")
204
+
205
+ # Pre-warm shared Qwen3-Claude (optional - happens on first agent call anyway)
206
+ logger.info("Shared Qwen3-Claude agent ready (loads on first use)")
207
+
208
+ log_step("Global Initialization", init_start)
209
+
210
+
211
+ # ============================================================================
212
+ # ANALYTICS & DATABASE FUNCTIONS
213
+ # ============================================================================
214
+
215
+ def get_trackio_database_path(project_name: str) -> Optional[str]:
216
+ """Get path to metrics SQLite database"""
217
+ possible_paths = [
218
+ f"./{project_name}.db",
219
+ f"./trackio_data/{project_name}.db",
220
+ f"./.trackio/{project_name}.db",
221
+ "./mimir_metrics.db"
222
+ ]
223
+
224
+ for path in possible_paths:
225
+ if os.path.exists(path):
226
+ return path
227
+
228
+ return None
229
+
230
+
231
+ def get_project_statistics_with_nulls(cursor, project_name: str) -> Dict:
232
+ """Query metrics database for project statistics"""
233
+ try:
234
+ stats = {}
235
+
236
+ # Total conversations
237
+ try:
238
+ cursor.execute("""
239
+ SELECT COUNT(DISTINCT run_id) as total_runs
240
+ FROM metrics
241
+ WHERE project_name = ?
242
+ """, (project_name,))
243
+ result = cursor.fetchone()
244
+ stats["total_conversations"] = result["total_runs"] if result and result["total_runs"] > 0 else None
245
+ except sqlite3.Error:
246
+ stats["total_conversations"] = None
247
+
248
+ # Average response time
249
+ try:
250
+ cursor.execute("""
251
+ SELECT AVG(CAST(value AS FLOAT)) as avg_response_time
252
+ FROM metrics
253
+ WHERE project_name = ? AND metric_name = 'response_time'
254
+ """, (project_name,))
255
+ result = cursor.fetchone()
256
+ if result and result["avg_response_time"] is not None:
257
+ stats["avg_session_length"] = round(result["avg_response_time"], 2)
258
+ else:
259
+ stats["avg_session_length"] = None
260
+ except sqlite3.Error:
261
+ stats["avg_session_length"] = None
262
+
263
+ # Success rate
264
+ try:
265
+ cursor.execute("""
266
+ SELECT
267
+ COUNT(*) as total_responses,
268
+ SUM(CASE WHEN CAST(value AS FLOAT) > 3.5 THEN 1 ELSE 0 END) as successful_responses
269
+ FROM metrics
270
+ WHERE project_name = ? AND metric_name = 'quality_score'
271
+ """, (project_name,))
272
+ result = cursor.fetchone()
273
+ if result and result["total_responses"] > 0:
274
+ success_rate = (result["successful_responses"] / result["total_responses"]) * 100
275
+ stats["success_rate"] = round(success_rate, 1)
276
+ else:
277
+ stats["success_rate"] = None
278
+ except sqlite3.Error:
279
+ stats["success_rate"] = None
280
+
281
+ return stats
282
+
283
+ except sqlite3.Error as e:
284
+ logger.error(f"Database error: {e}")
285
+ return {"total_conversations": None, "avg_session_length": None, "success_rate": None}
286
+
287
+
288
+ def get_recent_interactions_with_nulls(cursor, project_name: str, limit: int = 10) -> List:
289
+ """Query for recent interactions"""
290
+ try:
291
+ cursor.execute("""
292
+ SELECT
293
+ m1.timestamp,
294
+ m2.value as response_time,
295
+ m3.value as prompt_mode,
296
+ m4.value as tools_used,
297
+ m5.value as quality_score,
298
+ m6.value as adapter_used,
299
+ m1.run_id
300
+ FROM metrics m1
301
+ LEFT JOIN metrics m2 ON m1.run_id = m2.run_id AND m2.metric_name = 'response_time'
302
+ LEFT JOIN metrics m3 ON m1.run_id = m3.run_id AND m3.metric_name = 'prompt_mode'
303
+ LEFT JOIN metrics m4 ON m1.run_id = m4.run_id AND m4.metric_name = 'tools_used'
304
+ LEFT JOIN metrics m5 ON m1.run_id = m5.run_id AND m5.metric_name = 'quality_score'
305
+ LEFT JOIN metrics m6 ON m1.run_id = m6.run_id AND m6.metric_name = 'active_adapter'
306
+ WHERE m1.project_name = ? AND m1.metric_name = 'conversation_start'
307
+ ORDER BY m1.timestamp DESC
308
+ LIMIT ?
309
+ """, (project_name, limit))
310
+
311
+ results = cursor.fetchall()
312
+ recent_data = []
313
+
314
+ for row in results:
315
+ recent_data.append([
316
+ row["timestamp"][:16] if row["timestamp"] else None,
317
+ float(row["response_time"]) if row["response_time"] is not None else None,
318
+ row["prompt_mode"] if row["prompt_mode"] else None,
319
+ bool(int(row["tools_used"])) if row["tools_used"] is not None else None,
320
+ float(row["quality_score"]) if row["quality_score"] is not None else None,
321
+ row["adapter_used"] if row["adapter_used"] else None
322
+ ])
323
+
324
+ return recent_data
325
+
326
+ except sqlite3.Error as e:
327
+ logger.error(f"Database error: {e}")
328
+ return []
329
+
330
+
331
+ def create_dashboard_html_with_nulls(project_name: str, project_stats: Dict) -> str:
332
+ """Create dashboard HTML with enhanced agent-based metrics"""
333
+ def format_stat(value, suffix="", no_data_text="No data"):
334
+ if value is None:
335
+ return f'<span style="color: #999; font-style: italic;">{no_data_text}</span>'
336
+ return f"{value}{suffix}"
337
+
338
+ def format_large_stat(value, suffix="", no_data_text="--"):
339
+ if value is None:
340
+ return f'<span style="color: #ccc;">{no_data_text}</span>'
341
+ return f"{value}{suffix}"
342
+
343
+ # Get evaluation metrics from global state
344
+ try:
345
+ eval_summary = global_state_manager.get_evaluation_summary()
346
+ cache_status = global_state_manager.get_cache_status()
347
+
348
+ project_stats["ml_educational_quality"] = eval_summary['aggregate_metrics']['avg_educational_quality']
349
+ project_stats["user_satisfaction"] = eval_summary['aggregate_metrics']['user_satisfaction_rate']
350
+ project_stats["active_sessions"] = cache_status['total_conversation_sessions']
351
+
352
+ except Exception as e:
353
+ logger.warning(f"Could not get global state metrics: {e}")
354
+ project_stats["ml_educational_quality"] = None
355
+ project_stats["user_satisfaction"] = None
356
+ project_stats["active_sessions"] = None
357
+
358
+ # Status determination
359
+ success_rate = project_stats.get("success_rate")
360
+ if success_rate is not None:
361
+ if success_rate >= 80:
362
+ status_color = "#4CAF50"
363
+ status_text = "Excellent"
364
+ elif success_rate >= 60:
365
+ status_color = "#FF9800"
366
+ status_text = "Good"
367
+ else:
368
+ status_color = "#F44336"
369
+ status_text = "Needs Improvement"
370
+ else:
371
+ status_color = "#999"
372
+ status_text = "No data"
373
+
374
+ # Agent-based metrics section
375
+ agent_metrics_section = f"""
376
+ <div style="margin: 15px 0; padding: 10px; background: #f0f8ff; border-radius: 4px; border-left: 4px solid #007bff;">
377
+ <strong>πŸš€ Agent Performance (Qwen3-Claude Single Model):</strong>
378
+ Educational Quality: {format_stat(project_stats.get('ml_educational_quality'), '', 'N/A')} |
379
+ User Satisfaction: {format_stat(project_stats.get('user_satisfaction'), '%' if project_stats.get('user_satisfaction') else '', 'N/A')} |
380
+ Active Sessions: {format_stat(project_stats.get('active_sessions'), '', 'N/A')}
381
+ </div>
382
+ """
383
+
384
+ dashboard_html = f'''
385
+ <div style="text-align: center; padding: 20px; border: 1px solid #ddd; border-radius: 8px; background: #f9f9f9;">
386
+ <h3>πŸ“Š {project_name} Analytics</h3>
387
+
388
+ <div style="display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 15px; margin: 20px 0;">
389
+ <div style="padding: 15px; background: white; border-radius: 6px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
390
+ <div style="font-size: 24px; font-weight: bold; color: #2196F3;">{format_large_stat(project_stats.get('total_conversations'))}</div>
391
+ <div style="color: #666; font-size: 12px;">Total Sessions</div>
392
+ </div>
393
+ <div style="padding: 15px; background: white; border-radius: 6px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
394
+ <div style="font-size: 24px; font-weight: bold; color: #FF9800;">{format_large_stat(project_stats.get('avg_session_length'), 's' if project_stats.get('avg_session_length') else '')}</div>
395
+ <div style="color: #666; font-size: 12px;">Avg Response Time</div>
396
+ </div>
397
+ <div style="padding: 15px; background: white; border-radius: 6px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
398
+ <div style="font-size: 24px; font-weight: bold; color: {status_color};">{format_large_stat(success_rate, '%' if success_rate else '')}</div>
399
+ <div style="color: #666; font-size: 12px;">Success Rate ({status_text})</div>
400
+ </div>
401
+ </div>
402
+
403
+ {agent_metrics_section}
404
+
405
+ <div style="margin: 15px 0; padding: 10px; background: #fff3cd; border-radius: 4px; font-size: 14px;">
406
+ <strong>Model:</strong> {format_stat(project_stats.get('model_type'), no_data_text='Unknown')} |
407
+ <strong>Last Updated:</strong> {project_stats.get('last_updated', 'Unknown')}
408
+ </div>
409
+ </div>
410
+ '''
411
+
412
+ return dashboard_html
413
+
414
+
415
+ def calculate_response_quality(response: str) -> float:
416
+ """Calculate response quality score"""
417
+ try:
418
+ length_score = min(len(response) / 200, 1.0)
419
+ educational_keywords = ['learn', 'understand', 'concept', 'example', 'practice']
420
+ keyword_score = sum(1 for keyword in educational_keywords if keyword in response.lower()) / len(educational_keywords)
421
+
422
+ if len(response) < 20:
423
+ return 2.0
424
+ elif len(response) > 2000:
425
+ return 3.5
426
+
427
+ base_score = 2.5 + (length_score * 1.5) + (keyword_score * 1.0)
428
+ return min(max(base_score, 1.0), 5.0)
429
+ except:
430
+ return 3.0
431
+
432
+
433
+ def evaluate_educational_quality_with_tracking(user_query: str, response: str, thread_id: str = None, session_id: str = None):
434
+ """Educational quality evaluation with state tracking using LightEval"""
435
+ start_time = time.time()
436
+
437
+ try:
438
+ # Educational indicators
439
+ educational_indicators = {
440
+ 'has_examples': 'example' in response.lower(),
441
+ 'structured_explanation': '##' in response or '1.' in response,
442
+ 'appropriate_length': 100 < len(response) < 1500,
443
+ 'encourages_learning': any(phrase in response.lower()
444
+ for phrase in ['practice', 'try', 'consider', 'think about']),
445
+ 'uses_latex': '$' in response,
446
+ 'has_clear_sections': response.count('\n\n') >= 2
447
+ }
448
+
449
+ educational_score = sum(educational_indicators.values()) / len(educational_indicators)
450
+ semantic_quality = min(len(response) / 500, 1.0)
451
+ response_time = time.time() - start_time
452
+
453
+ # Use LightEval if available
454
+ if LIGHTEVAL_AVAILABLE:
455
+ try:
456
+ doc = Doc(
457
+ task_name=f"turn_{thread_id or session_id}",
458
+ query=user_query,
459
+ choices=[response],
460
+ gold_index=-1,
461
+ specific_output=response
462
+ )
463
+
464
+ bert_score = BertScore().compute(doc)
465
+ semantic_quality = bert_score if bert_score else semantic_quality
466
+
467
+ except Exception as lighteval_error:
468
+ logger.warning(f"LightEval computation failed: {lighteval_error}")
469
+
470
+ metrics = {
471
+ 'semantic_quality': semantic_quality,
472
+ 'educational_score': educational_score,
473
+ 'response_time': response_time,
474
+ 'indicators': educational_indicators
475
+ }
476
+
477
+ # Track in global state
478
+ global_state_manager.add_educational_quality_score(
479
+ user_query=user_query,
480
+ response=response,
481
+ metrics=metrics,
482
+ session_id=session_id
483
+ )
484
+
485
+ logger.info(f"Educational quality evaluated: {educational_score:.3f}")
486
+ return metrics
487
+
488
+ except Exception as e:
489
+ logger.error(f"Educational quality evaluation failed: {e}")
490
+ return {'educational_score': 0.5, 'semantic_quality': 0.5, 'response_time': 0.0}
491
+
492
+ def log_metrics_to_database(project_name: str, run_id: str, metrics: Dict):
493
+ """Log metrics to SQLite database for dashboard"""
494
+ try:
495
+ db_path = get_trackio_database_path(project_name)
496
+
497
+ if db_path is None:
498
+ db_path = "./mimir_metrics.db"
499
+
500
+ conn = sqlite3.connect(db_path)
501
+ cursor = conn.cursor()
502
+
503
+ # Create metrics table if not exists
504
+ cursor.execute("""
505
+ CREATE TABLE IF NOT EXISTS metrics (
506
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
507
+ project_name TEXT,
508
+ run_id TEXT,
509
+ metric_name TEXT,
510
+ value TEXT,
511
+ timestamp TEXT
512
+ )
513
+ """)
514
+
515
+ # Insert metrics
516
+ timestamp = datetime.now().isoformat()
517
+ for metric_name, metric_value in metrics.items():
518
+ cursor.execute("""
519
+ INSERT INTO metrics (project_name, run_id, metric_name, value, timestamp)
520
+ VALUES (?, ?, ?, ?, ?)
521
+ """, (project_name, run_id, metric_name, str(metric_value), timestamp))
522
+
523
+ conn.commit()
524
+ conn.close()
525
+
526
+ logger.info(f"Logged {len(metrics)} metrics to database")
527
+
528
+ except Exception as e:
529
+ logger.error(f"Failed to log metrics to database: {e}")
530
+
531
+
532
+ def sync_trackio_with_global_state():
533
+ """Sync metrics database with global state manager data"""
534
+ try:
535
+ eval_summary = global_state_manager.get_evaluation_summary()
536
+
537
+ # Log to database (agent-based metrics only)
538
+ metrics = {
539
+ "educational_quality_avg": eval_summary['aggregate_metrics']['avg_educational_quality'],
540
+ "user_satisfaction": eval_summary['aggregate_metrics']['user_satisfaction_rate'],
541
+ "total_evaluations": sum(eval_summary['total_evaluations'].values())
542
+ }
543
+
544
+ log_metrics_to_database("Mimir", str(uuid.uuid4()), metrics)
545
+
546
+ logger.info("Synced global state metrics to database")
547
+
548
+ except Exception as e:
549
+ logger.error(f"Failed to sync metrics to database: {e}")
550
+
551
+
552
+ def refresh_analytics_data_persistent():
553
+ """Refresh analytics data with global state persistence"""
554
+ project_name = "Mimir"
555
+
556
+ try:
557
+ analytics_state = global_state_manager.get_analytics_state()
558
+ last_refresh = analytics_state.get('last_refresh')
559
+
560
+ # If refreshed within last 30 seconds, return cached
561
+ if last_refresh and (datetime.now() - last_refresh).seconds < 30:
562
+ logger.info("Using cached analytics data (recent refresh)")
563
+ return (
564
+ analytics_state['project_stats'],
565
+ analytics_state['recent_interactions'],
566
+ analytics_state['dashboard_html']
567
+ )
568
+
569
+ db_path = get_trackio_database_path(project_name)
570
+
571
+ if db_path is None:
572
+ logger.warning("No metrics database found")
573
+ project_stats = {
574
+ "total_conversations": None,
575
+ "avg_session_length": None,
576
+ "success_rate": None,
577
+ "model_type": "Qwen3-4B-Claude GGUF (Q6_K - Single Model)",
578
+ "last_updated": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
579
+ }
580
+
581
+ dashboard_html = create_dashboard_html_with_nulls(project_name, project_stats)
582
+ recent_interactions = []
583
+
584
+ global_state_manager.update_analytics_state(
585
+ project_stats=project_stats,
586
+ recent_interactions=recent_interactions,
587
+ dashboard_html=dashboard_html
588
+ )
589
+
590
+ return project_stats, recent_interactions, dashboard_html
591
+
592
+ conn = sqlite3.connect(db_path)
593
+ conn.row_factory = sqlite3.Row
594
+ cursor = conn.cursor()
595
+
596
+ project_stats = get_project_statistics_with_nulls(cursor, project_name)
597
+ project_stats["model_type"] = "Qwen3-4B-Claude GGUF (Q6_K - Single Model)"
598
+ project_stats["last_updated"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
599
+
600
+ recent_data = get_recent_interactions_with_nulls(cursor, project_name, limit=10)
601
+ dashboard_html = create_dashboard_html_with_nulls(project_name, project_stats)
602
+
603
+ conn.close()
604
+
605
+ global_state_manager.update_analytics_state(
606
+ project_stats=project_stats,
607
+ recent_interactions=recent_data,
608
+ dashboard_html=dashboard_html
609
+ )
610
+
611
+ logger.info("Analytics data refreshed and cached successfully")
612
+ return project_stats, recent_data, dashboard_html
613
+
614
+ except Exception as e:
615
+ logger.error(f"Error refreshing analytics: {e}")
616
+
617
+ error_stats = {
618
+ "error": str(e),
619
+ "total_conversations": None,
620
+ "avg_session_length": None,
621
+ "success_rate": None,
622
+ "model_type": "Error",
623
+ "last_updated": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
624
+ }
625
+
626
+ error_html = f"""
627
+ <div style="text-align: center; padding: 40px; border: 2px dashed #f44336; border-radius: 8px; background: #ffebee;">
628
+ <h3 style="color: #f44336;">⚠️ Analytics Error</h3>
629
+ <p>Could not load analytics data: {str(e)[:100]}</p>
630
+ </div>
631
+ """
632
+
633
+ global_state_manager.update_analytics_state(
634
+ project_stats=error_stats,
635
+ recent_interactions=[],
636
+ dashboard_html=error_html,
637
+ error_state=str(e)
638
+ )
639
+
640
+ return error_stats, [], error_html
641
+
642
+
643
+ def export_metrics_json_persistent():
644
+ """Export metrics as JSON file"""
645
+ try:
646
+ project_stats, recent_data, _ = refresh_analytics_data_persistent()
647
+
648
+ export_data = {
649
+ "project": "Mimir",
650
+ "export_timestamp": datetime.now().isoformat(),
651
+ "statistics": project_stats,
652
+ "recent_interactions": recent_data
653
+ }
654
+
655
+ filename = f"mimir_metrics_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
656
+
657
+ with open(filename, 'w') as f:
658
+ json.dump(export_data, f, indent=2, default=str)
659
+
660
+ global_state_manager.add_export_record("JSON", filename, success=True)
661
+
662
+ logger.info(f"Metrics exported to {filename}")
663
+ gr.Info(f"Metrics exported successfully to {filename}")
664
+
665
+ except Exception as e:
666
+ global_state_manager.add_export_record("JSON", "failed", success=False)
667
+ logger.error(f"Export failed: {e}")
668
+ gr.Warning(f"Export failed: {str(e)}")
669
+
670
+
671
+ def export_metrics_csv_persistent():
672
+ """Export metrics as CSV file"""
673
+ try:
674
+ import csv
675
+
676
+ _, recent_data, _ = refresh_analytics_data_persistent()
677
+
678
+ filename = f"mimir_metrics_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
679
+
680
+ with open(filename, 'w', newline='') as f:
681
+ writer = csv.writer(f)
682
+ writer.writerow(["Timestamp", "Response Time", "Mode", "Tools Used", "Quality Score", "Adapter"])
683
+
684
+ for row in recent_data:
685
+ writer.writerow(row)
686
+
687
+ global_state_manager.add_export_record("CSV", filename, success=True)
688
+
689
+ logger.info(f"Metrics exported to {filename}")
690
+ gr.Info(f"Metrics exported successfully to {filename}")
691
+
692
+ except Exception as e:
693
+ global_state_manager.add_export_record("CSV", "failed", success=False)
694
+ logger.error(f"Export failed: {e}")
695
+ gr.Warning(f"Export failed: {str(e)}")
696
+
697
+
698
+ def load_analytics_state():
699
+ """Load analytics state from global manager"""
700
+ analytics_state = global_state_manager.get_analytics_state()
701
+
702
+ project_stats = analytics_state['project_stats']
703
+ recent_interactions = analytics_state['recent_interactions']
704
+ dashboard_html = analytics_state['dashboard_html']
705
+
706
+ if dashboard_html is None:
707
+ dashboard_html = """
708
+ <div style="text-align: center; padding: 40px; border: 2px dashed #ccc; border-radius: 8px; background: #f8f9fa;">
709
+ <h3>πŸ“Š Analytics Dashboard</h3>
710
+ <p>Click "Refresh Data" to load analytics.</p>
711
+ </div>
712
+ """
713
+
714
+ return project_stats, recent_interactions, dashboard_html
715
+
716
+
717
+ def get_global_state_debug_info():
718
+ """Get debug information about global state"""
719
+ cache_status = global_state_manager.get_cache_status()
720
+
721
+ debug_info = {
722
+ "cache_status": cache_status,
723
+ "timestamp": datetime.now().isoformat(),
724
+ "sessions": global_state_manager.get_all_sessions()
725
+ }
726
+
727
+ return debug_info
728
+
729
+
730
+ # ============================================================================
731
+ # POST-PROCESSING
732
+ # ============================================================================
733
+
734
+ class ResponsePostProcessor:
735
+ """Post-processing pipeline for educational responses"""
736
+
737
+ def __init__(self, max_length: int = 1800, min_length: int = 10):
738
+ self.max_length = max_length
739
+ self.min_length = min_length
740
+
741
+ self.logical_stop_patterns = [
742
+ r'\n\n---\n',
743
+ r'\n\n## Summary\b',
744
+ r'\n\nIn conclusion\b',
745
+ r'\n\nTo summarize\b',
746
+ ]
747
+
748
+ def process_response(self, raw_response: str, user_query: str = "") -> str:
749
+ """Main post-processing pipeline"""
750
+ try:
751
+ cleaned = self._enhanced_token_cleanup(raw_response)
752
+ cleaned = self._truncate_intelligently(cleaned)
753
+ cleaned = self._enhance_readability(cleaned)
754
+
755
+ if not self._passes_quality_check(cleaned):
756
+ return self._generate_fallback_response(user_query)
757
+
758
+ return cleaned.strip()
759
+
760
+ except Exception as e:
761
+ logger.error(f"Post-processing error: {e}")
762
+ return raw_response
763
+
764
+ def _enhanced_token_cleanup(self, text: str) -> str:
765
+ """Remove model artifacts"""
766
+ artifacts = [
767
+ r'<\|.*?\|>',
768
+ r'###\s*$',
769
+ r'User:\s*$',
770
+ r'Assistant:\s*$',
771
+ r'\n\s*\n\s*\n+',
772
+ ]
773
+
774
+ for pattern in artifacts:
775
+ text = re.sub(pattern, '', text, flags=re.MULTILINE)
776
+
777
+ return text
778
+
779
+ def _truncate_intelligently(self, text: str) -> str:
780
+ """Truncate at logical educational endpoints"""
781
+ for pattern in self.logical_stop_patterns:
782
+ match = re.search(pattern, text, re.IGNORECASE)
783
+ if match:
784
+ return text[:match.start()].strip()
785
+
786
+ if len(text) <= self.max_length:
787
+ return text
788
+
789
+ sentences = re.split(r'[.!?]+\s+', text)
790
+ truncated = ""
791
+
792
+ for sentence in sentences:
793
+ test_length = len(truncated + sentence + ". ")
794
+ if test_length <= self.max_length:
795
+ truncated += sentence + ". "
796
+ else:
797
+ break
798
+
799
+ return truncated.strip()
800
+
801
+ def _enhance_readability(self, text: str) -> str:
802
+ """Format for better presentation"""
803
+ text = re.sub(r'([.!?])([A-Z])', r'\1 \2', text)
804
+ text = re.sub(r'\s{2,}', ' ', text)
805
+ text = re.sub(r'\n\s*[-*]\s*', '\n- ', text)
806
+
807
+ return text
808
+
809
+ def _passes_quality_check(self, text: str) -> bool:
810
+ """Final quality validation"""
811
+ if len(text.strip()) < self.min_length:
812
+ return False
813
+
814
+ sentences = re.split(r'[.!?]+', text)
815
+ valid_sentences = [s for s in sentences if len(s.strip()) > 5]
816
+
817
+ return len(valid_sentences) > 0
818
+
819
+ def _generate_fallback_response(self, user_query: str) -> str:
820
+ """Generate safe fallback"""
821
+ return "I'd be happy to help you understand this better. Could you clarify what specific aspect you'd like me to focus on?"
822
+
823
+ def process_and_stream_response(self, raw_response: str, user_query: str = ""):
824
+ """Process response then stream word-by-word"""
825
+ try:
826
+ processed_response = self.process_response(raw_response, user_query)
827
+
828
+ words = processed_response.split()
829
+ current_output = ""
830
+
831
+ for i, word in enumerate(words):
832
+ current_output += word
833
+ if i < len(words) - 1:
834
+ current_output += " "
835
+
836
+ yield current_output
837
+ time.sleep(0.015)
838
+
839
+ except Exception as e:
840
+ logger.error(f"Stream processing error: {e}")
841
+ yield "I encountered an error processing the response."
842
+
843
+
844
+ post_processor = ResponsePostProcessor()
845
+
846
+
847
+ # ============================================================================
848
+ # TOOL FUNCTIONS
849
+ # ============================================================================
850
+
851
+ @tool(return_direct=False)
852
+ def Create_Graph_Tool(
853
+ data: dict,
854
+ plot_type: str,
855
+ title: str = "Generated Plot",
856
+ x_label: str = "",
857
+ y_label: str = "",
858
+ educational_context: str = ""
859
+ ) -> str:
860
+ """Generate educational graphs"""
861
+ tool_start = log_step("Create_Graph_Tool")
862
+
863
+ try:
864
+ content, artifact = generate_plot(
865
+ data=data,
866
+ plot_type=plot_type,
867
+ title=title,
868
+ x_label=x_label,
869
+ y_label=y_label
870
+ )
871
+
872
+ if "error" in artifact:
873
+ log_step("Create_Graph_Tool", tool_start)
874
+ return f'<p style="color:red;">Graph generation failed: {artifact["error"]}</p>'
875
+
876
+ base64_image = artifact["base64_image"]
877
+
878
+ context_html = ""
879
+ if educational_context:
880
+ context_html = f'<div style="margin: 10px 0; padding: 10px; background: #f8f9fa; border-left: 4px solid #007bff;">πŸ’‘ {educational_context}</div>'
881
+
882
+ result = f"""{context_html}
883
+ <div style="text-align: center; margin: 20px 0;">
884
+ <img src="data:image/png;base64,{base64_image}"
885
+ style="max-width: 100%; height: auto; border-radius: 8px; box-shadow: 0 2px 10px rgba(0,0,0,0.1);"
886
+ alt="{title}" />
887
+ </div>"""
888
+
889
+ log_step("Create_Graph_Tool", tool_start)
890
+ return result
891
+
892
+ except Exception as e:
893
+ logger.error(f"Graph tool error: {e}")
894
+ log_step("Create_Graph_Tool", tool_start)
895
+ return f'<p style="color:red;">Error: {str(e)}</p>'
896
+
897
+
898
+ # ============================================================================
899
+ # MAIN ORCHESTRATION WORKFLOW
900
+ # ============================================================================
901
+
902
+ def orchestrate_turn(user_input: str, session_id: str = "default") -> str:
903
+ """
904
+ Main orchestration function implementing the redesign workflow.
905
+
906
+ OPTIMIZED: Uses single Qwen3-Claude GGUF (loads once, all agents share)
907
+
908
+ Steps:
909
+ 1. Reset prompt state
910
+ 2. Process user input (history)
911
+ 3. Tool decision (Qwen3-Claude)
912
+ 4. Regex checks
913
+ 5. Agent execution (Qwen3-Claude)
914
+ 6. Thinking agents (Qwen3-Claude)
915
+ 7. Prompt assembly
916
+ 8. Response generation (Qwen3-Claude)
917
+ 9. Post-processing
918
+ 10. Metrics tracking (background thread)
919
+ """
920
+ turn_start = log_step("orchestrate_turn")
921
+ run_id = str(uuid.uuid4())
922
+
923
+ try:
924
+ # ====================================================================
925
+ # STEP 1: RESET PROMPT STATE
926
+ # ====================================================================
927
+ step_start = log_step("Step 1: Reset prompt state")
928
+ global_state_manager.reset_prompt_state()
929
+ prompt_state = global_state_manager.get_prompt_state_manager()
930
+ log_step("Step 1: Reset prompt state", step_start)
931
+
932
+ # ====================================================================
933
+ # STEP 2: USER INPUT PROCESSING
934
+ # ====================================================================
935
+ step_start = log_step("Step 2: Process user input")
936
+
937
+ # Get conversation history
938
+ conversation_state = global_state_manager.get_conversation_state(session_id)
939
+ recent_history = conversation_state['conversation_state'][-8:] if conversation_state['conversation_state'] else []
940
+
941
+ # Format history for agents
942
+ recent_history_formatted = "\n".join([
943
+ f"{msg['role']}: {msg['content'][:100]}"
944
+ for msg in recent_history
945
+ ]) if recent_history else "No previous conversation"
946
+
947
+ log_step("Step 2: Process user input", step_start)
948
+
949
+ # ====================================================================
950
+ # STEP 3: TOOL DECISION ENGINE (Qwen3-Claude)
951
+ # ====================================================================
952
+ step_start = log_step("Step 3: Tool decision")
953
+ tool_decision_result = tool_agent.should_use_visualization(user_input)
954
+
955
+ tool_img_output = ""
956
+ tool_context = ""
957
+
958
+ if tool_decision_result:
959
+ logger.info("Tool decision: YES - visualization needed")
960
+ prompt_state.update("TOOL_USE_ENHANCEMENT", True)
961
+ else:
962
+ logger.info("Tool decision: NO - no visualization needed")
963
+
964
+ log_step("Step 3: Tool decision", step_start)
965
+
966
+ # ====================================================================
967
+ # STEP 4: REGEX LOGICAL EXPRESSIONS
968
+ # ====================================================================
969
+ step_start = log_step("Step 4: Regex checks")
970
+ logical_expressions.apply_all_checks(user_input, prompt_state)
971
+ log_step("Step 4: Regex checks", step_start)
972
+
973
+ # ====================================================================
974
+ # STEP 5: SEQUENTIAL AGENT EXECUTION (Qwen3-Claude)
975
+ # ====================================================================
976
+ step_start = log_step("Step 5: Routing agents")
977
+
978
+ # Use unified process() method that handles all 4 routing agents
979
+ response_prompts_str, thinking_prompts_str = routing_agents.process(
980
+ user_input=user_input,
981
+ tool_used=(tool_decision_result and bool(tool_img_output))
982
+ )
983
+
984
+ # Update prompt state with response prompts
985
+ if response_prompts_str:
986
+ for prompt_name in response_prompts_str.split('\n'):
987
+ if prompt_name.strip():
988
+ prompt_state.update(prompt_name.strip(), True)
989
+ logger.info(f"Response prompt activated: {prompt_name.strip()}")
990
+
991
+ # Store thinking prompts for Step 6 (will be processed by ThinkingAgents)
992
+ thinking_prompts_from_routing = thinking_prompts_str.split('\n') if thinking_prompts_str else []
993
+ for prompt_name in thinking_prompts_from_routing:
994
+ if prompt_name.strip():
995
+ logger.info(f"Thinking prompt queued: {prompt_name.strip()}")
996
+
997
+ log_step("Step 5: Routing agents", step_start)
998
+
999
+ # ====================================================================
1000
+ # STEP 6: THINKING AGENT PROCESSING (Qwen3-Claude)
1001
+ # ====================================================================
1002
+ step_start = log_step("Step 6: Thinking agents")
1003
+
1004
+ # Use thinking prompts identified by routing agents in Step 5
1005
+ thinking_prompts_list = []
1006
+
1007
+ # Add thinking prompts from routing agents
1008
+ for prompt_name in thinking_prompts_from_routing:
1009
+ if prompt_name.strip():
1010
+ thinking_prompts_list.append(prompt_name.strip())
1011
+ prompt_state.update(prompt_name.strip(), True)
1012
+
1013
+ # Additional heuristic: Add MATH_THINKING if LATEX_FORMATTING is active
1014
+ # (This ensures math thinking is triggered even if routing agents didn't detect it)
1015
+ if prompt_state.is_active("LATEX_FORMATTING") and "MATH_THINKING" not in thinking_prompts_list:
1016
+ thinking_prompts_list.append("MATH_THINKING")
1017
+ prompt_state.update("MATH_THINKING", True)
1018
+
1019
+ # Execute thinking agents if any are active
1020
+ thinking_context = ""
1021
+ if thinking_prompts_list:
1022
+ thinking_prompts_string = '\n'.join(thinking_prompts_list)
1023
+ logger.info(f"Active thinking agents: {thinking_prompts_list}")
1024
+
1025
+ think_start = log_step("Thinking agents execution")
1026
+ thinking_context = thinking_agents.process(
1027
+ user_input=user_input,
1028
+ conversation_history=recent_history_formatted,
1029
+ thinking_prompts=thinking_prompts_string,
1030
+ tool_img_output=tool_img_output,
1031
+ tool_context=tool_context
1032
+ )
1033
+ log_step("Thinking agents execution", think_start)
1034
+
1035
+ log_step("Step 6: Thinking agents", step_start)
1036
+
1037
+ # ====================================================================
1038
+ # STEP 7: RESPONSE PROMPT ASSEMBLY
1039
+ # ====================================================================
1040
+ step_start = log_step("Step 7: Prompt assembly")
1041
+
1042
+ # Get active response prompts
1043
+ response_prompt_names = prompt_state.get_active_response_prompts()
1044
+
1045
+ # Build prompt segments
1046
+ prompt_segments = [CORE_IDENTITY]
1047
+
1048
+ prompt_map = {
1049
+ "VAUGE_INPUT": VAUGE_INPUT,
1050
+ "USER_UNDERSTANDING": USER_UNDERSTANDING,
1051
+ "GENERAL_FORMATTING": GENERAL_FORMATTING,
1052
+ "LATEX_FORMATTING": LATEX_FORMATTING,
1053
+ "GUIDING_TEACHING": GUIDING_TEACHING,
1054
+ "STRUCTURE_PRACTICE_QUESTIONS": STRUCTURE_PRACTICE_QUESTIONS,
1055
+ "PRACTICE_QUESTION_FOLLOWUP": PRACTICE_QUESTION_FOLLOWUP,
1056
+ "TOOL_USE_ENHANCEMENT": TOOL_USE_ENHANCEMENT,
1057
+ }
1058
+
1059
+ for prompt_name in response_prompt_names:
1060
+ if prompt_name in prompt_map:
1061
+ prompt_segments.append(prompt_map[prompt_name])
1062
+
1063
+ prompt_segments_text = "\n\n".join(prompt_segments)
1064
+
1065
+ logger.info(f"Active prompts: {response_prompt_names}")
1066
+ log_step("Step 7: Prompt assembly", step_start)
1067
+
1068
+ # ====================================================================
1069
+ # STEP 8: FINAL PROMPT CONSTRUCTION
1070
+ # ====================================================================
1071
+ step_start = log_step("Step 8: Final prompt construction")
1072
+
1073
+ # Knowledge cutoff
1074
+ knowledge_cutoff = f"""
1075
+
1076
+ The current year is {CURRENT_YEAR}. Your knowledge cutoff date is October 2023. If the user asks about recent events or dynamic facts, inform them you may not have the most up-to-date information and suggest referencing direct sources."""
1077
+
1078
+ complete_prompt = f"""
1079
+ {prompt_segments_text}
1080
+
1081
+ If tools were used, context and output will be here. Ignore if empty:
1082
+ Image output: {tool_img_output}
1083
+ Image context: {tool_context}
1084
+
1085
+ Conversation history, if available:
1086
+ {recent_history_formatted}
1087
+
1088
+ Consider any context available to you:
1089
+ {thinking_context}
1090
+
1091
+ Here is the user's current query:
1092
+ {user_input}
1093
+
1094
+ {knowledge_cutoff}
1095
+ """
1096
+
1097
+ log_step("Step 8: Final prompt construction", step_start)
1098
+
1099
+ # ====================================================================
1100
+ # STEP 9: RESPONSE GENERATION (Phi3)
1101
+ # ====================================================================
1102
+ step_start = log_step("Step 9: Response generation")
1103
+ raw_response = response_agent.invoke(complete_prompt)
1104
+ log_step("Step 9: Response generation", step_start)
1105
+
1106
+ # ====================================================================
1107
+ # STEP 10: POST-PROCESSING
1108
+ # ====================================================================
1109
+ step_start = log_step("Step 10: Post-processing")
1110
+ processed_response = post_processor.process_response(raw_response, user_input)
1111
+ log_step("Step 10: Post-processing", step_start)
1112
+
1113
+ # ====================================================================
1114
+ # STEP 11: METRICS TRACKING (BACKGROUND THREAD - NON-BLOCKING)
1115
+ # ====================================================================
1116
+ step_start = log_step("Step 11: Metrics tracking")
1117
+
1118
+ def track_metrics_async():
1119
+ """Run metrics tracking in background to avoid blocking"""
1120
+ try:
1121
+ logger.info("[Background] Starting metrics tracking...")
1122
+
1123
+ # Track educational quality
1124
+ quality_metrics = evaluate_educational_quality_with_tracking(
1125
+ user_query=user_input,
1126
+ response=processed_response,
1127
+ thread_id=run_id,
1128
+ session_id=session_id
1129
+ )
1130
+
1131
+ # Log metrics to database
1132
+ metrics_to_log = {
1133
+ "conversation_start": datetime.now().isoformat(),
1134
+ "response_time": time.time() - turn_start,
1135
+ "quality_score": calculate_response_quality(processed_response),
1136
+ "educational_score": quality_metrics['educational_score'],
1137
+ "prompt_mode": ",".join(response_prompt_names),
1138
+ "tools_used": 1 if prompt_state.is_active("TOOL_USE_ENHANCEMENT") else 0,
1139
+ "thinking_agents": ",".join(thinking_prompts_list) if thinking_prompts_list else "none",
1140
+ "active_adapter": response_agent.model_type if response_agent.model_loaded else "not_loaded"
1141
+ }
1142
+
1143
+ log_metrics_to_database("Mimir", run_id, metrics_to_log)
1144
+ logger.info("[Background] βœ“ Metrics tracking completed")
1145
+
1146
+ except Exception as metrics_error:
1147
+ logger.warning(f"[Background] Metrics tracking failed: {metrics_error}")
1148
+
1149
+ # Start background thread (daemon=True so it doesn't block shutdown)
1150
+ metrics_thread = threading.Thread(
1151
+ target=track_metrics_async,
1152
+ daemon=True,
1153
+ name="MetricsTracking"
1154
+ )
1155
+ metrics_thread.start()
1156
+
1157
+ log_step("Step 11: Metrics tracking", step_start)
1158
+ logger.info("βœ“ Metrics tracking started in background - continuing immediately")
1159
+
1160
+ log_step("orchestrate_turn", turn_start)
1161
+ return processed_response
1162
+
1163
+ except Exception as e:
1164
+ logger.error(f"Orchestration error: {e}")
1165
+ import traceback
1166
+ logger.error(traceback.format_exc())
1167
+ log_step("orchestrate_turn", turn_start)
1168
+ return f"I encountered an error: {str(e)}"
1169
+
1170
+
1171
+ # ============================================================================
1172
+ # GRADIO CALLBACK FUNCTIONS (FIXED STATE MANAGEMENT)
1173
+ # ============================================================================
1174
+
1175
+ def get_loading_animation_base64():
1176
+ """Load animated GIF as base64"""
1177
+ try:
1178
+ with open("loading_animation.gif", "rb") as gif_file:
1179
+ gif_data = gif_file.read()
1180
+ gif_base64 = base64.b64encode(gif_data).decode('utf-8')
1181
+ return f"data:image/gif;base64,{gif_base64}"
1182
+ except FileNotFoundError:
1183
+ logger.warning("loading_animation.gif not found")
1184
+ return None
1185
+
1186
+
1187
+ def remove_loading_animations(chat_history):
1188
+ """Remove loading animations from chat"""
1189
+ return [msg for msg in chat_history if not (
1190
+ msg.get("role") == "assistant" and
1191
+ "loading-animation" in str(msg.get("content", ""))
1192
+ )]
1193
+
1194
+
1195
+ def add_user_message(message, chat_history, conversation_state):
1196
+ """
1197
+ Add user message with proper state management.
1198
+ βœ… FIXED: Returns updated states to Gradio components.
1199
+ """
1200
+ callback_start = log_step("add_user_message")
1201
+
1202
+ if not message.strip():
1203
+ log_step("add_user_message", callback_start)
1204
+ return "", chat_history, conversation_state
1205
+
1206
+ # Get current state from global manager
1207
+ current_state = global_state_manager.get_conversation_state()
1208
+ chat_history = current_state['chat_history']
1209
+ conversation_state = current_state['conversation_state']
1210
+
1211
+ # Add to both states
1212
+ conversation_state.append({"role": "user", "content": message})
1213
+ chat_history.append({"role": "user", "content": message})
1214
+
1215
+ # Update global state
1216
+ global_state_manager.update_conversation_state(chat_history, conversation_state)
1217
+
1218
+ log_step("add_user_message", callback_start)
1219
+
1220
+ # βœ… CRITICAL: Return updated states to Gradio
1221
+ return "", chat_history, conversation_state
1222
+
1223
+
1224
+ def add_loading_animation(chat_history, conversation_state):
1225
+ """
1226
+ Add loading animation with proper state management.
1227
+ βœ… FIXED: Returns updated states to Gradio components.
1228
+ """
1229
+ callback_start = log_step("add_loading_animation")
1230
+
1231
+ # Get current state from global manager
1232
+ current_state = global_state_manager.get_conversation_state()
1233
+ chat_history = current_state['chat_history']
1234
+ conversation_state = current_state['conversation_state']
1235
+
1236
+ if not conversation_state:
1237
+ log_step("add_loading_animation", callback_start)
1238
+ return chat_history, conversation_state
1239
+
1240
+ # Remove any existing loading animations
1241
+ chat_history = remove_loading_animations(chat_history)
1242
+
1243
+ # Add loading animation
1244
+ gif_data = get_loading_animation_base64()
1245
+ if gif_data:
1246
+ loading_html = f'<div class="loading-animation" style="display: flex; align-items: center; justify-content: center; padding: 0.5px;"><img src="{gif_data}" alt="Thinking..." style="height: 64px; width: auto; max-width: 80px;" /></div>'
1247
+ else:
1248
+ loading_html = '<div class="loading-animation" style="display: flex; align-items: center; justify-content: center; padding: 0.5px;"><div style="width: 64px; height: 64px;"></div></div>'
1249
+
1250
+ chat_history.append({"role": "assistant", "content": loading_html})
1251
+
1252
+ # Update global state
1253
+ global_state_manager.update_conversation_state(chat_history, conversation_state)
1254
+
1255
+ log_step("add_loading_animation", callback_start)
1256
+
1257
+ # βœ… CRITICAL: Return updated states to Gradio
1258
+ return chat_history, conversation_state
1259
+
1260
+
1261
+ def generate_response(chat_history, conversation_state):
1262
+ """
1263
+ Generate response using orchestration with proper streaming.
1264
+ βœ… FIXED: Loading animation stays until first chunk, then streams properly.
1265
+ """
1266
+ callback_start = log_step("generate_response")
1267
+
1268
+ # Get fresh state from global manager
1269
+ current_state = global_state_manager.get_conversation_state()
1270
+ chat_history = current_state['chat_history']
1271
+ conversation_state = current_state['conversation_state']
1272
+
1273
+ if not conversation_state:
1274
+ log_step("generate_response", callback_start)
1275
+ return chat_history, conversation_state
1276
+
1277
+ # Get last user message
1278
+ last_user_message = ""
1279
+ for msg in reversed(conversation_state):
1280
+ if msg["role"] == "user":
1281
+ last_user_message = msg["content"]
1282
+ break
1283
+
1284
+ if not last_user_message:
1285
+ log_step("generate_response", callback_start)
1286
+ return chat_history, conversation_state
1287
+
1288
+ try:
1289
+ # βœ… DON'T remove loading animation yet - let it show during orchestration
1290
+
1291
+ # Call orchestration (this takes time)
1292
+ orch_start = log_step("orchestrate_turn call")
1293
+ raw_response = orchestrate_turn(last_user_message)
1294
+ log_step("orchestrate_turn call", orch_start)
1295
+
1296
+ # Stream the processed response
1297
+ first_chunk = True
1298
+ for chunk in post_processor.process_and_stream_response(raw_response, last_user_message):
1299
+ # βœ… Remove loading animation on FIRST chunk only
1300
+ if first_chunk:
1301
+ chat_history = remove_loading_animations(chat_history)
1302
+ first_chunk = False
1303
+
1304
+ # Update chat display
1305
+ if chat_history and chat_history[-1]["role"] == "assistant":
1306
+ chat_history[-1]["content"] = chunk
1307
+ else:
1308
+ chat_history.append({"role": "assistant", "content": chunk})
1309
+
1310
+ # βœ… Yield to update UI during streaming
1311
+ yield chat_history, conversation_state
1312
+
1313
+ # Add final response to conversation state
1314
+ final_response = chunk if 'chunk' in locals() else raw_response
1315
+ conversation_state.append({"role": "assistant", "content": final_response})
1316
+
1317
+ # Update global state with final conversation
1318
+ global_state_manager.update_conversation_state(chat_history, conversation_state)
1319
+
1320
+ # βœ… Final yield with complete states
1321
+ yield chat_history, conversation_state
1322
+
1323
+ except Exception as e:
1324
+ logger.error(f"Response generation error: {e}")
1325
+ import traceback
1326
+ logger.error(traceback.format_exc())
1327
+
1328
+ error_msg = f"I encountered an error: {str(e)}"
1329
+
1330
+ # Clean up and show error
1331
+ chat_history = remove_loading_animations(chat_history)
1332
+ chat_history.append({"role": "assistant", "content": error_msg})
1333
+ conversation_state.append({"role": "assistant", "content": error_msg})
1334
+
1335
+ global_state_manager.update_conversation_state(chat_history, conversation_state)
1336
+ yield chat_history, conversation_state
1337
+
1338
+ log_step("generate_response", callback_start)
1339
+
1340
+
1341
+ def reset_conversation():
1342
+ """
1343
+ Reset conversation with global state persistence.
1344
+ βœ… Returns empty states to Gradio components.
1345
+ """
1346
+ callback_start = log_step("reset_conversation")
1347
+ global_state_manager.reset_conversation_state()
1348
+ log_step("reset_conversation", callback_start)
1349
+ return [], []
1350
+
1351
+
1352
+ def load_conversation_state():
1353
+ """
1354
+ Load conversation state from global manager.
1355
+ βœ… Returns current states to Gradio components.
1356
+ """
1357
+ callback_start = log_step("load_conversation_state")
1358
+ current_state = global_state_manager.get_conversation_state()
1359
+ log_step("load_conversation_state", callback_start)
1360
+
1361
+ # βœ… Extract and return both states
1362
+ return current_state['chat_history'], current_state['conversation_state']
1363
+
1364
+
1365
+ # ============================================================================
1366
+ # MULTI-PAGE INTERFACE
1367
+ # ============================================================================
1368
+ def create_interface():
1369
+ """Create multi-page Gradio interface"""
1370
+ logger.info("Creating Gradio interface...")
1371
+
1372
+ # Import page modules
1373
+ import gradio_chatbot
1374
+ import gradio_analytics
1375
+ import gradio_prompt_testing # NEW
1376
+
1377
+ with gr.Blocks(title="Mimir - Educational AI Assistant") as demo:
1378
+ navbar = gr.Navbar(
1379
+ visible=True,
1380
+ main_page_name="Mimir Chatbot",
1381
+ value=[("Case Study", "https://github.com/Jdesiree112/Technical_Portfolio/tree/main/CaseStudy_Mimir")]
1382
+ )
1383
+ gradio_chatbot.demo.render()
1384
+
1385
+ with demo.route("Analytics"):
1386
+ navbar = gr.Navbar(
1387
+ visible=True,
1388
+ main_page_name="Mimir Chatbot",
1389
+ value=[("Case Study", "https://github.com/Jdesiree112/Technical_Portfolio/tree/main/CaseStudy_Mimir")]
1390
+ )
1391
+ gradio_analytics.demo.render()
1392
+
1393
+ with demo.route("Prompt Testing"):
1394
+ navbar = gr.Navbar(
1395
+ visible=True,
1396
+ main_page_name="Mimir Chatbot",
1397
+ value=[("Case Study", "https://github.com/Jdesiree112/Technical_Portfolio/tree/main/CaseStudy_Mimir")]
1398
+ )
1399
+ gradio_prompt_testing.demo.render()
1400
+
1401
+ logger.info("Interface created successfully")
1402
+ return demo
1403
+
1404
+
1405
+ # ============================================================================
1406
+ # MAIN EXECUTION
1407
+ # ============================================================================
1408
+ if __name__ == "__main__":
1409
+ try:
1410
+ logger.info("="*60)
1411
+ logger.info("STARTING MAIN EXECUTION")
1412
+ logger.info("="*60)
1413
+
1414
+ # Warm up models first
1415
+ logger.info("β†’ Importing compile_model...")
1416
+ from compile_model import compile_all
1417
+
1418
+ logger.info("β†’ Starting model compilation...")
1419
+ compile_start = time.time()
1420
+ compile_all()
1421
+ compile_duration = time.time() - compile_start
1422
+ logger.info(f"βœ“ Model compilation completed in {compile_duration:.2f}s")
1423
+
1424
+ logger.info("="*60)
1425
+ logger.info("MIMIR APPLICATION READY")
1426
+ logger.info("="*60)
1427
+ logger.info(f"LightEval available: {LIGHTEVAL_AVAILABLE}")
1428
+ logger.info(f"Current year: {CURRENT_YEAR}")
1429
+ logger.info(f"Single Qwen3-Claude model optimization: ENABLED βœ…")
1430
+ logger.info("="*60)
1431
+
1432
+ # Create and launch interface
1433
+ logger.info("β†’ Creating Gradio interface...")
1434
+ interface_start = time.time()
1435
+ interface = create_interface()
1436
+ interface_duration = time.time() - interface_start
1437
+ logger.info(f"βœ“ Interface created in {interface_duration:.2f}s")
1438
+
1439
+ logger.info("β†’ Launching Gradio server on 0.0.0.0:7860...")
1440
+ logger.info("β†’ Waiting for first user connection...")
1441
+
1442
+ interface.launch(
1443
+ server_name="0.0.0.0",
1444
+ server_port=7860,
1445
+ share=False,
1446
+ debug=True,
1447
+ favicon_path="favicon.ico" if os.path.exists("favicon.ico") else None,
1448
+ show_error=True,
1449
+ ssr_mode=False,
1450
+ quiet=False,
1451
+ prevent_thread_lock=False,
1452
+ max_threads=40
1453
+ )
1454
+
1455
+ logger.info("βœ“ Gradio server started successfully")
1456
+
1457
+ except KeyboardInterrupt:
1458
+ logger.info("Shutting down Mimir gracefully...")
1459
+ except Exception as e:
1460
+ logger.error("="*60)
1461
+ logger.error("CRITICAL ERROR IN MAIN EXECUTION")
1462
+ logger.error("="*60)
1463
+ logger.error(f"Error type: {type(e).__name__}")
1464
+ logger.error(f"Error message: {e}")
1465
+ logger.error("="*60)
1466
+ logger.error("Full traceback:")
1467
+ import traceback
1468
+ logger.error(traceback.format_exc())
1469
+ logger.error("="*60)
1470
+ raise