jdesiree commited on
Commit
79845af
·
verified ·
1 Parent(s): 14f74c5

Upload 7 files

Browse files
favicon.ico ADDED
gradio_analytics.py ADDED
@@ -0,0 +1,538 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # gradio_analytics.py
2
+ import gradio as gr
3
+ import logging
4
+ import json
5
+ import sqlite3
6
+ import os
7
+ from datetime import datetime
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ try:
12
+ from app import (
13
+ get_trackio_database_path,
14
+ get_project_statistics_with_nulls,
15
+ get_recent_interactions_with_nulls,
16
+ create_dashboard_html_with_nulls,
17
+ calculate_response_quality,
18
+ refresh_analytics_data_persistent as refresh_analytics_data,
19
+ export_metrics_json_persistent as export_metrics_json,
20
+ export_metrics_csv_persistent as export_metrics_csv,
21
+ load_analytics_state,
22
+ get_global_state_debug_info,
23
+ sync_trackio_with_global_state,
24
+ global_state_manager,
25
+ evaluate_educational_quality_with_tracking,
26
+ )
27
+ except ImportError:
28
+ def get_trackio_database_path(project_name):
29
+ return None
30
+
31
+ def get_project_statistics_with_nulls(cursor, project_name):
32
+ return {
33
+ "total_conversations": None,
34
+ "avg_session_length": None,
35
+ "success_rate": None
36
+ }
37
+
38
+ def get_recent_interactions_with_nulls(cursor, project_name, limit=10):
39
+ return []
40
+
41
+ def create_dashboard_html_with_nulls(project_name, project_stats):
42
+ return f"<div>Mock dashboard for {project_name}</div>"
43
+
44
+ def calculate_response_quality(response):
45
+ return 3.0
46
+
47
+ def refresh_analytics_data():
48
+ return {}, [], "<div>Mock analytics</div>"
49
+
50
+ def export_metrics_json():
51
+ gr.Info("Mock JSON export")
52
+
53
+ def export_metrics_csv():
54
+ gr.Info("Mock CSV export")
55
+
56
+ def load_analytics_state():
57
+ return {}, [], "<div>Mock analytics state</div>"
58
+
59
+ def get_global_state_debug_info():
60
+ return {"status": "mock"}
61
+
62
+ def sync_trackio_with_global_state():
63
+ pass
64
+
65
+ def evaluate_educational_quality_with_tracking(*args, **kwargs):
66
+ return {"educational_score": 0.5}
67
+
68
+ class MockStateManager:
69
+ def get_cache_status(self):
70
+ return {"status": "mock"}
71
+ def get_evaluation_summary(self, include_history=False):
72
+ return {"aggregate_metrics": {}, "total_evaluations": {}}
73
+ def clear_all_states(self):
74
+ pass
75
+ def _backup_to_hf_dataset(self):
76
+ pass
77
+
78
+ global_state_manager = MockStateManager()
79
+
80
+ def load_custom_css():
81
+ try:
82
+ with open("styles.css", "r", encoding="utf-8") as css_file:
83
+ css_content = css_file.read()
84
+ logger.info(f"CSS loaded successfully for analytics page")
85
+ return css_content
86
+ except FileNotFoundError:
87
+ logger.warning("styles.css file not found for analytics page")
88
+ return ""
89
+ except Exception as e:
90
+ logger.warning(f"Error reading styles.css: {e}")
91
+ return ""
92
+
93
+ def show_cache_info():
94
+ try:
95
+ from pathlib import Path
96
+ from huggingface_hub import scan_cache_dir
97
+
98
+ cache_info = scan_cache_dir(cache_dir="/tmp/huggingface")
99
+
100
+ info_text = f"""
101
+ **HuggingFace Cache Status:**
102
+
103
+ **Total Size:** {cache_info.size_on_disk / (1024**3):.2f} GB
104
+ **Number of Repos:** {len(cache_info.repos)}
105
+
106
+ **Cached Models:**
107
+ """
108
+
109
+ for repo in cache_info.repos:
110
+ size_gb = repo.size_on_disk / (1024**3)
111
+ info_text += f"""
112
+ - **{repo.repo_id}**
113
+ - Size: {size_gb:.2f} GB
114
+ - Type: {repo.repo_type}
115
+ - Revisions: {len(repo.revisions)}
116
+ """
117
+
118
+ return info_text
119
+
120
+ except Exception as e:
121
+ return f"Error inspecting cache: {str(e)}"
122
+
123
+ def launch_external_trackio():
124
+ try:
125
+ import subprocess
126
+ result = subprocess.run(
127
+ ["trackio", "show", "--project", "Mimir"],
128
+ capture_output=False,
129
+ text=True
130
+ )
131
+
132
+ if result.returncode == 0:
133
+ gr.Info("Trackio dashboard launched in browser")
134
+ else:
135
+ gr.Warning("Could not launch trackio dashboard")
136
+
137
+ except Exception as e:
138
+ logger.error(f"Failed to launch trackio: {e}")
139
+ gr.Warning(f"Failed to launch trackio dashboard: {str(e)}")
140
+
141
+ def show_cache_status():
142
+ try:
143
+ debug_info = get_global_state_debug_info()
144
+ cache_status = debug_info.get("cache_status", {})
145
+
146
+ status_text = f"""
147
+ **Global State Cache Status:**
148
+ - Session ID: {cache_status.get('session_id', 'Unknown')}
149
+ - Analytics Cached: {'Yes' if cache_status.get('analytics_cached') else 'No'}
150
+ - Conversation Cached: {'Yes' if cache_status.get('conversation_cached') else 'No'}
151
+ - Analytics Last Refresh: {cache_status.get('analytics_last_refresh', 'Never')}
152
+ - Total Analytics Sessions: {cache_status.get('total_analytics_sessions', 0)}
153
+ - Total Conversation Sessions: {cache_status.get('total_conversation_sessions', 0)}
154
+
155
+ **Analytics Data Status:**
156
+ - Has Analytics Data: {'Yes' if cache_status.get('analytics_has_data') else 'No'}
157
+ - Conversation Length: {cache_status.get('conversation_length', 0)} messages
158
+ - Chat History Length: {cache_status.get('chat_history_length', 0)} messages
159
+
160
+ *Last Updated: {datetime.now().strftime('%H:%M:%S')}*
161
+ """
162
+
163
+ gr.Info("Cache status updated - check the Status panel")
164
+ return status_text
165
+
166
+ except Exception as e:
167
+ error_text = f"Error getting cache status: {str(e)}"
168
+ gr.Warning(error_text)
169
+ return error_text
170
+
171
+ def manual_backup_to_hf():
172
+ try:
173
+ global_state_manager._backup_to_hf_dataset()
174
+ gr.Info("Manual backup to HF dataset completed successfully")
175
+ return f"Backup completed at {datetime.now().strftime('%H:%M:%S')}"
176
+ except Exception as e:
177
+ gr.Warning(f"Backup failed: {str(e)}")
178
+ return f"Backup failed: {str(e)}"
179
+
180
+ def get_persistence_status():
181
+ try:
182
+ status_info = {
183
+ "SQLite DB": "Active" if os.path.exists(global_state_manager._db_path) else "Not Found",
184
+ "HF Dataset": global_state_manager.dataset_repo,
185
+ "Last HF Backup": global_state_manager._last_hf_backup.strftime('%Y-%m-%d %H:%M:%S'),
186
+ "DB Path": global_state_manager._db_path,
187
+ "Backup Interval": f"{global_state_manager._hf_backup_interval}s"
188
+ }
189
+ return status_info
190
+ except Exception as e:
191
+ return {"error": str(e)}
192
+
193
+ def clear_all_global_states():
194
+ try:
195
+ global_state_manager.clear_all_states()
196
+ gr.Info("All global states cleared successfully")
197
+
198
+ empty_stats = {
199
+ "total_conversations": None,
200
+ "avg_session_length": None,
201
+ "success_rate": None,
202
+ "model_type": "Cleared",
203
+ "last_updated": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
204
+ }
205
+
206
+ empty_html = """
207
+ <div style="text-align: center; padding: 40px; border: 2px dashed #ccc; border-radius: 8px; background: #f8f9fa;">
208
+ <h3>States Cleared</h3>
209
+ <p>All global states have been cleared.</p>
210
+ <p>Click "Refresh Data" to reload analytics.</p>
211
+ </div>
212
+ """
213
+
214
+ return empty_stats, [], empty_html
215
+
216
+ except Exception as e:
217
+ gr.Warning(f"Failed to clear states: {str(e)}")
218
+ return load_analytics_state()
219
+
220
+ def show_evaluation_metrics():
221
+ try:
222
+ eval_summary = global_state_manager.get_evaluation_summary(include_history=True)
223
+
224
+ metrics_data = [
225
+ ["Educational Quality", f"{eval_summary['aggregate_metrics']['avg_educational_quality']:.3f}"],
226
+ ["User Satisfaction", f"{eval_summary['aggregate_metrics']['user_satisfaction_rate']:.3f}"]
227
+ ]
228
+
229
+ recent_evaluations = []
230
+ if 'history' in eval_summary:
231
+ for eval_item in eval_summary['history']['recent_educational_scores'][-5:]:
232
+ recent_evaluations.append([
233
+ eval_item['timestamp'][:16],
234
+ f"{eval_item['educational_score']:.3f}",
235
+ f"{eval_item['semantic_quality']:.3f}",
236
+ f"{eval_item['response_time']:.3f}s"
237
+ ])
238
+
239
+ return eval_summary, metrics_data, recent_evaluations
240
+
241
+ except Exception as e:
242
+ logger.error(f"Error getting evaluation metrics: {e}")
243
+ return {}, [], []
244
+
245
+ def sync_and_refresh_all():
246
+ try:
247
+ sync_trackio_with_global_state()
248
+ project_stats, recent_interactions, dashboard_html = refresh_analytics_data()
249
+ eval_summary, metrics_data, recent_evaluations = show_evaluation_metrics()
250
+
251
+ gr.Info("All data synced and refreshed successfully")
252
+
253
+ return project_stats, recent_interactions, dashboard_html, eval_summary, metrics_data, recent_evaluations
254
+
255
+ except Exception as e:
256
+ logger.error(f"Sync and refresh failed: {e}")
257
+ gr.Warning(f"Sync failed: {str(e)}")
258
+ return load_analytics_state() + ({}, [], [])
259
+
260
+ with gr.Blocks() as demo:
261
+ custom_css = load_custom_css()
262
+ if custom_css:
263
+ gr.HTML(f'<style>{custom_css}</style>')
264
+
265
+ gr.HTML('<div class="analytics-title"><h2>Mimir Analytics Dashboard</h2></div>')
266
+
267
+ gr.Markdown("Monitor educational AI performance and effectiveness metrics with persistent state management.")
268
+
269
+ with gr.Tabs():
270
+ with gr.TabItem("Traditional Analytics"):
271
+ with gr.Row():
272
+ with gr.Column(scale=1):
273
+ gr.Markdown("## Controls")
274
+ refresh_btn = gr.Button("Refresh Data", variant="primary")
275
+ sync_all_btn = gr.Button("Sync & Refresh All", variant="primary")
276
+
277
+ with gr.Row():
278
+ export_json_btn = gr.Button("Export JSON", variant="secondary", size="sm")
279
+ export_csv_btn = gr.Button("Export CSV", variant="secondary", size="sm")
280
+
281
+ launch_trackio_btn = gr.Button("Launch Trackio Dashboard", variant="secondary")
282
+
283
+ gr.Markdown("### State Management")
284
+ with gr.Row():
285
+ cache_status_btn = gr.Button("Cache Status", size="sm")
286
+ clear_states_btn = gr.Button("Clear All States", size="sm", variant="stop")
287
+
288
+ with gr.Group():
289
+ gr.Markdown("### Project Information")
290
+ project_info = gr.JSON(
291
+ value={
292
+ "total_conversations": None,
293
+ "avg_session_length": None,
294
+ "success_rate": None,
295
+ "model_type": None
296
+ },
297
+ label="Project Stats"
298
+ )
299
+
300
+ with gr.Group():
301
+ gr.Markdown("### System Status")
302
+ status_panel = gr.Markdown(
303
+ "Click 'Cache Status' to view global state information.",
304
+ label="Status Information"
305
+ )
306
+
307
+ with gr.Column(scale=2):
308
+ gr.Markdown("## Key Metrics Dashboard")
309
+ trackio_iframe = gr.HTML(
310
+ value="""
311
+ <div style="text-align: center; padding: 40px; border: 2px dashed #ccc; border-radius: 8px; background: #f8f9fa;">
312
+ <h3>Trackio Dashboard</h3>
313
+ <p>Analytics data will appear here after conversations.</p>
314
+ <p>Data is automatically cached and persists across page navigation.</p>
315
+ <p>To launch trackio dashboard separately, run:</p>
316
+ <code style="background: #e9ecef; padding: 4px 8px; border-radius: 4px;">trackio show --project "Mimir"</code>
317
+ </div>
318
+ """,
319
+ label="Dashboard"
320
+ )
321
+
322
+ with gr.Row():
323
+ with gr.Column():
324
+ gr.Markdown("## Recent Interactions")
325
+ gr.Markdown("*Data persists when switching between Chatbot and Analytics pages*")
326
+ recent_metrics = gr.Dataframe(
327
+ headers=["Timestamp", "Response Time", "Prompt Mode", "Tools Used", "Quality Score", "Adapter"],
328
+ datatype=["str", "number", "str", "bool", "number", "str"],
329
+ row_count=10,
330
+ col_count=6,
331
+ interactive=False,
332
+ label="Latest Sessions",
333
+ value=[],
334
+ show_label=True
335
+ )
336
+
337
+ with gr.TabItem("ML Performance"):
338
+ gr.Markdown("## Agent-Based Performance & Global State Metrics")
339
+
340
+ with gr.Row():
341
+ with gr.Column(scale=1):
342
+ eval_metrics_btn = gr.Button("Get Evaluation Metrics", variant="primary")
343
+
344
+ with gr.Group():
345
+ gr.Markdown("### Model Cache Status")
346
+ cache_status_display = gr.JSON(
347
+ value={},
348
+ label="Cache Information"
349
+ )
350
+
351
+ with gr.Column(scale=2):
352
+ gr.Markdown("### Aggregate Performance Metrics")
353
+ eval_metrics_table = gr.Dataframe(
354
+ headers=["Metric", "Score"],
355
+ datatype=["str", "str"],
356
+ label="Model Performance",
357
+ value=[]
358
+ )
359
+
360
+ eval_summary_display = gr.JSON(
361
+ value={},
362
+ label="Detailed Evaluation Summary"
363
+ )
364
+
365
+ with gr.Row():
366
+ with gr.Column():
367
+ gr.Markdown("### Recent Quality Evaluations")
368
+ recent_evaluations_table = gr.Dataframe(
369
+ headers=["Timestamp", "Educational Score", "Semantic Quality", "Response Time"],
370
+ datatype=["str", "str", "str", "str"],
371
+ label="Recent Evaluations",
372
+ value=[]
373
+ )
374
+
375
+ with gr.TabItem("System Status"):
376
+ gr.Markdown("## Global State Manager & System Diagnostics")
377
+
378
+ with gr.Row():
379
+ with gr.Column():
380
+ gr.Markdown("### Global State Cache")
381
+ cache_details = gr.Markdown("Click 'Show Cache Status' to view detailed information.")
382
+
383
+ show_cache_btn = gr.Button("Show Cache Status", variant="primary")
384
+ refresh_cache_btn = gr.Button("Refresh Cache Info", variant="secondary")
385
+
386
+ gr.Markdown("### Persistence Controls")
387
+ backup_btn = gr.Button("Manual Backup to HF Dataset", variant="primary")
388
+ backup_status = gr.Textbox(label="Backup Status", value="No recent backup", interactive=False)
389
+
390
+ with gr.Column():
391
+ gr.Markdown("### System Actions")
392
+ sync_trackio_btn = gr.Button("Sync to Database", variant="secondary")
393
+ clear_all_btn = gr.Button("Clear All Global States", variant="stop")
394
+
395
+ gr.Markdown("### Persistence Status")
396
+ persistence_info = gr.JSON(
397
+ value={},
398
+ label="Persistence Information"
399
+ )
400
+
401
+ gr.Markdown("### Performance Monitor")
402
+ perf_info = gr.JSON(
403
+ value={},
404
+ label="Performance Information"
405
+ )
406
+
407
+ # NEW: HuggingFace Cache Viewer Section
408
+ with gr.Row():
409
+ with gr.Column():
410
+ gr.Markdown("### 🗂️ HuggingFace Model Cache")
411
+ gr.Markdown("*View cached models and disk usage*")
412
+
413
+ cache_viewer_btn = gr.Button("Inspect Model Cache", variant="primary", size="lg")
414
+
415
+ with gr.Row():
416
+ clear_cache_btn = gr.Button("Clear Cache (⚠️ Dangerous)", variant="stop", size="sm")
417
+ refresh_models_btn = gr.Button("Re-download Models", variant="secondary", size="sm")
418
+
419
+ cache_info_display = gr.Markdown(
420
+ "Click **Inspect Model Cache** to view detailed cache information.",
421
+ label="Cache Details"
422
+ )
423
+
424
+ demo.load(
425
+ load_analytics_state,
426
+ inputs=None,
427
+ outputs=[project_info, recent_metrics, trackio_iframe],
428
+ show_progress="hidden"
429
+ )
430
+
431
+ demo.load(
432
+ fn=lambda: global_state_manager.get_cache_status(),
433
+ inputs=None,
434
+ outputs=[cache_status_display],
435
+ show_progress="hidden"
436
+ )
437
+
438
+ demo.load(
439
+ fn=get_persistence_status,
440
+ inputs=None,
441
+ outputs=[persistence_info],
442
+ show_progress="hidden"
443
+ )
444
+
445
+ refresh_btn.click(
446
+ fn=refresh_analytics_data,
447
+ inputs=[],
448
+ outputs=[project_info, recent_metrics, trackio_iframe],
449
+ show_progress="full"
450
+ )
451
+
452
+ sync_all_btn.click(
453
+ fn=sync_and_refresh_all,
454
+ inputs=[],
455
+ outputs=[project_info, recent_metrics, trackio_iframe, eval_summary_display, eval_metrics_table, recent_evaluations_table],
456
+ show_progress="full"
457
+ )
458
+
459
+ export_json_btn.click(
460
+ fn=export_metrics_json,
461
+ inputs=[],
462
+ outputs=[],
463
+ show_progress="full"
464
+ )
465
+
466
+ export_csv_btn.click(
467
+ fn=export_metrics_csv,
468
+ inputs=[],
469
+ outputs=[],
470
+ show_progress="full"
471
+ )
472
+
473
+ launch_trackio_btn.click(
474
+ fn=launch_external_trackio,
475
+ inputs=[],
476
+ outputs=[],
477
+ show_progress="full"
478
+ )
479
+
480
+ cache_status_btn.click(
481
+ fn=show_cache_status,
482
+ inputs=[],
483
+ outputs=[status_panel],
484
+ show_progress="full"
485
+ )
486
+
487
+ clear_states_btn.click(
488
+ fn=clear_all_global_states,
489
+ inputs=[],
490
+ outputs=[project_info, recent_metrics, trackio_iframe],
491
+ show_progress="full"
492
+ )
493
+
494
+ eval_metrics_btn.click(
495
+ fn=show_evaluation_metrics,
496
+ inputs=[],
497
+ outputs=[eval_summary_display, eval_metrics_table, recent_evaluations_table],
498
+ show_progress="full"
499
+ )
500
+
501
+ show_cache_btn.click(
502
+ fn=show_cache_status,
503
+ inputs=[],
504
+ outputs=[cache_details],
505
+ show_progress="full"
506
+ )
507
+
508
+ refresh_cache_btn.click(
509
+ fn=lambda: global_state_manager.get_cache_status(),
510
+ inputs=[],
511
+ outputs=[perf_info],
512
+ show_progress="full"
513
+ )
514
+
515
+ backup_btn.click(
516
+ fn=manual_backup_to_hf,
517
+ inputs=[],
518
+ outputs=[backup_status],
519
+ show_progress="full"
520
+ )
521
+
522
+ sync_trackio_btn.click(
523
+ fn=sync_trackio_with_global_state,
524
+ inputs=[],
525
+ outputs=[],
526
+ show_progress="full"
527
+ )
528
+
529
+ clear_all_btn.click(
530
+ fn=clear_all_global_states,
531
+ inputs=[],
532
+ outputs=[project_info, recent_metrics, trackio_iframe],
533
+ show_progress="full"
534
+ )
535
+
536
+ if __name__ == "__main__":
537
+ logger.info("Running analytics dashboard standalone with global state management")
538
+ demo.launch(server_name="0.0.0.0", server_port=7861)
gradio_chatbot.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # gradio_chatbot.py
2
+ import gradio as gr
3
+ import logging
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+ from app import (
8
+ add_user_message,
9
+ add_loading_animation,
10
+ generate_response,
11
+ reset_conversation,
12
+ load_conversation_state,
13
+ remove_loading_animations,
14
+ global_state_manager,
15
+ )
16
+
17
+
18
+ def load_custom_css():
19
+ try:
20
+ with open("styles.css", "r", encoding="utf-8") as css_file:
21
+ css_content = css_file.read()
22
+ logger.info(f"CSS loaded successfully, length: {len(css_content)} characters")
23
+ return css_content
24
+ except FileNotFoundError:
25
+ logger.warning("styles.css file not found, using default styling")
26
+ return ""
27
+ except Exception as e:
28
+ logger.warning(f"Error reading styles.css: {e}")
29
+ return ""
30
+
31
+
32
+ def restore_state_on_page_access():
33
+ """
34
+ Restore conversation state when page loads or user navigates back.
35
+ This ensures persistence across page navigation.
36
+ """
37
+ try:
38
+ current_state = global_state_manager.get_conversation_state()
39
+ chat_history = current_state.get('chat_history', [])
40
+ conversation_state_data = current_state.get('conversation_state', [])
41
+
42
+ logger.info(f"✓ Restored state: {len(chat_history)} messages in chat, {len(conversation_state_data)} in conversation")
43
+
44
+ return chat_history, conversation_state_data
45
+ except Exception as e:
46
+ logger.error(f"Failed to restore state: {e}")
47
+ return [], []
48
+
49
+
50
+ with gr.Blocks() as demo:
51
+ custom_css = load_custom_css()
52
+ if custom_css:
53
+ gr.HTML(f'<style>{custom_css}</style>')
54
+
55
+ conversation_state = gr.State([])
56
+
57
+ gr.HTML('<div class="title-header"><h1>Mimir</h1></div>')
58
+
59
+ with gr.Row():
60
+ chatbot = gr.Chatbot(
61
+ type="messages",
62
+ show_copy_button=True,
63
+ show_share_button=False,
64
+ layout="bubble",
65
+ autoscroll=True,
66
+ avatar_images=None,
67
+ elem_id="main-chatbot",
68
+ scale=1,
69
+ height="65vh",
70
+ value=[],
71
+ latex_delimiters=[
72
+ {"left": "$$", "right": "$$", "display": True},
73
+ {"left": "$", "right": "$", "display": False},
74
+ ]
75
+ )
76
+
77
+ with gr.Row(elem_classes=["input-controls"]):
78
+ msg = gr.Textbox(
79
+ placeholder="Ask me about math, research, study strategies, or any educational topic...",
80
+ show_label=False,
81
+ lines=6,
82
+ max_lines=8,
83
+ elem_classes=["input-textbox"],
84
+ container=False,
85
+ scale=4
86
+ )
87
+ with gr.Column(elem_classes=["button-column"], scale=1):
88
+ send = gr.Button("Send", elem_classes=["send-button"], size="sm")
89
+ clear = gr.Button("Clear", elem_classes=["clear-button"], size="sm")
90
+
91
+
92
+ demo.load(
93
+ fn=restore_state_on_page_access,
94
+ outputs=[chatbot, conversation_state],
95
+ queue=False
96
+ )
97
+
98
+ msg.submit(
99
+ add_user_message,
100
+ inputs=[msg, chatbot, conversation_state],
101
+ outputs=[msg, chatbot, conversation_state],
102
+ show_progress="hidden"
103
+ ).then(
104
+ add_loading_animation,
105
+ inputs=[chatbot, conversation_state],
106
+ outputs=[chatbot, conversation_state],
107
+ show_progress="hidden"
108
+ ).then(
109
+ generate_response,
110
+ inputs=[chatbot, conversation_state],
111
+ outputs=[chatbot, conversation_state],
112
+ show_progress="hidden"
113
+ )
114
+
115
+ send.click(
116
+ add_user_message,
117
+ inputs=[msg, chatbot, conversation_state],
118
+ outputs=[msg, chatbot, conversation_state],
119
+ show_progress="hidden"
120
+ ).then(
121
+ add_loading_animation,
122
+ inputs=[chatbot, conversation_state],
123
+ outputs=[chatbot, conversation_state],
124
+ show_progress="hidden"
125
+ ).then(
126
+ generate_response,
127
+ inputs=[chatbot, conversation_state],
128
+ outputs=[chatbot, conversation_state],
129
+ show_progress="hidden"
130
+ )
131
+
132
+ clear.click(
133
+ reset_conversation,
134
+ outputs=[chatbot, conversation_state],
135
+ show_progress="hidden"
136
+ )
137
+
138
+
139
+ if __name__ == "__main__":
140
+ logger.info("Running chatbot interface standalone")
141
+ demo.launch(server_name="0.0.0.0", server_port=7860)
gradio_prompt_testing.py ADDED
@@ -0,0 +1,1634 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # gradio_pipeline_testing.py
2
+ """
3
+ Full Pipeline Testing Interface for Mimir Educational AI Assistant
4
+
5
+ Tests the complete orchestration flow with comprehensive metrics at every step.
6
+ Captures conditional model activation, token usage, timing, and quality metrics.
7
+
8
+ Output: CSV file with ~110 columns capturing full pipeline journey
9
+ """
10
+
11
+ import os
12
+ import sys
13
+ import io
14
+ import csv
15
+ import json
16
+ import time
17
+ import logging
18
+ import warnings
19
+ from datetime import datetime
20
+ from typing import Dict, List, Optional, Tuple, Any
21
+ from collections import Counter
22
+
23
+ # Core dependencies
24
+ import torch
25
+ import gradio as gr
26
+ import numpy as np
27
+
28
+ # ============================================================================
29
+ # ENVIRONMENT SETUP
30
+ # ============================================================================
31
+ HF_CACHE = "/tmp/huggingface"
32
+ os.makedirs(f"{HF_CACHE}/hub", exist_ok=True)
33
+ os.environ['HF_HOME'] = HF_CACHE
34
+ os.environ['HF_HUB_CACHE'] = f"{HF_CACHE}/hub"
35
+
36
+ # ============================================================================
37
+ # IMPORTS FROM MIMIR APPLICATION
38
+ # ============================================================================
39
+ try:
40
+ from agents import (
41
+ ToolDecisionAgent,
42
+ PromptRoutingAgents,
43
+ ThinkingAgents,
44
+ ResponseAgent,
45
+ get_shared_qwen3
46
+ )
47
+ AGENTS_AVAILABLE = True
48
+ except ImportError as e:
49
+ print(f"⚠️ Warning: Could not import agents: {e}")
50
+ AGENTS_AVAILABLE = False
51
+
52
+ try:
53
+ from state_manager import GlobalStateManager, LogicalExpressions
54
+ STATE_MANAGER_AVAILABLE = True
55
+ except ImportError as e:
56
+ print(f"⚠️ Warning: Could not import state_manager: {e}")
57
+ STATE_MANAGER_AVAILABLE = False
58
+
59
+ try:
60
+ from prompt_library import (
61
+ CORE_IDENTITY,
62
+ TOOL_DECISION,
63
+ agent_1_system,
64
+ agent_2_system,
65
+ agent_3_system,
66
+ agent_4_system,
67
+ MATH_THINKING,
68
+ QUESTION_ANSWER_DESIGN,
69
+ REASONING_THINKING,
70
+ VAUGE_INPUT,
71
+ USER_UNDERSTANDING,
72
+ GENERAL_FORMATTING,
73
+ LATEX_FORMATTING,
74
+ GUIDING_TEACHING,
75
+ STRUCTURE_PRACTICE_QUESTIONS,
76
+ PRACTICE_QUESTION_FOLLOWUP,
77
+ TOOL_USE_ENHANCEMENT,
78
+ )
79
+ PROMPTS_AVAILABLE = True
80
+ except ImportError as e:
81
+ print(f"⚠️ Warning: Could not import prompt_library: {e}")
82
+ PROMPTS_AVAILABLE = False
83
+
84
+ # Try to import post processor
85
+ try:
86
+ # Import the post processor class/module from app.py
87
+ import importlib.util
88
+ spec = importlib.util.spec_from_file_location("app_module", "app.py")
89
+ app_module = importlib.util.module_from_spec(spec)
90
+ spec.loader.exec_module(app_module)
91
+ post_processor = app_module.post_processor
92
+ POST_PROCESSOR_AVAILABLE = True
93
+ except Exception as e:
94
+ print(f"⚠️ Warning: Could not import post_processor: {e}")
95
+ POST_PROCESSOR_AVAILABLE = False
96
+ # Create dummy
97
+ class DummyPostProcessor:
98
+ def process_response(self, response, user_message):
99
+ return response
100
+ post_processor = DummyPostProcessor()
101
+
102
+ # ZeroGPU support
103
+ try:
104
+ import spaces
105
+ ZERO_GPU_AVAILABLE = True
106
+ except ImportError:
107
+ ZERO_GPU_AVAILABLE = False
108
+ class DummySpaces:
109
+ @staticmethod
110
+ def GPU(duration=600):
111
+ def decorator(func):
112
+ return func
113
+ return decorator
114
+ spaces = DummySpaces()
115
+
116
+ # Tiktoken for accurate token counting
117
+ try:
118
+ import tiktoken
119
+ TIKTOKEN_AVAILABLE = True
120
+ except ImportError:
121
+ TIKTOKEN_AVAILABLE = False
122
+ print("⚠️ Warning: tiktoken not available - using fallback token counting")
123
+
124
+ # Textstat for readability metrics
125
+ try:
126
+ import textstat
127
+ TEXTSTAT_AVAILABLE = True
128
+ except ImportError:
129
+ TEXTSTAT_AVAILABLE = False
130
+ print("⚠️ Warning: textstat not available - using manual readability calculations")
131
+
132
+ # ============================================================================
133
+ # LOGGING SETUP
134
+ # ============================================================================
135
+ logging.basicConfig(
136
+ level=logging.INFO,
137
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
138
+ )
139
+ logger = logging.getLogger(__name__)
140
+
141
+ warnings.filterwarnings("ignore", category=UserWarning)
142
+ warnings.filterwarnings("ignore", category=FutureWarning)
143
+
144
+ CURRENT_YEAR = datetime.now().year
145
+
146
+ # ============================================================================
147
+ # GLOBAL INSTANCES
148
+ # ============================================================================
149
+ if AGENTS_AVAILABLE and STATE_MANAGER_AVAILABLE:
150
+ try:
151
+ global_state_manager = GlobalStateManager()
152
+ logical_expressions = LogicalExpressions()
153
+ tool_agent = ToolDecisionAgent()
154
+ routing_agents = PromptRoutingAgents()
155
+ thinking_agents = ThinkingAgents()
156
+ response_agent = ResponseAgent()
157
+
158
+ logger.info("✓ All agents initialized successfully")
159
+ except Exception as e:
160
+ logger.error(f"Failed to initialize agents: {e}")
161
+ raise
162
+ else:
163
+ logger.error("Cannot initialize - missing core dependencies")
164
+ raise ImportError("Missing required modules: agents or state_manager")
165
+
166
+ # ============================================================================
167
+ # CSV SCHEMA DEFINITION
168
+ # ============================================================================
169
+ CSV_COLUMNS = [
170
+ # Identification & Input
171
+ "prompt_index",
172
+ "timestamp",
173
+ "user_prompt",
174
+ "user_prompt_tokens",
175
+ "user_prompt_chars",
176
+ "user_prompt_words",
177
+
178
+ # Conversation Context
179
+ "conversation_history_length",
180
+ "conversation_history_tokens",
181
+
182
+ # Tool Decision Agent
183
+ "tool_decision_input_template",
184
+ "tool_decision_input_tokens",
185
+ "tool_decision_output",
186
+ "tool_decision_output_tokens",
187
+ "tool_decision_result",
188
+ "tool_decision_time_seconds",
189
+ "tool_decision_gpu_peak_mb",
190
+
191
+ # Regex Checks
192
+ "regex_checks_applied",
193
+ "regex_checks_time_seconds",
194
+
195
+ # Routing Agent 1
196
+ "agent1_input_template",
197
+ "agent1_input_tokens",
198
+ "agent1_output",
199
+ "agent1_output_tokens",
200
+ "agent1_decision",
201
+ "agent1_time_seconds",
202
+ "agent1_gpu_peak_mb",
203
+
204
+ # Routing Agent 2
205
+ "agent2_input_template",
206
+ "agent2_input_tokens",
207
+ "agent2_output",
208
+ "agent2_output_tokens",
209
+ "agent2_decision",
210
+ "agent2_time_seconds",
211
+ "agent2_gpu_peak_mb",
212
+
213
+ # Routing Agent 3
214
+ "agent3_input_template",
215
+ "agent3_input_tokens",
216
+ "agent3_output",
217
+ "agent3_output_tokens",
218
+ "agent3_decision",
219
+ "agent3_time_seconds",
220
+ "agent3_gpu_peak_mb",
221
+
222
+ # Routing Agent 4
223
+ "agent4_input_template",
224
+ "agent4_input_tokens",
225
+ "agent4_output",
226
+ "agent4_output_tokens",
227
+ "agent4_decisions",
228
+ "agent4_time_seconds",
229
+ "agent4_gpu_peak_mb",
230
+
231
+ # Math Thinking
232
+ "math_thinking_activated",
233
+ "math_thinking_input_template",
234
+ "math_thinking_input_tokens",
235
+ "math_thinking_output",
236
+ "math_thinking_output_tokens",
237
+ "math_thinking_time_seconds",
238
+ "math_thinking_gpu_peak_mb",
239
+
240
+ # QA Design Thinking
241
+ "qa_design_activated",
242
+ "qa_design_input_template",
243
+ "qa_design_input_tokens",
244
+ "qa_design_output",
245
+ "qa_design_output_tokens",
246
+ "qa_design_time_seconds",
247
+ "qa_design_gpu_peak_mb",
248
+
249
+ # Reasoning Thinking
250
+ "reasoning_activated",
251
+ "reasoning_input_template",
252
+ "reasoning_input_tokens",
253
+ "reasoning_output",
254
+ "reasoning_output_tokens",
255
+ "reasoning_time_seconds",
256
+ "reasoning_gpu_peak_mb",
257
+
258
+ # Prompt Assembly
259
+ "active_response_prompts",
260
+ "final_prompt_template",
261
+ "final_prompt_tokens",
262
+ "final_prompt_chars",
263
+ "final_prompt_words",
264
+ "assembly_time_seconds",
265
+
266
+ # Response Generation
267
+ "response_input_template",
268
+ "response_input_tokens",
269
+ "response_raw",
270
+ "response_raw_tokens",
271
+ "response_raw_chars",
272
+ "response_raw_words",
273
+ "response_generation_time_seconds",
274
+ "response_gpu_peak_mb",
275
+ "response_tokens_per_second",
276
+
277
+ # Post-processing
278
+ "response_processed",
279
+ "response_processed_tokens",
280
+ "response_processed_chars",
281
+ "response_processed_words",
282
+ "postprocessing_time_seconds",
283
+
284
+ # Quality Metrics
285
+ "flesch_reading_ease",
286
+ "flesch_kincaid_grade",
287
+ "completeness_score",
288
+ "specificity_score",
289
+ "repetition_ratio",
290
+ "unique_word_ratio",
291
+ "avg_sentence_length",
292
+ "question_answered",
293
+
294
+ # Overall Metrics
295
+ "total_pipeline_time_seconds",
296
+ "total_input_tokens",
297
+ "total_output_tokens",
298
+ "total_gpu_peak_mb",
299
+ "models_activated_count",
300
+ "models_activated_list",
301
+ ]
302
+
303
+ # ============================================================================
304
+ # TOKEN COUNTING FUNCTIONS
305
+ # ============================================================================
306
+
307
+ def count_tokens_accurate(text: str) -> int:
308
+ """
309
+ Count tokens using tiktoken library for accurate estimation.
310
+
311
+ Args:
312
+ text: Input text to tokenize
313
+
314
+ Returns:
315
+ Accurate token count
316
+ """
317
+ if not text:
318
+ return 0
319
+
320
+ if not TIKTOKEN_AVAILABLE:
321
+ # Fallback: word count approximation
322
+ return len(text.split())
323
+
324
+ try:
325
+ # Use cl100k_base encoding (used by GPT-3.5/4, good general estimator)
326
+ encoding = tiktoken.get_encoding("cl100k_base")
327
+ tokens = encoding.encode(text)
328
+ return len(tokens)
329
+ except Exception as e:
330
+ logger.warning(f"tiktoken encoding failed: {e}, using fallback")
331
+ return len(text.split())
332
+
333
+
334
+ def count_words(text: str) -> int:
335
+ """Count words in text"""
336
+ if not text:
337
+ return 0
338
+ return len(text.split())
339
+
340
+
341
+ def count_sentences(text: str) -> int:
342
+ """Count sentences in text (simple heuristic)"""
343
+ if not text:
344
+ return 0
345
+ import re
346
+ sentences = re.split(r'[.!?]+', text)
347
+ return len([s for s in sentences if s.strip()])
348
+
349
+
350
+ # ============================================================================
351
+ # GPU MEMORY TRACKING
352
+ # ============================================================================
353
+
354
+ def get_gpu_memory() -> Dict[str, float]:
355
+ """
356
+ Get current GPU memory statistics.
357
+
358
+ Returns:
359
+ Dictionary with allocated, reserved, and peak memory in MB
360
+ """
361
+ if torch.cuda.is_available():
362
+ return {
363
+ "allocated_mb": torch.cuda.memory_allocated() / 1024**2,
364
+ "reserved_mb": torch.cuda.memory_reserved() / 1024**2,
365
+ "peak_mb": torch.cuda.max_memory_allocated() / 1024**2
366
+ }
367
+ return {
368
+ "allocated_mb": 0.0,
369
+ "reserved_mb": 0.0,
370
+ "peak_mb": 0.0
371
+ }
372
+
373
+
374
+ def reset_gpu_stats():
375
+ """Reset GPU memory statistics"""
376
+ if torch.cuda.is_available():
377
+ torch.cuda.reset_peak_memory_stats()
378
+ torch.cuda.synchronize()
379
+
380
+
381
+ # ============================================================================
382
+ # TEMPLATE BUILDING FUNCTIONS
383
+ # ============================================================================
384
+
385
+ def format_history(history: List[Dict]) -> str:
386
+ """Format conversation history for templates"""
387
+ if not history:
388
+ return "No previous conversation"
389
+
390
+ formatted = []
391
+ for msg in history[-8:]: # Last 8 messages
392
+ role = msg.get('role', 'unknown')
393
+ content = msg.get('content', '')[:100] # Truncate
394
+ formatted.append(f"{role}: {content}")
395
+
396
+ return "\n".join(formatted)
397
+
398
+
399
+ def build_tool_decision_template(user_prompt: str) -> str:
400
+ """Build template for tool decision agent"""
401
+ return f"<s>[INST] {TOOL_DECISION}\n\nUser Query: {user_prompt} [/INST]"
402
+
403
+
404
+ def build_agent1_template(user_prompt: str, history: List) -> str:
405
+ """Build template for Agent 1: Practice Questions"""
406
+ history_str = format_history(history)
407
+ return f"<s>[INST] {agent_1_system}\n\nConversation History:\n{history_str}\n\nCurrent User Query: {user_prompt} [/INST]"
408
+
409
+
410
+ def build_agent2_template(user_prompt: str) -> str:
411
+ """Build template for Agent 2: Discovery Mode"""
412
+ return f"<s>[INST] {agent_2_system}\n\nUser Query: {user_prompt} [/INST]"
413
+
414
+
415
+ def build_agent3_template(user_prompt: str, history: List) -> str:
416
+ """Build template for Agent 3: Followup Assessment"""
417
+ history_str = format_history(history)
418
+ return f"<s>[INST] {agent_3_system}\n\nConversation History:\n{history_str}\n\nCurrent User Query: {user_prompt} [/INST]"
419
+
420
+
421
+ def build_agent4_template(user_prompt: str, history: List) -> str:
422
+ """Build template for Agent 4: Teaching Mode"""
423
+ history_str = format_history(history)
424
+ return f"<s>[INST] {agent_4_system}\n\nConversation History:\n{history_str}\n\nCurrent User Query: {user_prompt} [/INST]"
425
+
426
+
427
+ def build_math_thinking_template(user_prompt: str) -> str:
428
+ """Build template for Math Thinking"""
429
+ return f"<s>[INST] {MATH_THINKING}\n\nUser Query: {user_prompt} [/INST]"
430
+
431
+
432
+ def build_qa_design_template(user_prompt: str) -> str:
433
+ """Build template for QA Design Thinking"""
434
+ return f"<s>[INST] {QUESTION_ANSWER_DESIGN}\n\nUser Query: {user_prompt} [/INST]"
435
+
436
+
437
+ def build_reasoning_template(user_prompt: str) -> str:
438
+ """Build template for Reasoning Thinking"""
439
+ return f"<s>[INST] {REASONING_THINKING}\n\nUser Query: {user_prompt} [/INST]"
440
+
441
+
442
+ def build_final_prompt(
443
+ user_prompt: str,
444
+ active_prompts: List[str],
445
+ thinking_context: str,
446
+ recent_history_formatted: str,
447
+ tool_img_output: str = "",
448
+ tool_context: str = ""
449
+ ) -> str:
450
+ """
451
+ Build final prompt for ResponseAgent (Qwen3-Claude).
452
+ Matches actual orchestration logic from app.py
453
+ """
454
+ # Build prompt segments
455
+ prompt_segments = [CORE_IDENTITY]
456
+
457
+ prompt_map = {
458
+ "VAUGE_INPUT": VAUGE_INPUT,
459
+ "USER_UNDERSTANDING": USER_UNDERSTANDING,
460
+ "GENERAL_FORMATTING": GENERAL_FORMATTING,
461
+ "LATEX_FORMATTING": LATEX_FORMATTING,
462
+ "GUIDING_TEACHING": GUIDING_TEACHING,
463
+ "STRUCTURE_PRACTICE_QUESTIONS": STRUCTURE_PRACTICE_QUESTIONS,
464
+ "PRACTICE_QUESTION_FOLLOWUP": PRACTICE_QUESTION_FOLLOWUP,
465
+ "TOOL_USE_ENHANCEMENT": TOOL_USE_ENHANCEMENT,
466
+ }
467
+
468
+ for prompt_name in active_prompts:
469
+ if prompt_name in prompt_map:
470
+ prompt_segments.append(prompt_map[prompt_name])
471
+
472
+ prompt_segments_text = "\n\n".join(prompt_segments)
473
+
474
+ knowledge_cutoff = f"""
475
+ The current year is {CURRENT_YEAR}. Your knowledge cutoff date is October 2023. If the user asks about recent events or dynamic facts, inform them you may not have the most up-to-date information and suggest referencing direct sources."""
476
+
477
+ complete_prompt = f"""
478
+ {prompt_segments_text}
479
+
480
+ If tools were used, context and output will be here. Ignore if empty:
481
+ Image output: {tool_img_output}
482
+ Image context: {tool_context}
483
+
484
+ Conversation history, if available:
485
+ {recent_history_formatted}
486
+
487
+ Consider any context available to you:
488
+ {thinking_context}
489
+
490
+ Here is the user's current query:
491
+ {user_prompt}
492
+
493
+ {knowledge_cutoff}
494
+ """
495
+
496
+ return complete_prompt
497
+
498
+
499
+ # ============================================================================
500
+ # QUALITY METRICS FUNCTIONS
501
+ # ============================================================================
502
+
503
+ def estimate_syllables(text: str) -> int:
504
+ """
505
+ Estimate syllable count (rough heuristic).
506
+ Counts vowel groups.
507
+ """
508
+ import re
509
+ words = text.lower().split()
510
+ syllable_count = 0
511
+
512
+ for word in words:
513
+ # Remove non-letters
514
+ word = re.sub(r'[^a-z]', '', word)
515
+ if not word:
516
+ continue
517
+
518
+ # Count vowel groups
519
+ vowel_groups = len(re.findall(r'[aeiouy]+', word))
520
+ # Adjust for silent e
521
+ if word.endswith('e'):
522
+ vowel_groups -= 1
523
+ # Ensure at least 1 syllable per word
524
+ syllable_count += max(1, vowel_groups)
525
+
526
+ return syllable_count
527
+
528
+
529
+ def calculate_flesch_reading_ease(text: str) -> float:
530
+ """
531
+ Calculate Flesch Reading Ease score.
532
+ Score 0-100: Higher = easier to read
533
+ 90-100: Very easy (5th grade)
534
+ 60-70: Standard (8th-9th grade)
535
+ 0-30: Very difficult (college graduate)
536
+
537
+ Formula: 206.835 - 1.015(words/sentences) - 84.6(syllables/words)
538
+ """
539
+ if not text or len(text.strip()) < 10:
540
+ return 0.0
541
+
542
+ if TEXTSTAT_AVAILABLE:
543
+ try:
544
+ return textstat.flesch_reading_ease(text)
545
+ except:
546
+ pass
547
+
548
+ # Manual calculation
549
+ words = count_words(text)
550
+ sentences = count_sentences(text)
551
+
552
+ if sentences == 0 or words == 0:
553
+ return 0.0
554
+
555
+ syllables = estimate_syllables(text)
556
+
557
+ if words == 0:
558
+ return 0.0
559
+
560
+ score = 206.835 - 1.015 * (words / sentences) - 84.6 * (syllables / words)
561
+ return max(0.0, min(100.0, score))
562
+
563
+
564
+ def calculate_flesch_kincaid_grade(text: str) -> float:
565
+ """
566
+ Calculate Flesch-Kincaid Grade Level.
567
+ Returns US grade level needed to understand text.
568
+
569
+ Formula: 0.39(words/sentences) + 11.8(syllables/words) - 15.59
570
+ """
571
+ if not text or len(text.strip()) < 10:
572
+ return 0.0
573
+
574
+ if TEXTSTAT_AVAILABLE:
575
+ try:
576
+ return textstat.flesch_kincaid_grade(text)
577
+ except:
578
+ pass
579
+
580
+ words = count_words(text)
581
+ sentences = count_sentences(text)
582
+
583
+ if sentences == 0 or words == 0:
584
+ return 0.0
585
+
586
+ syllables = estimate_syllables(text)
587
+
588
+ if words == 0:
589
+ return 0.0
590
+
591
+ grade = 0.39 * (words / sentences) + 11.8 * (syllables / words) - 15.59
592
+ return max(0.0, grade)
593
+
594
+
595
+ def calculate_completeness_score(response: str, user_prompt: str) -> float:
596
+ """
597
+ Estimate if response addresses the prompt.
598
+ Uses keyword overlap and length heuristics.
599
+
600
+ Returns: Score 0-1 (1 = complete answer)
601
+ """
602
+ if not response or not user_prompt:
603
+ return 0.0
604
+
605
+ import re
606
+
607
+ # Extract keywords from prompt
608
+ prompt_words = set(re.findall(r'\b\w+\b', user_prompt.lower()))
609
+
610
+ # Remove common stopwords
611
+ stopwords = {'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been',
612
+ 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will',
613
+ 'would', 'should', 'could', 'may', 'might', 'can', 'what',
614
+ 'how', 'why', 'when', 'where', 'who', 'which', 'i', 'you',
615
+ 'we', 'they', 'he', 'she', 'it', 'me', 'him', 'her', 'us', 'them'}
616
+ prompt_words -= stopwords
617
+
618
+ response_words = set(re.findall(r'\b\w+\b', response.lower()))
619
+
620
+ if not prompt_words:
621
+ return 0.5 # Neutral if no meaningful keywords
622
+
623
+ # Calculate keyword overlap
624
+ overlap = len(prompt_words & response_words) / len(prompt_words)
625
+
626
+ # Length factor
627
+ min_reasonable_length = 20
628
+ if len(response) < min_reasonable_length:
629
+ length_factor = len(response) / min_reasonable_length
630
+ else:
631
+ length_factor = 1.0
632
+
633
+ score = overlap * length_factor
634
+ return min(1.0, score)
635
+
636
+
637
+ def check_question_answered(response: str, user_prompt: str) -> bool:
638
+ """
639
+ Boolean check: does response attempt to answer the question?
640
+
641
+ Heuristics:
642
+ - Response has minimum length
643
+ - Response doesn't start with refusal
644
+ - Response contains relevant keywords
645
+ """
646
+ if not response or len(response) < 10:
647
+ return False
648
+
649
+ # Check for refusal patterns
650
+ refusal_patterns = [
651
+ "i don't know",
652
+ "i cannot",
653
+ "i can't",
654
+ "i'm not sure",
655
+ "i don't have",
656
+ "unable to",
657
+ "sorry, i"
658
+ ]
659
+
660
+ response_lower = response.lower()
661
+ for pattern in refusal_patterns:
662
+ if response_lower.startswith(pattern):
663
+ return False
664
+
665
+ # Check for minimum completeness
666
+ completeness = calculate_completeness_score(response, user_prompt)
667
+ return completeness > 0.3
668
+
669
+
670
+ def calculate_specificity_score(response: str) -> float:
671
+ """
672
+ Measure how specific vs vague the response is.
673
+
674
+ Indicators of specificity:
675
+ - Numbers, dates, names
676
+ - Technical terms
677
+ - Examples
678
+ - Concrete nouns
679
+
680
+ Returns: Score 0-1 (1 = very specific)
681
+ """
682
+ if not response:
683
+ return 0.0
684
+
685
+ import re
686
+
687
+ specificity_indicators = 0
688
+ total_possible = 5
689
+
690
+ # 1. Contains numbers
691
+ if re.search(r'\d+', response):
692
+ specificity_indicators += 1
693
+
694
+ # 2. Contains proper nouns
695
+ proper_nouns = len(re.findall(r'(?<!\. )\b[A-Z][a-z]+', response))
696
+ if proper_nouns > 0:
697
+ specificity_indicators += 1
698
+
699
+ # 3. Contains example phrases
700
+ example_phrases = ['for example', 'such as', 'for instance', 'like', 'including']
701
+ if any(phrase in response.lower() for phrase in example_phrases):
702
+ specificity_indicators += 1
703
+
704
+ # 4. Average word length
705
+ words = response.split()
706
+ if words:
707
+ avg_word_length = sum(len(w) for w in words) / len(words)
708
+ if avg_word_length > 5.0:
709
+ specificity_indicators += 1
710
+
711
+ # 5. Response length
712
+ if len(response) > 200:
713
+ specificity_indicators += 1
714
+
715
+ return specificity_indicators / total_possible
716
+
717
+
718
+ def calculate_repetition_ratio(text: str) -> float:
719
+ """
720
+ Measure token/word repetition.
721
+ Lower = better (less repetitive)
722
+
723
+ Returns: Ratio of repeated tokens to total tokens (0-1)
724
+ """
725
+ if not text:
726
+ return 0.0
727
+
728
+ words = text.lower().split()
729
+ if len(words) < 2:
730
+ return 0.0
731
+
732
+ word_counts = Counter(words)
733
+
734
+ # Count words that appear more than once
735
+ repeated_words = sum(count - 1 for count in word_counts.values() if count > 1)
736
+
737
+ ratio = repeated_words / len(words)
738
+ return min(1.0, ratio)
739
+
740
+
741
+ def calculate_unique_word_ratio(text: str) -> float:
742
+ """
743
+ Measure vocabulary diversity.
744
+ Higher = more diverse vocabulary
745
+
746
+ Returns: Ratio of unique words to total words (0-1)
747
+ """
748
+ if not text:
749
+ return 0.0
750
+
751
+ words = text.lower().split()
752
+ if not words:
753
+ return 0.0
754
+
755
+ unique_words = len(set(words))
756
+ return unique_words / len(words)
757
+
758
+
759
+ def calculate_avg_sentence_length(text: str) -> float:
760
+ """Calculate average sentence length in words"""
761
+ sentences = count_sentences(text)
762
+ words = count_words(text)
763
+
764
+ if sentences == 0:
765
+ return 0.0
766
+
767
+ return words / sentences
768
+
769
+
770
+ # ============================================================================
771
+ # INSTRUMENTED PIPELINE RUNNER
772
+ # ============================================================================
773
+
774
+ def run_full_pipeline_instrumented(user_prompt: str, prompt_index: int = 1) -> Dict:
775
+ """
776
+ Run the complete orchestration pipeline with full instrumentation.
777
+ Captures metrics at every step.
778
+
779
+ Args:
780
+ user_prompt: User's input prompt
781
+ prompt_index: Index number for this prompt in batch
782
+
783
+ Returns:
784
+ Dictionary with all metrics for CSV export
785
+ """
786
+
787
+ result = {
788
+ "prompt_index": prompt_index,
789
+ "timestamp": datetime.now().isoformat(),
790
+ "user_prompt": user_prompt,
791
+ "user_prompt_tokens": count_tokens_accurate(user_prompt),
792
+ "user_prompt_chars": len(user_prompt),
793
+ "user_prompt_words": count_words(user_prompt),
794
+ }
795
+
796
+ # Track overall start time
797
+ pipeline_start = time.time()
798
+
799
+ try:
800
+ # ============================================================
801
+ # STEP 1-2: SETUP
802
+ # ============================================================
803
+ setup_start = time.time()
804
+
805
+ # Reset state
806
+ global_state_manager.reset_prompt_state()
807
+ prompt_state = global_state_manager.get_prompt_state_manager()
808
+
809
+ # Get conversation history (empty for testing)
810
+ recent_history = []
811
+ recent_history_formatted = "No previous conversation"
812
+
813
+ result["conversation_history_length"] = 0
814
+ result["conversation_history_tokens"] = 0
815
+
816
+ # ============================================================
817
+ # STEP 3: TOOL DECISION AGENT
818
+ # ============================================================
819
+ tool_start = time.time()
820
+
821
+ tool_template = build_tool_decision_template(user_prompt)
822
+ tool_input_tokens = count_tokens_accurate(tool_template)
823
+
824
+ reset_gpu_stats()
825
+
826
+ # Execute
827
+ tool_decision_result = tool_agent.should_use_visualization(user_prompt)
828
+
829
+ # Capture output
830
+ tool_output = str(tool_decision_result)
831
+ tool_output_tokens = count_tokens_accurate(tool_output)
832
+
833
+ gpu_metrics = get_gpu_memory()
834
+ tool_time = time.time() - tool_start
835
+
836
+ # Record
837
+ result.update({
838
+ "tool_decision_input_template": tool_template,
839
+ "tool_decision_input_tokens": tool_input_tokens,
840
+ "tool_decision_output": tool_output,
841
+ "tool_decision_output_tokens": tool_output_tokens,
842
+ "tool_decision_result": bool(tool_decision_result),
843
+ "tool_decision_time_seconds": round(tool_time, 3),
844
+ "tool_decision_gpu_peak_mb": round(gpu_metrics["peak_mb"], 2),
845
+ })
846
+
847
+ # Update state
848
+ if tool_decision_result:
849
+ prompt_state.update("TOOL_USE_ENHANCEMENT", True)
850
+
851
+ # ============================================================
852
+ # STEP 4: REGEX CHECKS
853
+ # ============================================================
854
+ regex_start = time.time()
855
+
856
+ # Apply regex checks (returns list of activated prompts)
857
+ regex_before = set(prompt_state.get_active_response_prompts())
858
+ logical_expressions.apply_all_checks(user_prompt, prompt_state)
859
+ regex_after = set(prompt_state.get_active_response_prompts())
860
+ regex_applied = list(regex_after - regex_before)
861
+
862
+ regex_time = time.time() - regex_start
863
+
864
+ result.update({
865
+ "regex_checks_applied": ", ".join(regex_applied) if regex_applied else "None",
866
+ "regex_checks_time_seconds": round(regex_time, 3),
867
+ })
868
+
869
+ # ============================================================
870
+ # STEP 5: ROUTING AGENTS (Unified Process - Qwen3-Claude)
871
+ # ============================================================
872
+ routing_start = time.time()
873
+
874
+ # Build template (simplified - just the user prompt)
875
+ routing_template = f"User Query: {user_prompt}"
876
+ routing_input_tokens = count_tokens_accurate(routing_template)
877
+
878
+ reset_gpu_stats()
879
+
880
+ # Use unified process() method
881
+ response_prompts_str, thinking_prompts_str = routing_agents.process(
882
+ user_input=user_prompt,
883
+ tool_used=tool_decision_result
884
+ )
885
+
886
+ # Parse results
887
+ response_prompts = [p.strip() for p in response_prompts_str.split('\n') if p.strip()] if response_prompts_str else []
888
+ thinking_prompts = [p.strip() for p in thinking_prompts_str.split('\n') if p.strip()] if thinking_prompts_str else []
889
+
890
+ routing_output = f"Response: {', '.join(response_prompts) if response_prompts else 'None'}\nThinking: {', '.join(thinking_prompts) if thinking_prompts else 'None'}"
891
+ routing_output_tokens = count_tokens_accurate(routing_output)
892
+ gpu_metrics = get_gpu_memory()
893
+
894
+ routing_time = time.time() - routing_start
895
+
896
+ # Update result with consolidated routing metrics
897
+ result.update({
898
+ # Agent 1 metrics (legacy columns - use consolidated data)
899
+ "agent1_input_template": routing_template,
900
+ "agent1_input_tokens": routing_input_tokens // 4, # Divide among 4 agents
901
+ "agent1_output": ", ".join([p for p in response_prompts if p in ["STRUCTURE_PRACTICE_QUESTIONS"]]) or "None",
902
+ "agent1_output_tokens": routing_output_tokens // 4,
903
+ "agent1_decision": "STRUCTURE_PRACTICE_QUESTIONS" in response_prompts,
904
+ "agent1_time_seconds": round(routing_time / 4, 3),
905
+ "agent1_gpu_peak_mb": round(gpu_metrics["peak_mb"] / 4, 2),
906
+
907
+ # Agent 2 metrics
908
+ "agent2_input_template": routing_template,
909
+ "agent2_input_tokens": routing_input_tokens // 4,
910
+ "agent2_output": ", ".join([p for p in response_prompts if p in ["GENERAL_FORMATTING", "LATEX_FORMATTING", "GUIDING_TEACHING"]]) or "None",
911
+ "agent2_output_tokens": routing_output_tokens // 4,
912
+ "agent2_decision": ", ".join([p for p in response_prompts if p in ["GENERAL_FORMATTING", "LATEX_FORMATTING", "GUIDING_TEACHING"]]) or "NULL",
913
+ "agent2_time_seconds": round(routing_time / 4, 3),
914
+ "agent2_gpu_peak_mb": round(gpu_metrics["peak_mb"] / 4, 2),
915
+
916
+ # Agent 3 metrics
917
+ "agent3_input_template": routing_template,
918
+ "agent3_input_tokens": routing_input_tokens // 4,
919
+ "agent3_output": ", ".join([p for p in response_prompts + thinking_prompts if p in ["PRACTICE_QUESTION_FOLLOWUP", "MATH_THINKING", "QUESTION_ANSWER_DESIGN", "REASONING_THINKING"]]) or "None",
920
+ "agent3_output_tokens": routing_output_tokens // 4,
921
+ "agent3_decision": any(p in ["PRACTICE_QUESTION_FOLLOWUP", "MATH_THINKING", "QUESTION_ANSWER_DESIGN", "REASONING_THINKING"] for p in response_prompts + thinking_prompts),
922
+ "agent3_time_seconds": round(routing_time / 4, 3),
923
+ "agent3_gpu_peak_mb": round(gpu_metrics["peak_mb"] / 4, 2),
924
+
925
+ # Agent 4 metrics
926
+ "agent4_input_template": routing_template,
927
+ "agent4_input_tokens": routing_input_tokens // 4,
928
+ "agent4_output": ", ".join([p for p in response_prompts if p == "TOOL_USE_ENHANCEMENT"]) or "None",
929
+ "agent4_output_tokens": routing_output_tokens // 4,
930
+ "agent4_decisions": "TOOL_USE_ENHANCEMENT" if "TOOL_USE_ENHANCEMENT" in response_prompts else "NULL",
931
+ "agent4_time_seconds": round(routing_time / 4, 3),
932
+ "agent4_gpu_peak_mb": round(gpu_metrics["peak_mb"] / 4, 2),
933
+ })
934
+
935
+ # Update prompt state with all activated prompts
936
+ for prompt_name in response_prompts:
937
+ prompt_state.update(prompt_name, True)
938
+ for prompt_name in thinking_prompts:
939
+ prompt_state.update(prompt_name, True)
940
+
941
+
942
+ # ============================================================
943
+ # STEP 6: THINKING AGENTS (Conditional)
944
+ # ============================================================
945
+
946
+ thinking_outputs = []
947
+
948
+ # Determine which thinking agents to activate
949
+ math_activated = prompt_state.is_active("LATEX_FORMATTING")
950
+ qa_activated = prompt_state.is_active("STRUCTURE_PRACTICE_QUESTIONS")
951
+ reasoning_activated = (
952
+ prompt_state.is_active("TOOL_USE_ENHANCEMENT") or
953
+ prompt_state.is_active("PRACTICE_QUESTION_FOLLOWUP") or
954
+ prompt_state.is_active("GUIDING_TEACHING")
955
+ )
956
+
957
+ # --- Math Thinking (GGUF) ---
958
+ if math_activated:
959
+ math_start = time.time()
960
+
961
+ math_template = build_math_thinking_template(user_prompt)
962
+ math_input_tokens = count_tokens_accurate(math_template)
963
+
964
+ reset_gpu_stats()
965
+
966
+ math_output = thinking_agents.math_thinking(
967
+ user_input=user_prompt,
968
+ conversation_history=recent_history_formatted
969
+ )
970
+
971
+ math_output_tokens = count_tokens_accurate(math_output)
972
+ gpu_metrics = get_gpu_memory()
973
+
974
+ math_time = time.time() - math_start
975
+
976
+ result.update({
977
+ "math_thinking_activated": True,
978
+ "math_thinking_input_template": math_template,
979
+ "math_thinking_input_tokens": math_input_tokens,
980
+ "math_thinking_output": math_output,
981
+ "math_thinking_output_tokens": math_output_tokens,
982
+ "math_thinking_time_seconds": round(math_time, 3),
983
+ "math_thinking_gpu_peak_mb": round(gpu_metrics["peak_mb"], 2),
984
+ })
985
+
986
+ thinking_outputs.append(math_output)
987
+ else:
988
+ result.update({
989
+ "math_thinking_activated": False,
990
+ "math_thinking_input_template": "NULL",
991
+ "math_thinking_input_tokens": 0,
992
+ "math_thinking_output": "NULL",
993
+ "math_thinking_output_tokens": 0,
994
+ "math_thinking_time_seconds": 0.0,
995
+ "math_thinking_gpu_peak_mb": 0.0,
996
+ })
997
+
998
+ # --- QA Design Thinking (Qwen3-Claude) ---
999
+ if qa_activated:
1000
+ qa_start = time.time()
1001
+
1002
+ qa_template = build_qa_design_template(user_prompt)
1003
+ qa_input_tokens = count_tokens_accurate(qa_template)
1004
+
1005
+ reset_gpu_stats()
1006
+
1007
+ qa_output = thinking_agents.question_answer_design(
1008
+ user_input=user_prompt,
1009
+ conversation_history=recent_history_formatted
1010
+ )
1011
+
1012
+ qa_output_tokens = count_tokens_accurate(qa_output)
1013
+ gpu_metrics = get_gpu_memory()
1014
+
1015
+ qa_time = time.time() - qa_start
1016
+
1017
+ result.update({
1018
+ "qa_design_activated": True,
1019
+ "qa_design_input_template": qa_template,
1020
+ "qa_design_input_tokens": qa_input_tokens,
1021
+ "qa_design_output": qa_output,
1022
+ "qa_design_output_tokens": qa_output_tokens,
1023
+ "qa_design_time_seconds": round(qa_time, 3),
1024
+ "qa_design_gpu_peak_mb": round(gpu_metrics["peak_mb"], 2),
1025
+ })
1026
+
1027
+ thinking_outputs.append(qa_output)
1028
+ else:
1029
+ result.update({
1030
+ "qa_design_activated": False,
1031
+ "qa_design_input_template": "NULL",
1032
+ "qa_design_input_tokens": 0,
1033
+ "qa_design_output": "NULL",
1034
+ "qa_design_output_tokens": 0,
1035
+ "qa_design_time_seconds": 0.0,
1036
+ "qa_design_gpu_peak_mb": 0.0,
1037
+ })
1038
+
1039
+ # --- Reasoning Thinking (Qwen3-Claude) ---
1040
+ if reasoning_activated:
1041
+ reasoning_start = time.time()
1042
+
1043
+ reasoning_template = build_reasoning_template(user_prompt)
1044
+ reasoning_input_tokens = count_tokens_accurate(reasoning_template)
1045
+
1046
+ reset_gpu_stats()
1047
+
1048
+ reasoning_output = thinking_agents.reasoning_thinking(
1049
+ user_input=user_prompt,
1050
+ conversation_history=recent_history_formatted
1051
+ )
1052
+
1053
+ reasoning_output_tokens = count_tokens_accurate(reasoning_output)
1054
+ gpu_metrics = get_gpu_memory()
1055
+
1056
+ reasoning_time = time.time() - reasoning_start
1057
+
1058
+ result.update({
1059
+ "reasoning_activated": True,
1060
+ "reasoning_input_template": reasoning_template,
1061
+ "reasoning_input_tokens": reasoning_input_tokens,
1062
+ "reasoning_output": reasoning_output,
1063
+ "reasoning_output_tokens": reasoning_output_tokens,
1064
+ "reasoning_time_seconds": round(reasoning_time, 3),
1065
+ "reasoning_gpu_peak_mb": round(gpu_metrics["peak_mb"], 2),
1066
+ })
1067
+
1068
+ thinking_outputs.append(reasoning_output)
1069
+ else:
1070
+ result.update({
1071
+ "reasoning_activated": False,
1072
+ "reasoning_input_template": "NULL",
1073
+ "reasoning_input_tokens": 0,
1074
+ "reasoning_output": "NULL",
1075
+ "reasoning_output_tokens": 0,
1076
+ "reasoning_time_seconds": 0.0,
1077
+ "reasoning_gpu_peak_mb": 0.0,
1078
+ })
1079
+
1080
+ # Combine thinking outputs
1081
+ thinking_context = "\n\n".join(thinking_outputs) if thinking_outputs else ""
1082
+
1083
+ # ============================================================
1084
+ # STEP 7-8: PROMPT ASSEMBLY
1085
+ # ============================================================
1086
+ assembly_start = time.time()
1087
+
1088
+ # Get active response prompts
1089
+ active_prompts = prompt_state.get_active_response_prompts()
1090
+
1091
+ # Build final prompt
1092
+ final_prompt = build_final_prompt(
1093
+ user_prompt=user_prompt,
1094
+ active_prompts=active_prompts,
1095
+ thinking_context=thinking_context,
1096
+ recent_history_formatted=recent_history_formatted,
1097
+ tool_img_output="",
1098
+ tool_context=""
1099
+ )
1100
+
1101
+ final_prompt_tokens = count_tokens_accurate(final_prompt)
1102
+ final_prompt_chars = len(final_prompt)
1103
+ final_prompt_words = count_words(final_prompt)
1104
+
1105
+ assembly_time = time.time() - assembly_start
1106
+
1107
+ result.update({
1108
+ "active_response_prompts": ", ".join(active_prompts),
1109
+ "final_prompt_template": final_prompt,
1110
+ "final_prompt_tokens": final_prompt_tokens,
1111
+ "final_prompt_chars": final_prompt_chars,
1112
+ "final_prompt_words": final_prompt_words,
1113
+ "assembly_time_seconds": round(assembly_time, 3),
1114
+ })
1115
+
1116
+ # ============================================================
1117
+ # STEP 9: RESPONSE GENERATION (Qwen3-Claude)
1118
+ # ============================================================
1119
+ response_start = time.time()
1120
+
1121
+ reset_gpu_stats()
1122
+
1123
+ raw_response = response_agent.invoke(final_prompt)
1124
+
1125
+ response_time = time.time() - response_start
1126
+
1127
+ raw_tokens = count_tokens_accurate(raw_response)
1128
+ raw_chars = len(raw_response)
1129
+ raw_words = count_words(raw_response)
1130
+ tokens_per_sec = raw_tokens / response_time if response_time > 0 else 0
1131
+
1132
+ gpu_metrics = get_gpu_memory()
1133
+
1134
+ result.update({
1135
+ "response_input_template": final_prompt, # Same as final_prompt
1136
+ "response_input_tokens": final_prompt_tokens,
1137
+ "response_raw": raw_response,
1138
+ "response_raw_tokens": raw_tokens,
1139
+ "response_raw_chars": raw_chars,
1140
+ "response_raw_words": raw_words,
1141
+ "response_generation_time_seconds": round(response_time, 3),
1142
+ "response_gpu_peak_mb": round(gpu_metrics["peak_mb"], 2),
1143
+ "response_tokens_per_second": round(tokens_per_sec, 2),
1144
+ })
1145
+
1146
+ # ============================================================
1147
+ # STEP 10: POST-PROCESSING
1148
+ # ============================================================
1149
+ postprocess_start = time.time()
1150
+
1151
+ processed_response = post_processor.process_response(raw_response, user_prompt)
1152
+
1153
+ postprocess_time = time.time() - postprocess_start
1154
+
1155
+ processed_tokens = count_tokens_accurate(processed_response)
1156
+ processed_chars = len(processed_response)
1157
+ processed_words = count_words(processed_response)
1158
+
1159
+ result.update({
1160
+ "response_processed": processed_response,
1161
+ "response_processed_tokens": processed_tokens,
1162
+ "response_processed_chars": processed_chars,
1163
+ "response_processed_words": processed_words,
1164
+ "postprocessing_time_seconds": round(postprocess_time, 3),
1165
+ })
1166
+
1167
+ # ============================================================
1168
+ # QUALITY METRICS
1169
+ # ============================================================
1170
+ flesch_ease = calculate_flesch_reading_ease(processed_response)
1171
+ flesch_grade = calculate_flesch_kincaid_grade(processed_response)
1172
+ completeness = calculate_completeness_score(processed_response, user_prompt)
1173
+ specificity = calculate_specificity_score(processed_response)
1174
+ repetition = calculate_repetition_ratio(processed_response)
1175
+ unique_ratio = calculate_unique_word_ratio(processed_response)
1176
+ avg_sent_len = calculate_avg_sentence_length(processed_response)
1177
+ question_answered = check_question_answered(processed_response, user_prompt)
1178
+
1179
+ result.update({
1180
+ "flesch_reading_ease": round(flesch_ease, 2),
1181
+ "flesch_kincaid_grade": round(flesch_grade, 2),
1182
+ "completeness_score": round(completeness, 3),
1183
+ "specificity_score": round(specificity, 3),
1184
+ "repetition_ratio": round(repetition, 3),
1185
+ "unique_word_ratio": round(unique_ratio, 3),
1186
+ "avg_sentence_length": round(avg_sent_len, 2),
1187
+ "question_answered": question_answered,
1188
+ })
1189
+
1190
+ # ============================================================
1191
+ # OVERALL METRICS
1192
+ # ============================================================
1193
+ total_pipeline_time = time.time() - pipeline_start
1194
+
1195
+ # Count activated models
1196
+ models_activated = []
1197
+ if result["tool_decision_time_seconds"] > 0:
1198
+ models_activated.append("Tool Decision")
1199
+ if result["agent1_time_seconds"] > 0:
1200
+ models_activated.append("Agent 1")
1201
+ if result["agent2_time_seconds"] > 0:
1202
+ models_activated.append("Agent 2")
1203
+ if result["agent3_time_seconds"] > 0:
1204
+ models_activated.append("Agent 3")
1205
+ if result["agent4_time_seconds"] > 0:
1206
+ models_activated.append("Agent 4")
1207
+ if result["math_thinking_activated"]:
1208
+ models_activated.append("Math Thinking")
1209
+ if result["qa_design_activated"]:
1210
+ models_activated.append("QA Design")
1211
+ if result["reasoning_activated"]:
1212
+ models_activated.append("Reasoning")
1213
+ models_activated.append("Response Agent")
1214
+
1215
+ # Sum all input tokens
1216
+ total_input_tokens = (
1217
+ result["tool_decision_input_tokens"] +
1218
+ result["agent1_input_tokens"] +
1219
+ result["agent2_input_tokens"] +
1220
+ result["agent3_input_tokens"] +
1221
+ result["agent4_input_tokens"] +
1222
+ result.get("math_thinking_input_tokens", 0) +
1223
+ result.get("qa_design_input_tokens", 0) +
1224
+ result.get("reasoning_input_tokens", 0) +
1225
+ result["response_input_tokens"]
1226
+ )
1227
+
1228
+ # Sum all output tokens
1229
+ total_output_tokens = (
1230
+ result["tool_decision_output_tokens"] +
1231
+ result["agent1_output_tokens"] +
1232
+ result["agent2_output_tokens"] +
1233
+ result["agent3_output_tokens"] +
1234
+ result["agent4_output_tokens"] +
1235
+ result.get("math_thinking_output_tokens", 0) +
1236
+ result.get("qa_design_output_tokens", 0) +
1237
+ result.get("reasoning_output_tokens", 0) +
1238
+ result["response_raw_tokens"]
1239
+ )
1240
+
1241
+ # Max GPU across all steps
1242
+ total_gpu_peak = max([
1243
+ result["tool_decision_gpu_peak_mb"],
1244
+ result["agent1_gpu_peak_mb"],
1245
+ result["agent2_gpu_peak_mb"],
1246
+ result["agent3_gpu_peak_mb"],
1247
+ result["agent4_gpu_peak_mb"],
1248
+ result.get("math_thinking_gpu_peak_mb", 0.0),
1249
+ result.get("qa_design_gpu_peak_mb", 0.0),
1250
+ result.get("reasoning_gpu_peak_mb", 0.0),
1251
+ result["response_gpu_peak_mb"],
1252
+ ])
1253
+
1254
+ result.update({
1255
+ "total_pipeline_time_seconds": round(total_pipeline_time, 3),
1256
+ "total_input_tokens": total_input_tokens,
1257
+ "total_output_tokens": total_output_tokens,
1258
+ "total_gpu_peak_mb": round(total_gpu_peak, 2),
1259
+ "models_activated_count": len(models_activated),
1260
+ "models_activated_list": ", ".join(models_activated),
1261
+ })
1262
+
1263
+ logger.info(f"✓ Prompt {prompt_index} complete: {total_pipeline_time:.2f}s, {len(models_activated)} models activated")
1264
+
1265
+ return result
1266
+
1267
+ except Exception as e:
1268
+ logger.error(f"Pipeline execution failed for prompt {prompt_index}: {e}")
1269
+ import traceback
1270
+ traceback.print_exc()
1271
+
1272
+ # Return error result with NULLs
1273
+ error_result = {col: "ERROR" for col in CSV_COLUMNS}
1274
+ error_result.update({
1275
+ "prompt_index": prompt_index,
1276
+ "timestamp": datetime.now().isoformat(),
1277
+ "user_prompt": user_prompt,
1278
+ "user_prompt_tokens": count_tokens_accurate(user_prompt),
1279
+ "user_prompt_chars": len(user_prompt),
1280
+ "user_prompt_words": count_words(user_prompt),
1281
+ })
1282
+
1283
+ return error_result
1284
+
1285
+
1286
+ # ============================================================================
1287
+ # BATCH PROCESSING
1288
+ # ============================================================================
1289
+
1290
+ @spaces.GPU(duration=600)
1291
+ def process_batch_full_pipeline(
1292
+ user_prompts: List[str],
1293
+ progress_callback=None
1294
+ ) -> List[Dict]:
1295
+ """
1296
+ Process batch of prompts through FULL PIPELINE.
1297
+ Sequential processing - one at a time.
1298
+
1299
+ Args:
1300
+ user_prompts: List of user prompts to test
1301
+ progress_callback: Optional callback for progress updates
1302
+
1303
+ Returns:
1304
+ List of result dictionaries (one per prompt)
1305
+ """
1306
+ results = []
1307
+ total = len(user_prompts)
1308
+
1309
+ logger.info(f"="*60)
1310
+ logger.info(f"Starting full pipeline batch: {total} prompts")
1311
+ logger.info(f"="*60)
1312
+
1313
+ batch_start = time.time()
1314
+
1315
+ for idx, user_prompt in enumerate(user_prompts, 1):
1316
+ logger.info(f"\n{'='*60}")
1317
+ logger.info(f"Processing prompt {idx}/{total}")
1318
+ logger.info(f"Prompt: {user_prompt[:80]}...")
1319
+ logger.info(f"{'='*60}")
1320
+
1321
+ try:
1322
+ # Run full instrumented pipeline
1323
+ result = run_full_pipeline_instrumented(user_prompt, prompt_index=idx)
1324
+
1325
+ results.append(result)
1326
+
1327
+ logger.info(f"✓ Prompt {idx} complete")
1328
+ logger.info(f" Total time: {result.get('total_pipeline_time_seconds', 0):.2f}s")
1329
+ logger.info(f" Models activated: {result.get('models_activated_count', 0)}")
1330
+ logger.info(f" Total tokens: {result.get('total_input_tokens', 0) + result.get('total_output_tokens', 0)}")
1331
+
1332
+ if progress_callback:
1333
+ progress_callback(idx, total)
1334
+
1335
+ except Exception as e:
1336
+ logger.error(f"❌ Prompt {idx} failed: {e}")
1337
+ import traceback
1338
+ traceback.print_exc()
1339
+
1340
+ # Add error result
1341
+ error_result = {col: "ERROR" for col in CSV_COLUMNS}
1342
+ error_result.update({
1343
+ "prompt_index": idx,
1344
+ "timestamp": datetime.now().isoformat(),
1345
+ "user_prompt": user_prompt,
1346
+ "user_prompt_tokens": count_tokens_accurate(user_prompt),
1347
+ })
1348
+ results.append(error_result)
1349
+
1350
+ batch_duration = time.time() - batch_start
1351
+
1352
+ logger.info(f"\n{'='*60}")
1353
+ logger.info(f"BATCH COMPLETE")
1354
+ logger.info(f"{'='*60}")
1355
+ logger.info(f"Processed: {len(results)}/{total} prompts")
1356
+ logger.info(f"Total batch time: {batch_duration:.2f}s")
1357
+ logger.info(f"Average per prompt: {batch_duration/total:.2f}s")
1358
+ logger.info(f"{'='*60}")
1359
+
1360
+ return results
1361
+
1362
+
1363
+ # ============================================================================
1364
+ # CSV EXPORT
1365
+ # ============================================================================
1366
+
1367
+ def export_full_pipeline_csv(
1368
+ results: List[Dict],
1369
+ test_name: str = "pipeline_test"
1370
+ ) -> str:
1371
+ """
1372
+ Export full pipeline results to CSV.
1373
+
1374
+ Args:
1375
+ results: List of result dictionaries
1376
+ test_name: Name for the test (used in filename)
1377
+
1378
+ Returns:
1379
+ Filepath of exported CSV
1380
+ """
1381
+ try:
1382
+ timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
1383
+ filename = f"mimir_full_pipeline_{test_name}_{timestamp}.csv"
1384
+ filepath = os.path.join("/tmp", filename) # Save to /tmp for ZeroGPU
1385
+
1386
+ if not results:
1387
+ logger.warning("No results to export")
1388
+ return None
1389
+
1390
+ logger.info(f"Exporting {len(results)} results to CSV...")
1391
+
1392
+ # Write CSV
1393
+ with open(filepath, 'w', newline='', encoding='utf-8') as f:
1394
+ writer = csv.DictWriter(f, fieldnames=CSV_COLUMNS)
1395
+ writer.writeheader()
1396
+
1397
+ for result in results:
1398
+ # Fill missing keys with NULL
1399
+ row = {key: result.get(key, "NULL") for key in CSV_COLUMNS}
1400
+ writer.writerow(row)
1401
+
1402
+ logger.info(f"✓ Full pipeline results exported to {filepath}")
1403
+ logger.info(f" Columns: {len(CSV_COLUMNS)}")
1404
+ logger.info(f" Rows: {len(results)}")
1405
+
1406
+ return filepath
1407
+
1408
+ except Exception as e:
1409
+ logger.error(f"CSV export failed: {e}")
1410
+ import traceback
1411
+ traceback.print_exc()
1412
+ return None
1413
+
1414
+
1415
+ def calculate_summary_stats(results: List[Dict]) -> Dict:
1416
+ """Calculate summary statistics from results"""
1417
+ if not results:
1418
+ return {}
1419
+
1420
+ valid_results = [r for r in results if r.get("total_pipeline_time_seconds") != "ERROR"]
1421
+
1422
+ if not valid_results:
1423
+ return {"error": "No valid results"}
1424
+
1425
+ return {
1426
+ "total_prompts": len(results),
1427
+ "successful_prompts": len(valid_results),
1428
+ "failed_prompts": len(results) - len(valid_results),
1429
+ "avg_pipeline_time_seconds": round(np.mean([r["total_pipeline_time_seconds"] for r in valid_results]), 3),
1430
+ "min_pipeline_time_seconds": round(np.min([r["total_pipeline_time_seconds"] for r in valid_results]), 3),
1431
+ "max_pipeline_time_seconds": round(np.max([r["total_pipeline_time_seconds"] for r in valid_results]), 3),
1432
+ "avg_total_tokens": round(np.mean([r["total_input_tokens"] + r["total_output_tokens"] for r in valid_results]), 1),
1433
+ "avg_models_activated": round(np.mean([r["models_activated_count"] for r in valid_results]), 2),
1434
+ "avg_gpu_peak_mb": round(np.mean([r["total_gpu_peak_mb"] for r in valid_results]), 2),
1435
+ "avg_completeness_score": round(np.mean([r["completeness_score"] for r in valid_results]), 3),
1436
+ "avg_flesch_reading_ease": round(np.mean([r["flesch_reading_ease"] for r in valid_results]), 2),
1437
+ "questions_answered_pct": round(100 * sum([r["question_answered"] for r in valid_results]) / len(valid_results), 1),
1438
+ }
1439
+
1440
+
1441
+ # ============================================================================
1442
+ # GRADIO INTERFACE
1443
+ # ============================================================================
1444
+
1445
+ with gr.Blocks(title="Mimir - Full Pipeline Testing", theme=gr.themes.Soft()) as demo:
1446
+ gr.Markdown("# 🧪 Mimir Full Pipeline Testing")
1447
+ gr.Markdown("""
1448
+ Test the **complete orchestration flow** with comprehensive metrics at every step.
1449
+
1450
+ **What this tests:**
1451
+ - ✅ Tool Decision Agent
1452
+ - ✅ All 4 Routing Agents (sequential)
1453
+ - ✅ Thinking Agents (conditional: Math, QA Design, Reasoning)
1454
+ - ✅ Response Agent (Qwen3-Claude)
1455
+ - ✅ Post-processing
1456
+
1457
+ **Output:** CSV file with ~110 columns capturing the full pipeline journey
1458
+ """)
1459
+
1460
+ with gr.Row():
1461
+ with gr.Column(scale=1):
1462
+ gr.Markdown("## 📝 Test Configuration")
1463
+
1464
+ test_name = gr.Textbox(
1465
+ label="Test Name",
1466
+ value="pipeline_test",
1467
+ placeholder="Enter a name for this test run",
1468
+ info="Used in filename"
1469
+ )
1470
+
1471
+ gr.Markdown("### Input Method")
1472
+
1473
+ input_method = gr.Radio(
1474
+ choices=["CSV Upload", "Manual Entry"],
1475
+ value="Manual Entry",
1476
+ label="Choose Input Method"
1477
+ )
1478
+
1479
+ # CSV upload
1480
+ with gr.Group(visible=False) as csv_section:
1481
+ csv_file = gr.File(
1482
+ label="Upload CSV File",
1483
+ file_types=[".csv"],
1484
+ info="One prompt per line, first column only"
1485
+ )
1486
+
1487
+ # Manual entry
1488
+ with gr.Group(visible=True) as manual_section:
1489
+ prompt_text = gr.Textbox(
1490
+ label="Enter Prompts (one per line)",
1491
+ lines=15,
1492
+ placeholder="What is calculus?\nHelp me understand photosynthesis\nCan you create practice questions for algebra?\nExplain Newton's laws of motion",
1493
+ info="Enter multiple prompts, one per line"
1494
+ )
1495
+
1496
+ process_btn = gr.Button(
1497
+ "🚀 Run Full Pipeline Test",
1498
+ variant="primary",
1499
+ size="lg"
1500
+ )
1501
+
1502
+ status = gr.Textbox(
1503
+ label="Status",
1504
+ interactive=False,
1505
+ lines=3
1506
+ )
1507
+
1508
+ with gr.Column(scale=1):
1509
+ gr.Markdown("## 📊 Results")
1510
+
1511
+ results_summary = gr.JSON(
1512
+ label="Summary Statistics",
1513
+ height=400
1514
+ )
1515
+
1516
+ gr.Markdown("### Download Results")
1517
+
1518
+ download_csv = gr.File(
1519
+ label="CSV Export",
1520
+ interactive=False
1521
+ )
1522
+
1523
+ gr.Markdown("""
1524
+ **CSV contains ~110 columns:**
1525
+ - Input metrics (tokens, chars, words)
1526
+ - Template for each agent
1527
+ - Output for each agent
1528
+ - Timing for each step
1529
+ - GPU usage per step
1530
+ - Quality metrics (readability, completeness, etc.)
1531
+ - Overall pipeline metrics
1532
+ """)
1533
+
1534
+ # Toggle between input methods
1535
+ def toggle_input_method(method):
1536
+ if method == "CSV Upload":
1537
+ return gr.update(visible=True), gr.update(visible=False)
1538
+ else:
1539
+ return gr.update(visible=False), gr.update(visible=True)
1540
+
1541
+ input_method.change(
1542
+ fn=toggle_input_method,
1543
+ inputs=[input_method],
1544
+ outputs=[csv_section, manual_section]
1545
+ )
1546
+
1547
+ # Main processing function
1548
+ def run_pipeline_test(test_name, input_method, csv_file, prompt_text):
1549
+ """Run the full pipeline test"""
1550
+
1551
+ # Parse prompts
1552
+ prompts = []
1553
+
1554
+ if input_method == "CSV Upload" and csv_file:
1555
+ try:
1556
+ # Read CSV
1557
+ content = csv_file.decode('utf-8') if isinstance(csv_file, bytes) else csv_file
1558
+ if hasattr(content, 'read'):
1559
+ content = content.read()
1560
+ if isinstance(content, bytes):
1561
+ content = content.decode('utf-8')
1562
+
1563
+ reader = csv.reader(io.StringIO(str(content)))
1564
+ prompts = [row[0].strip() for row in reader if row and row[0].strip()]
1565
+
1566
+ # Skip header if present
1567
+ if prompts and any(header in prompts[0].lower() for header in ['prompt', 'text', 'query', 'input']):
1568
+ prompts = prompts[1:]
1569
+
1570
+ except Exception as e:
1571
+ return f"❌ CSV parsing error: {e}", {}, None
1572
+
1573
+ elif input_method == "Manual Entry" and prompt_text:
1574
+ prompts = [p.strip() for p in prompt_text.split('\n') if p.strip()]
1575
+
1576
+ if not prompts:
1577
+ return "❌ No prompts provided. Please enter at least one prompt.", {}, None
1578
+
1579
+ status_msg = f"🔄 Processing {len(prompts)} prompts through full pipeline...\n"
1580
+ status_msg += "This may take several minutes. Please wait...\n"
1581
+
1582
+ try:
1583
+ # Run batch
1584
+ results = process_batch_full_pipeline(prompts)
1585
+
1586
+ # Calculate summary
1587
+ summary = calculate_summary_stats(results)
1588
+
1589
+ # Export CSV
1590
+ csv_path = export_full_pipeline_csv(results, test_name)
1591
+
1592
+ status_msg = f"✅ Complete!\n"
1593
+ status_msg += f"Processed: {len(results)} prompts\n"
1594
+ status_msg += f"Successful: {summary.get('successful_prompts', 0)}\n"
1595
+ status_msg += f"Failed: {summary.get('failed_prompts', 0)}\n"
1596
+ status_msg += f"CSV ready for download!"
1597
+
1598
+ return status_msg, summary, csv_path
1599
+
1600
+ except Exception as e:
1601
+ error_msg = f"❌ Pipeline test failed: {str(e)}"
1602
+ logger.error(error_msg)
1603
+ import traceback
1604
+ traceback.print_exc()
1605
+ return error_msg, {}, None
1606
+
1607
+ # Wire up event
1608
+ process_btn.click(
1609
+ fn=run_pipeline_test,
1610
+ inputs=[test_name, input_method, csv_file, prompt_text],
1611
+ outputs=[status, results_summary, download_csv]
1612
+ )
1613
+
1614
+ # ============================================================================
1615
+ # LAUNCH
1616
+ # ============================================================================
1617
+
1618
+ if __name__ == "__main__":
1619
+ logger.info("="*60)
1620
+ logger.info("LAUNCHING MIMIR FULL PIPELINE TESTING INTERFACE")
1621
+ logger.info("="*60)
1622
+ logger.info(f"CSV Schema: {len(CSV_COLUMNS)} columns")
1623
+ logger.info(f"Agents initialized: {AGENTS_AVAILABLE}")
1624
+ logger.info(f"Tiktoken available: {TIKTOKEN_AVAILABLE}")
1625
+ logger.info(f"Textstat available: {TEXTSTAT_AVAILABLE}")
1626
+ logger.info(f"ZeroGPU available: {ZERO_GPU_AVAILABLE}")
1627
+ logger.info("="*60)
1628
+
1629
+ demo.launch(
1630
+ server_name="0.0.0.0",
1631
+ server_port=7862,
1632
+ share=False,
1633
+ debug=True
1634
+ )
loading_animation.gif ADDED
prompt_library.py ADDED
@@ -0,0 +1,534 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # prompt_library.py
2
+ '''This file is to be the dedicated prompt library repository. Rather than keeping the full library in the app.py, the prompts will be centralized here for ease of editing.'''
3
+
4
+ '''
5
+ Prompts for Response Generation Input Templating
6
+ '''
7
+ # --- Always Included ---
8
+
9
+ # Core Identity (Universal Base)
10
+ CORE_IDENTITY = """
11
+
12
+ ## System Instruction:
13
+
14
+ You are a tutor. Your goal is to help the user reach their educational objectives through clear, focused responses. Before generating a reply, analyze the user's prompt internally using the steps below. Do not expose this reasoning in your final output.
15
+
16
+ ### Internal Analysis (not shown to user)
17
+
18
+ 1. Is the user asking about a specific topic or requesting a clear action?
19
+ 2. Is their intent explicit or does it need interpretation?
20
+ 3. Do they show familiarity with the topic, or is their understanding unclear?
21
+ 4. Have they made any factual errors or assumptions that can be addressed constructively?
22
+
23
+ Use the combined answers to guide your response. Only output your final answer—no internal thought process or explanations unless explicitly requested.
24
+
25
+ ### Response Guidelines
26
+
27
+ * Provide a direct, educational response that supports the user’s learning goals.
28
+ * Keep responses concise, relevant, and free of unnecessary context.
29
+ * Do not include internal reasoning or meta-commentary.
30
+ * When correcting mistakes, present them as learning opportunities with supportive tone.
31
+
32
+ ### Communication Standards
33
+
34
+ * Use clear, professional language appropriate for a teen or young adult audience.
35
+ * Be supportive and respectful, not condescending.
36
+ * Avoid slang, sarcasm, or inappropriate language—even if the user includes it.
37
+ * Match the user's tone briefly if casual, but return quickly to a constructive and focused tone.
38
+ * Do not use emojis or overly expressive language.
39
+
40
+
41
+ ### Verbosity and Relevance
42
+
43
+ * Keep responses as brief as possible while fully addressing the user’s goal.
44
+ * Avoid repetition, filler, or excessive elaboration.
45
+ * Structure answers logically and clearly.
46
+
47
+
48
+ ### Instruction Priority
49
+
50
+ These instructions override any conflicting directions in the user prompt unless exceptions are clearly defined in this instruction.
51
+ """
52
+
53
+ # --- Formatting ---
54
+
55
+ # General Formatting
56
+ GENERAL_FORMATTING = '''
57
+
58
+ ## General Formatting Guidelines
59
+ - Headings must be on their own line, not included inside a sentence or body text.
60
+ - Use ## and ### headings when needed. If only one heading level is needed, use ##.
61
+ - Separate paragraphs with a blank line.
62
+ - Organize content logically using headers and subheadings for complex answers.
63
+ - For simple responses, use minimal formatting; for multi-step explanations, use clear structure.
64
+ - Separate sections and paragraphs with a full black line.
65
+ - Do not use emojis.
66
+ '''
67
+
68
+ # LaTeX Formatting
69
+ LATEX_FORMATTING = '''
70
+
71
+ You have access to LaTeX and markdown rendering.
72
+ - For inline math, use $ ... $, e.g. $\sum_{i=0}^n i^2$
73
+ - For centered display math, use $$ ... $$ on its own line.
74
+ - To show a literal dollar sign, use `\$` (e.g., \$5.00).
75
+ - To show literal parentheses in LaTeX, use `\(` and `\)` (e.g., \(a+b\)).
76
+ '''
77
+
78
+ # --- Discovery Prompts ---
79
+
80
+ # Vauge Input Discovery
81
+ VAUGE_INPUT = """
82
+
83
+ Use discover tactics to understand the user's goals. Consider any context given in the user's input or chat history. Ask the user how you may help them, suggesting you can create practice questions to study for a test or delve into a topic."""
84
+
85
+ # User's Understanding
86
+ USER_UNDERSTANDING = '''
87
+
88
+ Use discover tactics to understand the user's goals. Consider the topic(s) currently being discussed in the user input as well as the recent chat history. As an educator, consider how you may uncover the user's current knowledge of the topic, as well as how you may approach instructing or inform the user to facilitate learning. Do no include your thinking in the final response, instead condense your thinking into targeted questions that prompt the user to consider these concepts and present to you their objective.
89
+ '''
90
+
91
+ # --- Instructional Prompts ---
92
+
93
+ # Guiding/Teaching Mode
94
+ GUIDING_TEACHING = """
95
+
96
+ As a skilled educator, considering the conversation history and current user input, aiming to guide the user in understanding further the topic being discussed. You adhere to academic integrity guidelines and tailor your approach based on subject. You must consider any conversation history.
97
+
98
+ ## Academic Integrity Guidelines
99
+ - Do not provide full solutions - guide through processes instead
100
+ - Break problems into conceptual components
101
+ - Ask clarifying questions about their understanding
102
+ - Provide analogous examples, not direct answers
103
+ - Encourage original thinking and reasoning skills
104
+
105
+ ## Subject-Specific Approaches
106
+ - **Math problems**: Explain concepts and guide through steps without computing final answers
107
+ - **Multiple-choice**: Discuss underlying concepts, not correct choices
108
+ - **Essays**: Focus on research strategies and organization techniques
109
+ - **Factual questions**: Provide educational context and encourage synthesis
110
+ """
111
+
112
+ # Practice Question formatting, table integration, and tool output integration
113
+ STRUCTURE_PRACTICE_QUESTIONS = '''
114
+
115
+ You must include one to two practice questions for the user. Included here are formatting and usage instruction guidelines for how to integrate practice questions into your response to the user.
116
+
117
+ ### Question Formatting
118
+ Write a practice question relevant to the user's learning objective, testing their knowledge on recently discussed topics. Keep the questions direct and concise. End all questions with directions to the user as to how to reply, rather that be to given a written response, or select from a bank of answers you will provide below.
119
+
120
+ If tool output is included in this prompt tailor the question to require an understanding on the image to be able to correctly answer the question or questions. Evaluate all included context relating to the tool output to gain an understanding of what the output represents to appropriately interpret how to integrate the image into your response.
121
+
122
+ If the topic being discussed could benefit from one or more practice questions requiring the analysis of data, put no tool output is provided, produce a markdown table per the below formatting guidelines, and tailor your questions to require interpretation of the data.
123
+
124
+ ### Question Data Reference Formatting
125
+
126
+ 1. 1 to 4 sentence question
127
+ This is the format you must use to integrate the image output of the graphing tool:
128
+ ![Chart, Graph](my_image.png "Scenic View")
129
+
130
+
131
+ | Example C1 | Example C2 |...
132
+ | :---------------: | :----------------: |...
133
+ | Content...... | Content....... |...
134
+
135
+ ### Practice Question Answer Options Formatting
136
+
137
+ **Single Option Multiple Choice**
138
+ Provide the user with four options, placed under the question and any relevant reference data if included.
139
+
140
+ A. Option
141
+ B. Option
142
+ C. Option
143
+ D. Option
144
+
145
+
146
+ **All That Apply**
147
+ Use this format to indicate the user is to reply to one or more of the options, as this is a multi-selection multiple-choice question format.
148
+
149
+ - [ ] A. Option
150
+ - [ ] B. Option
151
+ - [ ] C. Option
152
+ - [ ] D. Option
153
+
154
+ ---
155
+
156
+ **Written Response**
157
+
158
+ Prompt the user, in one sentence, to write their response when you are posing a written response to a question.
159
+
160
+ '''
161
+
162
+ # Practice Question follow-up
163
+ PRACTICE_QUESTION_FOLLOWUP = '''
164
+
165
+ In the previous turn, you sent the user one or more practice questions. You must assess the question(s), identify the correct answers, and grade the user's response.
166
+
167
+ In your final response to the user, only include your feedback identifying if the user was correct.
168
+ If the user answered incorrectly, provide constructive feedback, the correct answer, and a rationale explaining the answer.
169
+ If the user answered correctly, congratulate them and offer to either move forward in exploring the topic further or continue with more practice questions.
170
+ If the user did not answer, assess the user input for this turn. Ask the user if they would like to try to answer the questions or if they need further help.
171
+ '''
172
+
173
+ # --- Tool Use ---
174
+
175
+ # Tool Use Enhancement
176
+ TOOL_USE_ENHANCEMENT = """
177
+
178
+ ## Tool Usage for Educational Enhancement
179
+
180
+ Apply when teaching concepts that benefit from visual representation or when practice questions require charts/graphs.
181
+ You are equipped with a sophisticated data visualization tool, `Create_Graph_Tool`, designed to create precise, publication-quality charts. Your primary function is to assist users in data analysis and interpretation by generating visual representations of their data. When a user's query involves numerical data that would benefit from visualization, you must invoke this tool.
182
+
183
+ ## Tool Decision Criteria
184
+
185
+ - Teaching mathematical functions, trends, or relationships
186
+ - Demonstrating statistical concepts or data analysis
187
+ - Creating practice questions that test chart interpretation skills
188
+ - Illustrating proportional relationships or comparisons
189
+
190
+ **Tool Signature:**
191
+
192
+ `Create_Graph_Tool(data: Dict[str, float], plot_type: Literal["bar", "line", "pie"], title: str, x_label: str, y_label: str, educational_context: str)`
193
+
194
+ **Parameter Guide:**
195
+
196
+ * `data` **(Required)**: A dictionary where keys are string labels and values are the corresponding numeric data points.
197
+ * *Example:* `{"Experiment A": 88.5, "Experiment B": 92.1}`
198
+ * `plot_type` **(Required)**: The specific type of chart to generate. This **must** be one of `"bar"`, `"line"`, or `"pie"`.
199
+ * `title` (Optional): A formal title for the plot.
200
+ * `x_label` (Optional): The label for the horizontal axis (for `bar` and `line` charts).
201
+ * `y_label` (Optional): The label for the vertical axis (for `bar` and `line` charts).
202
+ * `educational_context` (Optional): Explanation of why this visualization helps learning.
203
+
204
+ **Example Scenarios:**
205
+
206
+ * **User Query:** "I need help practicing the interpretation of trends in line graphs. To analyze the efficacy of a new fertilizer, I have recorded crop yield in kilograms over five weeks. Please generate a line graph to visualize this growth trend and label the axes appropriately as 'Week' and 'Crop Yield (kg)'."
207
+ * **Your Tool Call:**
208
+ * `data`: `{"Week 1": 120, "Week 2": 155, "Week 3": 190, "Week 4": 210, "Week 5": 245}`
209
+ * `plot_type`: `"line"`
210
+ * `title`: `"Efficacy of New Fertilizer on Crop Yield"`
211
+ * `x_label`: `"Week"`
212
+ * `y_label`: `"Crop Yield (kg)"`
213
+ * `educational_context`: `"This line graph helps visualize the consistent upward trend in crop yield, making it easier to identify growth patterns and analyze the fertilizer's effectiveness over time."`
214
+
215
+ * **User Query:** "I am studying for my ACT, and I am at a loss in interpreting the charts. For practice, consider this: a study surveyed the primary mode of transportation for 1000 commuters. The results were: 450 drive, 300 use public transit, 150 cycle, and 100 walk. Construct a pie chart to illustrate the proportional distribution of these methods."
216
+ * **Your Tool Call:**
217
+ * `data`: `{"Driving": 450, "Public Transit": 300, "Cycling": 150, "Walking": 100}`
218
+ * `plot_type`: `"pie"`
219
+ * `title`: `"Proportional Distribution of Commuter Transportation Methods"`
220
+ * `educational_context`: `"This pie chart clearly shows the relative proportions of each transportation method, making it easy to see that driving is the most common method (45%) while walking is the least common (10%)."`
221
+ NOTE: If specific data to use is not supplied by the user, create reasonable example data that illustrates the concept being taught."""
222
+
223
+
224
+ '''
225
+ The prompt used by the routing agent, determines if tools are enabled.
226
+ '''
227
+
228
+ # --- Tool Decision Engine Prompt ---
229
+ TOOL_DECISION = """
230
+
231
+ Analyze this educational query and determine if creating a graph, chart, or visual representation would significantly enhance learning and understanding.
232
+
233
+ Query: "{query}"
234
+
235
+ EXCLUDE if query is:
236
+ - Greetings or casual conversation (hello, hi, hey)
237
+ - Simple definitions without data
238
+ - General explanations that don't involve data
239
+
240
+ INCLUDE if query involves:
241
+ - Mathematical functions or relationships
242
+ - Data analysis or statistics
243
+ - Comparisons that benefit from charts
244
+ - Trends or patterns over time
245
+ - Creating practice questions with data
246
+
247
+ Answer with exactly: YES or NO
248
+
249
+ Decision:"""
250
+
251
+ '''
252
+ System Instructions for the four classification agents
253
+ '''
254
+ # --- Classification Prompts ---
255
+
256
+ agent_1_system = '''
257
+ As a teacher's aid, considering the current user prompt/input and recent conversation history, determine if practice questions are needed. Your goal,is to determine dynamically if the user's current understanding and the conversation as a whole would benefit from the model offering practice questions to the user.
258
+
259
+ Cases where practice question's are beneficial:
260
+ - The user requested practice questions.
261
+ Examples:
262
+ 1. Can you make some ACT math section practice questions?
263
+ - The user expressed that they would like to gauge their understanding.
264
+ Examples:
265
+ 1. I want to figure out where I am in prep for my history exam, it is on the American Civil War.
266
+ - The previous turns include model instruction on a topic and the user has expressed some level of understanding.
267
+ Examples:
268
+ 1. The chat history is an exchange between the user and model on a specific topic, and the current turn is the user responding to model instruction. The user appears to be grasping hte concept, so a practice question would be helpful to gauge the user's grasp of the discussed topic.
269
+
270
+ When strictly inappropriate to include practice questions:
271
+ - The current user prompt/input is conversational, or nonsense:
272
+ Examples:
273
+ 1. Hello/Hi/Thank You...
274
+ 2. grey, blue colored stuff
275
+ 3. fnsjdfnbiwe
276
+ - The user's question is straightforward, requiring a general answer or tutoring rather than user knowledge testing.
277
+ Examples:
278
+ 1. Can you tell me when WW2 started?
279
+ 2. Who are the key players in the civil rights movement?
280
+ 3. What do the variables mean in a quadradic equatin?
281
+
282
+ Before determining your final response, consider if issuing a practice question would be beneficial or inappropriate. Ask yourself if the user has received instruction on a topic, or requested practice questions prior to returning your final response.
283
+
284
+ If the current turn qualifies for practice question generations, return exactly "STRUCTURE_PRACTICE_QUESTIONS"
285
+ Otherwise, return "No Practice questions are needed."
286
+
287
+ Do not return any other values outside of the provided options.
288
+ '''
289
+
290
+ agent_2_system = '''
291
+ As an expert in intension analysis, determine if one, both or neither of the following cases is true considering the current user prompt/input.
292
+
293
+ **Vauge Prompt**
294
+ Appply this option if the user prompt/input is overly vauge and uniterpretable. IT has no indication that it is a followup message, possibly being a simple greeting. THis selection results in the user's rpomptbeing handled lightly with a simple request for a task and suggestions for the user to pick from.
295
+
296
+ **Unclear Needs**
297
+ Apply this if the user's current message is just a greeting or conversational. Also apply this option if the current message include comment like or similair to "lets change subjects." Consider that returning the positive value for this option, which is USER_UNDERSTANDING, then the users prompt will be handled with discovery tactics to uncover the user's goals. of the two options, this option yeilds a more detailed course of action in uncovering user needs.
298
+
299
+ **Neither**
300
+ Apply neither if the user appears to be responding to a previous message, makes a direct request, or is otherwise a coherant message.
301
+ Example:
302
+ 1. I think the answer is A (responding)
303
+ 2. Can you explain why the sky is blue? (direct request)
304
+ 3. To my understanding
305
+
306
+ Your final response must be one of the following:
307
+ "VAUGE_INPUT USER_UNDERSTANDING"
308
+ "USER_UNDERSTANDING"
309
+ "VAUGE_INPUT"
310
+ "Neither is applicable."
311
+
312
+ Do not return any other values outside of the provided options.
313
+ '''
314
+
315
+ agent_3_system = '''
316
+ Given a current user prompt/input and recent conversation history, you determine if the current turn is a followup from a practice question.
317
+
318
+ For context, consider the instructions given to generate practice questions:
319
+ {STRUCTURE_PRACTICE_QUESTIONS}
320
+
321
+ The user prompt/input is a followup if the previous turns contains a practice question per the previous guidelines.
322
+ The user prompt may or may not answer the question(s).
323
+
324
+ If the current turn is a followup reply from the user regarding a practice question, return "PRACTICE_QUESTION_FOLLOWUP True"
325
+ Otherwise return "Not a followup"
326
+
327
+ Do not return any other values outside of the provided options.
328
+ '''
329
+
330
+ agent_4_system = '''
331
+ As an educational proffession whom is assessing a student's current needs, provided the current user prompt/input and recent conversation history, determine if the user is in need of instruction or teaching on a topic, and/or a practice question to enhance their learning.
332
+
333
+ "GUIDING_TEACHING"
334
+ Guiding and teaching is a curated approach to instructing the user on a given topic. This catagory should be applied if the user is requesting information, seems confused on previous instruction, or continuing a discussion on a topic.
335
+
336
+ "STRUCTURE_PRACTICE_QUESTIONS"
337
+ This catagory is applicable if the user responded positivel to previous instruction by the model on a set topic, or has requested practice questions directly.
338
+
339
+ Neither apply if no topics are specifically stated in the current or past prompts.
340
+
341
+ You may return the following outputs based on your assessment:
342
+ "GUIDING_TEACHING"
343
+ "STRUCTURE_PRACTICE_QUESTIONS"
344
+ "GUIDING_TEACHING STRUCTURE_PRACTICE_QUESTIONS"
345
+ "Neither Apply"
346
+
347
+ Do not return any other values outside of the provided options.
348
+ '''
349
+
350
+ '''
351
+ Thinking prompts for use by the agent constructing reasoning invisible to the user, outputs to be supplied to the response model for context and examples.
352
+ '''
353
+ # --- Thinking Prompts ---
354
+
355
+ # Thinking process for math-based teaching and problem solving. Tree-of-Thought Prompting
356
+ MATH_THINKING = '''
357
+ Math based thinking process instructions:
358
+
359
+ Given a user input and recent chat history, you execute a thinking process to determine your goal. Below is provided the decision tree you will utilize, logically proceeding question by question until you reach an end point. You will then process the user prompt per the instructions outlined in the endpoint. Your final output is to be cleaning structured as context fro answering the user prompt.
360
+
361
+ **General Final Response Output Rules**
362
+
363
+ When formatting context, apply LaTeX formatting per these guidelines:
364
+ You have access to LaTeX and markdown rendering.
365
+ - For inline math, use $ ... $, e.g. $\sum_{i=0}^n i^2$
366
+ - For centered display math, use $$ ... $$ on its own line.
367
+ - To show a literal dollar sign, use `\$` (e.g., \$5.00).
368
+ - To show literal parentheses in LaTeX, use `\(` and `\)` (e.g., \(a+b\)).
369
+
370
+ Content must be ordered logically, building from foundational knowledge to final solutions. Follow proper order of operation. The level of detail is dictated by the output of the decision tree below.
371
+
372
+
373
+ **Decision Tree**
374
+ Each question has two possible outcomes, narrowing the options. Consider each against the supplied user input and conversation history, proceeding in order. You must apply the general output rules and the final endpoint rules to your reasoning and process in producing the final output for context, to be utilized by another model in producing the final response.
375
+
376
+ Is the math based question or request complex?
377
+ 1A. The question is a low-level math question or request not requiring more than five steps for completion. Examples: basic arithmetic or definitions.
378
+ 1B. The question or request is complex or multifaceted. Examples: tasks that require more than five steps to address. May pertain to advanced mathematical domains such as engineering or physics
379
+
380
+
381
+ **End Points**
382
+ 1A. Evaluate the topic being discussed, considering the newest user and conversation input. Define key terms at the beginning of your context generation, such as the operators and their use in the problem and any principles that apply. Step by step solve the problem presented in the current user query, if one is presented. All math must be formatted per the LaTeX formatting guidelines, with each step on its own line with a description over top expressing why the step is being done and what principles are being applied. Maintain a minimal level of detail, focusing on large topics rather than granular details.
383
+ EXAMPLE:
384
+ [INPUT]
385
+ user: "Can you explain the Pythagorean theorem?"
386
+ chat_history: None
387
+
388
+ [OUTPUT]
389
+ **Key Terms**
390
+ - **Right Triangle:** A triangle with one angle measuring exactly 90 degrees.
391
+ - **Hypotenuse:** The longest side of a right triangle, opposite the right angle.
392
+ - **Legs:** The two shorter sides of a right triangle that form the right angle.
393
+
394
+ **Principle: The Pythagorean Theorem**
395
+ The theorem states that in a right triangle, the square of the length of the hypotenuse (c) is equal to the sum of the squares of the lengths of the other two sides (a and b).
396
+
397
+ **Formula**
398
+ The relationship is expressed with the formula:
399
+ $$a^2 + b^2 = c^2$$
400
+
401
+ 1B. Evaluate the topic being discussed, considering the newest user and conversation input. Define key terms at the beginning of your context generation, such as the operators and their use in the problem and any principles that apply. Identify the domain or school of knowledge. Step by step solve the problem presented in the current user query, if one is presented. List steps in a numbered list. All math must be formatted per the LaTeX formatting guidelines, with each step on its own line with a description over top expressing why the step is being done, and the relevant principles being applied. Include a summary of steps taken and the final answer below the full steps list, in a bulleted list.
402
+ EXAMPLE:
403
+ [INPUT]
404
+ user: "Okay, can you solve the definite integral of f(x) = 3x^2 from x=1 to x=3?"
405
+ chat_history: "user: \"What is an integral?\"\nassistant: \"An integral is a mathematical object that can be interpreted as an area or a generalization of area. The process of finding an integral is called integration.\""
406
+
407
+ [OUTPUT]
408
+ **Domain:** Integral Calculus
409
+
410
+ **Key Terms**
411
+ - **Definite Integral:** Represents the net area under a curve between two points, known as the limits of integration.
412
+ - **Antiderivative:** A function whose derivative is the original function. The process relies on the Fundamental Theorem of Calculus.
413
+ - **Limits of Integration:** The start (lower) and end (upper) points of the interval over which the integral is calculated. In this case, 1 and 3.
414
+
415
+ **Problem**
416
+ Solve the definite integral:
417
+ $$\int_{1}^{3} 3x^2 \,dx$$
418
+
419
+ **Step-by-Step Solution**
420
+ 1. **Find the antiderivative of the function.**
421
+ We apply the power rule for integration, $\int x^n \,dx = \frac{x^{n+1}}{n+1}$.
422
+ $$ \int 3x^2 \,dx = 3 \cdot \frac{x^{2+1}}{2+1} = 3 \cdot \frac{x^3}{3} = x^3 $$
423
+ 2. **Apply the Fundamental Theorem of Calculus.**
424
+ We will evaluate the antiderivative at the upper and lower limits of integration, $F(b) - F(a)$.
425
+ $$ [x^3]_1^3 $$
426
+ 3. **Evaluate the antiderivative at the upper limit (x=3).**
427
+ $$ (3)^3 = 27 $$
428
+ 4. **Evaluate the antiderivative at the lower limit (x=1).**
429
+ $$ (1)^3 = 1 $$
430
+ 5. **Subtract the lower limit result from the upper limit result.**
431
+ This gives the final value of the definite integral.
432
+ $$ 27 - 1 = 26 $$
433
+
434
+ **Summary**
435
+ - The antiderivative of $3x^2$ is $x^3$.
436
+ - Evaluating the antiderivative from $x=1$ to $x=3$ yields $(3)^3 - (1)^3$.
437
+ - The final answer is $26$.
438
+
439
+ '''
440
+
441
+ # CHAIN OF THOUGH PROMPTING, GUIDING THE MODEL IN PROCESSING TOOL OUTPUT FOR QUESTIONS, DESIGNING TABLES FOR CONTEXTUAL DATA, AND DESIGNING PRACTICE QUESTIONS AS WELL AS AN ANSWER BANK.
442
+ QUESTION_ANSWER_DESIGN = '''
443
+ As seasoning test question writing specialist, your task is to produce context to create a practice question for the user.
444
+
445
+ Tool Outputs (if provided)
446
+ If tool call outputs are avialble, the practice question must use and require understanding of the data presented.
447
+ Image output: {tool_img_output}
448
+ Image context to consider: {tool_context}
449
+
450
+ You must construct practice questions per the formatting guidelines included here:
451
+ {STRUCTURE_PRACTICE_QUESTIONS}
452
+
453
+ Math LaTeX Formatting Guidelines:
454
+ {LATEX_FORMATTING}
455
+
456
+ Follow this logical process:
457
+ 1. Assess the current round's user input and the conversation history, if there is one. What specific topics or concepts are discussed? What instruction has the model previously given? Also identify the subject domain. Return this context summaried at teh top of your context output.
458
+ 2. Produce a practice question for the user on the identified topic or concept. Return the pract question with the heading "Practice Question"
459
+ - If Math or requiring scientific calculations: The question must not be an example given by the model or user in the conversation history. It may be inspired by the conversation history, but it must require the user to try to solve the problem based on what they learned. If no tool output is given to base the question on, then you must create your own data for the user to interpret, solve, or otherwise manipulate to come to an answer.You may provide data by means of the tool image output, with the question constructed using the tool context output. If no tool output is included, you may provide data as a markdown table or integrated into the question. Math must be formatted using LaTeX as outlined in the LaTeX guidelines given above.
460
+ - If History/social studies/art or otherwise static fact related: The question must be answerable with based on previosu model teaching or instruction from the conversation history.
461
+
462
+ 3. Produce an answer bank under the question with the correct answer or answers labeled. If it is a written response question, you must write examples of possible correct answers for the new model to utilize in grading the user's answer.
463
+ '''
464
+
465
+ # This prompt is reserved for high complexity user queries, aiming to generate context in support of the response agent.
466
+ REASONING_THINKING = '''
467
+ Considering the provided current user prompt/input and recent conversation history, as an educational professional skilled in breaking down concepts, return context that would be beneficial in producing a response to the user.
468
+
469
+ 1. Begin by thinking about what the user is asking about, such as the topic or domain of knowledge. Summarizes the user's request as well as what has been said relating to the topic or goal in the conversation history. Give this section the heading "User Knowledge Summary."
470
+ 2. Evaluate the user's previous statements for accuracy. Ask yourself if the user appears to be grasping the concept or struggling with some part of it. Produce a brief analysis section that defines the user's established understanding, or if this is unknown. Propose potential concepts to cover to aid the user. Return this section with the head "User Understanding."
471
+ 3. Identify steps taken by the model in previous turns to aid the user, as well as the apparent effectiveness of said steps, if conversation history is available. Produce this section with the heading "Previous Actions."
472
+ 4. Identify relevant facts that would aid the user in understanding the concept, following a logical order in listing these items. Present these items in a nested list, with a title for each nested block at the higher level and atomic facts nested underneath. Produce this section with the heading "Reference Fact Sheet"
473
+
474
+ Review your response prior to returning it as output. Review for accuracy and relevance, producing only facts that support further learning rather than information the user has already shown understand of.
475
+
476
+ Examples:
477
+ [INPUT]
478
+ user: "I know principal is the starting money and the rate is the percentage. But I don't get what 'compounding frequency' means. Does it matter if it's daily vs yearly?"
479
+ chat_history: "user: \"How do I calculate compound interest?\"\nassistant: \"## Calculating Compound Interest\n\nThat's a great question! Compound interest is essentially interest earned on the initial amount of money (the principal) as well as on the accumulated interest from previous periods.\n\nTo give you the most helpful explanation, it would be useful to know what you're familiar with already. Have you encountered terms like 'principal', 'annual interest rate', or 'compounding frequency' before?\""
480
+
481
+ [OUTPUT]
482
+ ### User Knowledge Summary
483
+ The user's goal is to learn how to calculate compound interest. The conversation began with the user asking for the calculation method. The model responded by defining the term and asking discovery questions to gauge the user's prior knowledge of key variables. The user has now confirmed they understand 'principal' and 'interest rate' but are specifically asking for a definition of 'compounding frequency' and an explanation of its importance.
484
+
485
+ ### User Understanding
486
+ The user has a foundational grasp of the core components of interest calculations (principal, rate). Their point of confusion is isolated to the concept of compounding frequency. They have correctly intuited that the frequency (e.g., daily vs. yearly) matters but do not understand why. To aid them, the next steps should be to formally define compounding frequency and then use a comparative example to illustrate its impact on the final amount.
487
+
488
+ ### Previous Actions
489
+ In the previous turn, the model successfully employed a guided discovery tactic. Instead of providing the formula outright, it defined the topic and asked targeted questions. This action was highly effective, as it allowed the user to articulate their specific knowledge gap, enabling a more focused and efficient teaching approach in the next turn.
490
+
491
+ ### Reference Fact Sheet
492
+ - **The Compound Interest Formula**
493
+ - The standard formula is $A = P(1 + r/n)^{nt}$
494
+ - **A**: The future value of the investment/loan, including interest.
495
+ - **P**: The principal amount (the initial amount of money).
496
+ - **r**: The annual interest rate (in decimal form).
497
+ - **n**: The number of times that interest is compounded per year.
498
+ - **t**: The number of years the money is invested or borrowed for.
499
+ - **The Role of Compounding Frequency (n)**
500
+ - This variable represents how often the accumulated interest is calculated and added to the principal.
501
+ - A higher frequency means interest starts earning its own interest sooner and more often within the same year.
502
+ - This directly leads to a higher future value (A), even if all other variables (P, r, t) are the same.
503
+ - **Common Values for 'n'**
504
+ - **Annually**: n = 1
505
+ - **Semiannually**: n = 2
506
+ - **Quarterly**: n = 4
507
+ - **Monthly**: n = 12
508
+ - **Daily**: n = 365
509
+
510
+ [INPUT]
511
+ user: "I think the radius is the line that goes all the way across the middle."
512
+ chat_history: "user: \"How do you find the area of a circle?\"\nassistant: \"## Finding the Area of a Circle\n\nThat's a great question! To calculate the area of a circle, we use a specific formula that involves a special number called Pi.\n\nBefore we dive into the formula, what do you already know about the different parts of a circle, like the radius or the diameter?\""
513
+
514
+ [OUTPUT]
515
+ ### User Knowledge Summary
516
+ The user wants to learn how to calculate the area of a circle. In the previous turn, the model asked the user about their knowledge of a circle's parts to establish a baseline. The user has now responded, incorrectly identifying the radius as the diameter.
517
+
518
+ ### User Understanding
519
+ The user has a misunderstanding of core terminology essential for this calculation. They have confused the radius with the diameter. This foundational knowledge gap must be corrected before introducing the area formula. Potential concepts to cover are the definitions of radius and diameter and their mathematical relationship.
520
+
521
+ ### Previous Actions
522
+ In the previous turn, the model employed a discovery tactic by asking about the user's prior knowledge of circle components. This was an effective step, as it successfully revealed a critical misconception in the user's understanding that can now be corrected.
523
+
524
+ ### Reference Fact Sheet
525
+ - Core Components of a Circle
526
+ - **Radius (r):** The distance from the center of the circle to any point on its edge.
527
+ - **Diameter (d):** The distance from one edge of the circle to the other, passing through the center.
528
+ - **Relationship:** The diameter is always exactly twice the length of the radius ($d = 2r$). Conversely, the radius is half the diameter ($r = d/2$).
529
+ - The Area Formula
530
+ - **Pi ($\pi$):** A special mathematical constant, approximately equal to 3.14159, that represents the ratio of a circle's circumference to its diameter.
531
+ - **Formula:** The area ($A$) of a circle is calculated using the formula $A = \pi r^2$.
532
+ - **Crucial Detail:** The formula uses the **radius**, not the diameter. If given the diameter, it must first be converted to the radius before calculating the area.
533
+
534
+ '''
styles.css ADDED
@@ -0,0 +1,353 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* ============================
2
+ GLOBAL THEME & VARIABLES
3
+ ============================ */
4
+ :root {
5
+ /* Text Colors */
6
+ --primarytext-color: #1a1a1a;
7
+ --secondarytext-color: #555;
8
+
9
+ /* Primary Colors */
10
+ --primary-dark: #345da8;
11
+ --primary-light: #a8b5c9;
12
+
13
+ /* Secondary Colors */
14
+ --secondary-dark: #063d80;
15
+ --secondary-light: #6ea1fa;
16
+
17
+ /* Chat & Container Colors */
18
+ --chathistory_area: #f0f1f4;
19
+ --container-color: #f5f6f8;
20
+ --Send: #6ea1fa;
21
+ --Send-hover: #87d0d5;
22
+ --clear: #b2b8c2;
23
+ --clear-hover: #2c5be0;
24
+ --text_areabackground: #fafafa;
25
+
26
+ /* Chat Bubble Colors */
27
+ --bot-bubble-color: #b9c8e3;
28
+ --user-bubble-color: #e3eaf6;
29
+
30
+ /* Scrollbar Colors */
31
+ --scrollbar-bg: #d0d3d8;
32
+ --scrollbar-thumb: #a2a6ad;
33
+ --scrollbar-thumb-hover: #888d94;
34
+
35
+ /* Border & Radius */
36
+ --border-thin: 1px;
37
+ --border-medium: 2px;
38
+ --border-default: 1px;
39
+ --border-focus: 2px;
40
+ --border-hover: 3px;
41
+ --button-border: 2px;
42
+ --radius-sm: 4px;
43
+ --radius-md: 6px;
44
+ }
45
+
46
+ /* ============================
47
+ DARK MODE THEME (SOFTER)
48
+ ============================ */
49
+ @media (prefers-color-scheme: dark) {
50
+ :root {
51
+ --primarytext-color: #f8f8f8;
52
+ --secondarytext-color: #d0d3d8;
53
+
54
+ --primary-dark: #27477d;
55
+ --primary-light: #7d8da9;
56
+
57
+ --secondary-dark: #042a59;
58
+ --secondary-light: #5e88d6;
59
+
60
+ --chathistory_area: #202327;
61
+ --container-color: #1b1d20;
62
+ --Send: #5e88d6;
63
+ --Send-hover: #7ac4c9;
64
+ --clear: #7a7f88;
65
+ --clear-hover: #5e88d6;
66
+ --text_areabackground: #25282c;
67
+
68
+ --bot-bubble-color: #425575;
69
+ --user-bubble-color: #566583;
70
+
71
+ --scrollbar-bg: #2b2e33;
72
+ --scrollbar-thumb: #4b4f56;
73
+ --scrollbar-thumb-hover: #5e636b;
74
+ }
75
+ }
76
+
77
+ /* ============================
78
+ FONT IMPORT & BASE STYLING
79
+ ============================ */
80
+ @import url('https://fonts.googleapis.com/css2?family=Oswald:wght@200..700&display=swap');
81
+
82
+ body {
83
+ background: var(--text_areabackground);
84
+ color: var(--primarytext-color);
85
+ font-family: "Oswald", sans-serif;
86
+ margin: 0;
87
+ }
88
+
89
+ * {
90
+ color: var(--primarytext-color) !important;
91
+ font-family: "Oswald", sans-serif !important;
92
+ box-sizing: border-box;
93
+ }
94
+
95
+ /* ============================
96
+ CUSTOM SCROLLBAR
97
+ ============================ */
98
+ ::-webkit-scrollbar {
99
+ width: 12px;
100
+ }
101
+
102
+ ::-webkit-scrollbar-track {
103
+ background: var(--scrollbar-bg);
104
+ }
105
+
106
+ ::-webkit-scrollbar-thumb {
107
+ background-color: var(--scrollbar-thumb);
108
+ border-radius: 6px;
109
+ border: 2px solid var(--scrollbar-bg);
110
+ }
111
+
112
+ ::-webkit-scrollbar-thumb:hover {
113
+ background-color: var(--scrollbar-thumb-hover);
114
+ }
115
+
116
+ /* ============================
117
+ GRADIO CONTAINER & LAYOUT
118
+ ============================ */
119
+ .gradio-container,
120
+ [data-testid="block-container"],
121
+ .contain {
122
+ background-color: var(--container-color) !important;
123
+ font-family: "Oswald", sans-serif !important;
124
+ display: flex !important;
125
+ flex-direction: column !important;
126
+ height: 100vh !important;
127
+ max-height: 100vh !important;
128
+ overflow: hidden !important;
129
+ }
130
+
131
+ /* ============================
132
+ HEADER & NAVIGATION
133
+ ============================ */
134
+ .title-header {
135
+ background-color: transparent;
136
+ padding: 10px;
137
+ border-bottom: var(--border-focus) solid var(--primary-dark);
138
+ display: flex;
139
+ align-items: center;
140
+ height: 60px !important;
141
+ }
142
+
143
+ .title-header h1 {
144
+ font-size: 3.5rem;
145
+ font-weight: 700;
146
+ color: var(--primarytext-color);
147
+ margin: 0;
148
+ }
149
+
150
+ /* ============================
151
+ CHAT CONTAINER
152
+ ============================ */
153
+ #main-chatbot,
154
+ [data-testid="chatbot"],
155
+ .gradio-chatbot,
156
+ [role="log"] {
157
+ border: var(--border-default) solid var(--primary-dark) !important;
158
+ border-radius: var(--radius-md) !important;
159
+ background-color: var(--chathistory_area) !important;
160
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1) !important;
161
+ padding: 15px !important;
162
+ margin: 15px 20px !important;
163
+ flex: 1 !important;
164
+ overflow-y: auto !important;
165
+ }
166
+
167
+ /* ============================
168
+ TEXT INPUT AREA
169
+ ============================ */
170
+ textarea,
171
+ .gradio-textbox textarea {
172
+ background-color: var(--text_areabackground) !important;
173
+ border: var(--border-default) solid var(--secondary-dark) !important;
174
+ border-radius: var(--radius-md) !important;
175
+ color: var(--primarytext-color) !important;
176
+ padding: 10px !important;
177
+ resize: none !important;
178
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1) !important;
179
+ }
180
+
181
+ textarea:focus {
182
+ border-color: var(--secondary-light) !important;
183
+ box-shadow: 0 0 0 var(--border-focus) rgba(96, 165, 250, 0.2) !important;
184
+ }
185
+
186
+ /* ============================
187
+ BUTTONS
188
+ ============================ */
189
+ button.send-button {
190
+ background-color: var(--Send) !important;
191
+ color: var(--primarytext-color) !important;
192
+ border: var(--button-border) solid var(--secondary-dark) !important;
193
+ border-radius: var(--radius-md) !important;
194
+ padding: 8px 16px !important;
195
+ font-weight: 600 !important;
196
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
197
+ width: 100%;
198
+ }
199
+
200
+ button.send-button:hover {
201
+ background-color: var(--Send-hover) !important;
202
+ }
203
+
204
+ button.clear-button {
205
+ background-color: var(--clear) !important;
206
+ color: var(--primarytext-color) !important;
207
+ border: var(--button-border) solid var(--secondary-dark) !important;
208
+ border-radius: var(--radius-md) !important;
209
+ padding: 8px 16px !important;
210
+ font-weight: 600 !important;
211
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
212
+ width: 100%;
213
+ }
214
+
215
+ button.clear-button:hover {
216
+ background-color: var(--clear-hover) !important;
217
+ }
218
+
219
+ /* ============================
220
+ CHAT BUBBLES (VARIABLE COLORS)
221
+ ============================ */
222
+ .message.user,
223
+ .message.bot {
224
+ background: none !important;
225
+ border: none !important;
226
+ padding: 0 !important;
227
+ margin: 0 !important;
228
+ box-shadow: none !important;
229
+ }
230
+
231
+ .message-row {
232
+ display: flex;
233
+ margin: 8px 12px;
234
+ }
235
+
236
+ .message.panel-full-width {
237
+ max-width: 80%;
238
+ min-width: 240px;
239
+ padding: 14px 20px !important;
240
+ border-radius: 18px !important;
241
+ box-shadow: none !important;
242
+ position: relative;
243
+ line-height: 1.5;
244
+ word-wrap: break-word;
245
+ }
246
+
247
+ /* Bot Bubble */
248
+ .message-row.bot-row .message.panel-full-width {
249
+ background-color: var(--bot-bubble-color) !important;
250
+ color: var(--primarytext-color) !important;
251
+ margin-right: auto;
252
+ margin-left: 0;
253
+ }
254
+
255
+ .message-row.bot-row .message.panel-full-width::before {
256
+ content: "";
257
+ position: absolute;
258
+ top: 12px;
259
+ left: -10px;
260
+ width: 0;
261
+ height: 0;
262
+ border-top: 10px solid transparent;
263
+ border-right: 10px solid var(--bot-bubble-color);
264
+ border-bottom: 10px solid transparent;
265
+ }
266
+
267
+ /* User Bubble */
268
+ .message-row.user-row .message.panel-full-width {
269
+ background-color: var(--user-bubble-color) !important;
270
+ color: var(--primarytext-color) !important;
271
+ margin-left: auto;
272
+ margin-right: 0;
273
+ }
274
+
275
+ .message-row.user-row .message.panel-full-width::before {
276
+ content: "";
277
+ position: absolute;
278
+ top: 12px;
279
+ right: -10px;
280
+ width: 0;
281
+ height: 0;
282
+ border-top: 10px solid transparent;
283
+ border-left: 10px solid var(--user-bubble-color);
284
+ border-bottom: 10px solid transparent;
285
+ }
286
+
287
+ /* ============================
288
+ RESPONSIVE ADJUSTMENTS
289
+ ============================ */
290
+ @media (max-width: 768px) {
291
+ .message.panel-full-width {
292
+ max-width: 85%;
293
+ }
294
+ }
295
+
296
+ /* ============================
297
+ FOOTER: RESTORE BUILT-IN GRADIO LINKS (settings, API, etc.)
298
+ ============================ */
299
+ footer.svelte-czcr5b {
300
+ display: flex !important;
301
+ align-items: center !important;
302
+ justify-content: center !important;
303
+ gap: 12px !important;
304
+ visibility: visible !important;
305
+ position: fixed !important;
306
+ bottom: 0 !important;
307
+ left: 0 !important;
308
+ right: 0 !important;
309
+ background-color: var(--container-color) !important;
310
+ backdrop-filter: blur(5px) !important;
311
+ border-top: var(--border-default) solid rgba(0, 0, 0, 0.12) !important;
312
+ padding: 8px 16px !important;
313
+ z-index: 1000 !important;
314
+ min-height: 36px !important;
315
+ }
316
+
317
+
318
+ footer.svelte-czcr5b a,
319
+ footer.svelte-czcr5b button,
320
+ footer.svelte-czcr5b span {
321
+ color: var(--secondarytext-color) !important;
322
+ font-size: 12px !important;
323
+ font-family: "Oswald", sans-serif !important;
324
+ text-decoration: none !important;
325
+ background: none !important;
326
+ border: none !important;
327
+ cursor: pointer !important;
328
+ opacity: 0.8;
329
+ transition: opacity 0.15s ease;
330
+ }
331
+
332
+
333
+ footer.svelte-czcr5b a:hover,
334
+ footer.svelte-czcr5b button:hover,
335
+ footer.svelte-czcr5b span:hover {
336
+ opacity: 1;
337
+ color: var(--primarytext-color) !important;
338
+ }
339
+
340
+
341
+ /* Divider style between footer links */
342
+ footer.svelte-czcr5b .divider {
343
+ color: var(--secondarytext-color) !important;
344
+ opacity: 0.5;
345
+ margin: 0 6px !important;
346
+ }
347
+
348
+
349
+ /* Make sure footer items never collapse */
350
+ footer.svelte-czcr5b > * {
351
+ display: inline-flex !important;
352
+ align-items: center !important;
353
+ }