.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore DELETED
@@ -1,40 +0,0 @@
1
- # .gitignore
2
- # Database files
3
- *.db
4
- *.db-journal
5
- *.db-shm
6
- *.db-wal
7
- mimir_analytics.db
8
-
9
- # Python
10
- __pycache__/
11
- *.py[cod]
12
- *$py.class
13
- *.so
14
- .Python
15
- *.egg-info/
16
- dist/
17
- build/
18
-
19
- # Environment
20
- .env
21
- .venv/
22
- venv/
23
- ENV/
24
-
25
- # IDE
26
- .vscode/
27
- .idea/
28
- *.swp
29
- *.swo
30
- *~
31
-
32
- # OS
33
- .DS_Store
34
- Thumbs.db
35
-
36
- # Logs
37
- *.log
38
-
39
- # Git
40
- .git/
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Dockerfile DELETED
@@ -1,13 +0,0 @@
1
- FROM python:3.10
2
-
3
- WORKDIR /app
4
- COPY requirements.txt .
5
- RUN pip install -r requirements.txt
6
-
7
- COPY . .
8
-
9
- # Force unbuffered Python output
10
- ENV PYTHONUNBUFFERED=1
11
-
12
- # Run with explicit python -u flag
13
- CMD ["python", "-u", "app.py"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LightEval_Mimir.py DELETED
@@ -1,109 +0,0 @@
1
- # LightEval_Mimir.py
2
- '''This document outlines hte LightEval setu for tracking performance metrics of Mimir, to be sent to the trackio page for viszulization.'''
3
-
4
- # Imports
5
- from lighteval.metrics.metrics_sample import BertScore, ROUGE
6
- from lighteval.tasks.requests import Doc
7
-
8
- async def evaluate_educational_quality(user_query, response, thread_id):
9
- """Dynamic evaluation using LightEval metrics"""
10
- # Create ephemeral task for this turn
11
- doc = Doc(
12
- task_name=f"turn_{thread_id}",
13
- query=user_query,
14
- choices=[response],
15
- gold_index=-1, # No ground truth initially
16
- specific_output=response
17
- )
18
-
19
- # Use BertScore for semantic quality
20
- bert_score = BertScore().compute(doc)
21
-
22
- # Custom educational coherence metric
23
- educational_indicators = {
24
- 'has_examples': 'example' in response.lower(),
25
- 'structured_explanation': '##' in response or '1.' in response,
26
- 'appropriate_length': 100 < len(response) < 1500,
27
- 'encourages_learning': any(phrase in response.lower()
28
- for phrase in ['practice', 'try', 'consider', 'think about'])
29
- }
30
-
31
- return {
32
- 'semantic_quality': bert_score,
33
- 'educational_score': sum(educational_indicators.values()) / len(educational_indicators),
34
- 'response_time': time.time() - start_time
35
- }
36
-
37
- def track_rag_performance(query, retrieved_docs, used_in_response):
38
- """Evaluate RAG retrieval quality"""
39
- from lighteval.metrics.utils.metric_utils import SampleLevelMetric
40
-
41
- # Track retrieval-to-response alignment
42
- retrieval_relevance = calculate_relevance(query, retrieved_docs)
43
- retrieval_usage = len(used_in_response) / len(retrieved_docs) if retrieved_docs else 0
44
-
45
- # Log to trackio with LightEval structure
46
- metric_payload = {
47
- "evaluation_id": str(uuid.uuid4()),
48
- "task": "rag_retrieval",
49
- "metrics": {
50
- "retrieval_relevance": retrieval_relevance,
51
- "retrieval_usage_rate": retrieval_usage,
52
- "num_docs_retrieved": len(retrieved_docs)
53
- },
54
- "metadata": {
55
- "query": query[:100],
56
- "sources": [doc.metadata.get('source') for doc in retrieved_docs]
57
- }
58
- }
59
-
60
- send_evaluation_to_trackio(metric_payload)
61
-
62
- def evaluate_prompt_classification(predicted_mode, actual_conversation_outcome, thread_id):
63
- """Track prompt classifier accuracy in production"""
64
-
65
- # Did the predicted mode lead to successful interaction?
66
- success_indicators = {
67
- 'discovery_mode': lambda outcome: 'clarified_topic' in outcome,
68
- 'teaching_mode': lambda outcome: outcome.get('quality_score', 0) > 3.5,
69
- 'conversational': lambda outcome: outcome.get('user_satisfied', False)
70
- }
71
-
72
- mode_was_correct = success_indicators.get(
73
- predicted_mode,
74
- lambda x: True
75
- )(actual_conversation_outcome)
76
-
77
- # Create LightEval-style evaluation
78
- from lighteval.metrics import Metrics
79
- accuracy_metric = Metrics.ACCURACY if mode_was_correct else 0
80
-
81
- return {
82
- "prompt_classifier_accuracy": accuracy_metric,
83
- "predicted_mode": predicted_mode,
84
- "conversation_length": len(conversation_state)
85
- }
86
-
87
- def process_user_feedback(response_id, feedback_type, conversation_state):
88
- """Convert user feedback to LightEval ground truth"""
89
-
90
- last_exchange = {
91
- "query": conversation_state[-2]["content"], # User's question
92
- "response": conversation_state[-1]["content"], # Agent's response
93
- "gold_index": 0 if feedback_type == "thumbs_up" else -1
94
- }
95
-
96
- # Create retrospective evaluation with ground truth
97
- from lighteval.tasks.requests import Doc
98
- doc = Doc(
99
- task_name="user_feedback_eval",
100
- query=last_exchange["query"],
101
- choices=[last_exchange["response"]],
102
- gold_index=last_exchange["gold_index"]
103
- )
104
-
105
- # Now you have ground truth for accuracy metrics!
106
- accuracy = 1.0 if feedback_type == "thumbs_up" else 0.0
107
-
108
- return {"user_feedback_accuracy": accuracy, "response_id": response_id}
109
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -1,10 +1,10 @@
1
  ---
2
  title: Mimir
3
  emoji: 📚
4
- colorFrom: indigo
5
- colorTo: blue
6
  sdk: gradio
7
- sdk_version: 5.49.1
8
  app_file: app.py
9
  pinned: true
10
  python_version: '3.10'
@@ -12,461 +12,137 @@ short_description: Advanced prompt engineering for educational AI systems.
12
  thumbnail: >-
13
  https://cdn-uploads.huggingface.co/production/uploads/68700e7552b74a1dcbb2a87e/Z7P8DJ57rc5P1ozA5gwp3.png
14
  hardware: zero-gpu-dynamic
15
- startup_duration_timeout: 30m
 
16
  ---
17
 
18
  # Mimir: Educational AI Assistant
19
- ## Advanced Multi-Agent Architecture & Prompt Engineering Portfolio Project
20
 
21
  ### Project Overview
22
- Mimir demonstrates enterprise-grade AI system design through a sophisticated multi-agent architecture applied to educational technology. The system showcases advanced prompt engineering, intelligent decision-making pipelines, and state-persistent conversation management. Unlike simple single-model implementations, Mimir employs **four specialized agent types** working in concert: a tool decision engine, four parallel routing agents for prompt selection, three preprocessing thinking agents for complex reasoning, and a unified response generator. This architecture prioritizes pedagogical effectiveness through dynamic context assembly, ensuring responses are tailored to each unique educational interaction.
23
 
24
  ***
25
 
26
  ### Technical Architecture
27
-
28
- **Multi-Agent System:**
29
- ```
30
- User Input → Tool Decision Agent → Routing Agents (4x) → Thinking Agents (3x) → Response Agent → Output
31
- ↓ ↓ ↓ ↓
32
- Llama-3.2-3B Llama-3.2-3B (shared) Llama-3.2-3B Llama-3.2-3B
33
- ```
34
-
35
  **Core Technologies:**
36
 
37
- * **Unified Model Architecture**: Llama-3.2-3B-Instruct (3.21B parameters) for all tasks - decision-making, reasoning, and response generation
38
- * **Lazy Loading Strategy**: Model loads on first request and caches for subsequent calls (optimal for ZeroGPU)
39
- * **Custom Orchestration**: Hand-built agent coordination replacing traditional frameworks for precise control and optimization
40
- * **State Management**: Thread-safe global state with dual persistence (SQLite + HuggingFace Datasets)
41
- * **ZeroGPU Integration**: Dynamic GPU allocation with `@spaces.GPU` decorators for efficient resource usage
42
- * **Gradio**: Multi-page interface (Chatbot + Analytics Dashboard)
43
- * **Python**: Advanced backend with 4-bit quantization and streaming
44
 
45
- **Key Frameworks & Libraries:**
46
 
47
- * `transformers` & `accelerate` for model loading and inference optimization
48
- * `bitsandbytes` for 4-bit NF4 quantization (75% memory reduction)
49
- * `peft` for Parameter-Efficient Fine-Tuning support
50
- * `spaces` for HuggingFace ZeroGPU integration
51
- * `matplotlib` for dynamic visualization generation
52
- * Custom state management system with SQLite and dataset backup
53
 
54
  ***
55
 
56
- ### Advanced Agent Architecture
57
-
58
- #### Agent Pipeline Overview
59
- The system processes each user interaction through a sophisticated four-stage pipeline, with each stage making intelligent decisions that shape the final response.
60
-
61
- #### Stage 1: Tool Decision Agent
62
- **Purpose**: Determines if visualization tools enhance learning
63
-
64
- **Model**: Llama-3.2-3B-Instruct (4-bit NF4 quantized)
65
-
66
- **Prompt Engineering**:
67
- * Highly constrained binary decision prompt (YES/NO only)
68
- * Explicit INCLUDE/EXCLUDE criteria for educational contexts
69
- * Zero-shot classification with educational domain knowledge
70
-
71
- **Decision Criteria**:
72
- ```
73
- INCLUDE: Mathematical functions, data analysis, chart interpretation,
74
- trend visualization, proportional relationships
75
-
76
- EXCLUDE: Greetings, definitions, explanations without data
77
- ```
78
-
79
- **Output**: Boolean flag activating `TOOL_USE_ENHANCEMENT` prompt segment
80
-
81
- ---
82
-
83
- #### Stage 2: Prompt Routing Agents (4 Specialized Agents)
84
- **Purpose**: Intelligent prompt segment selection through parallel analysis
85
-
86
- **Model**: Shared Llama-3.2-3B-Instruct instance (memory efficient)
87
 
88
- **Agent Specializations**:
 
 
89
 
90
- 1. **Agent 1 - Practice Question Detector**
91
- - Analyzes conversation context for practice question opportunities
92
- - Considers user's expressed understanding and learning progression
93
- - Activates: `STRUCTURE_PRACTICE_QUESTIONS`
94
 
95
- 2. **Agent 2 - Discovery Mode Classifier**
96
- - Dual-classification: vague input detection + understanding assessment
97
- - Returns: `VAUGE_INPUT`, `USER_UNDERSTANDING`, or neither
98
- - Enables guided discovery and clarification strategies
99
 
100
- 3. **Agent 3 - Follow-up Assessment Agent**
101
- - Detects if user is responding to previous practice questions
102
- - Analyzes conversation history for grading opportunities
103
- - Activates: `PRACTICE_QUESTION_FOLLOWUP` (triggers grading mode)
104
 
105
- 4. **Agent 4 - Teaching Mode Assessor**
106
- - Evaluates need for direct instruction vs. structured practice
107
- - Multi-output agent (can activate multiple prompts)
108
- - Activates: `GUIDING_TEACHING`, `STRUCTURE_PRACTICE_QUESTIONS`
109
 
110
- **Prompt Engineering Innovation**:
111
- * Each agent uses a specialized system prompt with clear decision criteria
112
- * Structured output formats for reliable parsing
113
- * Context-aware analysis incorporating full conversation history
114
- * Sequential execution prevents decision conflicts
115
 
116
- ---
 
117
 
118
- #### Stage 3: Thinking Agents (Preprocessing Layer)
119
- **Purpose**: Generate reasoning context before final response (CoT/ToT)
120
-
121
- **Model**: Llama-3.2-3B-Instruct (shared instance)
122
-
123
- **Agent Specializations**:
124
-
125
- 1. **Math Thinking Agent**
126
- - **Method**: Tree-of-Thought reasoning for mathematical problems
127
- - **Activation**: When `LATEX_FORMATTING` is active
128
- - **Output Structure**:
129
- ```
130
- Key Terms → Principles → Formulas → Step-by-Step Solution → Summary
131
- ```
132
- - **Complexity Routing**: Decision tree determines detail level (1A: basic, 1B: complex)
133
-
134
- 2. **Question/Answer Design Agent**
135
- - **Method**: Chain-of-Thought for practice question formulation
136
- - **Activation**: When `STRUCTURE_PRACTICE_QUESTIONS` is active
137
- - **Formatted Inputs**: Tool context, LaTeX guidelines, practice question templates
138
- - **Output**: Question design, data formatting, answer bank generation
139
-
140
- 3. **Reasoning Thinking Agent**
141
- - **Method**: General Chain-of-Thought preprocessing
142
- - **Activation**: When tools, follow-ups, or teaching mode active
143
- - **Output Structure**:
144
- ```
145
- User Knowledge Summary → Understanding Analysis →
146
- Previous Actions → Reference Fact Sheet
147
- ```
148
-
149
- **Prompt Engineering Innovation**:
150
- * Thinking agents produce **context for ResponseAgent**, not final output
151
- * Outputs are invisible to user but inform response quality
152
- * Tree-of-Thought (ToT) for math: explores multiple solution paths
153
- * Chain-of-Thought (CoT) for others: step-by-step reasoning traces
154
 
155
- ---
 
 
156
 
157
- #### Stage 4: Response Agent (Educational Response Generation)
158
- **Purpose**: Generate pedagogically sound final response
159
-
160
- **Model**: Llama-3.2-3B-Instruct (same shared instance)
161
-
162
- **Configuration**:
163
- * 4-bit NF4 quantization (BitsAndBytes)
164
- * Mixed precision BF16 inference
165
- * Accelerate integration for distributed computation
166
- * 128K context window
167
- * Multilingual support (8 languages)
168
-
169
- **Prompt Assembly Process**:
170
- 1. **Core Identity**: Always included (defines Mimir persona)
171
- 2. **Logical Expressions**: Regex-triggered prompts (e.g., math keywords → `LATEX_FORMATTING`)
172
- 3. **Agent-Selected Prompts**: Dynamic assembly based on routing agent decisions
173
- 4. **Context Integration**: Tool outputs, thinking agent outputs, conversation history
174
- 5. **Complete Prompt**: All segments joined with proper formatting
175
-
176
- **Dynamic Prompt Library** (11 segments):
177
- ```
178
- Core: CORE_IDENTITY (always)
179
- Formatting: GENERAL_FORMATTING (always), LATEX_FORMATTING (math)
180
- Discovery: VAUGE_INPUT, USER_UNDERSTANDING
181
- Teaching: GUIDING_TEACHING
182
- Practice: STRUCTURE_PRACTICE_QUESTIONS, PRACTICE_QUESTION_FOLLOWUP
183
- Tool: TOOL_USE_ENHANCEMENT
184
- ```
185
-
186
- **Response Post-Processing**:
187
- * Artifact cleanup (remove special tokens)
188
- * Intelligent truncation at logical breakpoints
189
- * Sentence integrity preservation
190
- * Quality validation gates
191
- * Word-by-word streaming for UX
192
 
193
- ---
 
194
 
195
- ### Model Specifications
196
-
197
- **Llama-3.2-3B-Instruct Details:**
198
- * **Parameters**: 3.21 billion
199
- * **Architecture**: Optimized transformer with Grouped-Query Attention (GQA)
200
- * **Training Data**: 9 trillion tokens (December 2023 cutoff)
201
- * **Context Length**: 128,000 tokens
202
- * **Languages**: English, German, French, Italian, Portuguese, Hindi, Spanish, Thai
203
- * **Quantization**: 4-bit NF4 (~1GB VRAM)
204
- * **Training Method**: Knowledge distillation from Llama 3.1 8B/70B + SFT + RLHF
205
-
206
- **Why Single Model Architecture:**
207
- * ✅ **Consistency**: Same reasoning style across all agents
208
- * ✅ **Memory Efficient**: One model, shared instance (~1GB total)
209
- * ✅ **Instruction-Tuned**: Optimized for educational dialogue
210
- * ✅ **Fast Inference**: 3B parameters = quick responses
211
- * ✅ **ZeroGPU Friendly**: Small enough for dynamic allocation
212
- * ✅ **128K Context**: Can handle long educational conversations
213
 
214
- ---
 
215
 
216
- ### Prompt Engineering Techniques Demonstrated
217
 
218
- #### 1. Hierarchical Prompt Architecture
219
- **Three-Layer System**:
220
- - **Agent System Prompts**: Specialized instructions for each agent type
221
- - **Response Prompt Segments**: Modular components dynamically assembled
222
- - **Thinking Prompts**: Preprocessing templates for reasoning generation
223
-
224
- **Innovation**: Separates decision-making logic from response generation, enabling precise control over AI behavior at each pipeline stage.
225
-
226
- #### 2. Per-Turn Prompt State Management
227
- **PromptStateManager**:
228
- ```python
229
- # Reset at turn start - clean slate
230
- prompt_state.reset() # All 11 prompts → False
231
-
232
- # Agents activate relevant prompts
233
- prompt_state.update("LATEX_FORMATTING", True)
234
- prompt_state.update("GUIDING_TEACHING", True)
235
-
236
- # Assemble only active prompts
237
- active_prompts = prompt_state.get_active_response_prompts()
238
- # Returns: ["CORE_IDENTITY", "GENERAL_FORMATTING",
239
- # "LATEX_FORMATTING", "GUIDING_TEACHING"]
240
- ```
241
-
242
- **Benefits**:
243
- - No prompt pollution between turns
244
- - Context-appropriate responses every time
245
- - Traceable decision-making for debugging
246
-
247
- #### 3. Logical Expression System
248
- **Regex-Based Automatic Activation**:
249
- ```python
250
- # Math keyword detection
251
- math_regex = r'\b(calculus|algebra|equation|solve|derivative)\b'
252
- if re.search(math_regex, user_input, re.IGNORECASE):
253
- prompt_state.update("LATEX_FORMATTING", True)
254
- ```
255
-
256
- **Hybrid Approach**: Combines rule-based triggers with LLM decision-making for optimal reliability.
257
-
258
- #### 4. Constraint-Based Agent Prompting
259
- **Tool Decision Example**:
260
- ```
261
- System Prompt: Analyze query and determine if visualization needed.
262
-
263
- Output Format: YES or NO (nothing else)
264
-
265
- INCLUDE if: mathematical functions, data analysis, trends
266
- EXCLUDE if: greetings, simple definitions, no data
267
- ```
268
-
269
- **Result**: Reliable, parseable outputs from agents without complex post-processing.
270
-
271
- #### 5. Chain-of-Thought & Tree-of-Thought Preprocessing
272
- **CoT for Sequential Reasoning**:
273
- ```
274
- Step 1: Assess topic →
275
- Step 2: Identify user understanding →
276
- Step 3: Previous actions →
277
- Step 4: Reference facts
278
- ```
279
-
280
- **ToT for Mathematical Reasoning**:
281
- ```
282
- Question Type Assessment →
283
- Branch 1A (Simple): Minimal steps
284
- Branch 1B (Complex): Full derivation with principles
285
- ```
286
-
287
- **Innovation**: Thinking agents generate rich context that guides ResponseAgent to higher-quality outputs.
288
-
289
- #### 6. Academic Integrity by Design
290
- **Embedded in Core Prompts**:
291
- * "Do not provide full solutions - guide through processes instead"
292
- * "Break problems into conceptual components"
293
- * "Ask clarifying questions about their understanding"
294
- * Subject-specific guidelines (Math: explain concepts, not compute)
295
-
296
- **Follow-up Grading**:
297
- * Agent 3 detects practice question responses
298
- * `PRACTICE_QUESTION_FOLLOWUP` prompt activates
299
- * Automated assessment with constructive feedback
300
-
301
- #### 7. Multi-Modal Response Generation
302
- **Tool Integration**:
303
- ```python
304
- # Tool decision → JSON generation → matplotlib rendering → base64 encoding
305
- Create_Graph_Tool(
306
- data={"Week 1": 120, "Week 2": 155, ...},
307
- plot_type="line",
308
- title="Crop Yield Analysis",
309
- educational_context="Visualizes growth trend over time"
310
- )
311
- ```
312
-
313
- **Result**: In-memory graph generation with educational context, embedded directly in response.
314
 
315
- ---
316
 
317
- ### State Management & Persistence
 
 
 
 
 
318
 
319
- #### GlobalStateManager Architecture
320
- **Dual-Layer Persistence**:
321
- 1. **SQLite Database**: Fast local access, immediate writes
322
- 2. **HuggingFace Dataset**: Cloud backup, hourly sync
323
 
324
- **State Categories**:
325
- ```python
326
- - Conversation State: Full chat history + agent context
327
- - Prompt State: Per-turn activation (resets each interaction)
328
- - Analytics State: Metrics, dashboard data, export history
329
- - Evaluation State: Quality scores, classifier accuracy, user feedback
330
- - ML Model Cache: Loaded model for reuse across sessions
331
- ```
332
 
333
- **Thread Safety**: All state operations protected by `threading.Lock()`
 
 
 
 
334
 
335
- **Cleanup Strategy**:
336
- - Automatic cleanup every 60 minutes
337
- - Remove sessions older than 24 hours
338
- - Prevents memory leaks in long-running deployments
339
 
340
- ---
 
341
 
342
- ### Model Loading & Optimization Strategy
343
-
344
- #### Two-Stage Lazy Loading Pipeline
345
-
346
- **Stage 1: Build Time (Docker) - Optional Pre-caching**
347
- ```yaml
348
- # preload_from_hub in README.md
349
- preload_from_hub:
350
- - meta-llama/Llama-3.2-3B-Instruct
351
- ```
352
- * Downloads model weights during Docker build
353
- * Cached in HuggingFace hub cache directory
354
- * Reduces first-request latency (no download needed)
355
- * **Optional but recommended** for production deployments
356
-
357
- **Stage 2: Runtime (Lazy Loading with Automatic Caching)**
358
- ```python
359
- # model_manager.py - LazyLlamaModel class
360
- def _load_model(self):
361
- """Load on first generate() call"""
362
- if self.model is not None:
363
- return # Already loaded - reuse cached instance
364
-
365
- # First call: Load with 4-bit quantization
366
- self.model = AutoModelForCausalLM.from_pretrained(
367
- "meta-llama/Llama-3.2-3B-Instruct",
368
- quantization_config=quantization_config,
369
- device_map="auto",
370
- )
371
- # Model stays in memory for all future calls
372
-
373
- # All agents share this single instance
374
- @spaces.GPU(duration=120)
375
- def _load_model(self):
376
- # GPU allocated for 120 seconds during first load
377
- # Then reused without re-allocation
378
- ```
379
-
380
- **Loading Flow**:
381
- ```
382
- App starts → Instant startup (no model loading)
383
-
384
- First user request → Triggers model load (~30-60s)
385
- ├─ Download from cache (if preloaded: instant)
386
- ├─ Load with 4-bit quantization
387
- ├─ Create pipeline
388
- └─ Cache in memory
389
-
390
- All subsequent requests → Use cached model (~1s)
391
- ```
392
-
393
- **Memory Optimization**:
394
- - **4-bit NF4 Quantization**: 75% memory reduction
395
- - Llama-3.2-3B: ~6GB → ~1GB VRAM
396
- - **Shared Model Strategy**: ALL agents share one model instance
397
- - **Singleton Pattern**: Thread-safe model caching
398
- - **Device Mapping**: Automatic distribution with ZeroGPU
399
- - **128K Context**: Long conversations without truncation
400
-
401
- **ZeroGPU Integration**:
402
- ```python
403
- @spaces.GPU(duration=120) # Dynamic allocation for first load
404
- def _load_model(self):
405
- # GPU available for 120 seconds
406
- # Loads model once on first request
407
- # Cached instance reused across all agents
408
- # Automatic GPU management by ZeroGPU
409
- ```
410
-
411
- **Performance Characteristics**:
412
- * **First Request**: 30-60 seconds (one-time model load)
413
- - With `preload_from_hub`: 30-40s (just quantization)
414
- - Without preload: 50-60s (download + quantization)
415
- * **Subsequent Requests**: <1 second per agent
416
- * **Memory Footprint**: ~1GB VRAM (persistent)
417
- * **Cold Start**: Instant app startup (model loads on demand)
418
-
419
- **Why Lazy Loading?**
420
- * ✅ **Instant Startup**: App launches immediately
421
- * ✅ **ZeroGPU Optimal**: Perfect for dynamic GPU allocation
422
- * ✅ **Memory Efficient**: Only loads when needed
423
- * ✅ **Cache Persistent**: Stays loaded between requests
424
- * ✅ **Serverless Friendly**: Ideal for HuggingFace Spaces
425
 
426
- ---
427
 
428
- ### Analytics & Evaluation System
429
-
430
- #### Built-In Dashboard
431
- **Real-Time Metrics**:
432
- * Total conversations
433
- * Average response time
434
- * Success rate (quality score >3.5)
435
- * Educational quality scores (ML-evaluated)
436
- * Classifier accuracy rates
437
- * Active sessions count
438
-
439
- **LightEval Integration**:
440
- * BertScore for semantic quality
441
- * ROUGE for response completeness
442
- * Custom educational quality indicators:
443
- - Has examples
444
- - Structured explanation
445
- - Appropriate length
446
- - Encourages learning
447
- - Uses LaTeX (for math)
448
- - Clear sections
449
-
450
- **Exportable Data**:
451
- * JSON export with full metrics
452
- * CSV export of interaction history
453
- * Programmatic access via API
454
 
455
- ---
 
456
 
457
- ### Performance Benchmarks
458
-
459
- **Runtime Performance:**
460
- * **Inference Speed**: 25-40 tokens/second (with ZeroGPU)
461
- * **Memory Usage**: ~1GB VRAM (4-bit quantization)
462
- * **Context Window**: 128K tokens
463
- * **First Request**: ~30-60 seconds (one-time load)
464
- * **Warm Inference**: <1 second per agent
465
- * **Startup Time**: Instant (lazy loading)
466
-
467
- **Llama 3.2 Quality Scores:**
468
- * MMLU: 63.4 (competitive with larger models)
469
- * GSM8K (Math): 73.9
470
- * HumanEval (Coding): 59.3
471
- * Multilingual: 8 languages supported
472
- * Safety: RLHF-aligned for educational use
 
1
  ---
2
  title: Mimir
3
  emoji: 📚
4
+ colorFrom: yellow
5
+ colorTo: yellow
6
  sdk: gradio
7
+ sdk_version: 5.44.1
8
  app_file: app.py
9
  pinned: true
10
  python_version: '3.10'
 
12
  thumbnail: >-
13
  https://cdn-uploads.huggingface.co/production/uploads/68700e7552b74a1dcbb2a87e/Z7P8DJ57rc5P1ozA5gwp3.png
14
  hardware: zero-gpu-dynamic
15
+ hf_oauth: true
16
+ hf_oauth_expiration_minutes: 120
17
  ---
18
 
19
  # Mimir: Educational AI Assistant
20
+ ## Advanced Prompt Engineering Portfolio Project
21
 
22
  ### Project Overview
23
+ Mimir demonstrates sophisticated prompt engineering techniques applied to educational technology, showcasing the implementation of context-aware AI systems that prioritize pedagogical effectiveness over simple answer generation. A key feature is its ability to **dynamically generate custom data visualizations**, determined by an intelligent decision engine that assesses whether a visual aid will enhance the pedagogical explanation. This project exemplifies professional-grade prompt design for educational applications, embodying the role of an educational partner that guides students to discover answers for themselves.
24
 
25
  ***
26
 
27
  ### Technical Architecture
 
 
 
 
 
 
 
 
28
  **Core Technologies:**
29
 
30
+ * **LangChain**: Prompt template management and conversation chain orchestration.
31
+ * **LangGraph**: Orchestrates the application's flow as a state machine (**StateGraph**). It manages the conditional logic for the tool-use decision engine, routing user queries between the LLM, a pre-built **ToolNode** for graph generation, and the final response node.
32
+ * **Gradio**: Full-stack web interface with custom CSS styling.
33
+ * **Hugging Face Inference API**: Model deployment and response generation.
34
+ * **Python**: Backend logic and integration layer.
35
+ * **Matplotlib**: Powers the dynamic, in-memory generation of educational graphs and charts.
 
36
 
37
+ **Key Frameworks:**
38
 
39
+ * `langchain.prompts.ChatPromptTemplate` for dynamic prompt construction.
40
+ * `langchain_huggingface.HuggingFaceEndpoint` for model interface.
41
+ * `langchain.schema` message objects (HumanMessage, AIMessage, SystemMessage).
42
+ * `langgraph.graph.StateGraph` & `langgraph.prebuilt.ToolNode` for building and executing the conditional logic graph.
43
+ * `langgraph.checkpoint.memory.MemorySaver` for persistent conversation state.
 
44
 
45
  ***
46
 
47
+ ### Prompt Engineering Techniques Demonstrated
48
+ #### 1. Unified System Prompt Architecture
49
+ Employs a single, comprehensive system prompt that establishes the AI's core persona as **Mimir, an expert multi-concept tutor**. This foundational prompt meticulously defines the AI's behavior, tone, and pedagogical mission. It integrates:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
+ * **Core Educational Principles**: A directive to prioritize teaching methodology, foster critical thinking, and provide comprehensive explanations over direct answers.
52
+ * **Defined Persona & Tone**: Specific instructions to maintain an engaging, supportive, and intellectually appropriate tone for high school students, while avoiding fluff and emojis.
53
+ * **Specific Response Guidelines**: Contextual rules for handling different academic tasks, such as explaining concepts in math problems instead of solving them, or discussing research strategies for essays rather than writing them.
54
 
55
+ #### 2. Instructional Design Integration
56
+ The core prompt incorporates evidence-based instructional design principles:
 
 
57
 
58
+ * **Scaffolding**: Breaking complex concepts into manageable components.
59
+ * **Socratic Method**: Guiding discovery rather than providing direct answers.
60
+ * **Metacognitive Strategies**: Teaching learning-how-to-learn approaches.
 
61
 
62
+ #### 3. Academic Integrity Constraints
63
+ Implemented ethical AI guidelines directly into the system prompt:
 
 
64
 
65
+ * Explicit instructions to avoid homework completion.
66
+ * Focus on **process over product delivery**.
67
+ * Critical thinking skill development emphasis.
 
68
 
69
+ #### 4. Two-Stage Tool-Use Prompting
70
+ A sophisticated two-stage prompting strategy governs the use of the `Create_Graph_Tool`:
 
 
 
71
 
72
+ * **Tool-Use Decision Prompt**: A highly-constrained template is used by the `Tool_Decision_Engine` to determine if a tool should be used. This prompt forces a **YES** or **NO** response based on whether a visual aid would significantly enhance learning, using explicit **INCLUDE** and **EXCLUDE** criteria.
73
+ * **Tool-Execution Guidance**: The main system prompt contains separate, explicit instructions on how to use the tool once the decision has been made. It provides the exact **JSON structure** the model must output, including fields like `data`, `plot_type`, and `educational_context`, ensuring the generated graphs are pedagogically sound.
74
 
75
+ ***
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
+ ### Advanced Implementation Features
78
+ #### Intelligent Graphing Tool Integration
79
+ A custom, dynamic visualization system was developed to provide multi-modal educational responses.
80
 
81
+ * **LLM-Powered Analysis**: For relevant queries, a targeted LLM call is made using the specialized YES/NO decision prompt.
82
+ * **Dynamic Visualization Tool (`Create_Graph_Tool`)**: Designed and implemented a custom visualization tool using **matplotlib**. The tool receives a JSON configuration from the LLM and generates high-quality bar, line, or pie charts. The entire process occurs in-memory:
83
+ * The plot is rendered into a `BytesIO` buffer.
84
+ * The image is encoded into a **base64 string**.
85
+ * The final output is an HTML `<img>` tag with the embedded base64 data, which is displayed directly in the chat interface, eliminating the need for file I/O.
86
+ * The tool's docstring provides a clear schema and usage instructions for the LLM, ensuring reliable and pedagogically sound visualizations.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
+ #### Stateful Conversation Management with LangGraph
89
+ Implements persistent, multi-turn conversations using LangGraph's **MemorySaver**. This allows the application's state, including the full message history (`add_messages`), to be saved and resumed, ensuring robust context management even when tool use is involved.
90
 
91
+ #### Response Streaming & Truncation
92
+ * Smart text truncation preserving sentence integrity.
93
+ * Real-time response streaming for improved UX.
94
+ * Error handling and fallback mechanisms.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
+ #### Template Chaining Architecture
97
+ The core logic utilizes **LangChain Expression Language (LCEL)** to pipe inputs through templates, models, and tools.
98
 
99
+ ***
100
 
101
+ ### User Interface Engineering
102
+ * **Gradio Layout & Custom Styling**: The interface is built with `gr.Blocks`, using `gr.Column` and `gr.Row` to structure the main components. A custom `styles.css` file is loaded to apply specific theming, responsive design, and layout rules, moving beyond default Gradio styling for a tailored user experience.
103
+ * **Component Architecture**: Modular Gradio component structure with custom CSS class integration and accessibility-compliant patterns.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
+ ***
106
 
107
+ ### Prompt Engineering Methodologies Applied
108
+ * **Template Parameterization**: Dynamic variable injection for contextual responses.
109
+ * **Persona-Driven Response Generation**: Crafting a detailed persona within the system prompt to guide the AI's tone, style, and pedagogical approach consistently.
110
+ * **Domain-Specific Language Modeling**: Educational vocabulary and pedagogical terminology integration.
111
+ * **Multi-Modal Response Formatting**: Structured output generation with educational formatting.
112
+ * **Agentic Tool Routing**: Designing prompts and logic that enable an AI system to intelligently decide which tool is appropriate for a given task, simulating agent-like behavior.
113
 
114
+ ***
 
 
 
115
 
116
+ ### Professional Applications
117
+ This project demonstrates competency in:
 
 
 
 
 
 
118
 
119
+ * **Enterprise-Grade Prompt Design**: Scalable template and tool-use architecture.
120
+ * **Educational Technology Integration**: Designing AI tutors with robust pedagogical frameworks and dynamic, multi-modal response capabilities.
121
+ * **Ethical AI Implementation**: Academic integrity safeguards and responsible AI practices.
122
+ * **Full-Stack AI Application Development**: End-to-end system implementation.
123
+ * **Intelligent Agent & Tool Development**: Building AI agents that can utilize custom tools to solve complex problems.
124
 
125
+ ***
 
 
 
126
 
127
+ ### Technical Specifications
128
+ **Dependencies:**
129
 
130
+ * **Core ML/AI**: `transformers`, `torch`, `accelerate`
131
+ * **LangChain & LangGraph**: `langgraph`, `langchain-core`, `langchain-community`, `langchain-huggingface`
132
+ * **UI Framework**: `gradio`
133
+ * **Visualization**: `matplotlib`, `plotly`, `pandas`, `numpy`, `scipy`
134
+ * **Utilities**: `python-dotenv`
135
+ * **Monitoring**: `langsmith` (Optional)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
+ **Deployment:**
138
 
139
+ * Hugging Face Spaces compatible.
140
+ * Environment variable configuration for API keys.
141
+ * Production-ready error handling and logging.
142
+
143
+ ***
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
+ ### Results & Impact
146
+ Mimir represents a synthesis of prompt engineering best practices with educational technology requirements. The integration of an intelligent, conditional graphing tool demonstrates the ability to create AI systems that augment and enhance human learning processes, embodying the role of an educational partner who empowers students to succeed through genuine understanding.
147
 
148
+ > **Portfolio Demonstration**: This project evidences advanced prompt engineering capabilities, full-stack AI application development, and domain-specific AI system design suitable for enterprise educational technology environments.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agents.py DELETED
@@ -1,940 +0,0 @@
1
- # agents.py
2
- """
3
- Unified agent architecture for Mimir Educational AI Assistant.
4
-
5
- LAZY-LOADING LLAMA-3.2-3B-INSTRUCT
6
-
7
- Components:
8
- - LazyLlamaModel: Singleton lazy-loading model (loads on first use, cached thereafter)
9
- - ToolDecisionAgent: Uses lazy-loaded Llama for visualization decisions
10
- - PromptRoutingAgents: Uses lazy-loaded Llama for all 4 routing agents
11
- - ThinkingAgents: Uses lazy-loaded Llama for all reasoning (including math)
12
- - ResponseAgent: Uses lazy-loaded Llama for final responses
13
-
14
- Key optimization: Model loads on first generate() call and is cached for all
15
- subsequent requests. Single model architecture with ~1GB memory footprint.
16
- No compile or warmup scripts needed - fully automatic.
17
- """
18
-
19
- import os
20
- import re
21
- import torch
22
- import logging
23
- import time
24
- import subprocess
25
- import threading
26
- from datetime import datetime
27
- from typing import Dict, List, Optional, Tuple, Type
28
- import warnings
29
-
30
- # Setup main logger first
31
- logging.basicConfig(level=logging.INFO)
32
- logger = logging.getLogger(__name__)
33
-
34
- # ============================================================================
35
- # MEMORY PROFILING UTILITIES
36
- # ============================================================================
37
-
38
- def log_memory(tag=""):
39
- """Log current GPU memory usage"""
40
- try:
41
- if torch.cuda.is_available():
42
- allocated = torch.cuda.memory_allocated() / 1024**2
43
- reserved = torch.cuda.memory_reserved() / 1024**2
44
- max_allocated = torch.cuda.max_memory_allocated() / 1024**2
45
- logger.info(f"[{tag}] GPU Memory - Allocated: {allocated:.2f} MB, Reserved: {reserved:.2f} MB, Peak: {max_allocated:.2f} MB")
46
- else:
47
- logger.info(f"[{tag}] No CUDA available")
48
- except Exception as e:
49
- logger.warning(f"[{tag}] Error logging GPU memory: {e}")
50
-
51
-
52
- def log_nvidia_smi(tag=""):
53
- """Log full nvidia-smi output for system-wide GPU view"""
54
- try:
55
- output = subprocess.check_output(['nvidia-smi', '--query-gpu=memory.used,memory.total', '--format=csv,noheader,nounits'], encoding='utf-8')
56
- logger.info(f"[{tag}] NVIDIA-SMI: {output.strip()}")
57
- except Exception as e:
58
- logger.warning(f"[{tag}] Error running nvidia-smi: {e}")
59
-
60
-
61
- def log_step(step_name, start_time=None):
62
- """Log a pipeline step with timestamp and duration"""
63
- now = time.time()
64
- timestamp = datetime.now().strftime("%H:%M:%S.%f")[:-3]
65
-
66
- if start_time:
67
- duration = now - start_time
68
- logger.info(f"[{timestamp}] ✓ {step_name} completed in {duration:.2f}s")
69
- else:
70
- logger.info(f"[{timestamp}] → {step_name} starting...")
71
-
72
- return now
73
-
74
-
75
- def profile_generation(model, tokenizer, inputs, **gen_kwargs):
76
- """Profile memory and time for model.generate() call"""
77
- torch.cuda.empty_cache()
78
- torch.cuda.reset_peak_memory_stats()
79
-
80
- log_memory("Before generate()")
81
- start_time = time.time()
82
-
83
- with torch.no_grad():
84
- outputs = model.generate(**inputs, **gen_kwargs)
85
-
86
- end_time = time.time()
87
- duration = end_time - start_time
88
- peak_memory = torch.cuda.max_memory_allocated() / 1024**2
89
-
90
- log_memory("After generate()")
91
- logger.info(f"Generation completed in {duration:.2f}s. Peak GPU: {peak_memory:.2f} MB")
92
-
93
- return outputs, duration
94
-
95
-
96
- # ============================================================================
97
- # IMPORTS
98
- # ============================================================================
99
-
100
- # Transformers for standard models
101
- from transformers import (
102
- AutoTokenizer,
103
- AutoModelForCausalLM,
104
- BitsAndBytesConfig,
105
- )
106
-
107
- # ZeroGPU support
108
- try:
109
- import spaces
110
- HF_SPACES_AVAILABLE = True
111
- except ImportError:
112
- HF_SPACES_AVAILABLE = False
113
- class DummySpaces:
114
- @staticmethod
115
- def GPU(duration=90):
116
- def decorator(func):
117
- return func
118
- return decorator
119
- spaces = DummySpaces()
120
-
121
- # Accelerate
122
- from accelerate import Accelerator
123
- from accelerate.utils import set_seed
124
-
125
- # LangChain Core for proper message handling
126
- from langchain_core.runnables import Runnable
127
- from langchain_core.runnables.utils import Input, Output
128
- from langchain_core.messages import SystemMessage, HumanMessage
129
-
130
- # Import ALL prompts from prompt library
131
- from prompt_library import (
132
- # System prompts
133
- CORE_IDENTITY,
134
- TOOL_DECISION,
135
- agent_1_system,
136
- agent_2_system,
137
- agent_3_system,
138
- agent_4_system,
139
-
140
- # Thinking agent system prompts
141
- MATH_THINKING,
142
- QUESTION_ANSWER_DESIGN,
143
- REASONING_THINKING,
144
-
145
- # Response agent prompts (dynamically applied)
146
- VAUGE_INPUT,
147
- USER_UNDERSTANDING,
148
- GENERAL_FORMATTING,
149
- LATEX_FORMATTING,
150
- GUIDING_TEACHING,
151
- STRUCTURE_PRACTICE_QUESTIONS,
152
- PRACTICE_QUESTION_FOLLOWUP,
153
- TOOL_USE_ENHANCEMENT,
154
- )
155
-
156
- # ============================================================================
157
- # MODEL MANAGER - LAZY LOADING
158
- # ============================================================================
159
- # Import the lazy-loading Llama-3.2-3B model manager
160
- from model_manager import get_model as get_shared_llama, LazyLlamaModel as LlamaSharedAgent
161
-
162
- # Backwards compatibility aliases
163
- get_shared_mistral = get_shared_llama
164
- MistralSharedAgent = LlamaSharedAgent
165
-
166
- # ============================================================================
167
- # CONFIGURATION
168
- # ============================================================================
169
-
170
- CACHE_DIR = "/tmp/compiled_models"
171
- HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
172
-
173
- # Suppress warnings
174
- warnings.filterwarnings("ignore", category=UserWarning)
175
- warnings.filterwarnings("ignore", category=FutureWarning)
176
-
177
- # Model info (for logging/diagnostics)
178
- LLAMA_MODEL_ID = "meta-llama/Llama-3.2-3B-Instruct"
179
-
180
-
181
- def check_model_cache() -> Dict[str, bool]:
182
- """Check model status (legacy function for compatibility)"""
183
- cache_status = {
184
- "llama": True, # Lazy-loaded on first use
185
- "all_compiled": True,
186
- }
187
-
188
- logger.info("✓ Llama-3.2-3B uses lazy loading (loads on first generate() call)")
189
-
190
- return cache_status
191
-
192
-
193
- # Call at module load
194
- _cache_status = check_model_cache()
195
- log_memory("Module load complete")
196
-
197
-
198
- # ============================================================================
199
- # TOOL DECISION AGENT
200
- # ============================================================================
201
-
202
- class ToolDecisionAgent:
203
- """
204
- Analyzes if visualization/graphing tools should be used.
205
-
206
- Uses lazy-loaded Llama-3.2-3B for decision-making.
207
- Model loads automatically on first use.
208
-
209
- Returns: Boolean (True = use tools, False = skip tools)
210
- """
211
-
212
- def __init__(self):
213
- """Initialize with lazy-loaded Llama model"""
214
- self.model = get_shared_llama()
215
- logger.info("ToolDecisionAgent initialized (using lazy-loaded Llama)")
216
-
217
- def decide(self, user_query: str, conversation_history: List[Dict]) -> bool:
218
- """
219
- Decide if graphing tools should be used.
220
-
221
- Args:
222
- user_query: Current user message
223
- conversation_history: Full conversation context
224
-
225
- Returns:
226
- bool: True if tools should be used
227
- """
228
- logger.info("→ ToolDecisionAgent: Analyzing query for tool usage")
229
-
230
- # Format conversation context
231
- context = "\n".join([
232
- f"{msg['role']}: {msg['content']}"
233
- for msg in conversation_history[-3:] # Last 3 turns
234
- ])
235
-
236
- # Decision prompt
237
- analysis_prompt = f"""Previous conversation:
238
- {context}
239
-
240
- Current query: {user_query}
241
-
242
- Should visualization tools (graphs, charts) be used?"""
243
-
244
- try:
245
- decision_start = time.time()
246
-
247
- # Use shared Llama for decision
248
- response = self.model.generate(
249
- system_prompt=TOOL_DECISION,
250
- user_message=analysis_prompt,
251
- max_tokens=10,
252
- temperature=0.1
253
- )
254
-
255
- decision_time = time.time() - decision_start
256
-
257
- # Parse decision
258
- decision = "YES" in response.upper()
259
-
260
- logger.info(f"✓ ToolDecision: {'USE TOOLS' if decision else 'NO TOOLS'} ({decision_time:.2f}s)")
261
-
262
- return decision
263
-
264
- except Exception as e:
265
- logger.error(f"ToolDecisionAgent error: {e}")
266
- return False # Default: no tools
267
-
268
-
269
- # ============================================================================
270
- # PROMPT ROUTING AGENTS (4 Specialized Agents)
271
- # ============================================================================
272
-
273
- class PromptRoutingAgents:
274
- """
275
- Four specialized agents for prompt segment selection.
276
- All share the same Llama-3.2-3B instance for efficiency.
277
-
278
- Agents:
279
- 1. Practice Question Detector
280
- 2. Discovery Mode Classifier
281
- 3. Follow-up Assessment
282
- 4. Teaching Mode Assessor
283
- """
284
-
285
- def __init__(self):
286
- """Initialize with shared Llama model"""
287
- self.model = get_shared_llama()
288
- logger.info("PromptRoutingAgents initialized (4 agents, shared Llama)")
289
-
290
- def agent_1_practice_question(
291
- self,
292
- user_query: str,
293
- conversation_history: List[Dict]
294
- ) -> bool:
295
- """Agent 1: Detect if practice questions should be generated"""
296
- logger.info("→ Agent 1: Analyzing for practice question opportunity")
297
-
298
- context = "\n".join([
299
- f"{msg['role']}: {msg['content']}"
300
- for msg in conversation_history[-4:]
301
- ])
302
-
303
- analysis_prompt = f"""Conversation:
304
- {context}
305
-
306
- New query: {user_query}
307
-
308
- Should I create practice questions?"""
309
-
310
- try:
311
- response = self.model.generate(
312
- system_prompt=agent_1_system,
313
- user_message=analysis_prompt,
314
- max_tokens=10,
315
- temperature=0.1
316
- )
317
-
318
- decision = "YES" in response.upper()
319
- logger.info(f"✓ Agent 1: {'PRACTICE QUESTIONS' if decision else 'NO PRACTICE'}")
320
-
321
- return decision
322
-
323
- except Exception as e:
324
- logger.error(f"Agent 1 error: {e}")
325
- return False
326
-
327
- def agent_2_discovery_mode(
328
- self,
329
- user_query: str,
330
- conversation_history: List[Dict]
331
- ) -> Tuple[bool, bool]:
332
- """Agent 2: Classify vague input and understanding level"""
333
- logger.info("→ Agent 2: Classifying discovery mode")
334
-
335
- context = "\n".join([
336
- f"{msg['role']}: {msg['content']}"
337
- for msg in conversation_history[-3:]
338
- ])
339
-
340
- analysis_prompt = f"""Conversation:
341
- {context}
342
-
343
- Query: {user_query}
344
-
345
- Classification:
346
- 1. Is input vague? (VAGUE/CLEAR)
347
- 2. Understanding level? (LOW/MEDIUM/HIGH)"""
348
-
349
- try:
350
- response = self.model.generate(
351
- system_prompt=agent_2_system,
352
- user_message=analysis_prompt,
353
- max_tokens=20,
354
- temperature=0.1
355
- )
356
-
357
- vague = "VAGUE" in response.upper()
358
- low_understanding = "LOW" in response.upper()
359
-
360
- logger.info(f"✓ Agent 2: Vague={vague}, LowUnderstanding={low_understanding}")
361
-
362
- return vague, low_understanding
363
-
364
- except Exception as e:
365
- logger.error(f"Agent 2 error: {e}")
366
- return False, False
367
-
368
- def agent_3_followup_assessment(
369
- self,
370
- user_query: str,
371
- conversation_history: List[Dict]
372
- ) -> bool:
373
- """Agent 3: Detect if user is responding to practice questions"""
374
- logger.info("→ Agent 3: Checking for practice question follow-up")
375
-
376
- # Check last bot message for practice question indicators
377
- if len(conversation_history) < 2:
378
- return False
379
-
380
- last_bot_msg = None
381
- for msg in reversed(conversation_history):
382
- if msg['role'] == 'assistant':
383
- last_bot_msg = msg['content']
384
- break
385
-
386
- if not last_bot_msg:
387
- return False
388
-
389
- # Look for practice question markers
390
- has_practice = any(marker in last_bot_msg.lower() for marker in [
391
- "practice", "try this", "solve", "calculate", "what is", "question"
392
- ])
393
-
394
- if not has_practice:
395
- return False
396
-
397
- # Analyze if current query is an answer attempt
398
- analysis_prompt = f"""Previous message (from me):
399
- {last_bot_msg[:500]}
400
-
401
- User response:
402
- {user_query}
403
-
404
- Is user answering a practice question?"""
405
-
406
- try:
407
- response = self.model.generate(
408
- system_prompt=agent_3_system,
409
- user_message=analysis_prompt,
410
- max_tokens=10,
411
- temperature=0.1
412
- )
413
-
414
- is_followup = "YES" in response.upper()
415
- logger.info(f"✓ Agent 3: {'GRADING MODE' if is_followup else 'NOT FOLLOWUP'}")
416
-
417
- return is_followup
418
-
419
- except Exception as e:
420
- logger.error(f"Agent 3 error: {e}")
421
- return False
422
-
423
- def agent_4_teaching_mode(
424
- self,
425
- user_query: str,
426
- conversation_history: List[Dict]
427
- ) -> Tuple[bool, bool]:
428
- """Agent 4: Assess teaching vs practice mode"""
429
- logger.info("→ Agent 4: Assessing teaching mode")
430
-
431
- context = "\n".join([
432
- f"{msg['role']}: {msg['content']}"
433
- for msg in conversation_history[-3:]
434
- ])
435
-
436
- analysis_prompt = f"""Conversation:
437
- {context}
438
-
439
- Query: {user_query}
440
-
441
- Assessment:
442
- 1. Need direct teaching? (TEACH/PRACTICE)
443
- 2. Create practice questions? (YES/NO)"""
444
-
445
- try:
446
- response = self.model.generate(
447
- system_prompt=agent_4_system,
448
- user_message=analysis_prompt,
449
- max_tokens=15,
450
- temperature=0.1
451
- )
452
-
453
- teaching = "TEACH" in response.upper()
454
- practice = "YES" in response.upper() or "PRACTICE" in response.upper()
455
-
456
- logger.info(f"✓ Agent 4: Teaching={teaching}, Practice={practice}")
457
-
458
- return teaching, practice
459
-
460
- except Exception as e:
461
- logger.error(f"Agent 4 error: {e}")
462
- return False, False
463
-
464
- def process(
465
- self,
466
- user_input: str,
467
- tool_used: bool = False,
468
- conversation_history: Optional[List[Dict]] = None
469
- ) -> Tuple[str, str]:
470
- """
471
- Unified process method - runs all 4 routing agents sequentially.
472
-
473
- Returns:
474
- Tuple[str, str]: (response_prompts, thinking_prompts)
475
- """
476
- if conversation_history is None:
477
- conversation_history = []
478
-
479
- response_prompts = []
480
- thinking_prompts = []
481
-
482
- # Agent 1: Practice Questions
483
- if self.agent_1_practice_question(user_input, conversation_history):
484
- response_prompts.append("STRUCTURE_PRACTICE_QUESTIONS")
485
-
486
- # Agent 2: Discovery Mode
487
- is_vague, low_understanding = self.agent_2_discovery_mode(user_input, conversation_history)
488
- if is_vague:
489
- response_prompts.append("VAUGE_INPUT")
490
- if low_understanding:
491
- response_prompts.append("USER_UNDERSTANDING")
492
-
493
- # Agent 3: Follow-up Assessment
494
- if self.agent_3_followup_assessment(user_input, conversation_history):
495
- response_prompts.append("PRACTICE_QUESTION_FOLLOWUP")
496
-
497
- # Agent 4: Teaching Mode
498
- needs_teaching, needs_practice = self.agent_4_teaching_mode(user_input, conversation_history)
499
- if needs_teaching:
500
- response_prompts.append("GUIDING_TEACHING")
501
-
502
- # Always add base formatting
503
- response_prompts.extend(["GENERAL_FORMATTING", "LATEX_FORMATTING"])
504
-
505
- # Tool enhancement if used
506
- if tool_used:
507
- response_prompts.append("TOOL_USE_ENHANCEMENT")
508
-
509
- # Return as newline-separated strings
510
- response_prompts_str = "\n".join(response_prompts)
511
- thinking_prompts_str = "" # Thinking prompts decided elsewhere
512
-
513
- return response_prompts_str, thinking_prompts_str
514
-
515
- # ============================================================================
516
- # THINKING AGENTS (Preprocessing Layer)
517
- # ============================================================================
518
-
519
- class ThinkingAgents:
520
- """
521
- Generates reasoning context before final response.
522
- Uses shared Llama-3.2-3B for all thinking (including math).
523
-
524
- Agents:
525
- 1. Math Thinking (Tree-of-Thought)
526
- 2. Q&A Design (Chain-of-Thought)
527
- 3. General Reasoning (Chain-of-Thought)
528
- """
529
-
530
- def __init__(self):
531
- """Initialize with shared Llama model"""
532
- self.model = get_shared_llama()
533
- logger.info("ThinkingAgents initialized (using shared Llama for all thinking)")
534
-
535
- def math_thinking(
536
- self,
537
- user_query: str,
538
- conversation_history: List[Dict],
539
- tool_context: str = ""
540
- ) -> str:
541
- """
542
- Generate mathematical reasoning using Tree-of-Thought.
543
- Now uses Llama-3.2-3B instead of GGUF.
544
- """
545
- logger.info("→ Math Thinking Agent: Generating reasoning")
546
-
547
- context = "\n".join([
548
- f"{msg['role']}: {msg['content']}"
549
- for msg in conversation_history[-3:]
550
- ])
551
-
552
- thinking_prompt = f"""Conversation context:
553
- {context}
554
-
555
- Current query: {user_query}
556
-
557
- {f"Tool output: {tool_context}" if tool_context else ""}
558
-
559
- Generate mathematical reasoning:"""
560
-
561
- try:
562
- thinking_start = time.time()
563
-
564
- reasoning = self.model.generate(
565
- system_prompt=MATH_THINKING,
566
- user_message=thinking_prompt,
567
- max_tokens=300,
568
- temperature=0.7
569
- )
570
-
571
- thinking_time = time.time() - thinking_start
572
- logger.info(f"✓ Math Thinking: Generated {len(reasoning)} chars ({thinking_time:.2f}s)")
573
-
574
- return reasoning
575
-
576
- except Exception as e:
577
- logger.error(f"Math Thinking error: {e}")
578
- return ""
579
-
580
- def qa_design_thinking(
581
- self,
582
- user_query: str,
583
- conversation_history: List[Dict],
584
- tool_context: str = ""
585
- ) -> str:
586
- """Generate practice question design reasoning"""
587
- logger.info("→ Q&A Design Agent: Generating question strategy")
588
-
589
- context = "\n".join([
590
- f"{msg['role']}: {msg['content']}"
591
- for msg in conversation_history[-3:]
592
- ])
593
-
594
- thinking_prompt = f"""Context:
595
- {context}
596
-
597
- Query: {user_query}
598
-
599
- {f"Tool data: {tool_context}" if tool_context else ""}
600
-
601
- Design practice questions:"""
602
-
603
- try:
604
- reasoning = self.model.generate(
605
- system_prompt=QUESTION_ANSWER_DESIGN,
606
- user_message=thinking_prompt,
607
- max_tokens=250,
608
- temperature=0.7
609
- )
610
-
611
- logger.info(f"✓ Q&A Design: Generated {len(reasoning)} chars")
612
-
613
- return reasoning
614
-
615
- except Exception as e:
616
- logger.error(f"Q&A Design error: {e}")
617
- return ""
618
-
619
- def process(
620
- self,
621
- user_input: str,
622
- conversation_history: str = "",
623
- thinking_prompts: str = "",
624
- tool_img_output: str = "",
625
- tool_context: str = ""
626
- ) -> str:
627
- """
628
- Unified process method - runs thinking agents based on active prompts.
629
-
630
- Args:
631
- user_input: User's query
632
- conversation_history: Formatted conversation history string
633
- thinking_prompts: Newline-separated list of thinking prompts to activate
634
- tool_img_output: HTML output from visualization tool
635
- tool_context: Context from tool usage
636
-
637
- Returns:
638
- str: Combined thinking context from all activated agents
639
- """
640
- thinking_outputs = []
641
-
642
- # Convert history string to list format for agent methods
643
- history_list = []
644
- if conversation_history and conversation_history != "No previous conversation":
645
- for line in conversation_history.split('\n'):
646
- if ':' in line:
647
- role, content = line.split(':', 1)
648
- history_list.append({'role': role.strip(), 'content': content.strip()})
649
-
650
- # Determine which thinking agents to run based on prompts
651
- prompt_list = [p.strip() for p in thinking_prompts.split('\n') if p.strip()]
652
-
653
- # Math Thinking
654
- if any('MATH' in p.upper() for p in prompt_list):
655
- math_output = self.math_thinking(
656
- user_query=user_input,
657
- conversation_history=history_list,
658
- tool_context=tool_context
659
- )
660
- if math_output:
661
- thinking_outputs.append(f"[Mathematical Reasoning]\n{math_output}")
662
-
663
- # Q&A Design Thinking
664
- if any('PRACTICE' in p.upper() or 'QUESTION' in p.upper() for p in prompt_list):
665
- qa_output = self.qa_design_thinking(
666
- user_query=user_input,
667
- conversation_history=history_list,
668
- tool_context=tool_context
669
- )
670
- if qa_output:
671
- thinking_outputs.append(f"[Practice Question Design]\n{qa_output}")
672
-
673
- # General Reasoning (fallback or when no specific thinking needed)
674
- if not thinking_outputs or any('REASONING' in p.upper() for p in prompt_list):
675
- general_output = self.general_reasoning(
676
- user_query=user_input,
677
- conversation_history=history_list,
678
- tool_context=tool_context
679
- )
680
- if general_output:
681
- thinking_outputs.append(f"[General Reasoning]\n{general_output}")
682
-
683
- # Combine all thinking outputs
684
- combined_thinking = "\n\n".join(thinking_outputs) if thinking_outputs else ""
685
-
686
- if combined_thinking:
687
- logger.info(f"✓ Thinking complete: {len(combined_thinking)} chars from {len(thinking_outputs)} agents")
688
-
689
- return combined_thinking
690
-
691
- def general_reasoning(
692
- self,
693
- user_query: str,
694
- conversation_history: List[Dict],
695
- tool_context: str = ""
696
- ) -> str:
697
- """Generate general reasoning context"""
698
- logger.info("→ General Reasoning Agent: Generating context")
699
-
700
- context = "\n".join([
701
- f"{msg['role']}: {msg['content']}"
702
- for msg in conversation_history[-4:]
703
- ])
704
-
705
- thinking_prompt = f"""Conversation:
706
- {context}
707
-
708
- Query: {user_query}
709
-
710
- {f"Context: {tool_context}" if tool_context else ""}
711
-
712
- Analyze and provide reasoning:"""
713
-
714
- try:
715
- reasoning = self.model.generate(
716
- system_prompt=REASONING_THINKING,
717
- user_message=thinking_prompt,
718
- max_tokens=200,
719
- temperature=0.7
720
- )
721
-
722
- logger.info(f"✓ General Reasoning: Generated {len(reasoning)} chars")
723
-
724
- return reasoning
725
-
726
- except Exception as e:
727
- logger.error(f"General Reasoning error: {e}")
728
- return ""
729
-
730
-
731
- # ============================================================================
732
- # RESPONSE AGENT (Final Response Generation)
733
- # ============================================================================
734
-
735
- class ResponseAgent(Runnable):
736
- """
737
- Generates final educational responses using lazy-loaded Llama-3.2-3B.
738
- Model loads automatically on first use.
739
-
740
- Features:
741
- - Dynamic prompt assembly based on agent decisions
742
- - Streaming word-by-word output
743
- - Educational tone enforcement
744
- - LaTeX support for math
745
- - Context integration (thinking outputs, tool outputs)
746
- """
747
-
748
- def __init__(self):
749
- """Initialize with lazy-loaded Llama model"""
750
- super().__init__()
751
- self.model = get_shared_llama()
752
- logger.info("ResponseAgent initialized (using lazy-loaded Llama)")
753
-
754
- def invoke(self, input_data: Dict) -> Dict:
755
- """
756
- Generate final response with streaming.
757
-
758
- Args:
759
- input_data: {
760
- 'user_query': str,
761
- 'conversation_history': List[Dict],
762
- 'active_prompts': List[str],
763
- 'thinking_context': str,
764
- 'tool_context': str,
765
- }
766
-
767
- Returns:
768
- {'response': str, 'metadata': Dict}
769
- """
770
- logger.info("→ ResponseAgent: Generating final response")
771
-
772
- # Extract inputs
773
- user_query = input_data.get('user_query', '')
774
- conversation_history = input_data.get('conversation_history', [])
775
- active_prompts = input_data.get('active_prompts', [])
776
- thinking_context = input_data.get('thinking_context', '')
777
- tool_context = input_data.get('tool_context', '')
778
-
779
- # Build system prompt from active segments
780
- system_prompt = self._build_system_prompt(active_prompts)
781
-
782
- # Build user message with context
783
- user_message = self._build_user_message(
784
- user_query,
785
- conversation_history,
786
- thinking_context,
787
- tool_context
788
- )
789
-
790
- try:
791
- response_start = time.time()
792
-
793
- # Generate response (streaming handled at app.py level)
794
- response = self.model.generate(
795
- system_prompt=system_prompt,
796
- user_message=user_message,
797
- max_tokens=600,
798
- temperature=0.7
799
- )
800
-
801
- response_time = time.time() - response_start
802
-
803
- # Clean up response
804
- response = self._clean_response(response)
805
-
806
- logger.info(f"✓ ResponseAgent: Generated {len(response)} chars ({response_time:.2f}s)")
807
-
808
- return {
809
- 'response': response,
810
- 'metadata': {
811
- 'generation_time': response_time,
812
- 'model': LLAMA_MODEL_ID,
813
- 'active_prompts': active_prompts
814
- }
815
- }
816
-
817
- except Exception as e:
818
- logger.error(f"ResponseAgent error: {e}")
819
- return {
820
- 'response': "I apologize, but I encountered an error generating a response. Please try again.",
821
- 'metadata': {'error': str(e)}
822
- }
823
-
824
- def _build_system_prompt(self, active_prompts: List[str]) -> str:
825
- """Assemble system prompt from active segments"""
826
- prompt_map = {
827
- 'CORE_IDENTITY': CORE_IDENTITY,
828
- 'GENERAL_FORMATTING': GENERAL_FORMATTING,
829
- 'LATEX_FORMATTING': LATEX_FORMATTING,
830
- 'VAUGE_INPUT': VAUGE_INPUT,
831
- 'USER_UNDERSTANDING': USER_UNDERSTANDING,
832
- 'GUIDING_TEACHING': GUIDING_TEACHING,
833
- 'STRUCTURE_PRACTICE_QUESTIONS': STRUCTURE_PRACTICE_QUESTIONS,
834
- 'PRACTICE_QUESTION_FOLLOWUP': PRACTICE_QUESTION_FOLLOWUP,
835
- 'TOOL_USE_ENHANCEMENT': TOOL_USE_ENHANCEMENT,
836
- }
837
-
838
- # Always include core identity
839
- segments = [CORE_IDENTITY, GENERAL_FORMATTING]
840
-
841
- # Add active prompts
842
- for prompt_name in active_prompts:
843
- if prompt_name in prompt_map and prompt_map[prompt_name] not in segments:
844
- segments.append(prompt_map[prompt_name])
845
-
846
- return "\n\n".join(segments)
847
-
848
- def _build_user_message(
849
- self,
850
- user_query: str,
851
- conversation_history: List[Dict],
852
- thinking_context: str,
853
- tool_context: str
854
- ) -> str:
855
- """Build user message with all context"""
856
- parts = []
857
-
858
- # Conversation history (last 3 turns)
859
- if conversation_history:
860
- history_text = "\n".join([
861
- f"{msg['role']}: {msg['content'][:200]}"
862
- for msg in conversation_history[-3:]
863
- ])
864
- parts.append(f"Recent conversation:\n{history_text}")
865
-
866
- # Thinking context (invisible to user, guides response)
867
- if thinking_context:
868
- parts.append(f"[Internal reasoning context]: {thinking_context}")
869
-
870
- # Tool context
871
- if tool_context:
872
- parts.append(f"[Tool output]: {tool_context}")
873
-
874
- # Current query
875
- parts.append(f"Student query: {user_query}")
876
-
877
- return "\n\n".join(parts)
878
-
879
- def _clean_response(self, response: str) -> str:
880
- """Clean up response artifacts"""
881
- # Remove common artifacts
882
- artifacts = ['<|im_end|>', '<|endoftext|>', '###', '<|end|>']
883
- for artifact in artifacts:
884
- response = response.replace(artifact, '')
885
-
886
- # Remove trailing incomplete sentences
887
- if response and response[-1] not in '.!?':
888
- # Find last complete sentence
889
- for delimiter in ['. ', '! ', '? ']:
890
- if delimiter in response:
891
- response = response.rsplit(delimiter, 1)[0] + delimiter[0]
892
- break
893
-
894
- return response.strip()
895
-
896
- def stream(self, input_data: Dict):
897
- """
898
- Stream response word-by-word.
899
-
900
- Yields:
901
- str: Response chunks
902
- """
903
- logger.info("→ ResponseAgent: Streaming response")
904
-
905
- # Build prompts
906
- system_prompt = self._build_system_prompt(input_data.get('active_prompts', []))
907
- user_message = self._build_user_message(
908
- input_data.get('user_query', ''),
909
- input_data.get('conversation_history', []),
910
- input_data.get('thinking_context', ''),
911
- input_data.get('tool_context', '')
912
- )
913
-
914
- try:
915
- # Use streaming generation from shared model
916
- for chunk in self.model.generate_streaming(
917
- system_prompt=system_prompt,
918
- user_message=user_message,
919
- max_tokens=600,
920
- temperature=0.7
921
- ):
922
- yield chunk
923
-
924
- except Exception as e:
925
- logger.error(f"Streaming error: {e}")
926
- yield "I apologize, but I encountered an error. Please try again."
927
-
928
-
929
- # ============================================================================
930
- # MODULE INITIALIZATION
931
- # ============================================================================
932
-
933
- logger.info("="*60)
934
- logger.info("MIMIR AGENTS MODULE INITIALIZED")
935
- logger.info("="*60)
936
- logger.info(f" Model: Llama-3.2-3B-Instruct (lazy-loaded)")
937
- logger.info(f" Agents: Tool, Routing (4x), Thinking (3x), Response")
938
- logger.info(f" Memory: ~1GB (loads on first use)")
939
- logger.info(f" Architecture: Single unified model with caching")
940
- logger.info("="*60)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
The diff for this file is too large to render. See raw diff
 
gradio_analytics.py DELETED
@@ -1,538 +0,0 @@
1
- # gradio_analytics.py
2
- import gradio as gr
3
- import logging
4
- import json
5
- import sqlite3
6
- import os
7
- from datetime import datetime
8
-
9
- logger = logging.getLogger(__name__)
10
-
11
- try:
12
- from app import (
13
- get_trackio_database_path,
14
- get_project_statistics_with_nulls,
15
- get_recent_interactions_with_nulls,
16
- create_dashboard_html_with_nulls,
17
- calculate_response_quality,
18
- refresh_analytics_data_persistent as refresh_analytics_data,
19
- export_metrics_json_persistent as export_metrics_json,
20
- export_metrics_csv_persistent as export_metrics_csv,
21
- load_analytics_state,
22
- get_global_state_debug_info,
23
- sync_trackio_with_global_state,
24
- global_state_manager,
25
- evaluate_educational_quality_with_tracking,
26
- )
27
- except ImportError:
28
- def get_trackio_database_path(project_name):
29
- return None
30
-
31
- def get_project_statistics_with_nulls(cursor, project_name):
32
- return {
33
- "total_conversations": None,
34
- "avg_session_length": None,
35
- "success_rate": None
36
- }
37
-
38
- def get_recent_interactions_with_nulls(cursor, project_name, limit=10):
39
- return []
40
-
41
- def create_dashboard_html_with_nulls(project_name, project_stats):
42
- return f"<div>Mock dashboard for {project_name}</div>"
43
-
44
- def calculate_response_quality(response):
45
- return 3.0
46
-
47
- def refresh_analytics_data():
48
- return {}, [], "<div>Mock analytics</div>"
49
-
50
- def export_metrics_json():
51
- gr.Info("Mock JSON export")
52
-
53
- def export_metrics_csv():
54
- gr.Info("Mock CSV export")
55
-
56
- def load_analytics_state():
57
- return {}, [], "<div>Mock analytics state</div>"
58
-
59
- def get_global_state_debug_info():
60
- return {"status": "mock"}
61
-
62
- def sync_trackio_with_global_state():
63
- pass
64
-
65
- def evaluate_educational_quality_with_tracking(*args, **kwargs):
66
- return {"educational_score": 0.5}
67
-
68
- class MockStateManager:
69
- def get_cache_status(self):
70
- return {"status": "mock"}
71
- def get_evaluation_summary(self, include_history=False):
72
- return {"aggregate_metrics": {}, "total_evaluations": {}}
73
- def clear_all_states(self):
74
- pass
75
- def _backup_to_hf_dataset(self):
76
- pass
77
-
78
- global_state_manager = MockStateManager()
79
-
80
- def load_custom_css():
81
- try:
82
- with open("styles.css", "r", encoding="utf-8") as css_file:
83
- css_content = css_file.read()
84
- logger.info(f"CSS loaded successfully for analytics page")
85
- return css_content
86
- except FileNotFoundError:
87
- logger.warning("styles.css file not found for analytics page")
88
- return ""
89
- except Exception as e:
90
- logger.warning(f"Error reading styles.css: {e}")
91
- return ""
92
-
93
- def show_cache_info():
94
- try:
95
- from pathlib import Path
96
- from huggingface_hub import scan_cache_dir
97
-
98
- cache_info = scan_cache_dir(cache_dir="/tmp/huggingface")
99
-
100
- info_text = f"""
101
- **HuggingFace Cache Status:**
102
-
103
- **Total Size:** {cache_info.size_on_disk / (1024**3):.2f} GB
104
- **Number of Repos:** {len(cache_info.repos)}
105
-
106
- **Cached Models:**
107
- """
108
-
109
- for repo in cache_info.repos:
110
- size_gb = repo.size_on_disk / (1024**3)
111
- info_text += f"""
112
- - **{repo.repo_id}**
113
- - Size: {size_gb:.2f} GB
114
- - Type: {repo.repo_type}
115
- - Revisions: {len(repo.revisions)}
116
- """
117
-
118
- return info_text
119
-
120
- except Exception as e:
121
- return f"Error inspecting cache: {str(e)}"
122
-
123
- def launch_external_trackio():
124
- try:
125
- import subprocess
126
- result = subprocess.run(
127
- ["trackio", "show", "--project", "Mimir"],
128
- capture_output=False,
129
- text=True
130
- )
131
-
132
- if result.returncode == 0:
133
- gr.Info("Trackio dashboard launched in browser")
134
- else:
135
- gr.Warning("Could not launch trackio dashboard")
136
-
137
- except Exception as e:
138
- logger.error(f"Failed to launch trackio: {e}")
139
- gr.Warning(f"Failed to launch trackio dashboard: {str(e)}")
140
-
141
- def show_cache_status():
142
- try:
143
- debug_info = get_global_state_debug_info()
144
- cache_status = debug_info.get("cache_status", {})
145
-
146
- status_text = f"""
147
- **Global State Cache Status:**
148
- - Session ID: {cache_status.get('session_id', 'Unknown')}
149
- - Analytics Cached: {'Yes' if cache_status.get('analytics_cached') else 'No'}
150
- - Conversation Cached: {'Yes' if cache_status.get('conversation_cached') else 'No'}
151
- - Analytics Last Refresh: {cache_status.get('analytics_last_refresh', 'Never')}
152
- - Total Analytics Sessions: {cache_status.get('total_analytics_sessions', 0)}
153
- - Total Conversation Sessions: {cache_status.get('total_conversation_sessions', 0)}
154
-
155
- **Analytics Data Status:**
156
- - Has Analytics Data: {'Yes' if cache_status.get('analytics_has_data') else 'No'}
157
- - Conversation Length: {cache_status.get('conversation_length', 0)} messages
158
- - Chat History Length: {cache_status.get('chat_history_length', 0)} messages
159
-
160
- *Last Updated: {datetime.now().strftime('%H:%M:%S')}*
161
- """
162
-
163
- gr.Info("Cache status updated - check the Status panel")
164
- return status_text
165
-
166
- except Exception as e:
167
- error_text = f"Error getting cache status: {str(e)}"
168
- gr.Warning(error_text)
169
- return error_text
170
-
171
- def manual_backup_to_hf():
172
- try:
173
- global_state_manager._backup_to_hf_dataset()
174
- gr.Info("Manual backup to HF dataset completed successfully")
175
- return f"Backup completed at {datetime.now().strftime('%H:%M:%S')}"
176
- except Exception as e:
177
- gr.Warning(f"Backup failed: {str(e)}")
178
- return f"Backup failed: {str(e)}"
179
-
180
- def get_persistence_status():
181
- try:
182
- status_info = {
183
- "SQLite DB": "Active" if os.path.exists(global_state_manager._db_path) else "Not Found",
184
- "HF Dataset": global_state_manager.dataset_repo,
185
- "Last HF Backup": global_state_manager._last_hf_backup.strftime('%Y-%m-%d %H:%M:%S'),
186
- "DB Path": global_state_manager._db_path,
187
- "Backup Interval": f"{global_state_manager._hf_backup_interval}s"
188
- }
189
- return status_info
190
- except Exception as e:
191
- return {"error": str(e)}
192
-
193
- def clear_all_global_states():
194
- try:
195
- global_state_manager.clear_all_states()
196
- gr.Info("All global states cleared successfully")
197
-
198
- empty_stats = {
199
- "total_conversations": None,
200
- "avg_session_length": None,
201
- "success_rate": None,
202
- "model_type": "Cleared",
203
- "last_updated": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
204
- }
205
-
206
- empty_html = """
207
- <div style="text-align: center; padding: 40px; border: 2px dashed #ccc; border-radius: 8px; background: #f8f9fa;">
208
- <h3>States Cleared</h3>
209
- <p>All global states have been cleared.</p>
210
- <p>Click "Refresh Data" to reload analytics.</p>
211
- </div>
212
- """
213
-
214
- return empty_stats, [], empty_html
215
-
216
- except Exception as e:
217
- gr.Warning(f"Failed to clear states: {str(e)}")
218
- return load_analytics_state()
219
-
220
- def show_evaluation_metrics():
221
- try:
222
- eval_summary = global_state_manager.get_evaluation_summary(include_history=True)
223
-
224
- metrics_data = [
225
- ["Educational Quality", f"{eval_summary['aggregate_metrics']['avg_educational_quality']:.3f}"],
226
- ["User Satisfaction", f"{eval_summary['aggregate_metrics']['user_satisfaction_rate']:.3f}"]
227
- ]
228
-
229
- recent_evaluations = []
230
- if 'history' in eval_summary:
231
- for eval_item in eval_summary['history']['recent_educational_scores'][-5:]:
232
- recent_evaluations.append([
233
- eval_item['timestamp'][:16],
234
- f"{eval_item['educational_score']:.3f}",
235
- f"{eval_item['semantic_quality']:.3f}",
236
- f"{eval_item['response_time']:.3f}s"
237
- ])
238
-
239
- return eval_summary, metrics_data, recent_evaluations
240
-
241
- except Exception as e:
242
- logger.error(f"Error getting evaluation metrics: {e}")
243
- return {}, [], []
244
-
245
- def sync_and_refresh_all():
246
- try:
247
- sync_trackio_with_global_state()
248
- project_stats, recent_interactions, dashboard_html = refresh_analytics_data()
249
- eval_summary, metrics_data, recent_evaluations = show_evaluation_metrics()
250
-
251
- gr.Info("All data synced and refreshed successfully")
252
-
253
- return project_stats, recent_interactions, dashboard_html, eval_summary, metrics_data, recent_evaluations
254
-
255
- except Exception as e:
256
- logger.error(f"Sync and refresh failed: {e}")
257
- gr.Warning(f"Sync failed: {str(e)}")
258
- return load_analytics_state() + ({}, [], [])
259
-
260
- with gr.Blocks() as demo:
261
- custom_css = load_custom_css()
262
- if custom_css:
263
- gr.HTML(f'<style>{custom_css}</style>')
264
-
265
- gr.HTML('<div class="analytics-title"><h2>Mimir Analytics Dashboard</h2></div>')
266
-
267
- gr.Markdown("Monitor educational AI performance and effectiveness metrics with persistent state management.")
268
-
269
- with gr.Tabs():
270
- with gr.TabItem("Traditional Analytics"):
271
- with gr.Row():
272
- with gr.Column(scale=1):
273
- gr.Markdown("## Controls")
274
- refresh_btn = gr.Button("Refresh Data", variant="primary")
275
- sync_all_btn = gr.Button("Sync & Refresh All", variant="primary")
276
-
277
- with gr.Row():
278
- export_json_btn = gr.Button("Export JSON", variant="secondary", size="sm")
279
- export_csv_btn = gr.Button("Export CSV", variant="secondary", size="sm")
280
-
281
- launch_trackio_btn = gr.Button("Launch Trackio Dashboard", variant="secondary")
282
-
283
- gr.Markdown("### State Management")
284
- with gr.Row():
285
- cache_status_btn = gr.Button("Cache Status", size="sm")
286
- clear_states_btn = gr.Button("Clear All States", size="sm", variant="stop")
287
-
288
- with gr.Group():
289
- gr.Markdown("### Project Information")
290
- project_info = gr.JSON(
291
- value={
292
- "total_conversations": None,
293
- "avg_session_length": None,
294
- "success_rate": None,
295
- "model_type": None
296
- },
297
- label="Project Stats"
298
- )
299
-
300
- with gr.Group():
301
- gr.Markdown("### System Status")
302
- status_panel = gr.Markdown(
303
- "Click 'Cache Status' to view global state information.",
304
- label="Status Information"
305
- )
306
-
307
- with gr.Column(scale=2):
308
- gr.Markdown("## Key Metrics Dashboard")
309
- trackio_iframe = gr.HTML(
310
- value="""
311
- <div style="text-align: center; padding: 40px; border: 2px dashed #ccc; border-radius: 8px; background: #f8f9fa;">
312
- <h3>Trackio Dashboard</h3>
313
- <p>Analytics data will appear here after conversations.</p>
314
- <p>Data is automatically cached and persists across page navigation.</p>
315
- <p>To launch trackio dashboard separately, run:</p>
316
- <code style="background: #e9ecef; padding: 4px 8px; border-radius: 4px;">trackio show --project "Mimir"</code>
317
- </div>
318
- """,
319
- label="Dashboard"
320
- )
321
-
322
- with gr.Row():
323
- with gr.Column():
324
- gr.Markdown("## Recent Interactions")
325
- gr.Markdown("*Data persists when switching between Chatbot and Analytics pages*")
326
- recent_metrics = gr.Dataframe(
327
- headers=["Timestamp", "Response Time", "Prompt Mode", "Tools Used", "Quality Score", "Adapter"],
328
- datatype=["str", "number", "str", "bool", "number", "str"],
329
- row_count=10,
330
- col_count=6,
331
- interactive=False,
332
- label="Latest Sessions",
333
- value=[],
334
- show_label=True
335
- )
336
-
337
- with gr.TabItem("ML Performance"):
338
- gr.Markdown("## Agent-Based Performance & Global State Metrics")
339
-
340
- with gr.Row():
341
- with gr.Column(scale=1):
342
- eval_metrics_btn = gr.Button("Get Evaluation Metrics", variant="primary")
343
-
344
- with gr.Group():
345
- gr.Markdown("### Model Cache Status")
346
- cache_status_display = gr.JSON(
347
- value={},
348
- label="Cache Information"
349
- )
350
-
351
- with gr.Column(scale=2):
352
- gr.Markdown("### Aggregate Performance Metrics")
353
- eval_metrics_table = gr.Dataframe(
354
- headers=["Metric", "Score"],
355
- datatype=["str", "str"],
356
- label="Model Performance",
357
- value=[]
358
- )
359
-
360
- eval_summary_display = gr.JSON(
361
- value={},
362
- label="Detailed Evaluation Summary"
363
- )
364
-
365
- with gr.Row():
366
- with gr.Column():
367
- gr.Markdown("### Recent Quality Evaluations")
368
- recent_evaluations_table = gr.Dataframe(
369
- headers=["Timestamp", "Educational Score", "Semantic Quality", "Response Time"],
370
- datatype=["str", "str", "str", "str"],
371
- label="Recent Evaluations",
372
- value=[]
373
- )
374
-
375
- with gr.TabItem("System Status"):
376
- gr.Markdown("## Global State Manager & System Diagnostics")
377
-
378
- with gr.Row():
379
- with gr.Column():
380
- gr.Markdown("### Global State Cache")
381
- cache_details = gr.Markdown("Click 'Show Cache Status' to view detailed information.")
382
-
383
- show_cache_btn = gr.Button("Show Cache Status", variant="primary")
384
- refresh_cache_btn = gr.Button("Refresh Cache Info", variant="secondary")
385
-
386
- gr.Markdown("### Persistence Controls")
387
- backup_btn = gr.Button("Manual Backup to HF Dataset", variant="primary")
388
- backup_status = gr.Textbox(label="Backup Status", value="No recent backup", interactive=False)
389
-
390
- with gr.Column():
391
- gr.Markdown("### System Actions")
392
- sync_trackio_btn = gr.Button("Sync to Database", variant="secondary")
393
- clear_all_btn = gr.Button("Clear All Global States", variant="stop")
394
-
395
- gr.Markdown("### Persistence Status")
396
- persistence_info = gr.JSON(
397
- value={},
398
- label="Persistence Information"
399
- )
400
-
401
- gr.Markdown("### Performance Monitor")
402
- perf_info = gr.JSON(
403
- value={},
404
- label="Performance Information"
405
- )
406
-
407
- # NEW: HuggingFace Cache Viewer Section
408
- with gr.Row():
409
- with gr.Column():
410
- gr.Markdown("### 🗂️ HuggingFace Model Cache")
411
- gr.Markdown("*View cached models and disk usage*")
412
-
413
- cache_viewer_btn = gr.Button("Inspect Model Cache", variant="primary", size="lg")
414
-
415
- with gr.Row():
416
- clear_cache_btn = gr.Button("Clear Cache (⚠️ Dangerous)", variant="stop", size="sm")
417
- refresh_models_btn = gr.Button("Re-download Models", variant="secondary", size="sm")
418
-
419
- cache_info_display = gr.Markdown(
420
- "Click **Inspect Model Cache** to view detailed cache information.",
421
- label="Cache Details"
422
- )
423
-
424
- demo.load(
425
- load_analytics_state,
426
- inputs=None,
427
- outputs=[project_info, recent_metrics, trackio_iframe],
428
- show_progress="hidden"
429
- )
430
-
431
- demo.load(
432
- fn=lambda: global_state_manager.get_cache_status(),
433
- inputs=None,
434
- outputs=[cache_status_display],
435
- show_progress="hidden"
436
- )
437
-
438
- demo.load(
439
- fn=get_persistence_status,
440
- inputs=None,
441
- outputs=[persistence_info],
442
- show_progress="hidden"
443
- )
444
-
445
- refresh_btn.click(
446
- fn=refresh_analytics_data,
447
- inputs=[],
448
- outputs=[project_info, recent_metrics, trackio_iframe],
449
- show_progress="full"
450
- )
451
-
452
- sync_all_btn.click(
453
- fn=sync_and_refresh_all,
454
- inputs=[],
455
- outputs=[project_info, recent_metrics, trackio_iframe, eval_summary_display, eval_metrics_table, recent_evaluations_table],
456
- show_progress="full"
457
- )
458
-
459
- export_json_btn.click(
460
- fn=export_metrics_json,
461
- inputs=[],
462
- outputs=[],
463
- show_progress="full"
464
- )
465
-
466
- export_csv_btn.click(
467
- fn=export_metrics_csv,
468
- inputs=[],
469
- outputs=[],
470
- show_progress="full"
471
- )
472
-
473
- launch_trackio_btn.click(
474
- fn=launch_external_trackio,
475
- inputs=[],
476
- outputs=[],
477
- show_progress="full"
478
- )
479
-
480
- cache_status_btn.click(
481
- fn=show_cache_status,
482
- inputs=[],
483
- outputs=[status_panel],
484
- show_progress="full"
485
- )
486
-
487
- clear_states_btn.click(
488
- fn=clear_all_global_states,
489
- inputs=[],
490
- outputs=[project_info, recent_metrics, trackio_iframe],
491
- show_progress="full"
492
- )
493
-
494
- eval_metrics_btn.click(
495
- fn=show_evaluation_metrics,
496
- inputs=[],
497
- outputs=[eval_summary_display, eval_metrics_table, recent_evaluations_table],
498
- show_progress="full"
499
- )
500
-
501
- show_cache_btn.click(
502
- fn=show_cache_status,
503
- inputs=[],
504
- outputs=[cache_details],
505
- show_progress="full"
506
- )
507
-
508
- refresh_cache_btn.click(
509
- fn=lambda: global_state_manager.get_cache_status(),
510
- inputs=[],
511
- outputs=[perf_info],
512
- show_progress="full"
513
- )
514
-
515
- backup_btn.click(
516
- fn=manual_backup_to_hf,
517
- inputs=[],
518
- outputs=[backup_status],
519
- show_progress="full"
520
- )
521
-
522
- sync_trackio_btn.click(
523
- fn=sync_trackio_with_global_state,
524
- inputs=[],
525
- outputs=[],
526
- show_progress="full"
527
- )
528
-
529
- clear_all_btn.click(
530
- fn=clear_all_global_states,
531
- inputs=[],
532
- outputs=[project_info, recent_metrics, trackio_iframe],
533
- show_progress="full"
534
- )
535
-
536
- if __name__ == "__main__":
537
- logger.info("Running analytics dashboard standalone with global state management")
538
- demo.launch(server_name="0.0.0.0", server_port=7861)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
gradio_chatbot.py DELETED
@@ -1,148 +0,0 @@
1
- # gradio_chatbot.py
2
- import gradio as gr
3
- import logging
4
-
5
- logger = logging.getLogger(__name__)
6
-
7
- from app import (
8
- add_user_message,
9
- add_loading_animation,
10
- generate_response,
11
- reset_conversation,
12
- load_conversation_state,
13
- remove_loading_animations,
14
- global_state_manager,
15
- )
16
-
17
-
18
- def load_custom_css():
19
- try:
20
- with open("styles.css", "r", encoding="utf-8") as css_file:
21
- css_content = css_file.read()
22
- logger.info(f"CSS loaded successfully, length: {len(css_content)} characters")
23
- return css_content
24
- except FileNotFoundError:
25
- logger.warning("styles.css file not found, using default styling")
26
- return ""
27
- except Exception as e:
28
- logger.warning(f"Error reading styles.css: {e}")
29
- return ""
30
-
31
-
32
- def restore_state_on_page_access():
33
- """
34
- Restore conversation state when page loads or user navigates back.
35
- This ensures persistence across page navigation.
36
- """
37
- try:
38
- current_state = global_state_manager.get_conversation_state()
39
- chat_history = current_state.get('chat_history', [])
40
- conversation_state_data = current_state.get('conversation_state', [])
41
-
42
- logger.info(f"✓ Restored state: {len(chat_history)} messages in chat, {len(conversation_state_data)} in conversation")
43
-
44
- return chat_history, conversation_state_data
45
- except Exception as e:
46
- logger.error(f"Failed to restore state: {e}")
47
- return [], []
48
-
49
-
50
- with gr.Blocks() as demo:
51
- custom_css = load_custom_css()
52
- if custom_css:
53
- gr.HTML(f'<style>{custom_css}</style>')
54
-
55
- conversation_state = gr.State([])
56
-
57
- gr.HTML('<div class="title-header"><h1>Mimir</h1></div>')
58
-
59
- with gr.Row():
60
- chatbot = gr.Chatbot(
61
- type="messages",
62
- show_copy_button=True,
63
- show_share_button=False,
64
- layout="bubble",
65
- autoscroll=True,
66
- avatar_images=None,
67
- elem_id="main-chatbot",
68
- scale=1,
69
- height="65vh",
70
- value=[],
71
- latex_delimiters=[
72
- {"left": "$$", "right": "$$", "display": True},
73
- {"left": "$", "right": "$", "display": False},
74
- ]
75
- )
76
-
77
- with gr.Row(elem_classes=["input-controls"]):
78
- msg = gr.Textbox(
79
- placeholder="Ask me about math, research, study strategies, or any educational topic...",
80
- show_label=False,
81
- lines=6,
82
- max_lines=8,
83
- elem_classes=["input-textbox"],
84
- container=False,
85
- scale=4
86
- )
87
- with gr.Column(elem_classes=["button-column"], scale=1):
88
- send = gr.Button("Send", elem_classes=["send-button"], size="sm")
89
- clear = gr.Button("Clear", elem_classes=["clear-button"], size="sm")
90
-
91
-
92
- demo.load(
93
- fn=restore_state_on_page_access,
94
- outputs=[chatbot, conversation_state],
95
- queue=False
96
- )
97
-
98
- msg.submit(
99
- add_user_message,
100
- inputs=[msg, chatbot, conversation_state],
101
- outputs=[msg, chatbot, conversation_state],
102
- show_progress="hidden",
103
- queue=True,
104
- ).then(
105
- add_loading_animation,
106
- inputs=[chatbot, conversation_state],
107
- outputs=[chatbot, conversation_state],
108
- show_progress="hidden",
109
- queue=True,
110
- ).then(
111
- generate_response,
112
- inputs=[chatbot, conversation_state],
113
- outputs=[chatbot, conversation_state],
114
- show_progress="hidden",
115
- queue=True,
116
- )
117
-
118
- send.click(
119
- add_user_message,
120
- inputs=[msg, chatbot, conversation_state],
121
- outputs=[msg, chatbot, conversation_state],
122
- show_progress="hidden",
123
- queue=True,
124
- ).then(
125
- add_loading_animation,
126
- inputs=[chatbot, conversation_state],
127
- outputs=[chatbot, conversation_state],
128
- show_progress="hidden",
129
- queue=True,
130
- ).then(
131
- generate_response,
132
- inputs=[chatbot, conversation_state],
133
- outputs=[chatbot, conversation_state],
134
- show_progress="hidden",
135
- queue=True,
136
- )
137
-
138
- clear.click(
139
- reset_conversation,
140
- outputs=[chatbot, conversation_state],
141
- show_progress="hidden"
142
- )
143
-
144
-
145
- if __name__ == "__main__":
146
- logger.info("Running chatbot interface standalone")
147
- demo.queue(default_concurrency_limit=1)
148
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
gradio_prompt_testing.py DELETED
@@ -1,1564 +0,0 @@
1
- # gradio_pipeline_testing.py
2
- """
3
- Full Pipeline Testing Interface for Mimir Educational AI Assistant
4
-
5
- Tests the complete orchestration flow with comprehensive metrics at every step.
6
- Captures conditional model activation, token usage, timing, and quality metrics.
7
-
8
- UPDATED: Now correctly mirrors app.py orchestrate_turn() process
9
- - Tool decision uses decide() method with conversation history
10
- - Response agent invoked with input_data dict (not raw string)
11
- - Thinking agents process() method matches app.py
12
- - Graph generation included when tools are used
13
-
14
- Output: CSV file with ~110 columns capturing full pipeline journey
15
- """
16
-
17
- import os
18
- import sys
19
- import io
20
- import csv
21
- import json
22
- import time
23
- import logging
24
- import warnings
25
- from datetime import datetime
26
- from typing import Dict, List, Optional, Tuple, Any
27
- from collections import Counter
28
-
29
- # Core dependencies
30
- import torch
31
- import gradio as gr
32
- import numpy as np
33
-
34
- # ============================================================================
35
- # ENVIRONMENT SETUP
36
- # ============================================================================
37
- HF_CACHE = "/tmp/huggingface"
38
- os.makedirs(f"{HF_CACHE}/hub", exist_ok=True)
39
- os.environ['HF_HOME'] = HF_CACHE
40
- os.environ['HF_HUB_CACHE'] = f"{HF_CACHE}/hub"
41
-
42
- # ============================================================================
43
- # IMPORTS FROM MIMIR APPLICATION
44
- # ============================================================================
45
- try:
46
- from agents import (
47
- ToolDecisionAgent,
48
- PromptRoutingAgents,
49
- ThinkingAgents,
50
- ResponseAgent,
51
- )
52
- AGENTS_AVAILABLE = True
53
- except ImportError as e:
54
- print(f"⚠️ Warning: Could not import agents: {e}")
55
- AGENTS_AVAILABLE = False
56
-
57
- from model_manager import get_model as get_shared_llama
58
-
59
- try:
60
- from state_manager import GlobalStateManager, LogicalExpressions
61
- STATE_MANAGER_AVAILABLE = True
62
- except ImportError as e:
63
- print(f"⚠️ Warning: Could not import state_manager: {e}")
64
- STATE_MANAGER_AVAILABLE = False
65
-
66
- try:
67
- from prompt_library import (
68
- CORE_IDENTITY,
69
- TOOL_DECISION,
70
- agent_1_system,
71
- agent_2_system,
72
- agent_3_system,
73
- agent_4_system,
74
- MATH_THINKING,
75
- QUESTION_ANSWER_DESIGN,
76
- REASONING_THINKING,
77
- VAUGE_INPUT,
78
- USER_UNDERSTANDING,
79
- GENERAL_FORMATTING,
80
- LATEX_FORMATTING,
81
- GUIDING_TEACHING,
82
- STRUCTURE_PRACTICE_QUESTIONS,
83
- PRACTICE_QUESTION_FOLLOWUP,
84
- TOOL_USE_ENHANCEMENT,
85
- )
86
- PROMPTS_AVAILABLE = True
87
- except ImportError as e:
88
- print(f"⚠️ Warning: Could not import prompt_library: {e}")
89
- PROMPTS_AVAILABLE = False
90
-
91
- # Try to import post processor
92
- try:
93
- # Import the post processor class/module from app.py
94
- import importlib.util
95
- spec = importlib.util.spec_from_file_location("app_module", "app.py")
96
- app_module = importlib.util.module_from_spec(spec)
97
- spec.loader.exec_module(app_module)
98
- post_processor = app_module.post_processor
99
- POST_PROCESSOR_AVAILABLE = True
100
- except Exception as e:
101
- print(f"⚠️ Warning: Could not import post_processor: {e}")
102
- POST_PROCESSOR_AVAILABLE = False
103
- # Create dummy
104
- class DummyPostProcessor:
105
- def process_response(self, response, user_message):
106
- return response
107
- post_processor = DummyPostProcessor()
108
-
109
- # ZeroGPU support
110
- try:
111
- import spaces
112
- ZERO_GPU_AVAILABLE = True
113
- except ImportError:
114
- ZERO_GPU_AVAILABLE = False
115
- class DummySpaces:
116
- @staticmethod
117
- def GPU(duration=600):
118
- def decorator(func):
119
- return func
120
- return decorator
121
- spaces = DummySpaces()
122
-
123
- # Tiktoken for accurate token counting
124
- try:
125
- import tiktoken
126
- TIKTOKEN_AVAILABLE = True
127
- except ImportError:
128
- TIKTOKEN_AVAILABLE = False
129
- print("⚠️ Warning: tiktoken not available - using fallback token counting")
130
-
131
- # Textstat for readability metrics
132
- try:
133
- import textstat
134
- TEXTSTAT_AVAILABLE = True
135
- except ImportError:
136
- TEXTSTAT_AVAILABLE = False
137
- print("⚠️ Warning: textstat not available - using manual readability calculations")
138
-
139
- # ============================================================================
140
- # LOGGING SETUP
141
- # ============================================================================
142
- logging.basicConfig(
143
- level=logging.INFO,
144
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
145
- )
146
- logger = logging.getLogger(__name__)
147
-
148
- warnings.filterwarnings("ignore", category=UserWarning)
149
- warnings.filterwarnings("ignore", category=FutureWarning)
150
-
151
- CURRENT_YEAR = datetime.now().year
152
-
153
- # ============================================================================
154
- # GLOBAL INSTANCES
155
- # ============================================================================
156
- if AGENTS_AVAILABLE and STATE_MANAGER_AVAILABLE:
157
- try:
158
- global_state_manager = GlobalStateManager()
159
- logical_expressions = LogicalExpressions()
160
- tool_agent = ToolDecisionAgent()
161
- routing_agents = PromptRoutingAgents()
162
- thinking_agents = ThinkingAgents()
163
- response_agent = ResponseAgent()
164
-
165
- logger.info("✓ All agents initialized successfully")
166
- except Exception as e:
167
- logger.error(f"Failed to initialize agents: {e}")
168
- raise
169
- else:
170
- logger.error("Cannot initialize - missing core dependencies")
171
- raise ImportError("Missing required modules: agents or state_manager")
172
-
173
- # ============================================================================
174
- # CSV SCHEMA DEFINITION
175
- # ============================================================================
176
- CSV_COLUMNS = [
177
- # Identification & Input
178
- "prompt_index",
179
- "timestamp",
180
- "user_prompt",
181
- "user_prompt_tokens",
182
- "user_prompt_chars",
183
- "user_prompt_words",
184
-
185
- # Conversation Context
186
- "conversation_history_length",
187
- "conversation_history_tokens",
188
-
189
- # Tool Decision Agent
190
- "tool_decision_input_template",
191
- "tool_decision_input_tokens",
192
- "tool_decision_output",
193
- "tool_decision_output_tokens",
194
- "tool_decision_result",
195
- "tool_decision_time_seconds",
196
- "tool_decision_gpu_peak_mb",
197
-
198
- # Regex Checks
199
- "regex_checks_applied",
200
- "regex_checks_time_seconds",
201
-
202
- # Routing Agent 1
203
- "agent1_input_template",
204
- "agent1_input_tokens",
205
- "agent1_output",
206
- "agent1_output_tokens",
207
- "agent1_decision",
208
- "agent1_time_seconds",
209
- "agent1_gpu_peak_mb",
210
-
211
- # Routing Agent 2
212
- "agent2_input_template",
213
- "agent2_input_tokens",
214
- "agent2_output",
215
- "agent2_output_tokens",
216
- "agent2_decision",
217
- "agent2_time_seconds",
218
- "agent2_gpu_peak_mb",
219
-
220
- # Routing Agent 3
221
- "agent3_input_template",
222
- "agent3_input_tokens",
223
- "agent3_output",
224
- "agent3_output_tokens",
225
- "agent3_decision",
226
- "agent3_time_seconds",
227
- "agent3_gpu_peak_mb",
228
-
229
- # Routing Agent 4
230
- "agent4_input_template",
231
- "agent4_input_tokens",
232
- "agent4_output",
233
- "agent4_output_tokens",
234
- "agent4_decisions",
235
- "agent4_time_seconds",
236
- "agent4_gpu_peak_mb",
237
-
238
- # Math Thinking
239
- "math_thinking_activated",
240
- "math_thinking_input_template",
241
- "math_thinking_input_tokens",
242
- "math_thinking_output",
243
- "math_thinking_output_tokens",
244
- "math_thinking_time_seconds",
245
- "math_thinking_gpu_peak_mb",
246
-
247
- # QA Design Thinking
248
- "qa_design_activated",
249
- "qa_design_input_template",
250
- "qa_design_input_tokens",
251
- "qa_design_output",
252
- "qa_design_output_tokens",
253
- "qa_design_time_seconds",
254
- "qa_design_gpu_peak_mb",
255
-
256
- # Reasoning Thinking
257
- "reasoning_activated",
258
- "reasoning_input_template",
259
- "reasoning_input_tokens",
260
- "reasoning_output",
261
- "reasoning_output_tokens",
262
- "reasoning_time_seconds",
263
- "reasoning_gpu_peak_mb",
264
-
265
- # Prompt Assembly
266
- "active_response_prompts",
267
- "final_prompt_template",
268
- "final_prompt_tokens",
269
- "final_prompt_chars",
270
- "final_prompt_words",
271
- "assembly_time_seconds",
272
-
273
- # Response Generation
274
- "response_input_template",
275
- "response_input_tokens",
276
- "response_raw",
277
- "response_raw_tokens",
278
- "response_raw_chars",
279
- "response_raw_words",
280
- "response_generation_time_seconds",
281
- "response_gpu_peak_mb",
282
- "response_tokens_per_second",
283
-
284
- # Post-processing
285
- "response_processed",
286
- "response_processed_tokens",
287
- "response_processed_chars",
288
- "response_processed_words",
289
- "postprocessing_time_seconds",
290
-
291
- # Quality Metrics
292
- "flesch_reading_ease",
293
- "flesch_kincaid_grade",
294
- "completeness_score",
295
- "specificity_score",
296
- "repetition_ratio",
297
- "unique_word_ratio",
298
- "avg_sentence_length",
299
- "question_answered",
300
-
301
- # Overall Metrics
302
- "total_pipeline_time_seconds",
303
- "total_input_tokens",
304
- "total_output_tokens",
305
- "total_gpu_peak_mb",
306
- "models_activated_count",
307
- "models_activated_list",
308
- ]
309
-
310
- # ============================================================================
311
- # TOKEN COUNTING FUNCTIONS
312
- # ============================================================================
313
-
314
- def count_tokens_accurate(text: str) -> int:
315
- """
316
- Count tokens using tiktoken library for accurate estimation.
317
-
318
- Args:
319
- text: Input text to tokenize
320
-
321
- Returns:
322
- Accurate token count
323
- """
324
- if not text:
325
- return 0
326
-
327
- if not TIKTOKEN_AVAILABLE:
328
- # Fallback: word count approximation
329
- return len(text.split())
330
-
331
- try:
332
- # Use cl100k_base encoding (used by GPT-3.5/4, good general estimator)
333
- encoding = tiktoken.get_encoding("cl100k_base")
334
- tokens = encoding.encode(text)
335
- return len(tokens)
336
- except Exception as e:
337
- logger.warning(f"tiktoken encoding failed: {e}, using fallback")
338
- return len(text.split())
339
-
340
-
341
- def count_words(text: str) -> int:
342
- """Count words in text"""
343
- if not text:
344
- return 0
345
- return len(text.split())
346
-
347
-
348
- def count_sentences(text: str) -> int:
349
- """Count sentences in text (simple heuristic)"""
350
- if not text:
351
- return 0
352
- import re
353
- sentences = re.split(r'[.!?]+', text)
354
- return len([s for s in sentences if s.strip()])
355
-
356
-
357
- # ============================================================================
358
- # GPU MEMORY TRACKING
359
- # ============================================================================
360
-
361
- def get_gpu_memory() -> Dict[str, float]:
362
- """
363
- Get current GPU memory statistics.
364
-
365
- Returns:
366
- Dictionary with allocated, reserved, and peak memory in MB
367
- """
368
- if torch.cuda.is_available():
369
- return {
370
- "allocated_mb": torch.cuda.memory_allocated() / 1024**2,
371
- "reserved_mb": torch.cuda.memory_reserved() / 1024**2,
372
- "peak_mb": torch.cuda.max_memory_allocated() / 1024**2
373
- }
374
- return {
375
- "allocated_mb": 0.0,
376
- "reserved_mb": 0.0,
377
- "peak_mb": 0.0
378
- }
379
-
380
-
381
- def reset_gpu_stats():
382
- """Reset GPU memory statistics"""
383
- if torch.cuda.is_available():
384
- torch.cuda.reset_peak_memory_stats()
385
- torch.cuda.synchronize()
386
-
387
-
388
- # ============================================================================
389
- # TEMPLATE BUILDING FUNCTIONS
390
- # ============================================================================
391
-
392
- def format_history(history: List[Dict]) -> str:
393
- """Format conversation history for templates"""
394
- if not history:
395
- return "No previous conversation"
396
-
397
- formatted = []
398
- for msg in history[-8:]: # Last 8 messages
399
- role = msg.get('role', 'unknown')
400
- content = msg.get('content', '')[:100] # Truncate
401
- formatted.append(f"{role}: {content}")
402
-
403
- return "\n".join(formatted)
404
-
405
-
406
- def build_tool_decision_template(user_prompt: str, history: List) -> str:
407
- """Build template for tool decision agent - matches app.py"""
408
- history_str = format_history(history)
409
- return f"{history_str}\n\nUser Query: {user_prompt}"
410
-
411
-
412
- def build_agent1_template(user_prompt: str, history: List) -> str:
413
- """Build template for Agent 1: Practice Questions"""
414
- history_str = format_history(history)
415
- return f"<s>[INST] {agent_1_system}\n\nConversation History:\n{history_str}\n\nCurrent User Query: {user_prompt} [/INST]"
416
-
417
-
418
- def build_agent2_template(user_prompt: str) -> str:
419
- """Build template for Agent 2: Discovery Mode"""
420
- return f"<s>[INST] {agent_2_system}\n\nUser Query: {user_prompt} [/INST]"
421
-
422
-
423
- def build_agent3_template(user_prompt: str, history: List) -> str:
424
- """Build template for Agent 3: Followup Assessment"""
425
- history_str = format_history(history)
426
- return f"<s>[INST] {agent_3_system}\n\nConversation History:\n{history_str}\n\nCurrent User Query: {user_prompt} [/INST]"
427
-
428
-
429
- def build_agent4_template(user_prompt: str, history: List) -> str:
430
- """Build template for Agent 4: Teaching Mode"""
431
- history_str = format_history(history)
432
- return f"<s>[INST] {agent_4_system}\n\nConversation History:\n{history_str}\n\nCurrent User Query: {user_prompt} [/INST]"
433
-
434
-
435
- def build_math_thinking_template(user_prompt: str) -> str:
436
- """Build template for Math Thinking"""
437
- return f"<s>[INST] {MATH_THINKING}\n\nUser Query: {user_prompt} [/INST]"
438
-
439
-
440
- def build_qa_design_template(user_prompt: str) -> str:
441
- """Build template for QA Design Thinking"""
442
- return f"<s>[INST] {QUESTION_ANSWER_DESIGN}\n\nUser Query: {user_prompt} [/INST]"
443
-
444
-
445
- def build_reasoning_template(user_prompt: str) -> str:
446
- """Build template for Reasoning Thinking"""
447
- return f"<s>[INST] {REASONING_THINKING}\n\nUser Query: {user_prompt} [/INST]"
448
-
449
-
450
- # ============================================================================
451
- # QUALITY METRICS FUNCTIONS
452
- # ============================================================================
453
-
454
- def estimate_syllables(text: str) -> int:
455
- """
456
- Estimate syllable count (rough heuristic).
457
- Counts vowel groups.
458
- """
459
- import re
460
- words = text.lower().split()
461
- syllable_count = 0
462
-
463
- for word in words:
464
- # Remove non-letters
465
- word = re.sub(r'[^a-z]', '', word)
466
- if not word:
467
- continue
468
-
469
- # Count vowel groups
470
- vowel_groups = len(re.findall(r'[aeiouy]+', word))
471
-
472
- # Ensure at least 1 syllable per word
473
- syllable_count += max(1, vowel_groups)
474
-
475
- return syllable_count
476
-
477
-
478
- def calculate_flesch_reading_ease(text: str) -> float:
479
- """
480
- Calculate Flesch Reading Ease score.
481
- Score 0-100: Higher = easier to read
482
- 90-100: Very easy (5th grade)
483
- 60-70: Standard (8th-9th grade)
484
- 0-30: Very difficult (college graduate)
485
-
486
- Formula: 206.835 - 1.015(words/sentences) - 84.6(syllables/words)
487
- """
488
- if not text or len(text.strip()) < 10:
489
- return 0.0
490
-
491
- if TEXTSTAT_AVAILABLE:
492
- try:
493
- return textstat.flesch_reading_ease(text)
494
- except:
495
- pass
496
-
497
- # Manual calculation
498
- words = count_words(text)
499
- sentences = count_sentences(text)
500
-
501
- if sentences == 0 or words == 0:
502
- return 0.0
503
-
504
- syllables = estimate_syllables(text)
505
-
506
- if words == 0:
507
- return 0.0
508
-
509
- score = 206.835 - 1.015 * (words / sentences) - 84.6 * (syllables / words)
510
- return max(0.0, min(100.0, score))
511
-
512
-
513
- def calculate_flesch_kincaid_grade(text: str) -> float:
514
- """
515
- Calculate Flesch-Kincaid Grade Level.
516
- Returns US grade level needed to understand text.
517
-
518
- Formula: 0.39(words/sentences) + 11.8(syllables/words) - 15.59
519
- """
520
- if not text or len(text.strip()) < 10:
521
- return 0.0
522
-
523
- if TEXTSTAT_AVAILABLE:
524
- try:
525
- return textstat.flesch_kincaid_grade(text)
526
- except:
527
- pass
528
-
529
- words = count_words(text)
530
- sentences = count_sentences(text)
531
-
532
- if sentences == 0 or words == 0:
533
- return 0.0
534
-
535
- syllables = estimate_syllables(text)
536
-
537
- if words == 0:
538
- return 0.0
539
-
540
- grade = 0.39 * (words / sentences) + 11.8 * (syllables / words) - 15.59
541
- return max(0.0, grade)
542
-
543
-
544
- def calculate_completeness_score(response: str, user_prompt: str) -> float:
545
- """
546
- Estimate if response addresses the prompt.
547
- Uses keyword overlap and length heuristics.
548
-
549
- Returns: Score 0-1 (1 = complete answer)
550
- """
551
- if not response or not user_prompt:
552
- return 0.0
553
-
554
- import re
555
-
556
- # Extract keywords from prompt
557
- prompt_words = set(re.findall(r'\b\w+\b', user_prompt.lower()))
558
-
559
- # Remove common stopwords
560
- stopwords = {'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been',
561
- 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will',
562
- 'would', 'should', 'could', 'may', 'might', 'can', 'what',
563
- 'how', 'why', 'when', 'where', 'who', 'which', 'i', 'you',
564
- 'we', 'they', 'he', 'she', 'it', 'me', 'him', 'her', 'us', 'them'}
565
- prompt_words -= stopwords
566
-
567
- response_words = set(re.findall(r'\b\w+\b', response.lower()))
568
-
569
- if not prompt_words:
570
- return 0.5 # Neutral if no meaningful keywords
571
-
572
- # Calculate keyword overlap
573
- overlap = len(prompt_words & response_words) / len(prompt_words)
574
-
575
- # Length factor
576
- min_reasonable_length = 20
577
- if len(response) < min_reasonable_length:
578
- length_factor = len(response) / min_reasonable_length
579
- else:
580
- length_factor = 1.0
581
-
582
- score = overlap * length_factor
583
- return min(1.0, score)
584
-
585
-
586
- def check_question_answered(response: str, user_prompt: str) -> bool:
587
- """
588
- Boolean check: does response attempt to answer the question?
589
-
590
- Heuristics:
591
- - Response has minimum length
592
- - Response doesn't start with refusal
593
- - Response contains relevant keywords
594
- """
595
- if not response or len(response) < 10:
596
- return False
597
-
598
- # Check for refusal patterns
599
- refusal_patterns = [
600
- "i don't know",
601
- "i cannot",
602
- "i can't",
603
- "i'm not sure",
604
- "i don't have",
605
- "unable to",
606
- "sorry, i"
607
- ]
608
-
609
- response_lower = response.lower()
610
- for pattern in refusal_patterns:
611
- if response_lower.startswith(pattern):
612
- return False
613
-
614
- # Check for minimum completeness
615
- completeness = calculate_completeness_score(response, user_prompt)
616
- return completeness > 0.3
617
-
618
-
619
- def calculate_specificity_score(response: str) -> float:
620
- """
621
- Measure how specific vs vague the response is.
622
-
623
- Indicators of specificity:
624
- - Numbers, dates, names
625
- - Technical terms
626
- - Examples
627
- - Concrete nouns
628
-
629
- Returns: Score 0-1 (1 = very specific)
630
- """
631
- if not response:
632
- return 0.0
633
-
634
- import re
635
-
636
- specificity_indicators = 0
637
- total_possible = 5
638
-
639
- # 1. Contains numbers
640
- if re.search(r'\d+', response):
641
- specificity_indicators += 1
642
-
643
- # 2. Contains proper nouns
644
- proper_nouns = len(re.findall(r'(?<!\. )\b[A-Z][a-z]+', response))
645
- if proper_nouns > 0:
646
- specificity_indicators += 1
647
-
648
- # 3. Contains example phrases
649
- example_phrases = ['for example', 'such as', 'for instance', 'like', 'including']
650
- if any(phrase in response.lower() for phrase in example_phrases):
651
- specificity_indicators += 1
652
-
653
- # 4. Average word length
654
- words = response.split()
655
- if words:
656
- avg_word_length = sum(len(w) for w in words) / len(words)
657
- if avg_word_length > 5.0:
658
- specificity_indicators += 1
659
-
660
- # 5. Response length
661
- if len(response) > 200:
662
- specificity_indicators += 1
663
-
664
- return specificity_indicators / total_possible
665
-
666
-
667
- def calculate_repetition_ratio(text: str) -> float:
668
- """
669
- Measure token/word repetition.
670
- Lower = better (less repetitive)
671
-
672
- Returns: Ratio of repeated tokens to total tokens (0-1)
673
- """
674
- if not text:
675
- return 0.0
676
-
677
- words = text.lower().split()
678
- if len(words) < 2:
679
- return 0.0
680
-
681
- word_counts = Counter(words)
682
-
683
- # Count words that appear more than once
684
- repeated_words = sum(count - 1 for count in word_counts.values() if count > 1)
685
-
686
- ratio = repeated_words / len(words)
687
- return min(1.0, ratio)
688
-
689
-
690
- def calculate_unique_word_ratio(text: str) -> float:
691
- """
692
- Measure vocabulary diversity.
693
- Higher = more diverse vocabulary
694
-
695
- Returns: Ratio of unique words to total words (0-1)
696
- """
697
- if not text:
698
- return 0.0
699
-
700
- words = text.lower().split()
701
- if not words:
702
- return 0.0
703
-
704
- unique_words = len(set(words))
705
- return unique_words / len(words)
706
-
707
-
708
- def calculate_avg_sentence_length(text: str) -> float:
709
- """Calculate average sentence length in words"""
710
- sentences = count_sentences(text)
711
- words = count_words(text)
712
-
713
- if sentences == 0:
714
- return 0.0
715
-
716
- return words / sentences
717
-
718
-
719
- # ============================================================================
720
- # INSTRUMENTED PIPELINE RUNNER
721
- # ============================================================================
722
-
723
- def run_full_pipeline_instrumented(user_prompt: str, prompt_index: int = 1) -> Dict:
724
- """
725
- Run the complete orchestration pipeline with full instrumentation.
726
- Captures metrics at every step.
727
-
728
- ✅ UPDATED: Now correctly mirrors app.py orchestrate_turn() process
729
-
730
- Args:
731
- user_prompt: User's input prompt
732
- prompt_index: Index number for this prompt in batch
733
-
734
- Returns:
735
- Dictionary with all metrics for CSV export
736
- """
737
-
738
- result = {
739
- "prompt_index": prompt_index,
740
- "timestamp": datetime.now().isoformat(),
741
- "user_prompt": user_prompt,
742
- "user_prompt_tokens": count_tokens_accurate(user_prompt),
743
- "user_prompt_chars": len(user_prompt),
744
- "user_prompt_words": count_words(user_prompt),
745
- }
746
-
747
- # Track overall start time
748
- pipeline_start = time.time()
749
-
750
- try:
751
- # ============================================================
752
- # STEP 1-2: SETUP
753
- # ============================================================
754
- setup_start = time.time()
755
-
756
- # Reset state
757
- global_state_manager.reset_prompt_state()
758
- prompt_state = global_state_manager.get_prompt_state_manager()
759
-
760
- # Get conversation history (empty for testing)
761
- recent_history = []
762
- recent_history_formatted = "No previous conversation"
763
-
764
- result["conversation_history_length"] = 0
765
- result["conversation_history_tokens"] = 0
766
-
767
- # ============================================================
768
- # STEP 3: TOOL DECISION AGENT (✅ FIXED: Use decide() with history)
769
- # ============================================================
770
- tool_start = time.time()
771
-
772
- tool_template = build_tool_decision_template(user_prompt, recent_history)
773
- tool_input_tokens = count_tokens_accurate(tool_template)
774
-
775
- reset_gpu_stats()
776
-
777
- # ✅ FIXED: Use decide() method with conversation history (matches app.py)
778
- tool_decision_result = tool_agent.decide(user_prompt, recent_history)
779
-
780
- # Capture output
781
- tool_output = str(tool_decision_result)
782
- tool_output_tokens = count_tokens_accurate(tool_output)
783
-
784
- gpu_metrics = get_gpu_memory()
785
- tool_time = time.time() - tool_start
786
-
787
- # Record
788
- result.update({
789
- "tool_decision_input_template": tool_template,
790
- "tool_decision_input_tokens": tool_input_tokens,
791
- "tool_decision_output": tool_output,
792
- "tool_decision_output_tokens": tool_output_tokens,
793
- "tool_decision_result": bool(tool_decision_result),
794
- "tool_decision_time_seconds": round(tool_time, 3),
795
- "tool_decision_gpu_peak_mb": round(gpu_metrics["peak_mb"], 2),
796
- })
797
-
798
- # Update state
799
- tool_img_output = ""
800
- tool_context = ""
801
- if tool_decision_result:
802
- prompt_state.update("TOOL_USE_ENHANCEMENT", True)
803
- # Note: In real app.py, graph generation happens here
804
- # For testing, we'll just note that tools would be used
805
- tool_context = "Tool usage detected (graph would be generated in production)"
806
-
807
- # ============================================================
808
- # STEP 4: REGEX CHECKS
809
- # ============================================================
810
- regex_start = time.time()
811
-
812
- # Apply regex checks (returns list of activated prompts)
813
- regex_before = set(prompt_state.get_active_response_prompts())
814
- logical_expressions.apply_all_checks(user_prompt, prompt_state)
815
- regex_after = set(prompt_state.get_active_response_prompts())
816
- regex_applied = list(regex_after - regex_before)
817
-
818
- regex_time = time.time() - regex_start
819
-
820
- result.update({
821
- "regex_checks_applied": ", ".join(regex_applied) if regex_applied else "None",
822
- "regex_checks_time_seconds": round(regex_time, 3),
823
- })
824
-
825
- # ============================================================
826
- # STEP 5: ROUTING AGENTS (✅ Unified Process - matches app.py)
827
- # ============================================================
828
- routing_start = time.time()
829
-
830
- # Build template (simplified - just the user prompt)
831
- routing_template = f"User Query: {user_prompt}"
832
- routing_input_tokens = count_tokens_accurate(routing_template)
833
-
834
- reset_gpu_stats()
835
-
836
- # ✅ Use unified process() method (matches app.py)
837
- response_prompts_str, thinking_prompts_str = routing_agents.process(
838
- user_input=user_prompt,
839
- tool_used=(tool_decision_result and bool(tool_img_output))
840
- )
841
-
842
- # Parse results
843
- response_prompts = [p.strip() for p in response_prompts_str.split('\n') if p.strip()] if response_prompts_str else []
844
- thinking_prompts = [p.strip() for p in thinking_prompts_str.split('\n') if p.strip()] if thinking_prompts_str else []
845
-
846
- routing_output = f"Response: {', '.join(response_prompts) if response_prompts else 'None'}\nThinking: {', '.join(thinking_prompts) if thinking_prompts else 'None'}"
847
- routing_output_tokens = count_tokens_accurate(routing_output)
848
- gpu_metrics = get_gpu_memory()
849
-
850
- routing_time = time.time() - routing_start
851
-
852
- # Update result with consolidated routing metrics
853
- result.update({
854
- # Agent 1 metrics (legacy columns - use consolidated data)
855
- "agent1_input_template": routing_template,
856
- "agent1_input_tokens": routing_input_tokens // 4, # Divide among 4 agents
857
- "agent1_output": ", ".join([p for p in response_prompts if p in ["STRUCTURE_PRACTICE_QUESTIONS"]]) or "None",
858
- "agent1_output_tokens": routing_output_tokens // 4,
859
- "agent1_decision": "STRUCTURE_PRACTICE_QUESTIONS" in response_prompts,
860
- "agent1_time_seconds": round(routing_time / 4, 3),
861
- "agent1_gpu_peak_mb": round(gpu_metrics["peak_mb"] / 4, 2),
862
-
863
- # Agent 2 metrics
864
- "agent2_input_template": routing_template,
865
- "agent2_input_tokens": routing_input_tokens // 4,
866
- "agent2_output": ", ".join([p for p in response_prompts if p in ["GENERAL_FORMATTING", "LATEX_FORMATTING", "GUIDING_TEACHING"]]) or "None",
867
- "agent2_output_tokens": routing_output_tokens // 4,
868
- "agent2_decision": ", ".join([p for p in response_prompts if p in ["GENERAL_FORMATTING", "LATEX_FORMATTING", "GUIDING_TEACHING"]]) or "NULL",
869
- "agent2_time_seconds": round(routing_time / 4, 3),
870
- "agent2_gpu_peak_mb": round(gpu_metrics["peak_mb"] / 4, 2),
871
-
872
- # Agent 3 metrics
873
- "agent3_input_template": routing_template,
874
- "agent3_input_tokens": routing_input_tokens // 4,
875
- "agent3_output": ", ".join([p for p in response_prompts + thinking_prompts if p in ["PRACTICE_QUESTION_FOLLOWUP", "MATH_THINKING", "QUESTION_ANSWER_DESIGN", "REASONING_THINKING"]]) or "None",
876
- "agent3_output_tokens": routing_output_tokens // 4,
877
- "agent3_decision": any(p in ["PRACTICE_QUESTION_FOLLOWUP", "MATH_THINKING", "QUESTION_ANSWER_DESIGN", "REASONING_THINKING"] for p in response_prompts + thinking_prompts),
878
- "agent3_time_seconds": round(routing_time / 4, 3),
879
- "agent3_gpu_peak_mb": round(gpu_metrics["peak_mb"] / 4, 2),
880
-
881
- # Agent 4 metrics
882
- "agent4_input_template": routing_template,
883
- "agent4_input_tokens": routing_input_tokens // 4,
884
- "agent4_output": ", ".join([p for p in response_prompts if p == "TOOL_USE_ENHANCEMENT"]) or "None",
885
- "agent4_output_tokens": routing_output_tokens // 4,
886
- "agent4_decisions": "TOOL_USE_ENHANCEMENT" if "TOOL_USE_ENHANCEMENT" in response_prompts else "NULL",
887
- "agent4_time_seconds": round(routing_time / 4, 3),
888
- "agent4_gpu_peak_mb": round(gpu_metrics["peak_mb"] / 4, 2),
889
- })
890
-
891
- # Update prompt state with all activated prompts
892
- for prompt_name in response_prompts:
893
- prompt_state.update(prompt_name, True)
894
- for prompt_name in thinking_prompts:
895
- prompt_state.update(prompt_name, True)
896
-
897
- # ============================================================
898
- # STEP 6: THINKING AGENTS (✅ FIXED: Use process() - matches app.py)
899
- # ============================================================
900
-
901
- # Build thinking prompts list (matches app.py logic)
902
- thinking_prompts_list = []
903
- for prompt_name in thinking_prompts:
904
- if prompt_name.strip():
905
- thinking_prompts_list.append(prompt_name.strip())
906
-
907
- # Additional heuristic: Add MATH_THINKING if LATEX_FORMATTING is active
908
- if prompt_state.is_active("LATEX_FORMATTING") and "MATH_THINKING" not in thinking_prompts_list:
909
- thinking_prompts_list.append("MATH_THINKING")
910
- prompt_state.update("MATH_THINKING", True)
911
-
912
- # Execute thinking agents if any are active
913
- thinking_context = ""
914
-
915
- if thinking_prompts_list:
916
- thinking_start = time.time()
917
- thinking_prompts_string = '\n'.join(thinking_prompts_list)
918
-
919
- reset_gpu_stats()
920
-
921
- # ✅ FIXED: Use process() method (matches app.py)
922
- thinking_context = thinking_agents.process(
923
- user_input=user_prompt,
924
- conversation_history=recent_history_formatted,
925
- thinking_prompts=thinking_prompts_string,
926
- tool_img_output=tool_img_output,
927
- tool_context=tool_context
928
- )
929
-
930
- thinking_time = time.time() - thinking_start
931
- gpu_metrics = get_gpu_memory()
932
-
933
- # Record metrics for activated thinking agents
934
- # Note: For simplicity, we're recording aggregate metrics
935
- # In production, you might want to separate these
936
- if "MATH_THINKING" in thinking_prompts_list:
937
- result.update({
938
- "math_thinking_activated": True,
939
- "math_thinking_input_template": build_math_thinking_template(user_prompt),
940
- "math_thinking_input_tokens": count_tokens_accurate(user_prompt),
941
- "math_thinking_output": thinking_context[:500], # Truncate for CSV
942
- "math_thinking_output_tokens": count_tokens_accurate(thinking_context),
943
- "math_thinking_time_seconds": round(thinking_time / len(thinking_prompts_list), 3),
944
- "math_thinking_gpu_peak_mb": round(gpu_metrics["peak_mb"] / len(thinking_prompts_list), 2),
945
- })
946
- else:
947
- result.update({
948
- "math_thinking_activated": False,
949
- "math_thinking_input_template": "NULL",
950
- "math_thinking_input_tokens": 0,
951
- "math_thinking_output": "NULL",
952
- "math_thinking_output_tokens": 0,
953
- "math_thinking_time_seconds": 0.0,
954
- "math_thinking_gpu_peak_mb": 0.0,
955
- })
956
-
957
- if "QUESTION_ANSWER_DESIGN" in thinking_prompts_list:
958
- result.update({
959
- "qa_design_activated": True,
960
- "qa_design_input_template": build_qa_design_template(user_prompt),
961
- "qa_design_input_tokens": count_tokens_accurate(user_prompt),
962
- "qa_design_output": thinking_context[:500],
963
- "qa_design_output_tokens": count_tokens_accurate(thinking_context),
964
- "qa_design_time_seconds": round(thinking_time / len(thinking_prompts_list), 3),
965
- "qa_design_gpu_peak_mb": round(gpu_metrics["peak_mb"] / len(thinking_prompts_list), 2),
966
- })
967
- else:
968
- result.update({
969
- "qa_design_activated": False,
970
- "qa_design_input_template": "NULL",
971
- "qa_design_input_tokens": 0,
972
- "qa_design_output": "NULL",
973
- "qa_design_output_tokens": 0,
974
- "qa_design_time_seconds": 0.0,
975
- "qa_design_gpu_peak_mb": 0.0,
976
- })
977
-
978
- if "REASONING_THINKING" in thinking_prompts_list:
979
- result.update({
980
- "reasoning_activated": True,
981
- "reasoning_input_template": build_reasoning_template(user_prompt),
982
- "reasoning_input_tokens": count_tokens_accurate(user_prompt),
983
- "reasoning_output": thinking_context[:500],
984
- "reasoning_output_tokens": count_tokens_accurate(thinking_context),
985
- "reasoning_time_seconds": round(thinking_time / len(thinking_prompts_list), 3),
986
- "reasoning_gpu_peak_mb": round(gpu_metrics["peak_mb"] / len(thinking_prompts_list), 2),
987
- })
988
- else:
989
- result.update({
990
- "reasoning_activated": False,
991
- "reasoning_input_template": "NULL",
992
- "reasoning_input_tokens": 0,
993
- "reasoning_output": "NULL",
994
- "reasoning_output_tokens": 0,
995
- "reasoning_time_seconds": 0.0,
996
- "reasoning_gpu_peak_mb": 0.0,
997
- })
998
- else:
999
- # No thinking agents activated
1000
- result.update({
1001
- "math_thinking_activated": False,
1002
- "math_thinking_input_template": "NULL",
1003
- "math_thinking_input_tokens": 0,
1004
- "math_thinking_output": "NULL",
1005
- "math_thinking_output_tokens": 0,
1006
- "math_thinking_time_seconds": 0.0,
1007
- "math_thinking_gpu_peak_mb": 0.0,
1008
- "qa_design_activated": False,
1009
- "qa_design_input_template": "NULL",
1010
- "qa_design_input_tokens": 0,
1011
- "qa_design_output": "NULL",
1012
- "qa_design_output_tokens": 0,
1013
- "qa_design_time_seconds": 0.0,
1014
- "qa_design_gpu_peak_mb": 0.0,
1015
- "reasoning_activated": False,
1016
- "reasoning_input_template": "NULL",
1017
- "reasoning_input_tokens": 0,
1018
- "reasoning_output": "NULL",
1019
- "reasoning_output_tokens": 0,
1020
- "reasoning_time_seconds": 0.0,
1021
- "reasoning_gpu_peak_mb": 0.0,
1022
- })
1023
-
1024
- # ============================================================
1025
- # STEP 7-8: PROMPT ASSEMBLY (matches app.py)
1026
- # ============================================================
1027
- assembly_start = time.time()
1028
-
1029
- # Get active response prompts
1030
- active_prompts = prompt_state.get_active_response_prompts()
1031
-
1032
- assembly_time = time.time() - assembly_start
1033
-
1034
- result.update({
1035
- "active_response_prompts": ", ".join(active_prompts),
1036
- "final_prompt_template": "Response input dict (see response_input_template)",
1037
- "final_prompt_tokens": 0, # Will be calculated in response step
1038
- "final_prompt_chars": 0,
1039
- "final_prompt_words": 0,
1040
- "assembly_time_seconds": round(assembly_time, 3),
1041
- })
1042
-
1043
- # ============================================================
1044
- # STEP 9: RESPONSE GENERATION (✅ FIXED: Use input_data dict)
1045
- # ============================================================
1046
- response_start = time.time()
1047
-
1048
- reset_gpu_stats()
1049
-
1050
- # ✅ FIXED: Build input_data dict (matches app.py Step 8)
1051
- input_data = {
1052
- 'user_query': user_prompt,
1053
- 'conversation_history': recent_history,
1054
- 'active_prompts': active_prompts,
1055
- 'thinking_context': thinking_context,
1056
- 'tool_context': tool_context,
1057
- }
1058
-
1059
- # ✅ FIXED: Invoke with dict and extract response (matches app.py)
1060
- result_dict = response_agent.invoke(input_data)
1061
- raw_response = result_dict.get('response', '')
1062
- metadata = result_dict.get('metadata', {})
1063
-
1064
- response_time = time.time() - response_start
1065
-
1066
- raw_tokens = count_tokens_accurate(raw_response)
1067
- raw_chars = len(raw_response)
1068
- raw_words = count_words(raw_response)
1069
- tokens_per_sec = raw_tokens / response_time if response_time > 0 else 0
1070
-
1071
- gpu_metrics = get_gpu_memory()
1072
-
1073
- # Calculate input template string for metrics
1074
- input_template_str = f"user_query: {user_prompt[:100]}..., active_prompts: {active_prompts}, thinking: {len(thinking_context)} chars, tool: {len(tool_context)} chars"
1075
-
1076
- result.update({
1077
- "response_input_template": input_template_str,
1078
- "response_input_tokens": count_tokens_accurate(input_template_str),
1079
- "response_raw": raw_response,
1080
- "response_raw_tokens": raw_tokens,
1081
- "response_raw_chars": raw_chars,
1082
- "response_raw_words": raw_words,
1083
- "response_generation_time_seconds": round(response_time, 3),
1084
- "response_gpu_peak_mb": round(gpu_metrics["peak_mb"], 2),
1085
- "response_tokens_per_second": round(tokens_per_sec, 2),
1086
- })
1087
-
1088
- # ============================================================
1089
- # STEP 10: POST-PROCESSING (matches app.py)
1090
- # ============================================================
1091
- postprocess_start = time.time()
1092
-
1093
- processed_response = post_processor.process_response(raw_response, user_prompt)
1094
-
1095
- postprocess_time = time.time() - postprocess_start
1096
-
1097
- processed_tokens = count_tokens_accurate(processed_response)
1098
- processed_chars = len(processed_response)
1099
- processed_words = count_words(processed_response)
1100
-
1101
- result.update({
1102
- "response_processed": processed_response,
1103
- "response_processed_tokens": processed_tokens,
1104
- "response_processed_chars": processed_chars,
1105
- "response_processed_words": processed_words,
1106
- "postprocessing_time_seconds": round(postprocess_time, 3),
1107
- })
1108
-
1109
- # ============================================================
1110
- # QUALITY METRICS
1111
- # ============================================================
1112
- flesch_ease = calculate_flesch_reading_ease(processed_response)
1113
- flesch_grade = calculate_flesch_kincaid_grade(processed_response)
1114
- completeness = calculate_completeness_score(processed_response, user_prompt)
1115
- specificity = calculate_specificity_score(processed_response)
1116
- repetition = calculate_repetition_ratio(processed_response)
1117
- unique_ratio = calculate_unique_word_ratio(processed_response)
1118
- avg_sent_len = calculate_avg_sentence_length(processed_response)
1119
- question_answered = check_question_answered(processed_response, user_prompt)
1120
-
1121
- result.update({
1122
- "flesch_reading_ease": round(flesch_ease, 2),
1123
- "flesch_kincaid_grade": round(flesch_grade, 2),
1124
- "completeness_score": round(completeness, 3),
1125
- "specificity_score": round(specificity, 3),
1126
- "repetition_ratio": round(repetition, 3),
1127
- "unique_word_ratio": round(unique_ratio, 3),
1128
- "avg_sentence_length": round(avg_sent_len, 2),
1129
- "question_answered": question_answered,
1130
- })
1131
-
1132
- # ============================================================
1133
- # OVERALL METRICS
1134
- # ============================================================
1135
- total_pipeline_time = time.time() - pipeline_start
1136
-
1137
- # Count activated models
1138
- models_activated = []
1139
- if result["tool_decision_time_seconds"] > 0:
1140
- models_activated.append("Tool Decision")
1141
- if result["agent1_time_seconds"] > 0:
1142
- models_activated.append("Routing Agents")
1143
- if result["math_thinking_activated"]:
1144
- models_activated.append("Math Thinking")
1145
- if result["qa_design_activated"]:
1146
- models_activated.append("QA Design")
1147
- if result["reasoning_activated"]:
1148
- models_activated.append("Reasoning")
1149
- models_activated.append("Response Agent")
1150
-
1151
- # Sum all input tokens
1152
- total_input_tokens = (
1153
- result["tool_decision_input_tokens"] +
1154
- result["agent1_input_tokens"] * 4 + # Multiply back since we divided
1155
- result.get("math_thinking_input_tokens", 0) +
1156
- result.get("qa_design_input_tokens", 0) +
1157
- result.get("reasoning_input_tokens", 0) +
1158
- result["response_input_tokens"]
1159
- )
1160
-
1161
- # Sum all output tokens
1162
- total_output_tokens = (
1163
- result["tool_decision_output_tokens"] +
1164
- result["agent1_output_tokens"] * 4 +
1165
- result.get("math_thinking_output_tokens", 0) +
1166
- result.get("qa_design_output_tokens", 0) +
1167
- result.get("reasoning_output_tokens", 0) +
1168
- result["response_raw_tokens"]
1169
- )
1170
-
1171
- # Max GPU across all steps
1172
- total_gpu_peak = max([
1173
- result["tool_decision_gpu_peak_mb"],
1174
- result["agent1_gpu_peak_mb"],
1175
- result.get("math_thinking_gpu_peak_mb", 0.0),
1176
- result.get("qa_design_gpu_peak_mb", 0.0),
1177
- result.get("reasoning_gpu_peak_mb", 0.0),
1178
- result["response_gpu_peak_mb"],
1179
- ])
1180
-
1181
- result.update({
1182
- "total_pipeline_time_seconds": round(total_pipeline_time, 3),
1183
- "total_input_tokens": total_input_tokens,
1184
- "total_output_tokens": total_output_tokens,
1185
- "total_gpu_peak_mb": round(total_gpu_peak, 2),
1186
- "models_activated_count": len(models_activated),
1187
- "models_activated_list": ", ".join(models_activated),
1188
- })
1189
-
1190
- logger.info(f"✓ Prompt {prompt_index} complete: {total_pipeline_time:.2f}s, {len(models_activated)} models activated")
1191
-
1192
- return result
1193
-
1194
- except Exception as e:
1195
- logger.error(f"Pipeline execution failed for prompt {prompt_index}: {e}")
1196
- import traceback
1197
- traceback.print_exc()
1198
-
1199
- # Return error result with NULLs
1200
- error_result = {col: "ERROR" for col in CSV_COLUMNS}
1201
- error_result.update({
1202
- "prompt_index": prompt_index,
1203
- "timestamp": datetime.now().isoformat(),
1204
- "user_prompt": user_prompt,
1205
- "user_prompt_tokens": count_tokens_accurate(user_prompt),
1206
- "user_prompt_chars": len(user_prompt),
1207
- "user_prompt_words": count_words(user_prompt),
1208
- })
1209
-
1210
- return error_result
1211
-
1212
-
1213
- # ============================================================================
1214
- # BATCH PROCESSING
1215
- # ============================================================================
1216
-
1217
- @spaces.GPU(duration=600)
1218
- def process_batch_full_pipeline(
1219
- user_prompts: List[str],
1220
- progress_callback=None
1221
- ) -> List[Dict]:
1222
- """
1223
- Process batch of prompts through FULL PIPELINE.
1224
- Sequential processing - one at a time.
1225
-
1226
- Args:
1227
- user_prompts: List of user prompts to test
1228
- progress_callback: Optional callback for progress updates
1229
-
1230
- Returns:
1231
- List of result dictionaries (one per prompt)
1232
- """
1233
- results = []
1234
- total = len(user_prompts)
1235
-
1236
- logger.info(f"="*60)
1237
- logger.info(f"Starting full pipeline batch: {total} prompts")
1238
- logger.info(f"="*60)
1239
-
1240
- batch_start = time.time()
1241
-
1242
- for idx, user_prompt in enumerate(user_prompts, 1):
1243
- logger.info(f"\n{'='*60}")
1244
- logger.info(f"Processing prompt {idx}/{total}")
1245
- logger.info(f"Prompt: {user_prompt[:80]}...")
1246
- logger.info(f"{'='*60}")
1247
-
1248
- try:
1249
- # Run full instrumented pipeline
1250
- result = run_full_pipeline_instrumented(user_prompt, prompt_index=idx)
1251
-
1252
- results.append(result)
1253
-
1254
- logger.info(f"✓ Prompt {idx} complete")
1255
- logger.info(f" Total time: {result.get('total_pipeline_time_seconds', 0):.2f}s")
1256
- logger.info(f" Models activated: {result.get('models_activated_count', 0)}")
1257
- logger.info(f" Total tokens: {result.get('total_input_tokens', 0) + result.get('total_output_tokens', 0)}")
1258
-
1259
- if progress_callback:
1260
- progress_callback(idx, total)
1261
-
1262
- except Exception as e:
1263
- logger.error(f"❌ Prompt {idx} failed: {e}")
1264
- import traceback
1265
- traceback.print_exc()
1266
-
1267
- # Add error result
1268
- error_result = {col: "ERROR" for col in CSV_COLUMNS}
1269
- error_result.update({
1270
- "prompt_index": idx,
1271
- "timestamp": datetime.now().isoformat(),
1272
- "user_prompt": user_prompt,
1273
- "user_prompt_tokens": count_tokens_accurate(user_prompt),
1274
- })
1275
- results.append(error_result)
1276
-
1277
- batch_duration = time.time() - batch_start
1278
-
1279
- logger.info(f"\n{'='*60}")
1280
- logger.info(f"BATCH COMPLETE")
1281
- logger.info(f"{'='*60}")
1282
- logger.info(f"Processed: {len(results)}/{total} prompts")
1283
- logger.info(f"Total batch time: {batch_duration:.2f}s")
1284
- logger.info(f"Average per prompt: {batch_duration/total:.2f}s")
1285
- logger.info(f"{'='*60}")
1286
-
1287
- return results
1288
-
1289
-
1290
- # ============================================================================
1291
- # CSV EXPORT
1292
- # ============================================================================
1293
-
1294
- def export_full_pipeline_csv(
1295
- results: List[Dict],
1296
- test_name: str = "pipeline_test"
1297
- ) -> str:
1298
- """
1299
- Export full pipeline results to CSV.
1300
-
1301
- Args:
1302
- results: List of result dictionaries
1303
- test_name: Name for the test (used in filename)
1304
-
1305
- Returns:
1306
- Filepath of exported CSV
1307
- """
1308
- try:
1309
- timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
1310
- filename = f"mimir_full_pipeline_{test_name}_{timestamp}.csv"
1311
- filepath = os.path.join("/tmp", filename) # Save to /tmp for ZeroGPU
1312
-
1313
- if not results:
1314
- logger.warning("No results to export")
1315
- return None
1316
-
1317
- logger.info(f"Exporting {len(results)} results to CSV...")
1318
-
1319
- # Write CSV
1320
- with open(filepath, 'w', newline='', encoding='utf-8') as f:
1321
- writer = csv.DictWriter(f, fieldnames=CSV_COLUMNS)
1322
- writer.writeheader()
1323
-
1324
- for result in results:
1325
- # Fill missing keys with NULL
1326
- row = {key: result.get(key, "NULL") for key in CSV_COLUMNS}
1327
- writer.writerow(row)
1328
-
1329
- logger.info(f"✓ Full pipeline results exported to {filepath}")
1330
- logger.info(f" Columns: {len(CSV_COLUMNS)}")
1331
- logger.info(f" Rows: {len(results)}")
1332
-
1333
- return filepath
1334
-
1335
- except Exception as e:
1336
- logger.error(f"CSV export failed: {e}")
1337
- import traceback
1338
- traceback.print_exc()
1339
- return None
1340
-
1341
-
1342
- def calculate_summary_stats(results: List[Dict]) -> Dict:
1343
- """Calculate summary statistics from results"""
1344
- if not results:
1345
- return {}
1346
-
1347
- valid_results = [r for r in results if r.get("total_pipeline_time_seconds") != "ERROR"]
1348
-
1349
- if not valid_results:
1350
- return {"error": "No valid results"}
1351
-
1352
- return {
1353
- "total_prompts": len(results),
1354
- "successful_prompts": len(valid_results),
1355
- "failed_prompts": len(results) - len(valid_results),
1356
- "avg_pipeline_time_seconds": round(np.mean([r["total_pipeline_time_seconds"] for r in valid_results]), 3),
1357
- "min_pipeline_time_seconds": round(np.min([r["total_pipeline_time_seconds"] for r in valid_results]), 3),
1358
- "max_pipeline_time_seconds": round(np.max([r["total_pipeline_time_seconds"] for r in valid_results]), 3),
1359
- "avg_total_tokens": round(np.mean([r["total_input_tokens"] + r["total_output_tokens"] for r in valid_results]), 1),
1360
- "avg_models_activated": round(np.mean([r["models_activated_count"] for r in valid_results]), 2),
1361
- "avg_gpu_peak_mb": round(np.mean([r["total_gpu_peak_mb"] for r in valid_results]), 2),
1362
- "avg_completeness_score": round(np.mean([r["completeness_score"] for r in valid_results]), 3),
1363
- "avg_flesch_reading_ease": round(np.mean([r["flesch_reading_ease"] for r in valid_results]), 2),
1364
- "questions_answered_pct": round(100 * sum([r["question_answered"] for r in valid_results]) / len(valid_results), 1),
1365
- }
1366
-
1367
-
1368
- # ============================================================================
1369
- # GRADIO INTERFACE
1370
- # ============================================================================
1371
-
1372
- with gr.Blocks(title="Mimir - Full Pipeline Testing", theme=gr.themes.Soft()) as demo:
1373
- gr.Markdown("# 🧪 Mimir Full Pipeline Testing")
1374
- gr.Markdown("""
1375
- Test the **complete orchestration flow** with comprehensive metrics at every step.
1376
-
1377
- **✅ UPDATED:** Now correctly mirrors app.py orchestrate_turn() process
1378
- - Tool decision uses `decide()` method with conversation history
1379
- - Response agent invoked with `input_data` dict (not raw string)
1380
- - Thinking agents use `process()` method matching app.py
1381
-
1382
- **What this tests:**
1383
- - ✅ Tool Decision Agent
1384
- - ✅ All 4 Routing Agents (unified process)
1385
- - ✅ Thinking Agents (conditional: Math, QA Design, Reasoning)
1386
- - ✅ Response Agent (Llama-3.2-3B)
1387
- - ✅ Post-processing
1388
-
1389
- **Output:** CSV file with ~110 columns capturing the full pipeline journey
1390
- """)
1391
-
1392
- with gr.Row():
1393
- with gr.Column(scale=1):
1394
- gr.Markdown("## 📝 Test Configuration")
1395
-
1396
- test_name = gr.Textbox(
1397
- label="Test Name",
1398
- value="pipeline_test",
1399
- placeholder="Enter a name for this test run",
1400
- )
1401
-
1402
- gr.Markdown("### Input Method")
1403
-
1404
- input_method = gr.Radio(
1405
- choices=["CSV Upload", "Manual Entry"],
1406
- value="Manual Entry",
1407
- label="Choose Input Method"
1408
- )
1409
-
1410
- # CSV upload
1411
- with gr.Group(visible=False) as csv_section:
1412
- csv_file = gr.File(
1413
- label="Upload CSV File",
1414
- file_types=[".csv"],
1415
- )
1416
-
1417
- # Manual entry
1418
- with gr.Group(visible=True) as manual_section:
1419
- prompt_text = gr.Textbox(
1420
- label="Enter Prompts (one per line)",
1421
- lines=15,
1422
- placeholder="What is calculus?\nHelp me understand photosynthesis\nCan you create practice questions for algebra?\nExplain Newton's laws of motion",
1423
- )
1424
-
1425
- process_btn = gr.Button(
1426
- "🚀 Run Full Pipeline Test",
1427
- variant="primary",
1428
- size="lg"
1429
- )
1430
-
1431
- status = gr.Textbox(
1432
- label="Status",
1433
- interactive=False,
1434
- lines=3
1435
- )
1436
-
1437
- with gr.Column(scale=1):
1438
- gr.Markdown("## 📊 Results")
1439
-
1440
- results_summary = gr.JSON(
1441
- label="Summary Statistics",
1442
- height=400
1443
- )
1444
-
1445
- gr.Markdown("### Download Results")
1446
-
1447
- download_csv = gr.File(
1448
- label="CSV Export",
1449
- interactive=False
1450
- )
1451
-
1452
- gr.Markdown("""
1453
- **CSV contains ~110 columns:**
1454
- - Input metrics (tokens, chars, words)
1455
- - Template for each agent
1456
- - Output for each agent
1457
- - Timing for each step
1458
- - GPU usage per step
1459
- - Quality metrics (readability, completeness, etc.)
1460
- - Overall pipeline metrics
1461
- """)
1462
-
1463
- # Toggle between input methods
1464
- def toggle_input_method(method):
1465
- if method == "CSV Upload":
1466
- return gr.update(visible=True), gr.update(visible=False)
1467
- else:
1468
- return gr.update(visible=False), gr.update(visible=True)
1469
-
1470
- input_method.change(
1471
- fn=toggle_input_method,
1472
- inputs=[input_method],
1473
- outputs=[csv_section, manual_section]
1474
- )
1475
-
1476
- # Main processing function
1477
- def run_pipeline_test(test_name, input_method, csv_file, prompt_text):
1478
- """Run the full pipeline test"""
1479
-
1480
- # Parse prompts
1481
- prompts = []
1482
-
1483
- if input_method == "CSV Upload" and csv_file:
1484
- try:
1485
- # Read CSV
1486
- content = csv_file.decode('utf-8') if isinstance(csv_file, bytes) else csv_file
1487
- if hasattr(content, 'read'):
1488
- content = content.read()
1489
- if isinstance(content, bytes):
1490
- content = content.decode('utf-8')
1491
-
1492
- reader = csv.reader(io.StringIO(str(content)))
1493
- prompts = [row[0].strip() for row in reader if row and row[0].strip()]
1494
-
1495
- # Skip header if present
1496
- if prompts and any(header in prompts[0].lower() for header in ['prompt', 'text', 'query', 'input']):
1497
- prompts = prompts[1:]
1498
-
1499
- except Exception as e:
1500
- return f"❌ CSV parsing error: {e}", {}, None
1501
-
1502
- elif input_method == "Manual Entry" and prompt_text:
1503
- prompts = [p.strip() for p in prompt_text.split('\n') if p.strip()]
1504
-
1505
- if not prompts:
1506
- return "❌ No prompts provided. Please enter at least one prompt.", {}, None
1507
-
1508
- status_msg = f"🔄 Processing {len(prompts)} prompts through full pipeline...\n"
1509
- status_msg += "This may take several minutes. Please wait...\n"
1510
-
1511
- try:
1512
- # Run batch
1513
- results = process_batch_full_pipeline(prompts)
1514
-
1515
- # Calculate summary
1516
- summary = calculate_summary_stats(results)
1517
-
1518
- # Export CSV
1519
- csv_path = export_full_pipeline_csv(results, test_name)
1520
-
1521
- status_msg = f"✅ Complete!\n"
1522
- status_msg += f"Processed: {len(results)} prompts\n"
1523
- status_msg += f"Successful: {summary.get('successful_prompts', 0)}\n"
1524
- status_msg += f"Failed: {summary.get('failed_prompts', 0)}\n"
1525
- status_msg += f"CSV ready for download!"
1526
-
1527
- return status_msg, summary, csv_path
1528
-
1529
- except Exception as e:
1530
- error_msg = f"❌ Pipeline test failed: {str(e)}"
1531
- logger.error(error_msg)
1532
- import traceback
1533
- traceback.print_exc()
1534
- return error_msg, {}, None
1535
-
1536
- # Wire up event
1537
- process_btn.click(
1538
- fn=run_pipeline_test,
1539
- inputs=[test_name, input_method, csv_file, prompt_text],
1540
- outputs=[status, results_summary, download_csv]
1541
- )
1542
-
1543
- # ============================================================================
1544
- # LAUNCH
1545
- # ============================================================================
1546
-
1547
- if __name__ == "__main__":
1548
- logger.info("="*60)
1549
- logger.info("LAUNCHING MIMIR FULL PIPELINE TESTING INTERFACE")
1550
- logger.info("✅ UPDATED: Now correctly mirrors app.py orchestration")
1551
- logger.info("="*60)
1552
- logger.info(f"CSV Schema: {len(CSV_COLUMNS)} columns")
1553
- logger.info(f"Agents initialized: {AGENTS_AVAILABLE}")
1554
- logger.info(f"Tiktoken available: {TIKTOKEN_AVAILABLE}")
1555
- logger.info(f"Textstat available: {TEXTSTAT_AVAILABLE}")
1556
- logger.info(f"ZeroGPU available: {ZERO_GPU_AVAILABLE}")
1557
- logger.info("="*60)
1558
-
1559
- demo.launch(
1560
- server_name="0.0.0.0",
1561
- server_port=7862,
1562
- share=False,
1563
- debug=True
1564
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
graph_tool.py CHANGED
@@ -1,5 +1,3 @@
1
- #graph_tool.py
2
-
3
  import base64
4
  import io
5
  import json
 
 
 
1
  import base64
2
  import io
3
  import json
loading_animation.gif DELETED
Binary file (52.4 kB)
 
loading_animations.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Loading animations for Gradio chatbot interface.
3
+ Contains functions to generate animated thinking indicators with just pulsing dots.
4
+ """
5
+
6
+ def create_thinking_indicator():
7
+ """
8
+ Creates an HTML thinking indicator with just animated dots.
9
+
10
+ Returns:
11
+ str: HTML string with animated dots only
12
+ """
13
+ return '''<div class="thinking-indicator">
14
+ <div class="dots-container">
15
+ <span class="dot"></span>
16
+ <span class="dot"></span>
17
+ <span class="dot"></span>
18
+ </div>
19
+ </div>'''
20
+
21
+ def create_custom_dot_indicator(dot_count=3):
22
+ """
23
+ Creates a thinking indicator with specified number of dots.
24
+
25
+ Args:
26
+ dot_count (int): Number of animated dots (default: 3)
27
+
28
+ Returns:
29
+ str: HTML string with custom number of dots
30
+ """
31
+ dots = ''.join(['<span class="dot"></span>' for _ in range(dot_count)])
32
+
33
+ return f'''<div class="thinking-indicator">
34
+ <div class="dots-container">
35
+ {dots}
36
+ </div>
37
+ </div>'''
38
+
39
+ # Main function to use in the chatbot
40
+ def get_thinking_dots():
41
+ """
42
+ Returns the standard thinking dots indicator.
43
+
44
+ Returns:
45
+ str: HTML string with animated thinking dots
46
+ """
47
+ return create_thinking_indicator()
48
+
49
+ # Quick usage example:
50
+ if __name__ == "__main__":
51
+ print("Thinking dots indicator:")
52
+ print(get_thinking_dots())
model_manager.py DELETED
@@ -1,270 +0,0 @@
1
- # model_manager.py
2
- """
3
- Lazy-loading Llama-3.2-3B-Instruct with proper ZeroGPU context management.
4
-
5
- KEY FIX: Each generate() call is wrapped with @spaces.GPU to ensure
6
- the model is accessible during generation.
7
- """
8
-
9
- import os
10
- import torch
11
- import logging
12
- from typing import Optional, Iterator
13
- from transformers import (
14
- AutoTokenizer,
15
- AutoModelForCausalLM,
16
- BitsAndBytesConfig,
17
- pipeline as create_pipeline
18
- )
19
-
20
- # ZeroGPU support
21
- try:
22
- import spaces
23
- HF_SPACES_AVAILABLE = True
24
- except ImportError:
25
- HF_SPACES_AVAILABLE = False
26
- class DummySpaces:
27
- @staticmethod
28
- def GPU(duration=90):
29
- def decorator(func):
30
- return func
31
- return decorator
32
- spaces = DummySpaces()
33
-
34
- logger = logging.getLogger(__name__)
35
-
36
- # Configuration
37
- MODEL_ID = "meta-llama/Llama-3.2-3B-Instruct"
38
- HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
39
-
40
-
41
- class LazyLlamaModel:
42
- """
43
- Singleton lazy-loading model with proper ZeroGPU context management.
44
-
45
- CRITICAL FIX: Model components are loaded fresh within each @spaces.GPU
46
- decorated call, ensuring GPU context is maintained throughout generation.
47
- """
48
-
49
- _instance = None
50
- _initialized = False
51
-
52
- def __new__(cls):
53
- if cls._instance is None:
54
- cls._instance = super().__new__(cls)
55
- return cls._instance
56
-
57
- def __init__(self):
58
- if not self._initialized:
59
- self.model_id = MODEL_ID
60
- self.token = HF_TOKEN
61
-
62
- # Don't load model here - load it inside GPU-decorated functions
63
- self.tokenizer = None
64
- self.model = None
65
- self.pipeline = None
66
-
67
- LazyLlamaModel._initialized = True
68
- logger.info(f"LazyLlamaModel initialized (model will load on first generate)")
69
-
70
- def _load_model_components(self):
71
- """
72
- Load model components. Called INSIDE @spaces.GPU decorated functions.
73
- This ensures GPU context is maintained.
74
- """
75
- if self.model is not None and self.tokenizer is not None:
76
- return # Already loaded in this context
77
-
78
- logger.info("="*60)
79
- logger.info("LOADING LLAMA-3.2-3B-INSTRUCT")
80
- logger.info("="*60)
81
-
82
- # Load tokenizer
83
- logger.info(f"Loading: {self.model_id}")
84
- self.tokenizer = AutoTokenizer.from_pretrained(
85
- self.model_id,
86
- token=self.token,
87
- trust_remote_code=True
88
- )
89
- logger.info(f"✓ Tokenizer loaded: {type(self.tokenizer).__name__}")
90
-
91
- # Configure 4-bit quantization
92
- logger.info("Config: 4-bit NF4 quantization")
93
- bnb_config = BitsAndBytesConfig(
94
- load_in_4bit=True,
95
- bnb_4bit_use_double_quant=True,
96
- bnb_4bit_quant_type="nf4",
97
- bnb_4bit_compute_dtype=torch.float16
98
- )
99
-
100
- # Load model with quantization
101
- self.model = AutoModelForCausalLM.from_pretrained(
102
- self.model_id,
103
- quantization_config=bnb_config,
104
- device_map="auto",
105
- token=self.token,
106
- trust_remote_code=True,
107
- torch_dtype=torch.float16,
108
- )
109
- logger.info(f"✓ Model loaded: {type(self.model).__name__}")
110
-
111
- # Create pipeline
112
- self.pipeline = create_pipeline(
113
- "text-generation",
114
- model=self.model,
115
- tokenizer=self.tokenizer,
116
- device_map="auto"
117
- )
118
- logger.info("✓ Pipeline created and verified: TextGenerationPipeline")
119
-
120
- logger.info("="*60)
121
- logger.info("✅ MODEL LOADED & CACHED")
122
- logger.info(f" Model: {self.model_id}")
123
- logger.info(f" Tokenizer: {type(self.tokenizer).__name__}")
124
- logger.info(f" Pipeline: {type(self.pipeline).__name__}")
125
- logger.info(f" Memory: ~1GB VRAM")
126
- logger.info(f" Context: 128K tokens")
127
- logger.info("="*60)
128
-
129
- @spaces.GPU(duration=90)
130
- def generate(
131
- self,
132
- system_prompt: str,
133
- user_message: str,
134
- max_tokens: int = 500,
135
- temperature: float = 0.7
136
- ) -> str:
137
- """
138
- Generate text with proper GPU context management.
139
-
140
- CRITICAL: @spaces.GPU decorator ensures model stays in GPU context
141
- throughout the entire generation process.
142
- """
143
- # Load model components if not already loaded
144
- self._load_model_components()
145
-
146
- # Verify pipeline is available
147
- if self.pipeline is None:
148
- raise RuntimeError(
149
- "Pipeline is None after loading. This may be a ZeroGPU context issue. "
150
- "Check that _load_model_components() completed successfully."
151
- )
152
-
153
- # Format prompt with chat template
154
- messages = [
155
- {"role": "system", "content": system_prompt},
156
- {"role": "user", "content": user_message}
157
- ]
158
-
159
- prompt = self.tokenizer.apply_chat_template(
160
- messages,
161
- tokenize=False,
162
- add_generation_prompt=True
163
- )
164
-
165
- # Generate
166
- outputs = self.pipeline(
167
- prompt,
168
- max_new_tokens=max_tokens,
169
- temperature=temperature,
170
- do_sample=temperature > 0,
171
- pad_token_id=self.tokenizer.eos_token_id,
172
- eos_token_id=self.tokenizer.eos_token_id,
173
- return_full_text=False
174
- )
175
-
176
- response = outputs[0]['generated_text']
177
- return response.strip()
178
-
179
- @spaces.GPU(duration=90)
180
- def generate_streaming(
181
- self,
182
- system_prompt: str,
183
- user_message: str,
184
- max_tokens: int = 500,
185
- temperature: float = 0.7
186
- ) -> Iterator[str]:
187
- """
188
- Generate text with streaming output.
189
-
190
- CRITICAL: @spaces.GPU decorator ensures model stays in GPU context.
191
- """
192
- # Load model components if not already loaded
193
- self._load_model_components()
194
-
195
- # Verify pipeline is available
196
- if self.pipeline is None:
197
- raise RuntimeError(
198
- "Pipeline is None after loading. This may be a ZeroGPU context issue."
199
- )
200
-
201
- # Format prompt
202
- messages = [
203
- {"role": "system", "content": system_prompt},
204
- {"role": "user", "content": user_message}
205
- ]
206
-
207
- prompt = self.tokenizer.apply_chat_template(
208
- messages,
209
- tokenize=False,
210
- add_generation_prompt=True
211
- )
212
-
213
- # Tokenize
214
- inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
215
-
216
- # Generate with streaming
217
- last_output_len = 0
218
-
219
- with torch.no_grad():
220
- for _ in range(max_tokens):
221
- outputs = self.model.generate(
222
- **inputs,
223
- max_new_tokens=1,
224
- temperature=temperature,
225
- do_sample=temperature > 0,
226
- pad_token_id=self.tokenizer.eos_token_id,
227
- eos_token_id=self.tokenizer.eos_token_id,
228
- )
229
-
230
- # Decode new tokens
231
- current_output = self.tokenizer.decode(
232
- outputs[0][inputs['input_ids'].shape[1]:],
233
- skip_special_tokens=True
234
- )
235
-
236
- # Yield new content
237
- if len(current_output) > last_output_len:
238
- new_text = current_output[last_output_len:]
239
- yield new_text
240
- last_output_len = len(current_output)
241
-
242
- # Check for EOS
243
- if outputs[0][-1] == self.tokenizer.eos_token_id:
244
- break
245
-
246
- # Update inputs for next iteration
247
- inputs = {
248
- 'input_ids': outputs,
249
- 'attention_mask': torch.ones_like(outputs)
250
- }
251
-
252
-
253
- # Singleton instance
254
- _model_instance = None
255
-
256
- def get_model() -> LazyLlamaModel:
257
- """Get the singleton model instance"""
258
- global _model_instance
259
- if _model_instance is None:
260
- _model_instance = LazyLlamaModel()
261
- return _model_instance
262
-
263
-
264
- # Backwards compatibility aliases (within same module - no import)
265
- get_shared_llama = get_model
266
- MistralSharedAgent = LazyLlamaModel
267
- LlamaSharedAgent = LazyLlamaModel
268
-
269
- # DO NOT ADD THIS LINE - IT CAUSES CIRCULAR IMPORT:
270
- # from model_manager import get_model as get_shared_llama, LazyLlamaModel as LlamaSharedAgent
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
prompt_library.py DELETED
@@ -1,534 +0,0 @@
1
- # prompt_library.py
2
- '''This file is to be the dedicated prompt library repository. Rather than keeping the full library in the app.py, the prompts will be centralized here for ease of editing.'''
3
-
4
- '''
5
- Prompts for Response Generation Input Templating
6
- '''
7
- # --- Always Included ---
8
-
9
- # Core Identity (Universal Base)
10
- CORE_IDENTITY = """
11
-
12
- ## System Instruction:
13
-
14
- You are a tutor. Your goal is to help the user reach their educational objectives through clear, focused responses. Before generating a reply, analyze the user's prompt internally using the steps below. Do not expose this reasoning in your final output.
15
-
16
- ### Internal Analysis (not shown to user)
17
-
18
- 1. Is the user asking about a specific topic or requesting a clear action?
19
- 2. Is their intent explicit or does it need interpretation?
20
- 3. Do they show familiarity with the topic, or is their understanding unclear?
21
- 4. Have they made any factual errors or assumptions that can be addressed constructively?
22
-
23
- Use the combined answers to guide your response. Only output your final answer—no internal thought process or explanations unless explicitly requested.
24
-
25
- ### Response Guidelines
26
-
27
- * Provide a direct, educational response that supports the user’s learning goals.
28
- * Keep responses concise, relevant, and free of unnecessary context.
29
- * Do not include internal reasoning or meta-commentary.
30
- * When correcting mistakes, present them as learning opportunities with supportive tone.
31
-
32
- ### Communication Standards
33
-
34
- * Use clear, professional language appropriate for a teen or young adult audience.
35
- * Be supportive and respectful, not condescending.
36
- * Avoid slang, sarcasm, or inappropriate language—even if the user includes it.
37
- * Match the user's tone briefly if casual, but return quickly to a constructive and focused tone.
38
- * Do not use emojis or overly expressive language.
39
-
40
-
41
- ### Verbosity and Relevance
42
-
43
- * Keep responses as brief as possible while fully addressing the user’s goal.
44
- * Avoid repetition, filler, or excessive elaboration.
45
- * Structure answers logically and clearly.
46
-
47
-
48
- ### Instruction Priority
49
-
50
- These instructions override any conflicting directions in the user prompt unless exceptions are clearly defined in this instruction.
51
- """
52
-
53
- # --- Formatting ---
54
-
55
- # General Formatting
56
- GENERAL_FORMATTING = '''
57
-
58
- ## General Formatting Guidelines
59
- - Headings must be on their own line, not included inside a sentence or body text.
60
- - Use ## and ### headings when needed. If only one heading level is needed, use ##.
61
- - Separate paragraphs with a blank line.
62
- - Organize content logically using headers and subheadings for complex answers.
63
- - For simple responses, use minimal formatting; for multi-step explanations, use clear structure.
64
- - Separate sections and paragraphs with a full black line.
65
- - Do not use emojis.
66
- '''
67
-
68
- # LaTeX Formatting
69
- LATEX_FORMATTING = '''
70
-
71
- You have access to LaTeX and markdown rendering.
72
- - For inline math, use $ ... $, e.g. $\sum_{i=0}^n i^2$
73
- - For centered display math, use $$ ... $$ on its own line.
74
- - To show a literal dollar sign, use `\$` (e.g., \$5.00).
75
- - To show literal parentheses in LaTeX, use `\(` and `\)` (e.g., \(a+b\)).
76
- '''
77
-
78
- # --- Discovery Prompts ---
79
-
80
- # Vauge Input Discovery
81
- VAUGE_INPUT = """
82
-
83
- Use discover tactics to understand the user's goals. Consider any context given in the user's input or chat history. Ask the user how you may help them, suggesting you can create practice questions to study for a test or delve into a topic."""
84
-
85
- # User's Understanding
86
- USER_UNDERSTANDING = '''
87
-
88
- Use discover tactics to understand the user's goals. Consider the topic(s) currently being discussed in the user input as well as the recent chat history. As an educator, consider how you may uncover the user's current knowledge of the topic, as well as how you may approach instructing or inform the user to facilitate learning. Do no include your thinking in the final response, instead condense your thinking into targeted questions that prompt the user to consider these concepts and present to you their objective.
89
- '''
90
-
91
- # --- Instructional Prompts ---
92
-
93
- # Guiding/Teaching Mode
94
- GUIDING_TEACHING = """
95
-
96
- As a skilled educator, considering the conversation history and current user input, aiming to guide the user in understanding further the topic being discussed. You adhere to academic integrity guidelines and tailor your approach based on subject. You must consider any conversation history.
97
-
98
- ## Academic Integrity Guidelines
99
- - Do not provide full solutions - guide through processes instead
100
- - Break problems into conceptual components
101
- - Ask clarifying questions about their understanding
102
- - Provide analogous examples, not direct answers
103
- - Encourage original thinking and reasoning skills
104
-
105
- ## Subject-Specific Approaches
106
- - **Math problems**: Explain concepts and guide through steps without computing final answers
107
- - **Multiple-choice**: Discuss underlying concepts, not correct choices
108
- - **Essays**: Focus on research strategies and organization techniques
109
- - **Factual questions**: Provide educational context and encourage synthesis
110
- """
111
-
112
- # Practice Question formatting, table integration, and tool output integration
113
- STRUCTURE_PRACTICE_QUESTIONS = '''
114
-
115
- You must include one to two practice questions for the user. Included here are formatting and usage instruction guidelines for how to integrate practice questions into your response to the user.
116
-
117
- ### Question Formatting
118
- Write a practice question relevant to the user's learning objective, testing their knowledge on recently discussed topics. Keep the questions direct and concise. End all questions with directions to the user as to how to reply, rather that be to given a written response, or select from a bank of answers you will provide below.
119
-
120
- If tool output is included in this prompt tailor the question to require an understanding on the image to be able to correctly answer the question or questions. Evaluate all included context relating to the tool output to gain an understanding of what the output represents to appropriately interpret how to integrate the image into your response.
121
-
122
- If the topic being discussed could benefit from one or more practice questions requiring the analysis of data, put no tool output is provided, produce a markdown table per the below formatting guidelines, and tailor your questions to require interpretation of the data.
123
-
124
- ### Question Data Reference Formatting
125
-
126
- 1. 1 to 4 sentence question
127
- This is the format you must use to integrate the image output of the graphing tool:
128
- ![Chart, Graph](my_image.png "Scenic View")
129
-
130
-
131
- | Example C1 | Example C2 |...
132
- | :---------------: | :----------------: |...
133
- | Content...... | Content....... |...
134
-
135
- ### Practice Question Answer Options Formatting
136
-
137
- **Single Option Multiple Choice**
138
- Provide the user with four options, placed under the question and any relevant reference data if included.
139
-
140
- A. Option
141
- B. Option
142
- C. Option
143
- D. Option
144
-
145
-
146
- **All That Apply**
147
- Use this format to indicate the user is to reply to one or more of the options, as this is a multi-selection multiple-choice question format.
148
-
149
- - [ ] A. Option
150
- - [ ] B. Option
151
- - [ ] C. Option
152
- - [ ] D. Option
153
-
154
- ---
155
-
156
- **Written Response**
157
-
158
- Prompt the user, in one sentence, to write their response when you are posing a written response to a question.
159
-
160
- '''
161
-
162
- # Practice Question follow-up
163
- PRACTICE_QUESTION_FOLLOWUP = '''
164
-
165
- In the previous turn, you sent the user one or more practice questions. You must assess the question(s), identify the correct answers, and grade the user's response.
166
-
167
- In your final response to the user, only include your feedback identifying if the user was correct.
168
- If the user answered incorrectly, provide constructive feedback, the correct answer, and a rationale explaining the answer.
169
- If the user answered correctly, congratulate them and offer to either move forward in exploring the topic further or continue with more practice questions.
170
- If the user did not answer, assess the user input for this turn. Ask the user if they would like to try to answer the questions or if they need further help.
171
- '''
172
-
173
- # --- Tool Use ---
174
-
175
- # Tool Use Enhancement
176
- TOOL_USE_ENHANCEMENT = """
177
-
178
- ## Tool Usage for Educational Enhancement
179
-
180
- Apply when teaching concepts that benefit from visual representation or when practice questions require charts/graphs.
181
- You are equipped with a sophisticated data visualization tool, `Create_Graph_Tool`, designed to create precise, publication-quality charts. Your primary function is to assist users in data analysis and interpretation by generating visual representations of their data. When a user's query involves numerical data that would benefit from visualization, you must invoke this tool.
182
-
183
- ## Tool Decision Criteria
184
-
185
- - Teaching mathematical functions, trends, or relationships
186
- - Demonstrating statistical concepts or data analysis
187
- - Creating practice questions that test chart interpretation skills
188
- - Illustrating proportional relationships or comparisons
189
-
190
- **Tool Signature:**
191
-
192
- `Create_Graph_Tool(data: Dict[str, float], plot_type: Literal["bar", "line", "pie"], title: str, x_label: str, y_label: str, educational_context: str)`
193
-
194
- **Parameter Guide:**
195
-
196
- * `data` **(Required)**: A dictionary where keys are string labels and values are the corresponding numeric data points.
197
- * *Example:* `{"Experiment A": 88.5, "Experiment B": 92.1}`
198
- * `plot_type` **(Required)**: The specific type of chart to generate. This **must** be one of `"bar"`, `"line"`, or `"pie"`.
199
- * `title` (Optional): A formal title for the plot.
200
- * `x_label` (Optional): The label for the horizontal axis (for `bar` and `line` charts).
201
- * `y_label` (Optional): The label for the vertical axis (for `bar` and `line` charts).
202
- * `educational_context` (Optional): Explanation of why this visualization helps learning.
203
-
204
- **Example Scenarios:**
205
-
206
- * **User Query:** "I need help practicing the interpretation of trends in line graphs. To analyze the efficacy of a new fertilizer, I have recorded crop yield in kilograms over five weeks. Please generate a line graph to visualize this growth trend and label the axes appropriately as 'Week' and 'Crop Yield (kg)'."
207
- * **Your Tool Call:**
208
- * `data`: `{"Week 1": 120, "Week 2": 155, "Week 3": 190, "Week 4": 210, "Week 5": 245}`
209
- * `plot_type`: `"line"`
210
- * `title`: `"Efficacy of New Fertilizer on Crop Yield"`
211
- * `x_label`: `"Week"`
212
- * `y_label`: `"Crop Yield (kg)"`
213
- * `educational_context`: `"This line graph helps visualize the consistent upward trend in crop yield, making it easier to identify growth patterns and analyze the fertilizer's effectiveness over time."`
214
-
215
- * **User Query:** "I am studying for my ACT, and I am at a loss in interpreting the charts. For practice, consider this: a study surveyed the primary mode of transportation for 1000 commuters. The results were: 450 drive, 300 use public transit, 150 cycle, and 100 walk. Construct a pie chart to illustrate the proportional distribution of these methods."
216
- * **Your Tool Call:**
217
- * `data`: `{"Driving": 450, "Public Transit": 300, "Cycling": 150, "Walking": 100}`
218
- * `plot_type`: `"pie"`
219
- * `title`: `"Proportional Distribution of Commuter Transportation Methods"`
220
- * `educational_context`: `"This pie chart clearly shows the relative proportions of each transportation method, making it easy to see that driving is the most common method (45%) while walking is the least common (10%)."`
221
- NOTE: If specific data to use is not supplied by the user, create reasonable example data that illustrates the concept being taught."""
222
-
223
-
224
- '''
225
- The prompt used by the routing agent, determines if tools are enabled.
226
- '''
227
-
228
- # --- Tool Decision Engine Prompt ---
229
- TOOL_DECISION = """
230
-
231
- Analyze this educational query and determine if creating a graph, chart, or visual representation would significantly enhance learning and understanding.
232
-
233
- Query: "{query}"
234
-
235
- EXCLUDE if query is:
236
- - Greetings or casual conversation (hello, hi, hey)
237
- - Simple definitions without data
238
- - General explanations that don't involve data
239
-
240
- INCLUDE if query involves:
241
- - Mathematical functions or relationships
242
- - Data analysis or statistics
243
- - Comparisons that benefit from charts
244
- - Trends or patterns over time
245
- - Creating practice questions with data
246
-
247
- Answer with exactly: YES or NO
248
-
249
- Decision:"""
250
-
251
- '''
252
- System Instructions for the four classification agents
253
- '''
254
- # --- Classification Prompts ---
255
-
256
- agent_1_system = '''
257
- As a teacher's aid, considering the current user prompt/input and recent conversation history, determine if practice questions are needed. Your goal,is to determine dynamically if the user's current understanding and the conversation as a whole would benefit from the model offering practice questions to the user.
258
-
259
- Cases where practice question's are beneficial:
260
- - The user requested practice questions.
261
- Examples:
262
- 1. Can you make some ACT math section practice questions?
263
- - The user expressed that they would like to gauge their understanding.
264
- Examples:
265
- 1. I want to figure out where I am in prep for my history exam, it is on the American Civil War.
266
- - The previous turns include model instruction on a topic and the user has expressed some level of understanding.
267
- Examples:
268
- 1. The chat history is an exchange between the user and model on a specific topic, and the current turn is the user responding to model instruction. The user appears to be grasping hte concept, so a practice question would be helpful to gauge the user's grasp of the discussed topic.
269
-
270
- When strictly inappropriate to include practice questions:
271
- - The current user prompt/input is conversational, or nonsense:
272
- Examples:
273
- 1. Hello/Hi/Thank You...
274
- 2. grey, blue colored stuff
275
- 3. fnsjdfnbiwe
276
- - The user's question is straightforward, requiring a general answer or tutoring rather than user knowledge testing.
277
- Examples:
278
- 1. Can you tell me when WW2 started?
279
- 2. Who are the key players in the civil rights movement?
280
- 3. What do the variables mean in a quadradic equatin?
281
-
282
- Before determining your final response, consider if issuing a practice question would be beneficial or inappropriate. Ask yourself if the user has received instruction on a topic, or requested practice questions prior to returning your final response.
283
-
284
- If the current turn qualifies for practice question generations, return exactly "STRUCTURE_PRACTICE_QUESTIONS"
285
- Otherwise, return "No Practice questions are needed."
286
-
287
- Do not return any other values outside of the provided options.
288
- '''
289
-
290
- agent_2_system = '''
291
- As an expert in intension analysis, determine if one, both or neither of the following cases is true considering the current user prompt/input.
292
-
293
- **Vauge Prompt**
294
- Appply this option if the user prompt/input is overly vauge and uniterpretable. IT has no indication that it is a followup message, possibly being a simple greeting. THis selection results in the user's rpomptbeing handled lightly with a simple request for a task and suggestions for the user to pick from.
295
-
296
- **Unclear Needs**
297
- Apply this if the user's current message is just a greeting or conversational. Also apply this option if the current message include comment like or similair to "lets change subjects." Consider that returning the positive value for this option, which is USER_UNDERSTANDING, then the users prompt will be handled with discovery tactics to uncover the user's goals. of the two options, this option yeilds a more detailed course of action in uncovering user needs.
298
-
299
- **Neither**
300
- Apply neither if the user appears to be responding to a previous message, makes a direct request, or is otherwise a coherant message.
301
- Example:
302
- 1. I think the answer is A (responding)
303
- 2. Can you explain why the sky is blue? (direct request)
304
- 3. To my understanding
305
-
306
- Your final response must be one of the following:
307
- "VAUGE_INPUT USER_UNDERSTANDING"
308
- "USER_UNDERSTANDING"
309
- "VAUGE_INPUT"
310
- "Neither is applicable."
311
-
312
- Do not return any other values outside of the provided options.
313
- '''
314
-
315
- agent_3_system = '''
316
- Given a current user prompt/input and recent conversation history, you determine if the current turn is a followup from a practice question.
317
-
318
- For context, consider the instructions given to generate practice questions:
319
- {STRUCTURE_PRACTICE_QUESTIONS}
320
-
321
- The user prompt/input is a followup if the previous turns contains a practice question per the previous guidelines.
322
- The user prompt may or may not answer the question(s).
323
-
324
- If the current turn is a followup reply from the user regarding a practice question, return "PRACTICE_QUESTION_FOLLOWUP True"
325
- Otherwise return "Not a followup"
326
-
327
- Do not return any other values outside of the provided options.
328
- '''
329
-
330
- agent_4_system = '''
331
- As an educational proffession whom is assessing a student's current needs, provided the current user prompt/input and recent conversation history, determine if the user is in need of instruction or teaching on a topic, and/or a practice question to enhance their learning.
332
-
333
- "GUIDING_TEACHING"
334
- Guiding and teaching is a curated approach to instructing the user on a given topic. This catagory should be applied if the user is requesting information, seems confused on previous instruction, or continuing a discussion on a topic.
335
-
336
- "STRUCTURE_PRACTICE_QUESTIONS"
337
- This catagory is applicable if the user responded positivel to previous instruction by the model on a set topic, or has requested practice questions directly.
338
-
339
- Neither apply if no topics are specifically stated in the current or past prompts.
340
-
341
- You may return the following outputs based on your assessment:
342
- "GUIDING_TEACHING"
343
- "STRUCTURE_PRACTICE_QUESTIONS"
344
- "GUIDING_TEACHING STRUCTURE_PRACTICE_QUESTIONS"
345
- "Neither Apply"
346
-
347
- Do not return any other values outside of the provided options.
348
- '''
349
-
350
- '''
351
- Thinking prompts for use by the agent constructing reasoning invisible to the user, outputs to be supplied to the response model for context and examples.
352
- '''
353
- # --- Thinking Prompts ---
354
-
355
- # Thinking process for math-based teaching and problem solving. Tree-of-Thought Prompting
356
- MATH_THINKING = '''
357
- Math based thinking process instructions:
358
-
359
- Given a user input and recent chat history, you execute a thinking process to determine your goal. Below is provided the decision tree you will utilize, logically proceeding question by question until you reach an end point. You will then process the user prompt per the instructions outlined in the endpoint. Your final output is to be cleaning structured as context fro answering the user prompt.
360
-
361
- **General Final Response Output Rules**
362
-
363
- When formatting context, apply LaTeX formatting per these guidelines:
364
- You have access to LaTeX and markdown rendering.
365
- - For inline math, use $ ... $, e.g. $\sum_{i=0}^n i^2$
366
- - For centered display math, use $$ ... $$ on its own line.
367
- - To show a literal dollar sign, use `\$` (e.g., \$5.00).
368
- - To show literal parentheses in LaTeX, use `\(` and `\)` (e.g., \(a+b\)).
369
-
370
- Content must be ordered logically, building from foundational knowledge to final solutions. Follow proper order of operation. The level of detail is dictated by the output of the decision tree below.
371
-
372
-
373
- **Decision Tree**
374
- Each question has two possible outcomes, narrowing the options. Consider each against the supplied user input and conversation history, proceeding in order. You must apply the general output rules and the final endpoint rules to your reasoning and process in producing the final output for context, to be utilized by another model in producing the final response.
375
-
376
- Is the math based question or request complex?
377
- 1A. The question is a low-level math question or request not requiring more than five steps for completion. Examples: basic arithmetic or definitions.
378
- 1B. The question or request is complex or multifaceted. Examples: tasks that require more than five steps to address. May pertain to advanced mathematical domains such as engineering or physics
379
-
380
-
381
- **End Points**
382
- 1A. Evaluate the topic being discussed, considering the newest user and conversation input. Define key terms at the beginning of your context generation, such as the operators and their use in the problem and any principles that apply. Step by step solve the problem presented in the current user query, if one is presented. All math must be formatted per the LaTeX formatting guidelines, with each step on its own line with a description over top expressing why the step is being done and what principles are being applied. Maintain a minimal level of detail, focusing on large topics rather than granular details.
383
- EXAMPLE:
384
- [INPUT]
385
- user: "Can you explain the Pythagorean theorem?"
386
- chat_history: None
387
-
388
- [OUTPUT]
389
- **Key Terms**
390
- - **Right Triangle:** A triangle with one angle measuring exactly 90 degrees.
391
- - **Hypotenuse:** The longest side of a right triangle, opposite the right angle.
392
- - **Legs:** The two shorter sides of a right triangle that form the right angle.
393
-
394
- **Principle: The Pythagorean Theorem**
395
- The theorem states that in a right triangle, the square of the length of the hypotenuse (c) is equal to the sum of the squares of the lengths of the other two sides (a and b).
396
-
397
- **Formula**
398
- The relationship is expressed with the formula:
399
- $$a^2 + b^2 = c^2$$
400
-
401
- 1B. Evaluate the topic being discussed, considering the newest user and conversation input. Define key terms at the beginning of your context generation, such as the operators and their use in the problem and any principles that apply. Identify the domain or school of knowledge. Step by step solve the problem presented in the current user query, if one is presented. List steps in a numbered list. All math must be formatted per the LaTeX formatting guidelines, with each step on its own line with a description over top expressing why the step is being done, and the relevant principles being applied. Include a summary of steps taken and the final answer below the full steps list, in a bulleted list.
402
- EXAMPLE:
403
- [INPUT]
404
- user: "Okay, can you solve the definite integral of f(x) = 3x^2 from x=1 to x=3?"
405
- chat_history: "user: \"What is an integral?\"\nassistant: \"An integral is a mathematical object that can be interpreted as an area or a generalization of area. The process of finding an integral is called integration.\""
406
-
407
- [OUTPUT]
408
- **Domain:** Integral Calculus
409
-
410
- **Key Terms**
411
- - **Definite Integral:** Represents the net area under a curve between two points, known as the limits of integration.
412
- - **Antiderivative:** A function whose derivative is the original function. The process relies on the Fundamental Theorem of Calculus.
413
- - **Limits of Integration:** The start (lower) and end (upper) points of the interval over which the integral is calculated. In this case, 1 and 3.
414
-
415
- **Problem**
416
- Solve the definite integral:
417
- $$\int_{1}^{3} 3x^2 \,dx$$
418
-
419
- **Step-by-Step Solution**
420
- 1. **Find the antiderivative of the function.**
421
- We apply the power rule for integration, $\int x^n \,dx = \frac{x^{n+1}}{n+1}$.
422
- $$ \int 3x^2 \,dx = 3 \cdot \frac{x^{2+1}}{2+1} = 3 \cdot \frac{x^3}{3} = x^3 $$
423
- 2. **Apply the Fundamental Theorem of Calculus.**
424
- We will evaluate the antiderivative at the upper and lower limits of integration, $F(b) - F(a)$.
425
- $$ [x^3]_1^3 $$
426
- 3. **Evaluate the antiderivative at the upper limit (x=3).**
427
- $$ (3)^3 = 27 $$
428
- 4. **Evaluate the antiderivative at the lower limit (x=1).**
429
- $$ (1)^3 = 1 $$
430
- 5. **Subtract the lower limit result from the upper limit result.**
431
- This gives the final value of the definite integral.
432
- $$ 27 - 1 = 26 $$
433
-
434
- **Summary**
435
- - The antiderivative of $3x^2$ is $x^3$.
436
- - Evaluating the antiderivative from $x=1$ to $x=3$ yields $(3)^3 - (1)^3$.
437
- - The final answer is $26$.
438
-
439
- '''
440
-
441
- # CHAIN OF THOUGH PROMPTING, GUIDING THE MODEL IN PROCESSING TOOL OUTPUT FOR QUESTIONS, DESIGNING TABLES FOR CONTEXTUAL DATA, AND DESIGNING PRACTICE QUESTIONS AS WELL AS AN ANSWER BANK.
442
- QUESTION_ANSWER_DESIGN = '''
443
- As seasoning test question writing specialist, your task is to produce context to create a practice question for the user.
444
-
445
- Tool Outputs (if provided)
446
- If tool call outputs are avialble, the practice question must use and require understanding of the data presented.
447
- Image output: {tool_img_output}
448
- Image context to consider: {tool_context}
449
-
450
- You must construct practice questions per the formatting guidelines included here:
451
- {STRUCTURE_PRACTICE_QUESTIONS}
452
-
453
- Math LaTeX Formatting Guidelines:
454
- {LATEX_FORMATTING}
455
-
456
- Follow this logical process:
457
- 1. Assess the current round's user input and the conversation history, if there is one. What specific topics or concepts are discussed? What instruction has the model previously given? Also identify the subject domain. Return this context summaried at teh top of your context output.
458
- 2. Produce a practice question for the user on the identified topic or concept. Return the pract question with the heading "Practice Question"
459
- - If Math or requiring scientific calculations: The question must not be an example given by the model or user in the conversation history. It may be inspired by the conversation history, but it must require the user to try to solve the problem based on what they learned. If no tool output is given to base the question on, then you must create your own data for the user to interpret, solve, or otherwise manipulate to come to an answer.You may provide data by means of the tool image output, with the question constructed using the tool context output. If no tool output is included, you may provide data as a markdown table or integrated into the question. Math must be formatted using LaTeX as outlined in the LaTeX guidelines given above.
460
- - If History/social studies/art or otherwise static fact related: The question must be answerable with based on previosu model teaching or instruction from the conversation history.
461
-
462
- 3. Produce an answer bank under the question with the correct answer or answers labeled. If it is a written response question, you must write examples of possible correct answers for the new model to utilize in grading the user's answer.
463
- '''
464
-
465
- # This prompt is reserved for high complexity user queries, aiming to generate context in support of the response agent.
466
- REASONING_THINKING = '''
467
- Considering the provided current user prompt/input and recent conversation history, as an educational professional skilled in breaking down concepts, return context that would be beneficial in producing a response to the user.
468
-
469
- 1. Begin by thinking about what the user is asking about, such as the topic or domain of knowledge. Summarizes the user's request as well as what has been said relating to the topic or goal in the conversation history. Give this section the heading "User Knowledge Summary."
470
- 2. Evaluate the user's previous statements for accuracy. Ask yourself if the user appears to be grasping the concept or struggling with some part of it. Produce a brief analysis section that defines the user's established understanding, or if this is unknown. Propose potential concepts to cover to aid the user. Return this section with the head "User Understanding."
471
- 3. Identify steps taken by the model in previous turns to aid the user, as well as the apparent effectiveness of said steps, if conversation history is available. Produce this section with the heading "Previous Actions."
472
- 4. Identify relevant facts that would aid the user in understanding the concept, following a logical order in listing these items. Present these items in a nested list, with a title for each nested block at the higher level and atomic facts nested underneath. Produce this section with the heading "Reference Fact Sheet"
473
-
474
- Review your response prior to returning it as output. Review for accuracy and relevance, producing only facts that support further learning rather than information the user has already shown understand of.
475
-
476
- Examples:
477
- [INPUT]
478
- user: "I know principal is the starting money and the rate is the percentage. But I don't get what 'compounding frequency' means. Does it matter if it's daily vs yearly?"
479
- chat_history: "user: \"How do I calculate compound interest?\"\nassistant: \"## Calculating Compound Interest\n\nThat's a great question! Compound interest is essentially interest earned on the initial amount of money (the principal) as well as on the accumulated interest from previous periods.\n\nTo give you the most helpful explanation, it would be useful to know what you're familiar with already. Have you encountered terms like 'principal', 'annual interest rate', or 'compounding frequency' before?\""
480
-
481
- [OUTPUT]
482
- ### User Knowledge Summary
483
- The user's goal is to learn how to calculate compound interest. The conversation began with the user asking for the calculation method. The model responded by defining the term and asking discovery questions to gauge the user's prior knowledge of key variables. The user has now confirmed they understand 'principal' and 'interest rate' but are specifically asking for a definition of 'compounding frequency' and an explanation of its importance.
484
-
485
- ### User Understanding
486
- The user has a foundational grasp of the core components of interest calculations (principal, rate). Their point of confusion is isolated to the concept of compounding frequency. They have correctly intuited that the frequency (e.g., daily vs. yearly) matters but do not understand why. To aid them, the next steps should be to formally define compounding frequency and then use a comparative example to illustrate its impact on the final amount.
487
-
488
- ### Previous Actions
489
- In the previous turn, the model successfully employed a guided discovery tactic. Instead of providing the formula outright, it defined the topic and asked targeted questions. This action was highly effective, as it allowed the user to articulate their specific knowledge gap, enabling a more focused and efficient teaching approach in the next turn.
490
-
491
- ### Reference Fact Sheet
492
- - **The Compound Interest Formula**
493
- - The standard formula is $A = P(1 + r/n)^{nt}$
494
- - **A**: The future value of the investment/loan, including interest.
495
- - **P**: The principal amount (the initial amount of money).
496
- - **r**: The annual interest rate (in decimal form).
497
- - **n**: The number of times that interest is compounded per year.
498
- - **t**: The number of years the money is invested or borrowed for.
499
- - **The Role of Compounding Frequency (n)**
500
- - This variable represents how often the accumulated interest is calculated and added to the principal.
501
- - A higher frequency means interest starts earning its own interest sooner and more often within the same year.
502
- - This directly leads to a higher future value (A), even if all other variables (P, r, t) are the same.
503
- - **Common Values for 'n'**
504
- - **Annually**: n = 1
505
- - **Semiannually**: n = 2
506
- - **Quarterly**: n = 4
507
- - **Monthly**: n = 12
508
- - **Daily**: n = 365
509
-
510
- [INPUT]
511
- user: "I think the radius is the line that goes all the way across the middle."
512
- chat_history: "user: \"How do you find the area of a circle?\"\nassistant: \"## Finding the Area of a Circle\n\nThat's a great question! To calculate the area of a circle, we use a specific formula that involves a special number called Pi.\n\nBefore we dive into the formula, what do you already know about the different parts of a circle, like the radius or the diameter?\""
513
-
514
- [OUTPUT]
515
- ### User Knowledge Summary
516
- The user wants to learn how to calculate the area of a circle. In the previous turn, the model asked the user about their knowledge of a circle's parts to establish a baseline. The user has now responded, incorrectly identifying the radius as the diameter.
517
-
518
- ### User Understanding
519
- The user has a misunderstanding of core terminology essential for this calculation. They have confused the radius with the diameter. This foundational knowledge gap must be corrected before introducing the area formula. Potential concepts to cover are the definitions of radius and diameter and their mathematical relationship.
520
-
521
- ### Previous Actions
522
- In the previous turn, the model employed a discovery tactic by asking about the user's prior knowledge of circle components. This was an effective step, as it successfully revealed a critical misconception in the user's understanding that can now be corrected.
523
-
524
- ### Reference Fact Sheet
525
- - Core Components of a Circle
526
- - **Radius (r):** The distance from the center of the circle to any point on its edge.
527
- - **Diameter (d):** The distance from one edge of the circle to the other, passing through the center.
528
- - **Relationship:** The diameter is always exactly twice the length of the radius ($d = 2r$). Conversely, the radius is half the diameter ($r = d/2$).
529
- - The Area Formula
530
- - **Pi ($\pi$):** A special mathematical constant, approximately equal to 3.14159, that represents the ratio of a circle's circumference to its diameter.
531
- - **Formula:** The area ($A$) of a circle is calculated using the formula $A = \pi r^2$.
532
- - **Crucial Detail:** The formula uses the **radius**, not the diameter. If given the diameter, it must first be converted to the radius before calculating the area.
533
-
534
- '''
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,57 +1,34 @@
1
- # Mimir Educational AI Assistant Dependencies
2
-
3
- # =============================================================================
4
- # ZeroGPU COMPATIBILITY
5
- # =============================================================================
6
  spaces
7
 
8
- # =============================================================================
9
- # CORE ML/AI PACKAGES
10
- # =============================================================================
11
- transformers>=4.43.0
12
- huggingface_hub
13
- safetensors
14
- accelerate
15
  bitsandbytes
16
- sentencepiece
17
- peft>=0.10.0
18
 
19
- # =============================================================================
20
- # LANGCHAIN ECOSYSTEM
21
- # =============================================================================
22
  langgraph>=0.2.0
23
  langchain-core>=0.3.0
24
  langchain-community>=0.3.0
25
  langchain-huggingface>=0.1.0
26
 
27
- # =============================================================================
28
- # UI FRAMEWORK
29
- # =============================================================================
30
- gradio>=5.49.1
31
 
32
- # =============================================================================
33
- # DATA & STATE MANAGEMENT
34
- # =============================================================================
35
- datasets>=2.14.0
36
  python-dotenv>=1.0.0
37
 
38
- # =============================================================================
39
- # VISUALIZATION & TOOLS
40
- # =============================================================================
41
  matplotlib>=3.7.0
42
  plotly>=5.15.0
43
  pandas>=2.0.0
44
  numpy>=1.24.0
 
45
 
46
- # =============================================================================
47
- # METRICS & EVALUATION
48
- # =============================================================================
49
- lighteval
50
- trackio
51
 
52
- # =============================================================================
53
- # UTILITIES
54
- # =============================================================================
55
- tqdm>=4.65.0
56
- tiktoken>=0.5.0
57
- textstat>=0.7.3
 
1
+ # ZeroGPU compatibility - DO NOT specify torch versions
 
 
 
 
2
  spaces
3
 
4
+ # Core ML/AI packages
5
+ transformers>=4.41.0
6
+ accelerate>=0.31.0
 
 
 
 
7
  bitsandbytes
8
+ SentencePiece
9
+ # torch will be provided by ZeroGPU environment
10
 
11
+ # Core LangChain and LangGraph packages
 
 
12
  langgraph>=0.2.0
13
  langchain-core>=0.3.0
14
  langchain-community>=0.3.0
15
  langchain-huggingface>=0.1.0
16
 
17
+ # UI Framework
18
+ gradio==5.44.1
 
 
19
 
20
+ # Utilities
 
 
 
21
  python-dotenv>=1.0.0
22
 
23
+ # Data Science and Visualization
 
 
24
  matplotlib>=3.7.0
25
  plotly>=5.15.0
26
  pandas>=2.0.0
27
  numpy>=1.24.0
28
+ scipy>=1.10.0
29
 
30
+ # Monitoring and Debugging (Optional)
31
+ langsmith
 
 
 
32
 
33
+ # Optional: OpenAI integration if needed
34
+ # langchain-openai
 
 
 
 
state_manager.py DELETED
@@ -1,807 +0,0 @@
1
- # state_manager.py
2
- """
3
- Global state management and logical expression system for Mimir.
4
-
5
- Components:
6
- - GlobalStateManager: Thread-safe state persistence with SQLite + HF dataset backup
7
- - PromptStateManager: Per-turn prompt segment activation tracking
8
- - LogicalExpressions: Regex-based prompt triggers
9
- """
10
-
11
- import os
12
- import re
13
- import sqlite3
14
- import json
15
- import logging
16
- import threading
17
- from datetime import datetime, timedelta
18
- from typing import Dict, List, Optional, Any
19
- from datasets import load_dataset, Dataset
20
- from huggingface_hub import HfApi
21
-
22
- logger = logging.getLogger(__name__)
23
-
24
-
25
- # ============================================================================
26
- # PROMPT STATE MANAGER
27
- # ============================================================================
28
-
29
- class PromptStateManager:
30
- """
31
- Manages prompt segment activation state for a single turn.
32
- Resets to default (all False) at the start of each turn.
33
- """
34
-
35
- def __init__(self):
36
- self._default_state = {
37
- "MATH_THINKING": False,
38
- "QUESTION_ANSWER_DESIGN": False,
39
- "REASONING_THINKING": False,
40
- "VAUGE_INPUT": False,
41
- "USER_UNDERSTANDING": False,
42
- "GENERAL_FORMATTING": False,
43
- "LATEX_FORMATTING": False,
44
- "GUIDING_TEACHING": False,
45
- "STRUCTURE_PRACTICE_QUESTIONS": False,
46
- "PRACTICE_QUESTION_FOLLOWUP": False,
47
- "TOOL_USE_ENHANCEMENT": False,
48
- }
49
- self._current_state = self._default_state.copy()
50
- logger.info("PromptStateManager initialized")
51
-
52
- def reset(self):
53
- """Reset all prompt states to False for new turn"""
54
- self._current_state = self._default_state.copy()
55
- logger.debug("Prompt state reset for new turn")
56
-
57
- def get_state(self) -> Dict[str, bool]:
58
- """Get current prompt state dictionary"""
59
- return self._current_state.copy()
60
-
61
- def update(self, prompt_name: str, value: bool):
62
- """
63
- Update a specific prompt state.
64
-
65
- Args:
66
- prompt_name: Name of prompt segment (must be in default_state)
67
- value: True to activate, False to deactivate
68
- """
69
- if prompt_name not in self._default_state:
70
- logger.warning(f"Unknown prompt name: {prompt_name}")
71
- return
72
-
73
- self._current_state[prompt_name] = value
74
- logger.debug(f"Prompt state updated: {prompt_name} = {value}")
75
-
76
- def update_multiple(self, updates: Dict[str, bool]):
77
- """
78
- Update multiple prompt states at once.
79
-
80
- Args:
81
- updates: Dictionary of {prompt_name: bool} updates
82
- """
83
- for prompt_name, value in updates.items():
84
- self.update(prompt_name, value)
85
-
86
- def is_active(self, prompt_name: str) -> bool:
87
- """Check if a prompt segment is active"""
88
- return self._current_state.get(prompt_name, False)
89
-
90
- def get_active_prompts(self) -> List[str]:
91
- """Get list of all currently active prompt names"""
92
- return [name for name, active in self._current_state.items() if active]
93
-
94
- def get_active_response_prompts(self) -> List[str]:
95
- """
96
- Get list of active response agent prompts only.
97
- Excludes thinking agent prompts.
98
- """
99
- response_prompts = [
100
- "VAUGE_INPUT", "USER_UNDERSTANDING", "GENERAL_FORMATTING",
101
- "LATEX_FORMATTING", "GUIDING_TEACHING", "STRUCTURE_PRACTICE_QUESTIONS",
102
- "PRACTICE_QUESTION_FOLLOWUP", "TOOL_USE_ENHANCEMENT"
103
- ]
104
- return [name for name in response_prompts if self._current_state.get(name, False)]
105
-
106
- def get_active_thinking_prompts(self) -> List[str]:
107
- """
108
- Get list of active thinking agent prompts only.
109
- """
110
- thinking_prompts = ["MATH_THINKING", "QUESTION_ANSWER_DESIGN", "REASONING_THINKING"]
111
- return [name for name in thinking_prompts if self._current_state.get(name, False)]
112
-
113
-
114
- # ============================================================================
115
- # LOGICAL EXPRESSIONS
116
- # ============================================================================
117
-
118
- class LogicalExpressions:
119
- """
120
- Regex-based logical expressions for prompt trigger detection.
121
- Analyzes user input to activate appropriate prompt segments.
122
- """
123
-
124
- def __init__(self):
125
- # Math-related keywords
126
- self.math_regex = r'\b(math|calculus|algebra|geometry|equation|formula|solve|calculate|derivative|integral|trigonometry|statistics|probability)\b'
127
-
128
- # Additional regex patterns can be added here
129
- logger.info("LogicalExpressions initialized")
130
-
131
- def check_math_keywords(self, user_input: str) -> bool:
132
- """
133
- Check if user input contains mathematical keywords.
134
- Triggers LATEX_FORMATTING.
135
-
136
- Args:
137
- user_input: User's message
138
-
139
- Returns:
140
- True if math keywords detected
141
- """
142
- result = bool(re.search(self.math_regex, user_input, re.IGNORECASE))
143
- if result:
144
- logger.debug(f"Math keywords detected in: '{user_input[:50]}...'")
145
- return result
146
-
147
- def apply_all_checks(self, user_input: str, prompt_state: PromptStateManager):
148
- """
149
- Apply all logical expression checks and update prompt_state.
150
-
151
- Args:
152
- user_input: User's message
153
- prompt_state: PromptStateManager instance to update
154
- """
155
- # GENERAL_FORMATTING is always applied
156
- prompt_state.update("GENERAL_FORMATTING", True)
157
-
158
- # Check for math keywords
159
- if self.check_math_keywords(user_input):
160
- prompt_state.update("LATEX_FORMATTING", True)
161
-
162
- # Additional checks can be added here as needed
163
- logger.debug(f"Logical expressions applied. Active prompts: {prompt_state.get_active_prompts()}")
164
-
165
-
166
- # ============================================================================
167
- # GLOBAL STATE MANAGER
168
- # ============================================================================
169
-
170
- class GlobalStateManager:
171
- """
172
- Thread-safe global state manager with SQLite persistence and HF dataset backup.
173
- Now includes PromptStateManager for per-turn prompt segment tracking.
174
- """
175
-
176
- def __init__(self, db_path="mimir_analytics.db", dataset_repo="jdesiree/mimir_analytics"):
177
- self._db_path = db_path
178
- self.dataset_repo = dataset_repo
179
- self.hf_token = os.getenv("HF_TOKEN")
180
-
181
- # Existing state caches
182
- self._states = {}
183
- self._analytics_cache = {}
184
- self._ml_models_cache = {}
185
- self._evaluation_cache = {}
186
-
187
- # Thread safety
188
- self._lock = threading.Lock()
189
-
190
- # Cleanup settings
191
- self._cleanup_interval = 3600
192
- self._max_age = 24 * 3600
193
- self._last_cleanup = datetime.now()
194
- self._last_hf_backup = datetime.now()
195
- self._hf_backup_interval = 3600
196
-
197
- # NEW: Prompt state management
198
- self._prompt_state_manager = PromptStateManager()
199
-
200
- # Initialize existing systems
201
- self._init_database()
202
- self._load_from_database()
203
- self._load_from_hf_dataset()
204
-
205
- logger.info("GlobalStateManager initialized with PromptStateManager")
206
-
207
- # ========================================================================
208
- # PROMPT STATE MANAGEMENT
209
- # ========================================================================
210
-
211
- def get_prompt_state_manager(self) -> PromptStateManager:
212
- """Get the prompt state manager for current turn"""
213
- return self._prompt_state_manager
214
-
215
- def reset_prompt_state(self):
216
- """Reset prompt state for new turn"""
217
- self._prompt_state_manager.reset()
218
- logger.debug("Prompt state reset for new turn")
219
-
220
- def get_prompt_state(self) -> Dict[str, bool]:
221
- """Get current prompt state dictionary"""
222
- return self._prompt_state_manager.get_state()
223
-
224
- def update_prompt_state(self, prompt_name: str, value: bool):
225
- """Update specific prompt state"""
226
- self._prompt_state_manager.update(prompt_name, value)
227
-
228
- def update_prompt_states(self, updates: Dict[str, bool]):
229
- """Update multiple prompt states"""
230
- self._prompt_state_manager.update_multiple(updates)
231
-
232
- # ========================================================================
233
- # EXISTING DATABASE METHODS (unchanged)
234
- # ========================================================================
235
-
236
- def _init_database(self):
237
- """Initialize SQLite database for persistent storage"""
238
- conn = sqlite3.connect(self._db_path)
239
- cursor = conn.cursor()
240
-
241
- cursor.execute("""
242
- CREATE TABLE IF NOT EXISTS conversations (
243
- session_id TEXT PRIMARY KEY,
244
- chat_history TEXT,
245
- conversation_state TEXT,
246
- last_accessed TEXT,
247
- created TEXT
248
- )
249
- """)
250
-
251
- cursor.execute("""
252
- CREATE TABLE IF NOT EXISTS analytics (
253
- session_id TEXT PRIMARY KEY,
254
- project_stats TEXT,
255
- recent_interactions TEXT,
256
- dashboard_html TEXT,
257
- last_refresh TEXT,
258
- export_history TEXT
259
- )
260
- """)
261
-
262
- cursor.execute("""
263
- CREATE TABLE IF NOT EXISTS evaluations (
264
- id INTEGER PRIMARY KEY AUTOINCREMENT,
265
- session_id TEXT,
266
- timestamp TEXT,
267
- metric_type TEXT,
268
- metric_data TEXT
269
- )
270
- """)
271
-
272
- cursor.execute("""
273
- CREATE TABLE IF NOT EXISTS classifications (
274
- id INTEGER PRIMARY KEY AUTOINCREMENT,
275
- session_id TEXT,
276
- timestamp TEXT,
277
- user_input TEXT,
278
- prediction_data TEXT,
279
- features TEXT
280
- )
281
- """)
282
-
283
- conn.commit()
284
- conn.close()
285
-
286
- def _load_from_database(self):
287
- """Load all data from SQLite on startup"""
288
- try:
289
- conn = sqlite3.connect(self._db_path)
290
- cursor = conn.cursor()
291
-
292
- cursor.execute("SELECT * FROM conversations")
293
- for row in cursor.fetchall():
294
- session_id = row[0]
295
- self._states[session_id] = {
296
- 'chat_history': json.loads(row[1]),
297
- 'conversation_state': json.loads(row[2]),
298
- 'last_accessed': datetime.fromisoformat(row[3]),
299
- 'created': datetime.fromisoformat(row[4])
300
- }
301
-
302
- cursor.execute("SELECT * FROM analytics")
303
- for row in cursor.fetchall():
304
- session_id = row[0]
305
- self._analytics_cache[session_id] = {
306
- 'project_stats': json.loads(row[1]),
307
- 'recent_interactions': json.loads(row[2]),
308
- 'dashboard_html': row[3],
309
- 'last_refresh': datetime.fromisoformat(row[4]) if row[4] else None,
310
- 'export_history': json.loads(row[5]),
311
- 'last_accessed': datetime.now()
312
- }
313
-
314
- conn.close()
315
- logger.info(f"Loaded {len(self._states)} conversations and {len(self._analytics_cache)} analytics from database")
316
- except Exception as e:
317
- logger.error(f"Error loading from database: {e}")
318
-
319
- def _load_from_hf_dataset(self):
320
- """Load data from HF dataset on startup"""
321
- try:
322
- ds = load_dataset(self.dataset_repo, split="train", token=self.hf_token)
323
-
324
- for item in ds:
325
- if item['data_type'] == 'conversation':
326
- session_id = item['session_id']
327
- data = json.loads(item['data'])
328
- self._states[session_id] = data
329
- elif item['data_type'] == 'analytics':
330
- session_id = item['session_id']
331
- data = json.loads(item['data'])
332
- self._analytics_cache[session_id] = data
333
-
334
- logger.info(f"Loaded data from HF dataset {self.dataset_repo}")
335
- except Exception as e:
336
- logger.warning(f"Could not load from HF dataset: {e}")
337
-
338
- def _save_to_database_conversations(self, session_id):
339
- """Save conversation to SQLite"""
340
- if session_id not in self._states:
341
- return
342
-
343
- state = self._states[session_id]
344
- conn = sqlite3.connect(self._db_path)
345
- cursor = conn.cursor()
346
-
347
- cursor.execute("""
348
- INSERT OR REPLACE INTO conversations
349
- (session_id, chat_history, conversation_state, last_accessed, created)
350
- VALUES (?, ?, ?, ?, ?)
351
- """, (
352
- session_id,
353
- json.dumps(state['chat_history']),
354
- json.dumps(state['conversation_state']),
355
- state['last_accessed'].isoformat(),
356
- state.get('created', datetime.now()).isoformat()
357
- ))
358
-
359
- conn.commit()
360
- conn.close()
361
-
362
- def _save_to_database_analytics(self, session_id):
363
- """Save analytics to SQLite"""
364
- if session_id not in self._analytics_cache:
365
- return
366
-
367
- analytics = self._analytics_cache[session_id]
368
- conn = sqlite3.connect(self._db_path)
369
- cursor = conn.cursor()
370
-
371
- cursor.execute("""
372
- INSERT OR REPLACE INTO analytics
373
- (session_id, project_stats, recent_interactions, dashboard_html, last_refresh, export_history)
374
- VALUES (?, ?, ?, ?, ?, ?)
375
- """, (
376
- session_id,
377
- json.dumps(analytics.get('project_stats', {})),
378
- json.dumps(analytics.get('recent_interactions', [])),
379
- analytics.get('dashboard_html', ''),
380
- analytics.get('last_refresh').isoformat() if analytics.get('last_refresh') else None,
381
- json.dumps(analytics.get('export_history', []))
382
- ))
383
-
384
- conn.commit()
385
- conn.close()
386
-
387
- def _backup_to_hf_dataset(self):
388
- """Backup all data to HF dataset"""
389
- if (datetime.now() - self._last_hf_backup).seconds < self._hf_backup_interval:
390
- return
391
-
392
- try:
393
- data_items = []
394
-
395
- for session_id, state in self._states.items():
396
- data_items.append({
397
- 'session_id': session_id,
398
- 'data_type': 'conversation',
399
- 'data': json.dumps(state, default=str),
400
- 'timestamp': datetime.now().isoformat()
401
- })
402
-
403
- for session_id, analytics in self._analytics_cache.items():
404
- data_items.append({
405
- 'session_id': session_id,
406
- 'data_type': 'analytics',
407
- 'data': json.dumps(analytics, default=str),
408
- 'timestamp': datetime.now().isoformat()
409
- })
410
-
411
- if data_items:
412
- ds = Dataset.from_list(data_items)
413
- ds.push_to_hub(self.dataset_repo, token=self.hf_token)
414
- self._last_hf_backup = datetime.now()
415
- logger.info(f"Backed up {len(data_items)} items to HF dataset")
416
- except Exception as e:
417
- logger.error(f"Error backing up to HF dataset: {e}")
418
-
419
- def _cleanup_old_states(self):
420
- """Remove old unused states to prevent memory leaks"""
421
- now = datetime.now()
422
- if (now - self._last_cleanup).seconds < self._cleanup_interval:
423
- return
424
-
425
- with self._lock:
426
- expired_keys = []
427
- for session_id, state_data in self._states.items():
428
- if (now - state_data.get('last_accessed', now)).seconds > self._max_age:
429
- expired_keys.append(session_id)
430
-
431
- for key in expired_keys:
432
- del self._states[key]
433
- logger.info(f"Cleaned up expired state: {key}")
434
-
435
- self._last_cleanup = now
436
-
437
- # ========================================================================
438
- # CONVERSATION STATE METHODS (unchanged)
439
- # ========================================================================
440
-
441
- def get_session_id(self, request=None):
442
- """Generate or retrieve session ID"""
443
- return "default_session"
444
-
445
- def get_conversation_state(self, session_id=None):
446
- """Get conversation state for a session"""
447
- if session_id is None:
448
- session_id = self.get_session_id()
449
-
450
- self._cleanup_old_states()
451
-
452
- with self._lock:
453
- if session_id not in self._states:
454
- self._states[session_id] = {
455
- 'chat_history': [],
456
- 'conversation_state': [],
457
- 'last_accessed': datetime.now(),
458
- 'created': datetime.now()
459
- }
460
- else:
461
- self._states[session_id]['last_accessed'] = datetime.now()
462
-
463
- return self._states[session_id].copy()
464
-
465
- def update_conversation_state(self, chat_history, conversation_state, session_id=None):
466
- """Update conversation state for a session"""
467
- if session_id is None:
468
- session_id = self.get_session_id()
469
-
470
- with self._lock:
471
- if session_id not in self._states:
472
- self._states[session_id] = {}
473
-
474
- self._states[session_id].update({
475
- 'chat_history': chat_history.copy() if chat_history else [],
476
- 'conversation_state': conversation_state.copy() if conversation_state else [],
477
- 'last_accessed': datetime.now()
478
- })
479
-
480
- # self._save_to_database_conversations(session_id)
481
- # self._backup_to_hf_dataset()
482
- threading.Thread(target=self._save_to_database_conversations,
483
- args=(session_id,), daemon=True).start()
484
-
485
- if (datetime.now() - self._last_hf_backup).seconds >= self._hf_backup_interval:
486
- threading.Thread(target=self._backup_to_hf_dataset,
487
- daemon=True).start()
488
-
489
- def reset_conversation_state(self, session_id=None):
490
- """Reset conversation state for a session"""
491
- if session_id is None:
492
- session_id = self.get_session_id()
493
-
494
- with self._lock:
495
- if session_id in self._states:
496
- self._states[session_id].update({
497
- 'chat_history': [],
498
- 'conversation_state': [],
499
- 'last_accessed': datetime.now()
500
- })
501
- self._save_to_database_conversations(session_id)
502
-
503
- def get_all_sessions(self):
504
- """Get all active sessions (for analytics)"""
505
- self._cleanup_old_states()
506
- with self._lock:
507
- return list(self._states.keys())
508
-
509
- # ========================================================================
510
- # ANALYTICS STATE METHODS (unchanged)
511
- # ========================================================================
512
-
513
- def get_analytics_state(self, session_id=None):
514
- """Get analytics state for a session"""
515
- if session_id is None:
516
- session_id = self.get_session_id()
517
-
518
- self._cleanup_old_states()
519
-
520
- with self._lock:
521
- if session_id not in self._analytics_cache:
522
- self._analytics_cache[session_id] = {
523
- 'project_stats': {
524
- "total_conversations": None,
525
- "avg_session_length": None,
526
- "success_rate": None,
527
- "model_type": "Phi-3-mini (Fine-tuned)",
528
- "last_updated": None
529
- },
530
- 'recent_interactions': [],
531
- 'dashboard_html': None,
532
- 'last_refresh': None,
533
- 'export_history': [],
534
- 'database_status': 'unknown',
535
- 'error_state': None,
536
- 'last_accessed': datetime.now()
537
- }
538
- else:
539
- self._analytics_cache[session_id]['last_accessed'] = datetime.now()
540
-
541
- return self._analytics_cache[session_id].copy()
542
-
543
- def update_analytics_state(self, project_stats=None, recent_interactions=None,
544
- dashboard_html=None, error_state=None, session_id=None):
545
- """Update analytics state for a session"""
546
- if session_id is None:
547
- session_id = self.get_session_id()
548
-
549
- with self._lock:
550
- if session_id not in self._analytics_cache:
551
- self._analytics_cache[session_id] = {}
552
-
553
- current_time = datetime.now()
554
-
555
- if project_stats is not None:
556
- self._analytics_cache[session_id]['project_stats'] = project_stats.copy()
557
- self._analytics_cache[session_id]['last_refresh'] = current_time
558
-
559
- if recent_interactions is not None:
560
- self._analytics_cache[session_id]['recent_interactions'] = recent_interactions.copy()
561
-
562
- if dashboard_html is not None:
563
- self._analytics_cache[session_id]['dashboard_html'] = dashboard_html
564
-
565
- if error_state is not None:
566
- self._analytics_cache[session_id]['error_state'] = error_state
567
-
568
- self._analytics_cache[session_id]['last_accessed'] = current_time
569
-
570
- self._save_to_database_analytics(session_id)
571
- self._backup_to_hf_dataset()
572
-
573
- def add_export_record(self, export_type, filename, success=True, session_id=None):
574
- """Add export record to analytics state"""
575
- if session_id is None:
576
- session_id = self.get_session_id()
577
-
578
- with self._lock:
579
- if session_id not in self._analytics_cache:
580
- self.get_analytics_state(session_id)
581
-
582
- export_record = {
583
- 'timestamp': datetime.now().isoformat(),
584
- 'type': export_type,
585
- 'filename': filename,
586
- 'success': success
587
- }
588
-
589
- if 'export_history' not in self._analytics_cache[session_id]:
590
- self._analytics_cache[session_id]['export_history'] = []
591
-
592
- self._analytics_cache[session_id]['export_history'].append(export_record)
593
-
594
- if len(self._analytics_cache[session_id]['export_history']) > 20:
595
- self._analytics_cache[session_id]['export_history'] = \
596
- self._analytics_cache[session_id]['export_history'][-20:]
597
-
598
- self._save_to_database_analytics(session_id)
599
-
600
- # ========================================================================
601
- # ML MODEL CACHE METHODS (unchanged)
602
- # ========================================================================
603
-
604
- def get_ml_model_cache(self, model_type: str = "prompt_classifier"):
605
- """Get cached ML model"""
606
- with self._lock:
607
- return self._ml_models_cache.get(model_type, None)
608
-
609
- def cache_ml_model(self, model, model_type: str = "prompt_classifier", metadata: dict = None):
610
- """Cache a trained ML model"""
611
- with self._lock:
612
- self._ml_models_cache[model_type] = {
613
- 'model': model,
614
- 'cached_at': datetime.now(),
615
- 'metadata': metadata or {},
616
- 'access_count': 0
617
- }
618
- logger.info(f"ML model '{model_type}' cached successfully")
619
-
620
- # ========================================================================
621
- # EVALUATION STATE METHODS (unchanged)
622
- # ========================================================================
623
-
624
- def get_evaluation_state(self, session_id=None):
625
- """Get evaluation state for a session"""
626
- if session_id is None:
627
- session_id = self.get_session_id()
628
-
629
- with self._lock:
630
- if session_id not in self._evaluation_cache:
631
- self._evaluation_cache[session_id] = {
632
- 'educational_quality_scores': [],
633
- 'rag_performance_metrics': [],
634
- 'prompt_classification_accuracy': [],
635
- 'user_feedback_history': [],
636
- 'aggregate_metrics': {
637
- 'avg_educational_quality': 0.0,
638
- 'avg_rag_relevance': 0.0,
639
- 'classifier_accuracy_rate': 0.0,
640
- 'user_satisfaction_rate': 0.0
641
- },
642
- 'evaluation_session_count': 0,
643
- 'last_updated': datetime.now()
644
- }
645
-
646
- return self._evaluation_cache[session_id].copy()
647
-
648
- def add_educational_quality_score(self, user_query: str, response: str, metrics: dict, session_id=None):
649
- """Add educational quality evaluation result"""
650
- if session_id is None:
651
- session_id = self.get_session_id()
652
-
653
- with self._lock:
654
- if session_id not in self._evaluation_cache:
655
- self.get_evaluation_state(session_id)
656
-
657
- quality_record = {
658
- 'timestamp': datetime.now().isoformat(),
659
- 'user_query': user_query[:100],
660
- 'response_length': len(response),
661
- 'semantic_quality': metrics.get('semantic_quality', 0.0),
662
- 'educational_score': metrics.get('educational_score', 0.0),
663
- 'response_time': metrics.get('response_time', 0.0),
664
- 'overall_score': (metrics.get('semantic_quality', 0.0) + metrics.get('educational_score', 0.0)) / 2
665
- }
666
-
667
- self._evaluation_cache[session_id]['educational_quality_scores'].append(quality_record)
668
- self._update_aggregate_metrics(session_id)
669
-
670
- def add_prompt_classification_result(self, predicted_mode: str, was_successful: bool, metadata: dict = None, session_id=None):
671
- """Add prompt classification accuracy result"""
672
- if session_id is None:
673
- session_id = self.get_session_id()
674
-
675
- with self._lock:
676
- if session_id not in self._evaluation_cache:
677
- self.get_evaluation_state(session_id)
678
-
679
- classification_record = {
680
- 'timestamp': datetime.now().isoformat(),
681
- 'predicted_mode': predicted_mode,
682
- 'was_successful': was_successful,
683
- 'accuracy_score': 1.0 if was_successful else 0.0,
684
- 'metadata': metadata or {}
685
- }
686
-
687
- self._evaluation_cache[session_id]['prompt_classification_accuracy'].append(classification_record)
688
- self._update_aggregate_metrics(session_id)
689
-
690
- def add_user_feedback(self, response_id: str, feedback_type: str, conversation_context: dict = None, session_id=None):
691
- """Add user feedback result"""
692
- if session_id is None:
693
- session_id = self.get_session_id()
694
-
695
- with self._lock:
696
- if session_id not in self._evaluation_cache:
697
- self.get_evaluation_state(session_id)
698
-
699
- feedback_record = {
700
- 'timestamp': datetime.now().isoformat(),
701
- 'response_id': response_id,
702
- 'feedback_type': feedback_type,
703
- 'satisfaction_score': 1.0 if feedback_type == 'thumbs_up' else 0.0,
704
- 'conversation_context': conversation_context or {}
705
- }
706
-
707
- self._evaluation_cache[session_id]['user_feedback_history'].append(feedback_record)
708
- self._update_aggregate_metrics(session_id)
709
-
710
- def _update_aggregate_metrics(self, session_id: str):
711
- """Update aggregate metrics for a session"""
712
- eval_state = self._evaluation_cache[session_id]
713
-
714
- if eval_state['educational_quality_scores']:
715
- avg_educational = sum(score['overall_score'] for score in eval_state['educational_quality_scores']) / len(eval_state['educational_quality_scores'])
716
- eval_state['aggregate_metrics']['avg_educational_quality'] = avg_educational
717
-
718
- if eval_state['prompt_classification_accuracy']:
719
- accuracy_rate = sum(result['accuracy_score'] for result in eval_state['prompt_classification_accuracy']) / len(eval_state['prompt_classification_accuracy'])
720
- eval_state['aggregate_metrics']['classifier_accuracy_rate'] = accuracy_rate
721
-
722
- if eval_state['user_feedback_history']:
723
- satisfaction_rate = sum(feedback['satisfaction_score'] for feedback in eval_state['user_feedback_history']) / len(eval_state['user_feedback_history'])
724
- eval_state['aggregate_metrics']['user_satisfaction_rate'] = satisfaction_rate
725
-
726
- eval_state['last_updated'] = datetime.now()
727
- eval_state['evaluation_session_count'] += 1
728
-
729
- def get_evaluation_summary(self, session_id=None, include_history: bool = False):
730
- """Get evaluation summary for analytics"""
731
- if session_id is None:
732
- session_id = self.get_session_id()
733
-
734
- eval_state = self.get_evaluation_state(session_id)
735
-
736
- summary = {
737
- 'aggregate_metrics': eval_state['aggregate_metrics'],
738
- 'total_evaluations': {
739
- 'educational_quality': len(eval_state['educational_quality_scores']),
740
- 'classification_accuracy': len(eval_state['prompt_classification_accuracy']),
741
- 'user_feedback': len(eval_state['user_feedback_history'])
742
- },
743
- 'last_updated': eval_state['last_updated'],
744
- 'session_evaluation_count': eval_state['evaluation_session_count']
745
- }
746
-
747
- if include_history:
748
- summary['history'] = {
749
- 'recent_educational_scores': eval_state['educational_quality_scores'][-10:],
750
- 'recent_classification_results': eval_state['prompt_classification_accuracy'][-10:],
751
- 'recent_user_feedback': eval_state['user_feedback_history'][-10:]
752
- }
753
-
754
- return summary
755
-
756
- # ========================================================================
757
- # UTILITY METHODS
758
- # ========================================================================
759
-
760
- def get_cache_status(self, session_id=None):
761
- """Get cache status for debugging"""
762
- if session_id is None:
763
- session_id = self.get_session_id()
764
-
765
- with self._lock:
766
- analytics_cached = session_id in self._analytics_cache
767
- conversation_cached = session_id in self._states
768
-
769
- cache_info = {
770
- 'session_id': session_id,
771
- 'analytics_cached': analytics_cached,
772
- 'conversation_cached': conversation_cached,
773
- 'total_analytics_sessions': len(self._analytics_cache),
774
- 'total_conversation_sessions': len(self._states),
775
- 'prompt_state_active_count': len(self._prompt_state_manager.get_active_prompts())
776
- }
777
-
778
- if analytics_cached:
779
- analytics_state = self._analytics_cache[session_id]
780
- cache_info['analytics_last_refresh'] = analytics_state.get('last_refresh')
781
- cache_info['analytics_has_data'] = bool(analytics_state.get('project_stats', {}).get('total_conversations'))
782
-
783
- if conversation_cached:
784
- conversation_state = self._states[session_id]
785
- cache_info['conversation_length'] = len(conversation_state.get('conversation_state', []))
786
- cache_info['chat_history_length'] = len(conversation_state.get('chat_history', []))
787
-
788
- return cache_info
789
-
790
- def reset_analytics_state(self, session_id=None):
791
- """Reset analytics state for a session"""
792
- if session_id is None:
793
- session_id = self.get_session_id()
794
-
795
- with self._lock:
796
- if session_id in self._analytics_cache:
797
- del self._analytics_cache[session_id]
798
-
799
- def clear_all_states(self):
800
- """Clear all states - use with caution"""
801
- with self._lock:
802
- self._states.clear()
803
- self._analytics_cache.clear()
804
- self._ml_models_cache.clear()
805
- self._evaluation_cache.clear()
806
- self._prompt_state_manager.reset()
807
- logger.info("All global states cleared")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
styles.css CHANGED
@@ -1,353 +1,760 @@
1
- /* ============================
2
- GLOBAL THEME & VARIABLES
3
- ============================ */
4
  :root {
5
- /* Text Colors */
6
- --primarytext-color: #1a1a1a;
7
- --secondarytext-color: #555;
8
-
9
- /* Primary Colors */
10
- --primary-dark: #345da8;
11
- --primary-light: #a8b5c9;
12
-
13
- /* Secondary Colors */
14
- --secondary-dark: #063d80;
15
- --secondary-light: #6ea1fa;
16
-
17
- /* Chat & Container Colors */
18
- --chathistory_area: #f0f1f4;
19
- --container-color: #f5f6f8;
20
- --Send: #6ea1fa;
21
- --Send-hover: #87d0d5;
22
- --clear: #b2b8c2;
23
- --clear-hover: #2c5be0;
24
- --text_areabackground: #fafafa;
25
-
26
- /* Chat Bubble Colors */
27
- --bot-bubble-color: #b9c8e3;
28
- --user-bubble-color: #e3eaf6;
29
-
30
- /* Scrollbar Colors */
31
- --scrollbar-bg: #d0d3d8;
32
- --scrollbar-thumb: #a2a6ad;
33
- --scrollbar-thumb-hover: #888d94;
34
-
35
- /* Border & Radius */
36
  --border-thin: 1px;
37
  --border-medium: 2px;
 
 
38
  --border-default: 1px;
39
  --border-focus: 2px;
40
  --border-hover: 3px;
 
 
41
  --button-border: 2px;
42
- --radius-sm: 4px;
43
- --radius-md: 6px;
44
  }
45
 
46
- /* ============================
47
- DARK MODE THEME (SOFTER)
48
- ============================ */
49
- @media (prefers-color-scheme: dark) {
50
- :root {
51
- --primarytext-color: #f8f8f8;
52
- --secondarytext-color: #d0d3d8;
53
 
54
- --primary-dark: #27477d;
55
- --primary-light: #7d8da9;
 
 
 
 
 
 
 
56
 
57
- --secondary-dark: #042a59;
58
- --secondary-light: #5e88d6;
 
 
59
 
60
- --chathistory_area: #202327;
61
- --container-color: #1b1d20;
62
- --Send: #5e88d6;
63
- --Send-hover: #7ac4c9;
64
- --clear: #7a7f88;
65
- --clear-hover: #5e88d6;
66
- --text_areabackground: #25282c;
67
 
68
- --bot-bubble-color: #425575;
69
- --user-bubble-color: #566583;
 
 
 
 
 
 
 
 
 
 
 
70
 
71
- --scrollbar-bg: #2b2e33;
72
- --scrollbar-thumb: #4b4f56;
73
- --scrollbar-thumb-hover: #5e636b;
74
- }
 
 
75
  }
76
 
77
- /* ============================
78
- FONT IMPORT & BASE STYLING
79
- ============================ */
80
- @import url('https://fonts.googleapis.com/css2?family=Oswald:wght@200..700&display=swap');
 
 
 
 
81
 
82
- body {
83
- background: var(--text_areabackground);
84
- color: var(--primarytext-color);
85
- font-family: "Oswald", sans-serif;
86
- margin: 0;
 
 
 
 
 
87
  }
88
 
89
- * {
90
- color: var(--primarytext-color) !important;
91
- font-family: "Oswald", sans-serif !important;
92
- box-sizing: border-box;
 
 
 
 
 
 
93
  }
94
 
95
- /* ============================
96
- CUSTOM SCROLLBAR
97
- ============================ */
98
- ::-webkit-scrollbar {
99
- width: 12px;
 
 
 
 
 
100
  }
101
 
102
- ::-webkit-scrollbar-track {
103
- background: var(--scrollbar-bg);
 
 
 
 
104
  }
105
 
106
- ::-webkit-scrollbar-thumb {
107
- background-color: var(--scrollbar-thumb);
108
- border-radius: 6px;
109
- border: 2px solid var(--scrollbar-bg);
 
 
110
  }
111
 
112
- ::-webkit-scrollbar-thumb:hover {
113
- background-color: var(--scrollbar-thumb-hover);
 
 
 
 
 
 
 
 
 
 
114
  }
115
 
116
- /* ============================
117
- GRADIO CONTAINER & LAYOUT
118
- ============================ */
119
- .gradio-container,
120
- [data-testid="block-container"],
121
- .contain {
122
- background-color: var(--container-color) !important;
123
- font-family: "Oswald", sans-serif !important;
124
- display: flex !important;
125
- flex-direction: column !important;
126
- height: 100vh !important;
127
- max-height: 100vh !important;
128
- overflow: hidden !important;
129
- }
130
-
131
- /* ============================
132
- HEADER & NAVIGATION
133
- ============================ */
134
- .title-header {
135
- background-color: transparent;
136
- padding: 10px;
137
- border-bottom: var(--border-focus) solid var(--primary-dark);
138
- display: flex;
139
- align-items: center;
140
- height: 60px !important;
141
  }
142
 
143
- .title-header h1 {
144
- font-size: 3.5rem;
145
- font-weight: 700;
146
- color: var(--primarytext-color);
147
- margin: 0;
148
  }
149
 
150
- /* ============================
151
- CHAT CONTAINER
152
- ============================ */
153
- #main-chatbot,
154
- [data-testid="chatbot"],
155
- .gradio-chatbot,
156
- [role="log"] {
157
- border: var(--border-default) solid var(--primary-dark) !important;
158
- border-radius: var(--radius-md) !important;
159
- background-color: var(--chathistory_area) !important;
160
- box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1) !important;
161
- padding: 15px !important;
162
- margin: 15px 20px !important;
163
- flex: 1 !important;
164
- overflow-y: auto !important;
165
- }
166
-
167
- /* ============================
168
- TEXT INPUT AREA
169
- ============================ */
170
- textarea,
171
- .gradio-textbox textarea {
172
- background-color: var(--text_areabackground) !important;
173
- border: var(--border-default) solid var(--secondary-dark) !important;
174
- border-radius: var(--radius-md) !important;
175
- color: var(--primarytext-color) !important;
176
- padding: 10px !important;
177
- resize: none !important;
178
- box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1) !important;
179
  }
180
 
181
- textarea:focus {
182
- border-color: var(--secondary-light) !important;
183
- box-shadow: 0 0 0 var(--border-focus) rgba(96, 165, 250, 0.2) !important;
 
 
 
184
  }
185
 
186
- /* ============================
187
- BUTTONS
188
- ============================ */
189
- button.send-button {
190
- background-color: var(--Send) !important;
191
- color: var(--primarytext-color) !important;
192
- border: var(--button-border) solid var(--secondary-dark) !important;
193
- border-radius: var(--radius-md) !important;
194
- padding: 8px 16px !important;
195
- font-weight: 600 !important;
196
- box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
197
- width: 100%;
198
  }
199
 
200
- button.send-button:hover {
201
- background-color: var(--Send-hover) !important;
202
  }
203
 
204
- button.clear-button {
205
- background-color: var(--clear) !important;
206
- color: var(--primarytext-color) !important;
207
- border: var(--button-border) solid var(--secondary-dark) !important;
208
- border-radius: var(--radius-md) !important;
209
- padding: 8px 16px !important;
210
- font-weight: 600 !important;
211
- box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
212
- width: 100%;
213
  }
214
 
215
- button.clear-button:hover {
216
- background-color: var(--clear-hover) !important;
 
 
 
 
 
 
 
 
 
 
 
 
217
  }
218
 
219
- /* ============================
220
- CHAT BUBBLES (VARIABLE COLORS)
221
- ============================ */
222
- .message.user,
223
  .message.bot {
224
- background: none !important;
225
- border: none !important;
226
- padding: 0 !important;
227
- margin: 0 !important;
228
- box-shadow: none !important;
229
- }
230
-
231
- .message-row {
232
- display: flex;
233
- margin: 8px 12px;
234
- }
235
-
236
- .message.panel-full-width {
237
- max-width: 80%;
238
- min-width: 240px;
239
- padding: 14px 20px !important;
240
- border-radius: 18px !important;
241
- box-shadow: none !important;
242
- position: relative;
243
- line-height: 1.5;
244
- word-wrap: break-word;
245
- }
246
-
247
- /* Bot Bubble */
248
- .message-row.bot-row .message.panel-full-width {
249
- background-color: var(--bot-bubble-color) !important;
250
- color: var(--primarytext-color) !important;
251
- margin-right: auto;
252
- margin-left: 0;
253
- }
254
-
255
- .message-row.bot-row .message.panel-full-width::before {
256
- content: "";
257
- position: absolute;
258
- top: 12px;
259
- left: -10px;
260
- width: 0;
261
- height: 0;
262
- border-top: 10px solid transparent;
263
- border-right: 10px solid var(--bot-bubble-color);
264
- border-bottom: 10px solid transparent;
265
- }
266
-
267
- /* User Bubble */
268
- .message-row.user-row .message.panel-full-width {
269
- background-color: var(--user-bubble-color) !important;
270
- color: var(--primarytext-color) !important;
271
- margin-left: auto;
272
- margin-right: 0;
273
- }
274
-
275
- .message-row.user-row .message.panel-full-width::before {
276
- content: "";
277
- position: absolute;
278
- top: 12px;
279
- right: -10px;
280
- width: 0;
281
- height: 0;
282
- border-top: 10px solid transparent;
283
- border-left: 10px solid var(--user-bubble-color);
284
- border-bottom: 10px solid transparent;
285
- }
286
-
287
- /* ============================
288
- RESPONSIVE ADJUSTMENTS
289
- ============================ */
290
- @media (max-width: 768px) {
291
- .message.panel-full-width {
292
- max-width: 85%;
293
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
  }
295
 
296
- /* ============================
297
- FOOTER: RESTORE BUILT-IN GRADIO LINKS (settings, API, etc.)
298
- ============================ */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
  footer.svelte-czcr5b {
300
- display: flex !important;
301
- align-items: center !important;
302
- justify-content: center !important;
303
- gap: 12px !important;
304
- visibility: visible !important;
305
- position: fixed !important;
306
- bottom: 0 !important;
307
- left: 0 !important;
308
- right: 0 !important;
309
- background-color: var(--container-color) !important;
310
- backdrop-filter: blur(5px) !important;
311
- border-top: var(--border-default) solid rgba(0, 0, 0, 0.12) !important;
312
- padding: 8px 16px !important;
313
- z-index: 1000 !important;
314
- min-height: 36px !important;
315
- }
316
-
317
-
318
- footer.svelte-czcr5b a,
319
- footer.svelte-czcr5b button,
320
- footer.svelte-czcr5b span {
321
- color: var(--secondarytext-color) !important;
322
- font-size: 12px !important;
323
- font-family: "Oswald", sans-serif !important;
324
- text-decoration: none !important;
325
- background: none !important;
326
- border: none !important;
327
- cursor: pointer !important;
328
- opacity: 0.8;
329
- transition: opacity 0.15s ease;
330
- }
331
-
332
-
333
- footer.svelte-czcr5b a:hover,
334
- footer.svelte-czcr5b button:hover,
335
- footer.svelte-czcr5b span:hover {
336
- opacity: 1;
337
- color: var(--primarytext-color) !important;
338
- }
339
-
340
-
341
- /* Divider style between footer links */
342
  footer.svelte-czcr5b .divider {
343
- color: var(--secondarytext-color) !important;
344
- opacity: 0.5;
345
- margin: 0 6px !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
  }
347
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
 
349
- /* Make sure footer items never collapse */
350
- footer.svelte-czcr5b > * {
351
- display: inline-flex !important;
352
- align-items: center !important;
 
 
 
 
 
 
 
 
 
353
  }
 
 
 
 
1
  :root {
2
+
3
+ /* Text colors */
4
+ --primarytext-color: #0f0e09;
5
+ --secondarytext-color: #696966;
6
+
7
+ /* Primary colors - Blue theme */
8
+ --primary-dark: #1e3a8a; /* Deep blue */
9
+ --primary-light: #3b82f6; /* Medium blue */
10
+
11
+ /* Secondary colors - Blue shades */
12
+ --secondary-dark: #1d4ed8; /* Darker blue */
13
+ --secondary-light: #60a5fa; /* Light blue */
14
+
15
+ /* Chat colors */
16
+ --user_message: #bfdbfe; /* Light blue bubble for user */
17
+ --ai_message: #14b8a6; /* Medium teal for AI */
18
+ --chathistory_area: #f3f4f6; /* Very light grey for chat history */
19
+
20
+ /* Text, Chat, UI */
21
+ --Send: #3b82f6; /* Send button - medium blue (lighter than clear) */
22
+ --clear: #1e40af; /* Clear button - darker blue */
23
+ --Send-hover: #2563eb; /* Send button hover */
24
+ --clear-hover: #1d4ed8; /* Clear button hover */
25
+ --text_areabackground: #f3f4f6; /* Very light grey for text areas */
26
+
27
+
28
+ /* Border thickness variables */
 
 
 
 
29
  --border-thin: 1px;
30
  --border-medium: 2px;
31
+
32
+ /* Semantic border variables */
33
  --border-default: 1px;
34
  --border-focus: 2px;
35
  --border-hover: 3px;
36
+
37
+ /* Component-specific borders */
38
  --button-border: 2px;
39
+ --input-border: 1px;
40
+ --card-border: 1px;
41
  }
42
 
43
+ /* Import Oswald font - Google Fonts */
44
+ @import url('https://fonts.googleapis.com/css2?family=Oswald:wght@200..700&display=swap');
 
 
 
 
 
45
 
46
+ /* HIDE the HTML components that create scrollbars */
47
+ #component-1, #component-2 {
48
+ display: none !important;
49
+ height: 0 !important;
50
+ min-height: 0 !important;
51
+ padding: 0 !important;
52
+ margin: 0 !important;
53
+ visibility: hidden !important;
54
+ }
55
 
56
+ /* Specific text elements - ENSURE VISIBILITY */
57
+ body, p, span, div, h1, h2, h3, h4, h5, h6, label, a {
58
+ color: var(--primarytext-color) !important;
59
+ }
60
 
61
+ /* All Gradio text elements - OVERRIDE GRADIO DEFAULTS */
62
+ .gradio-container,
63
+ .gradio-container *:not(textarea):not(input):not(button) {
64
+ color: var(--primarytext-color) !important;
65
+ }
 
 
66
 
67
+ /* Title header with transparent background */
68
+ .title-header {
69
+ background-color: transparent;
70
+ padding: 10px 20px;
71
+ margin: 0 !important;
72
+ border-bottom: var(--border-focus) solid var(--primary-dark);
73
+ text-align: left;
74
+ flex-shrink: 0 !important;
75
+ height: 60px !important;
76
+ display: flex !important;
77
+ align-items: center !important;
78
+ width: 100% !important;
79
+ }
80
 
81
+ .title-header h1 {
82
+ font-size: 1.5rem;
83
+ font-weight: 600 !important;
84
+ color: var(--primarytext-color) !important;
85
+ margin: 0;
86
+ padding: 0;
87
  }
88
 
89
+ /* More aggressive Gradio overrides - keep current background */
90
+ .gradio-container,
91
+ .gradio-container *,
92
+ [data-testid="block-container"],
93
+ .contain {
94
+ background-color: rgb(240, 236, 230) !important;
95
+ font-family: "Oswald", sans-serif !important;
96
+ }
97
 
98
+ /* Chat container - target all possible selectors */
99
+ [data-testid="chatbot"],
100
+ .chatbot,
101
+ .gradio-chatbot,
102
+ #main-chatbot,
103
+ [role="log"] {
104
+ background-color: var(--chathistory_area) !important;
105
+ border: var(--border-default) solid var(--primary-dark) !important;
106
+ border-radius: 6px !important;
107
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1) !important;
108
  }
109
 
110
+ /* Text input - target all possible selectors */
111
+ [data-testid="textbox"] textarea,
112
+ .gradio-textbox textarea,
113
+ textarea {
114
+ background-color: var(--text_areabackground) !important;
115
+ border: var(--input-border) solid var(--secondary-dark) !important;
116
+ border-radius: 6px !important;
117
+ color: var(--primarytext-color) !important;
118
+ font-family: "Oswald", sans-serif !important;
119
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1) !important;
120
  }
121
 
122
+ /* Buttons - target all possible selectors */
123
+ [data-testid="button"],
124
+ .gradio-button,
125
+ button.send-button,
126
+ button.clear-button {
127
+ border-radius: 6px !important;
128
+ font-family: "Oswald", sans-serif !important;
129
+ font-weight: 500 !important;
130
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1) !important;
131
+ color: var(--primarytext-color) !important;
132
  }
133
 
134
+ /* Send button specific */
135
+ button.send-button,
136
+ [data-testid="button"]:nth-of-type(1) {
137
+ background-color: var(--Send) !important;
138
+ color: var(--primarytext-color) !important;
139
+ border: var(--button-border) solid var(--secondary-dark) !important;
140
  }
141
 
142
+ /* Clear button specific */
143
+ button.clear-button,
144
+ [data-testid="button"]:nth-of-type(2) {
145
+ background-color: var(--clear) !important;
146
+ color: var(--primarytext-color) !important;
147
+ border: var(--button-border) solid var(--secondary-dark) !important;
148
  }
149
 
150
+ /* Background area behind everything */
151
+ .gradio-container {
152
+ background-color: rgb(240, 236, 230) !important;
153
+ font-family: "Oswald", sans-serif !important;
154
+ color: var(--primarytext-color) !important;
155
+ padding: 0 !important;
156
+ margin: 0 !important;
157
+ height: 100vh !important;
158
+ max-height: 100vh !important;
159
+ overflow: hidden !important;
160
+ display: flex !important;
161
+ flex-direction: column !important;
162
  }
163
 
164
+ /* Target Gradio's internal structure */
165
+ .gradio-container > div {
166
+ height: 95% !important;
167
+ display: flex !important;
168
+ flex-direction: column !important;
169
+ padding-top: 0 !important;
170
+ margin-top: 0 !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  }
172
 
173
+ /* Main container wrapper */
174
+ .main-container {
175
+ padding-bottom: 50px !important;
 
 
176
  }
177
 
178
+ /* Chat history container/box */
179
+ #main-chatbot {
180
+ border: var(--border-default) solid var(--primary-dark);
181
+ background-color: var(--chathistory_area);
182
+ border-radius: 6px !important;
183
+ padding: 15px !important;
184
+ flex: 1 !important;
185
+ min-height: 0 !important;
186
+ overflow-y: auto !important;
187
+ margin: 15px 20px !important;
188
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
189
+ color: var(--primarytext-color) !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  }
191
 
192
+ /* Gradio-specific chat container selectors */
193
+ .gradio-container .gradio-chatbot {
194
+ background-color: var(--chathistory_area) !important;
195
+ border: var(--border-default) solid var(--primary-dark) !important;
196
+ border-radius: 6px !important;
197
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1) !important;
198
  }
199
 
200
+ .gradio-container .gradio-chatbot > div {
201
+ background-color: var(--chathistory_area) !important;
 
 
 
 
 
 
 
 
 
 
202
  }
203
 
204
+ .gradio-container .gradio-chatbot .message {
205
+ background-color: transparent !important;
206
  }
207
 
208
+ /* User message text box */
209
+ .message.user {
210
+ border: 1pt solid var(--secondary-dark);
211
+ background-color: var(--user_message);
 
 
 
 
 
212
  }
213
 
214
+ /* User message markdown styling */
215
+ .message.user .markdown,
216
+ .message.user .markdown * {
217
+ background-color: var(--user_message) !important;
218
+ color: var(--primarytext-color) !important;
219
+ border-radius: 8px !important;
220
+ padding: 12px 16px !important;
221
+ border: var(--border-default) solid var(--primary-dark) !important;
222
+ max-width: 70%;
223
+ margin-left: auto;
224
+ margin-right: 0;
225
+ word-wrap: break-word;
226
+ font-weight: 400 !important;
227
+ margin-bottom: 10px !important;
228
  }
229
 
230
+ /* AI message text box */
 
 
 
231
  .message.bot {
232
+ border: 1pt solid var(--secondary-dark);
233
+ background-color: var(--ai_message);
234
+ }
235
+
236
+ /* AI message markdown styling */
237
+ .message.bot .markdown,
238
+ .message.bot .markdown * {
239
+ background-color: var(--ai_message) !important;
240
+ color: var(--primarytext-color) !important;
241
+ border-radius: 8px !important;
242
+ padding: 12px 16px !important;
243
+ border: var(--border-default) solid var(--secondary-dark) !important;
244
+ max-width: 70%;
245
+ margin-left: 0;
246
+ margin-right: auto;
247
+ word-wrap: break-word;
248
+ font-weight: 400 !important;
249
+ margin-bottom: 10px !important;
250
+ }
251
+
252
+ /* Chat message text content - CRITICAL FIX */
253
+ .message.user .markdown p,
254
+ .message.user .markdown span,
255
+ .message.user .markdown div,
256
+ .message.bot .markdown p,
257
+ .message.bot .markdown span,
258
+ .message.bot .markdown div {
259
+ color: var(--primarytext-color) !important;
260
+ }
261
+
262
+ /* Input textbox */
263
+ .input-textbox {
264
+ border: var(--border-default) solid var(--primary-dark);
265
+ background-color: var(--text_areabackground);
266
+ }
267
+
268
+ /* Input textbox textarea styling */
269
+ .input-textbox textarea {
270
+ background-color: var(--text_areabackground) !important;
271
+ border: var(--input-border) solid var(--secondary-dark) !important;
272
+ border-radius: 6px !important;
273
+ color: var(--primarytext-color) !important;
274
+ font-family: "Oswald", sans-serif !important;
275
+ padding: 10px !important;
276
+ resize: none !important;
277
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
278
+ }
279
+
280
+ /* More specific Gradio textarea selectors */
281
+ .gradio-container textarea {
282
+ background-color: var(--text_areabackground) !important;
283
+ border: var(--input-border) solid var(--secondary-dark) !important;
284
+ border-radius: 6px !important;
285
+ color: var(--primarytext-color) !important;
286
+ font-family: "Oswald", sans-serif !important;
287
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1) !important;
288
+ }
289
+
290
+ .gradio-container .gradio-textbox textarea {
291
+ background-color: var(--text_areabackground) !important;
292
+ border: var(--input-border) solid var(--secondary-dark) !important;
293
+ border-radius: 6px !important;
294
+ color: var(--primarytext-color) !important;
295
+ font-family: "Oswald", sans-serif !important;
296
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1) !important;
297
+ }
298
+
299
+ .gradio-container .gradio-textbox {
300
+ background-color: transparent !important;
301
+ border: none !important;
302
+ }
303
+
304
+ /* Input textbox focus state */
305
+ .input-textbox textarea:focus {
306
+ border-color: var(--secondary-light) !important;
307
+ box-shadow: 0 0 0 var(--border-focus) rgba(96, 165, 250, 0.2) !important;
308
+ }
309
+
310
+ /* General textarea styling */
311
+ textarea {
312
+ border: var(--border-default) solid var(--primary-dark);
313
+ background-color: var(--text_areabackground);
314
+ background-color: var(--text_areabackground) !important;
315
+ border: var(--input-border) solid var(--secondary-dark) !important;
316
+ border-radius: 6px !important;
317
+ color: var(--primarytext-color) !important;
318
+ font-family: "Oswald", sans-serif !important;
319
+ padding: 10px !important;
320
+ resize: none !important;
321
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
322
  }
323
 
324
+ /* General textarea focus state */
325
+ textarea:focus {
326
+ border-color: var(--secondary-light) !important;
327
+ box-shadow: 0 0 0 var(--border-focus) rgba(96, 165, 250, 0.2) !important;
328
+ }
329
+
330
+ /* Input text - ENSURE VISIBILITY */
331
+ textarea, input {
332
+ color: var(--primarytext-color) !important;
333
+ }
334
+
335
+ /* Input placeholder text */
336
+ textarea::placeholder,
337
+ input::placeholder {
338
+ color: var(--secondarytext-color) !important;
339
+ }
340
+
341
+ /* Send button */
342
+ .send-button {
343
+ border: 1pt solid var(--secondary-dark);
344
+ background-color: var(--Send);
345
+ border: var(--button-border) solid var(--secondary-dark) !important;
346
+ border-radius: 6px !important;
347
+ font-weight: 500 !important;
348
+ padding: 8px 16px !important;
349
+ margin-bottom: 5px !important;
350
+ width: 100% !important;
351
+ background-color: var(--Send) !important;
352
+ color: var(--primarytext-color) !important;
353
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
354
+ }
355
+
356
+ /* Gradio-specific button selectors */
357
+ .gradio-container button.send-button {
358
+ background-color: var(--Send) !important;
359
+ color: var(--primarytext-color) !important;
360
+ border: var(--button-border) solid var(--secondary-dark) !important;
361
+ border-radius: 6px !important;
362
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1) !important;
363
+ font-family: "Oswald", sans-serif !important;
364
+ }
365
+
366
+ /* Send button hover */
367
+ .send-button:hover {
368
+ background-color: var(--Send-hover);
369
+ background-color: var(--Send-hover) !important;
370
+ border-color: var(--secondary-dark) !important;
371
+ }
372
+
373
+ /* Clear button */
374
+ .clear-button {
375
+ border: 1pt solid var(--secondary-dark);
376
+ background-color: var(--clear);
377
+ border: var(--button-border) solid var(--secondary-dark) !important;
378
+ border-radius: 6px !important;
379
+ font-weight: 500 !important;
380
+ padding: 8px 16px !important;
381
+ margin-bottom: 5px !important;
382
+ width: 100% !important;
383
+ background-color: var(--clear) !important;
384
+ color: var(--primarytext-color) !important;
385
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
386
+ }
387
+
388
+ /* Gradio-specific clear button selector */
389
+ .gradio-container button.clear-button {
390
+ background-color: var(--clear) !important;
391
+ color: var(--primarytext-color) !important;
392
+ border: var(--button-border) solid var(--secondary-dark) !important;
393
+ border-radius: 6px !important;
394
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1) !important;
395
+ font-family: "Oswald", sans-serif !important;
396
+ }
397
+
398
+ /* Clear button hover */
399
+ .clear-button:hover {
400
+ background-color: var(--clear-hover);
401
+ background-color: var(--clear-hover) !important;
402
+ border-color: var(--secondary-dark) !important;
403
+ }
404
+
405
+ /* Button text - ENSURE VISIBILITY */
406
+ button, .gradio-button {
407
+ color: var(--primarytext-color) !important;
408
+ }
409
+
410
+
411
+ /* Input controls container */
412
+ .input-controls {
413
+ padding: 15px 20px !important;
414
+ background-color: transparent !important;
415
+ flex-shrink: 0 !important;
416
+ width: 100% !important;
417
+ }
418
+
419
+ /* Button column container */
420
+ .button-column {
421
+ margin-left: 10px !important;
422
+ min-width: 80px !important;
423
+ }
424
+
425
+ /* Gradio footer styling */
426
  footer.svelte-czcr5b {
427
+ display: flex !important;
428
+ visibility: visible !important;
429
+ position: fixed !important;
430
+ bottom: 0 !important;
431
+ left: 0 !important;
432
+ right: 0 !important;
433
+ background-color: transparent !important;
434
+ backdrop-filter: blur(5px) !important;
435
+ border-top: var(--border-default) solid #59524f !important;
436
+ padding: 8px 16px !important;
437
+ z-index: 1000 !important;
438
+ height: auto !important;
439
+ min-height: 40px !important;
440
+ }
441
+
442
+ /* Footer buttons styling */
443
+ footer.svelte-czcr5b button {
444
+ background-color: transparent !important;
445
+ color: var(--secondarytext-color) !important;
446
+ border: none !important;
447
+ font-family: "Oswald", sans-serif !important;
448
+ font-size: 12px !important;
449
+ }
450
+
451
+ /* Footer divider styling */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
452
  footer.svelte-czcr5b .divider {
453
+ color: var(--secondarytext-color) !important;
454
+ }
455
+
456
+ /* Responsive design */
457
+ @media (max-width: 768px) {
458
+ .message.bot .markdown,
459
+ .message.user .markdown {
460
+ max-width: 85%;
461
+ }
462
+
463
+ .input-controls {
464
+ padding: 10px !important;
465
+ }
466
+ }
467
+
468
+ /* Dark mode variables pulled from HTML when testing the app */
469
+ /* This is meant to address the issue of Mimir not applying dark mode correctly */
470
+ /* and ensure consistency regardless of user system settings */
471
+ :root .dark {
472
+ --body-background-fill: var(--text_areabackground);
473
+ --body-text-color: var(--primarytext-color);
474
+ --color-accent-soft: var(--primary-light);
475
+ --background-fill-primary: var(--chathistory_area);
476
+ --background-fill-secondary: var(--text_areabackground);
477
+ --border-color-accent: var(--primary-dark);
478
+ --border-color-primary: var(--primary-dark);
479
+ --link-text-color-active: var(--secondary-light);
480
+ --link-text-color: var(--secondary-light);
481
+ --link-text-color-hover: var(--secondary-dark);
482
+ --link-text-color-visited: var(--secondary-dark);
483
+ --body-text-color-subdued: var(--secondarytext-color);
484
+ --accordion-text-color: var(--primarytext-color);
485
+ --table-text-color: var(--primarytext-color);
486
+ --shadow-spread: 1px;
487
+ --block-background-fill: var(--chathistory_area);
488
+ --block-border-color: var(--primary-dark);
489
+ --block_border_width: 1.5px;
490
+ --block-info-text-color: var(--secondarytext-color);
491
+ --block-label-background-fill: var(--text_areabackground);
492
+ --block-label-border-color: var(--primary-dark);
493
+ --block_label_border_width: 1.5px;
494
+ --block-label-text-color: var(--primarytext-color);
495
+ --block_shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
496
+ --block_title_background_fill: transparent;
497
+ --block_title_border_color: var(--primary-dark);
498
+ --block_title_border_width: 1.5px;
499
+ --block-title-text-color: var(--primarytext-color);
500
+ --panel-background-fill: var(--text_areabackground);
501
+ --panel-border-color: var(--primary-dark);
502
+ --panel_border_width: 1.5px;
503
+ --border-color-accent-subdued: var(--secondary-dark);
504
+ --code-background-fill: var(--primary-light);
505
+ --checkbox-background-color: var(--text_areabackground);
506
+ --checkbox-background-color-focus: var(--text_areabackground);
507
+ --checkbox-background-color-hover: var(--text_areabackground);
508
+ --checkbox-background-color-selected: var(--primary-light);
509
+ --checkbox-border-color: var(--primary-dark);
510
+ --checkbox-border-color-focus: var(--secondary-light);
511
+ --checkbox-border-color-hover: var(--secondary-dark);
512
+ --checkbox-border-color-selected: var(--secondary-light);
513
+ --checkbox-border-width: 1.5px;
514
+ --checkbox-label-background-fill: var(--text_areabackground);
515
+ --checkbox-label-background-fill-hover: var(--text_areabackground);
516
+ --checkbox-label-background-fill-selected: var(--primary-light);
517
+ --checkbox-label-border-color: var(--primary-dark);
518
+ --checkbox-label-border-color-hover: var(--secondary-dark);
519
+ --checkbox-label-border-color-selected: var(--secondary-light);
520
+ --checkbox-label-border-width: 1.5px;
521
+ --checkbox-label-text-color: var(--primarytext-color);
522
+ --checkbox-label-text-color-selected: var(--primarytext-color);
523
+ --error-background-fill: var(--text_areabackground);
524
+ --error-border-color: #ef4444;
525
+ --error_border_width: 1.5px;
526
+ --error-text-color: #ef4444;
527
+ --error-icon-color: #ef4444;
528
+ --input-background-fill: var(--text_areabackground);
529
+ --input_background_fill_focus: var(--text_areabackground);
530
+ --input-background-fill-hover: var(--text_areabackground);
531
+ --input-border-color: var(--secondary-dark);
532
+ --input-border-color-focus: var(--secondary-light);
533
+ --input-border-color-hover: var(--secondary-dark);
534
+ --input_border_width: 1.5px;
535
+ --input-placeholder-color: var(--secondarytext-color);
536
+ --input_shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
537
+ --input-shadow-focus: 0 0 0 var(--shadow-spread) rgba(96, 165, 250, 0.2), var(--shadow-inset);
538
+ --loader_color: var(--secondary-light);
539
+ --slider_color: var(--secondary-light);
540
+ --stat-background-fill: linear-gradient(to right, var(--primary-light), var(--primary-dark));
541
+ --table-border-color: var(--primary-dark);
542
+ --table-even-background-fill: var(--text_areabackground);
543
+ --table-odd-background-fill: var(--chathistory_area);
544
+ --table-row-focus: var(--secondary-light);
545
+ --button-border-width: 1.5px;
546
+ --button-cancel-background-fill: var(--clear);
547
+ --button-cancel-background-fill-hover: var(--clear-hover);
548
+ --button-cancel-border-color: var(--secondary-dark);
549
+ --button-cancel-border-color-hover: var(--secondary-dark);
550
+ --button-cancel-text-color: var(--primarytext-color);
551
+ --button-cancel-text-color-hover: var(--primarytext-color);
552
+ --button-primary-background-fill: var(--Send);
553
+ --button-primary-background-fill-hover: var(--Send-hover);
554
+ --button-primary-border-color: var(--secondary-dark);
555
+ --button-primary-border-color-hover: var(--secondary-dark);
556
+ --button-primary-text-color: var(--primarytext-color);
557
+ --button-primary-text-color-hover: var(--primarytext-color);
558
+ --button_primary_shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
559
+ --button-primary-shadow-hover: 0 2px 4px rgba(0, 0, 0, 0.1);
560
+ --button-primary-shadow-active: 0 2px 4px rgba(0, 0, 0, 0.1);
561
+ --button-secondary-background-fill: var(--clear);
562
+ --button-secondary-background-fill-hover: var(--clear-hover);
563
+ --button-secondary-border-color: var(--secondary-dark);
564
+ --button-secondary-border-color-hover: var(--secondary-dark);
565
+ --button-secondary-text-color: var(--primarytext-color);
566
+ --button-secondary-text-color-hover: var(--primarytext-color);
567
+ --button_secondary_shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
568
+ --button-secondary-shadow-hover: 0 2px 4px rgba(0, 0, 0, 0.1);
569
+ --button-secondary-shadow-active: 0 2px 4px rgba(0, 0, 0, 0.1);
570
+ --name: origin;
571
+ --primary-50: #eff6ff;
572
+ --primary-100: #dbeafe;
573
+ --primary-200: #bfdbfe;
574
+ --primary-300: #93c5fd;
575
+ --primary-400: #60a5fa;
576
+ --primary-500: #3b82f6;
577
+ --primary-600: #2563eb;
578
+ --primary-700: #1d4ed8;
579
+ --primary-800: #1e40af;
580
+ --primary-900: #1e3a8a;
581
+ --primary-950: #172554;
582
+ --secondary-50: #f0f9ff;
583
+ --secondary-100: #e0f2fe;
584
+ --secondary-200: #bae6fd;
585
+ --secondary-300: #7dd3fc;
586
+ --secondary-400: #38bdf8;
587
+ --secondary-500: #0ea5e9;
588
+ --secondary-600: #0284c7;
589
+ --secondary-700: #0369a1;
590
+ --secondary-800: #075985;
591
+ --secondary-900: #0c4a6e;
592
+ --secondary-950: #082f49;
593
+ --neutral-50: #f9fafb;
594
+ --neutral-100: #f3f4f6;
595
+ --neutral-200: #e5e7eb;
596
+ --neutral-300: #d1d5db;
597
+ --neutral-400: #9ca3af;
598
+ --neutral-500: #6b7280;
599
+ --neutral-600: #4b5563;
600
+ --neutral-700: #374151;
601
+ --neutral-800: #1f2937;
602
+ --neutral-900: #111827;
603
+ --neutral-950: #0b0f19;
604
+ --spacing-xxs: 1px;
605
+ --spacing-xs: 2px;
606
+ --spacing-sm: 4px;
607
+ --spacing-md: 6px;
608
+ --spacing-lg: 8px;
609
+ --spacing-xl: 10px;
610
+ --spacing-xxl: 16px;
611
+ --radius-xxs: 1px;
612
+ --radius-xs: 2px;
613
+ --radius-sm: 4px;
614
+ --radius-md: 6px;
615
+ --radius-lg: 8px;
616
+ --radius-xl: 12px;
617
+ --radius-xxl: 22px;
618
+ --text-xxs: 9px;
619
+ --text-xs: 10px;
620
+ --text-sm: 12px;
621
+ --text-md: 14px;
622
+ --text-lg: 16px;
623
+ --text-xl: 22px;
624
+ --text-xxl: 26px;
625
+ --font: 'Oswald', ui-sans-serif, system-ui, sans-serif;
626
+ --font-mono: 'IBM Plex Mono', ui-monospace, Consolas, monospace;
627
+ --body-text-size: var(--text-md);
628
+ --body-text-weight: 400;
629
+ --embed-radius: var(--radius-sm);
630
+ --color-accent: var(--secondary-light);
631
+ --shadow-drop: 0 2px 4px rgba(0, 0, 0, 0.1);
632
+ --shadow-drop-lg: 0 4px 8px rgba(0, 0, 0, 0.1);
633
+ --shadow-inset: 0 2px 4px rgba(0, 0, 0, 0.1) inset;
634
+ --block-border-width: 1.5px;
635
+ --block-info-text-size: var(--text-sm);
636
+ --block-info-text-weight: 400;
637
+ --block-label-border-width: 1.5px;
638
+ --block-label-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
639
+ --block-label-margin: 0;
640
+ --block-label-padding: var(--spacing-sm) var(--spacing-lg);
641
+ --block-label-radius: calc(var(--radius-sm) - 1px) 0 calc(var(--radius-sm) - 1px) 0;
642
+ --block-label-right-radius: 0 calc(var(--radius-sm) - 1px) 0 calc(var(--radius-sm) - 1px);
643
+ --block-label-text-size: var(--text-sm);
644
+ --block-label-text-weight: 400;
645
+ --block-padding: var(--spacing-xl) calc(var(--spacing-xl) + 2px);
646
+ --block-radius: var(--radius-sm);
647
+ --block-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
648
+ --block-title-background-fill: transparent;
649
+ --block-title-border-color: var(--primary-dark);
650
+ --block-title-border-width: 1.5px;
651
+ --block-title-padding: 0;
652
+ --block-title-radius: var(--radius-sm);
653
+ --block-title-text-size: var(--text-md);
654
+ --block-title-text-weight: 400;
655
+ --container-radius: var(--radius-sm);
656
+ --form-gap-width: 1.5px;
657
+ --layout-gap: var(--spacing-xxl);
658
+ --panel-border-width: 1.5px;
659
+ --section-header-text-size: var(--text-md);
660
+ --section-header-text-weight: 400;
661
+ --chatbot-text-size: var(--text-lg);
662
+ --checkbox-border-radius: var(--radius-sm);
663
+ --checkbox-label-gap: var(--spacing-lg);
664
+ --checkbox-label-padding: var(--spacing-md) calc(2 * var(--spacing-md));
665
+ --checkbox-label-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
666
+ --checkbox-label-text-size: var(--text-md);
667
+ --checkbox-label-text-weight: 400;
668
+ --checkbox-check: url(data:image/svg+xml,%3csvg viewBox='0 0 16 16' fill='%230f0e09' xmlns='http://www.w3.org/2000/svg'%3e%3cpath d='M12.207 4.793a1 1 0 010 1.414l-5 5a1 1 0 01-1.414 0l-2-2a1 1 0 011.414-1.414L6.5 9.086l4.293-4.293a1 1 0 011.414 0z'/%3e%3c/svg%3e);
669
+ --radio-circle: url(data:image/svg+xml,%3csvg viewBox='0 0 16 16' fill='%230f0e09' xmlns='http://www.w3.org/2000/svg'%3e%3ccircle cx='8' cy='8' r='3'/%3e%3c/svg%3e);
670
+ --checkbox-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
671
+ --error-border-width: 1.5px;
672
+ --input-background-fill-focus: var(--text_areabackground);
673
+ --input-border-width: 1.5px;
674
+ --input-padding: var(--spacing-xl);
675
+ --input-radius: var(--radius-sm);
676
+ --input-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
677
+ --input-text-size: var(--text-md);
678
+ --input-text-weight: 400;
679
+ --loader-color: var(--secondary-light);
680
+ --prose-text-size: var(--text-md);
681
+ --prose-text-weight: 400;
682
+ --prose-header-text-weight: 600;
683
+ --slider-color: var(--secondary-light);
684
+ --table-radius: var(--radius-sm);
685
+ --button-transform-hover: none;
686
+ --button-transform-active: none;
687
+ --button-transition: none;
688
+ --button-large-padding: var(--spacing-lg) calc(2 * var(--spacing-lg));
689
+ --button-large-radius: var(--radius-md);
690
+ --button-large-text-size: var(--text-lg);
691
+ --button-large-text-weight: 600;
692
+ --button-primary-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
693
+ --button-secondary-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
694
+ --button-small-padding: var(--spacing-sm) calc(1.5 * var(--spacing-sm));
695
+ --button-small-radius: var(--radius-md);
696
+ --button-small-text-size: var(--text-sm);
697
+ --button-small-text-weight: 400;
698
+ --button-medium-padding: var(--spacing-md) calc(2 * var(--spacing-md));
699
+ --button-medium-radius: var(--radius-md);
700
+ --button-medium-text-size: var(--text-md);
701
+ --button-medium-text-weight: 600;
702
+ }
703
+
704
+ /* Force text color on all elements - FIX FOR WHITE TEXT */
705
+ * {
706
+ color: var(--primarytext-color) !important;
707
+ font-family: "Oswald", sans-serif !important;
708
+ }
709
+
710
+ @media (prefers-color-scheme: dark) {
711
+ body {
712
+ background: var(--text_areabackground);
713
+ color: var(--primarytext-color);
714
+ }
715
+ }
716
+
717
+ body {
718
+ background: var(--text_areabackground);
719
+ color: var(--primarytext-color);
720
+ font-family: "Oswald", sans-serif;
721
+ }
722
+
723
+ /* Loading Animation CSS using your theme colors */
724
+ .thinking-indicator {
725
+ display: inline-flex;
726
+ align-items: center;
727
+ padding: 8px 12px;
728
+ margin: 5px 0;
729
+ background-color: transparent;
730
  }
731
 
732
+ .dots-container {
733
+ display: inline-flex;
734
+ gap: 3px;
735
+ align-items: center;
736
+ }
737
+
738
+ .dot {
739
+ width: 4px;
740
+ height: 4px;
741
+ border-radius: 50%;
742
+ background-color: var(--primary-light);
743
+ animation: pulse 1.5s infinite ease-in-out;
744
+ display: inline-block;
745
+ }
746
 
747
+ .dot:nth-child(1) { animation-delay: 0s; }
748
+ .dot:nth-child(2) { animation-delay: 0.3s; }
749
+ .dot:nth-child(3) { animation-delay: 0.6s; }
750
+
751
+ @keyframes pulse {
752
+ 0%, 70%, 100% {
753
+ transform: scale(0.8);
754
+ opacity: 0.4;
755
+ }
756
+ 35% {
757
+ transform: scale(1.2);
758
+ opacity: 1;
759
+ }
760
  }