jdesiree commited on
Commit
d8b2b50
·
0 Parent(s):

Update requirements.txt

Browse files
.gitattributes ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ loading_animation.gif.gif filter=lfs diff=lfs merge=lfs -text
37
+ loading_animation.gif filter=lfs diff=lfs merge=lfs -text
.huggingface/config.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # .huggingface/config.yaml
2
+ # Pre-download models during build
3
+ models:
4
+ - jdesiree/Mimir-Phi-3.5
5
+ - microsoft/Phi-3-mini-4k-instruct
6
+ - microsoft/Phi-3-mini-128k-instruct
7
+ - thenlper/gte-small
8
+
9
+ Linked models
LightEval_Mimir.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # LightEval_Mimir.py
2
+ '''This document outlines hte LightEval setu for tracking performance metrics of Mimir, to be sent to the trackio page for viszulization.'''
3
+
4
+ # Imports
5
+ from lighteval.metrics.metrics_sample import BertScore, ROUGE
6
+ from lighteval.tasks.requests import Doc
7
+
8
+ async def evaluate_educational_quality(user_query, response, thread_id):
9
+ """Dynamic evaluation using LightEval metrics"""
10
+ # Create ephemeral task for this turn
11
+ doc = Doc(
12
+ task_name=f"turn_{thread_id}",
13
+ query=user_query,
14
+ choices=[response],
15
+ gold_index=-1, # No ground truth initially
16
+ specific_output=response
17
+ )
18
+
19
+ # Use BertScore for semantic quality
20
+ bert_score = BertScore().compute(doc)
21
+
22
+ # Custom educational coherence metric
23
+ educational_indicators = {
24
+ 'has_examples': 'example' in response.lower(),
25
+ 'structured_explanation': '##' in response or '1.' in response,
26
+ 'appropriate_length': 100 < len(response) < 1500,
27
+ 'encourages_learning': any(phrase in response.lower()
28
+ for phrase in ['practice', 'try', 'consider', 'think about'])
29
+ }
30
+
31
+ return {
32
+ 'semantic_quality': bert_score,
33
+ 'educational_score': sum(educational_indicators.values()) / len(educational_indicators),
34
+ 'response_time': time.time() - start_time
35
+ }
36
+
37
+ def track_rag_performance(query, retrieved_docs, used_in_response):
38
+ """Evaluate RAG retrieval quality"""
39
+ from lighteval.metrics.utils.metric_utils import SampleLevelMetric
40
+
41
+ # Track retrieval-to-response alignment
42
+ retrieval_relevance = calculate_relevance(query, retrieved_docs)
43
+ retrieval_usage = len(used_in_response) / len(retrieved_docs) if retrieved_docs else 0
44
+
45
+ # Log to trackio with LightEval structure
46
+ metric_payload = {
47
+ "evaluation_id": str(uuid.uuid4()),
48
+ "task": "rag_retrieval",
49
+ "metrics": {
50
+ "retrieval_relevance": retrieval_relevance,
51
+ "retrieval_usage_rate": retrieval_usage,
52
+ "num_docs_retrieved": len(retrieved_docs)
53
+ },
54
+ "metadata": {
55
+ "query": query[:100],
56
+ "sources": [doc.metadata.get('source') for doc in retrieved_docs]
57
+ }
58
+ }
59
+
60
+ send_evaluation_to_trackio(metric_payload)
61
+
62
+ def evaluate_prompt_classification(predicted_mode, actual_conversation_outcome, thread_id):
63
+ """Track prompt classifier accuracy in production"""
64
+
65
+ # Did the predicted mode lead to successful interaction?
66
+ success_indicators = {
67
+ 'discovery_mode': lambda outcome: 'clarified_topic' in outcome,
68
+ 'teaching_mode': lambda outcome: outcome.get('quality_score', 0) > 3.5,
69
+ 'conversational': lambda outcome: outcome.get('user_satisfied', False)
70
+ }
71
+
72
+ mode_was_correct = success_indicators.get(
73
+ predicted_mode,
74
+ lambda x: True
75
+ )(actual_conversation_outcome)
76
+
77
+ # Create LightEval-style evaluation
78
+ from lighteval.metrics import Metrics
79
+ accuracy_metric = Metrics.ACCURACY if mode_was_correct else 0
80
+
81
+ return {
82
+ "prompt_classifier_accuracy": accuracy_metric,
83
+ "predicted_mode": predicted_mode,
84
+ "conversation_length": len(conversation_state)
85
+ }
86
+
87
+ def process_user_feedback(response_id, feedback_type, conversation_state):
88
+ """Convert user feedback to LightEval ground truth"""
89
+
90
+ last_exchange = {
91
+ "query": conversation_state[-2]["content"], # User's question
92
+ "response": conversation_state[-1]["content"], # Agent's response
93
+ "gold_index": 0 if feedback_type == "thumbs_up" else -1
94
+ }
95
+
96
+ # Create retrospective evaluation with ground truth
97
+ from lighteval.tasks.requests import Doc
98
+ doc = Doc(
99
+ task_name="user_feedback_eval",
100
+ query=last_exchange["query"],
101
+ choices=[last_exchange["response"]],
102
+ gold_index=last_exchange["gold_index"]
103
+ )
104
+
105
+ # Now you have ground truth for accuracy metrics!
106
+ accuracy = 1.0 if feedback_type == "thumbs_up" else 0.0
107
+
108
+ return {"user_feedback_accuracy": accuracy, "response_id": response_id}
109
+
README.md ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Mimir
3
+ emoji: 📚
4
+ colorFrom: indigo
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 5.47.0
8
+ app_file: app.py
9
+ pinned: true
10
+ python_version: '3.10'
11
+ short_description: Advanced prompt engineering for educational AI systems.
12
+ thumbnail: >-
13
+ https://cdn-uploads.huggingface.co/production/uploads/68700e7552b74a1dcbb2a87e/Z7P8DJ57rc5P1ozA5gwp3.png
14
+ hardware: zero-gpu-dynamic
15
+ hf_oauth: true
16
+ hf_oauth_expiration_minutes: 120
17
+ ---
18
+
19
+ # Mimir: Educational AI Assistant
20
+ ## Advanced Prompt Engineering Portfolio Project
21
+
22
+ ### Project Overview
23
+ Mimir demonstrates sophisticated prompt engineering techniques applied to educational technology, showcasing the implementation of context-aware AI systems that prioritize pedagogical effectiveness over simple answer generation. A key feature is its ability to **dynamically generate custom data visualizations**, determined by an intelligent decision engine that assesses whether a visual aid will enhance the pedagogical explanation. This project exemplifies professional-grade prompt design for educational applications, embodying the role of an educational partner that guides students to discover answers for themselves.
24
+
25
+ ***
26
+
27
+ ### Technical Architecture
28
+ **Core Technologies:**
29
+
30
+ * **LangChain**: Prompt template management and conversation chain orchestration.
31
+ * **LangGraph**: Orchestrates the application's flow as a state machine (**StateGraph**). It manages the conditional logic for the tool-use decision engine, routing user queries between the LLM, a pre-built **ToolNode** for graph generation, and the final response node.
32
+ * **Gradio**: Full-stack web interface with custom CSS styling.
33
+ * **Hugging Face Inference API**: Model deployment and response generation.
34
+ * **Python**: Backend logic and integration layer.
35
+ * **Matplotlib**: Powers the dynamic, in-memory generation of educational graphs and charts.
36
+
37
+ **Key Frameworks:**
38
+
39
+ * `langchain.prompts.ChatPromptTemplate` for dynamic prompt construction.
40
+ * `langchain_huggingface.HuggingFaceEndpoint` for model interface.
41
+ * `langchain.schema` message objects (HumanMessage, AIMessage, SystemMessage).
42
+ * `langgraph.graph.StateGraph` & `langgraph.prebuilt.ToolNode` for building and executing the conditional logic graph.
43
+ * `langgraph.checkpoint.memory.MemorySaver` for persistent conversation state.
44
+
45
+ ***
46
+
47
+ ### Prompt Engineering Techniques Demonstrated
48
+ #### 1. Unified System Prompt Architecture
49
+ Employs a single, comprehensive system prompt that establishes the AI's core persona as **Mimir, an expert multi-concept tutor**. This foundational prompt meticulously defines the AI's behavior, tone, and pedagogical mission. It integrates:
50
+
51
+ * **Core Educational Principles**: A directive to prioritize teaching methodology, foster critical thinking, and provide comprehensive explanations over direct answers.
52
+ * **Defined Persona & Tone**: Specific instructions to maintain an engaging, supportive, and intellectually appropriate tone for high school students, while avoiding fluff and emojis.
53
+ * **Specific Response Guidelines**: Contextual rules for handling different academic tasks, such as explaining concepts in math problems instead of solving them, or discussing research strategies for essays rather than writing them.
54
+
55
+ #### 2. Instructional Design Integration
56
+ The core prompt incorporates evidence-based instructional design principles:
57
+
58
+ * **Scaffolding**: Breaking complex concepts into manageable components.
59
+ * **Socratic Method**: Guiding discovery rather than providing direct answers.
60
+ * **Metacognitive Strategies**: Teaching learning-how-to-learn approaches.
61
+
62
+ #### 3. Academic Integrity Constraints
63
+ Implemented ethical AI guidelines directly into the system prompt:
64
+
65
+ * Explicit instructions to avoid homework completion.
66
+ * Focus on **process over product delivery**.
67
+ * Critical thinking skill development emphasis.
68
+
69
+ #### 4. Two-Stage Tool-Use Prompting
70
+ A sophisticated two-stage prompting strategy governs the use of the `Create_Graph_Tool`:
71
+
72
+ * **Tool-Use Decision Prompt**: A highly-constrained template is used by the `Tool_Decision_Engine` to determine if a tool should be used. This prompt forces a **YES** or **NO** response based on whether a visual aid would significantly enhance learning, using explicit **INCLUDE** and **EXCLUDE** criteria.
73
+ * **Tool-Execution Guidance**: The main system prompt contains separate, explicit instructions on how to use the tool once the decision has been made. It provides the exact **JSON structure** the model must output, including fields like `data`, `plot_type`, and `educational_context`, ensuring the generated graphs are pedagogically sound.
74
+
75
+ ***
76
+
77
+ ### Advanced Implementation Features
78
+ #### Intelligent Graphing Tool Integration
79
+ A custom, dynamic visualization system was developed to provide multi-modal educational responses.
80
+
81
+ * **LLM-Powered Analysis**: For relevant queries, a targeted LLM call is made using the specialized YES/NO decision prompt.
82
+ * **Dynamic Visualization Tool (`Create_Graph_Tool`)**: Designed and implemented a custom visualization tool using **matplotlib**. The tool receives a JSON configuration from the LLM and generates high-quality bar, line, or pie charts. The entire process occurs in-memory:
83
+ * The plot is rendered into a `BytesIO` buffer.
84
+ * The image is encoded into a **base64 string**.
85
+ * The final output is an HTML `<img>` tag with the embedded base64 data, which is displayed directly in the chat interface, eliminating the need for file I/O.
86
+ * The tool's docstring provides a clear schema and usage instructions for the LLM, ensuring reliable and pedagogically sound visualizations.
87
+
88
+ #### Stateful Conversation Management with LangGraph
89
+ Implements persistent, multi-turn conversations using LangGraph's **MemorySaver**. This allows the application's state, including the full message history (`add_messages`), to be saved and resumed, ensuring robust context management even when tool use is involved.
90
+
91
+ #### Response Streaming & Truncation
92
+ * Smart text truncation preserving sentence integrity.
93
+ * Real-time response streaming for improved UX.
94
+ * Error handling and fallback mechanisms.
95
+
96
+ #### Template Chaining Architecture
97
+ The core logic utilizes **LangChain Expression Language (LCEL)** to pipe inputs through templates, models, and tools.
98
+
99
+ ***
100
+
101
+ ### User Interface Engineering
102
+ * **Gradio Layout & Custom Styling**: The interface is built with `gr.Blocks`, using `gr.Column` and `gr.Row` to structure the main components. A custom `styles.css` file is loaded to apply specific theming, responsive design, and layout rules, moving beyond default Gradio styling for a tailored user experience.
103
+ * **Component Architecture**: Modular Gradio component structure with custom CSS class integration and accessibility-compliant patterns.
104
+
105
+ ***
106
+
107
+ ### Prompt Engineering Methodologies Applied
108
+ * **Template Parameterization**: Dynamic variable injection for contextual responses.
109
+ * **Persona-Driven Response Generation**: Crafting a detailed persona within the system prompt to guide the AI's tone, style, and pedagogical approach consistently.
110
+ * **Domain-Specific Language Modeling**: Educational vocabulary and pedagogical terminology integration.
111
+ * **Multi-Modal Response Formatting**: Structured output generation with educational formatting.
112
+ * **Agentic Tool Routing**: Designing prompts and logic that enable an AI system to intelligently decide which tool is appropriate for a given task, simulating agent-like behavior.
113
+
114
+ ***
115
+
116
+ ### Professional Applications
117
+ This project demonstrates competency in:
118
+
119
+ * **Enterprise-Grade Prompt Design**: Scalable template and tool-use architecture.
120
+ * **Educational Technology Integration**: Designing AI tutors with robust pedagogical frameworks and dynamic, multi-modal response capabilities.
121
+ * **Ethical AI Implementation**: Academic integrity safeguards and responsible AI practices.
122
+ * **Full-Stack AI Application Development**: End-to-end system implementation.
123
+ * **Intelligent Agent & Tool Development**: Building AI agents that can utilize custom tools to solve complex problems.
124
+
125
+ ***
126
+
127
+ ### Technical Specifications
128
+ **Dependencies:**
129
+
130
+ * **Core ML/AI**: `transformers`, `torch`, `accelerate`
131
+ * **LangChain & LangGraph**: `langgraph`, `langchain-core`, `langchain-community`, `langchain-huggingface`
132
+ * **UI Framework**: `gradio`
133
+ * **Visualization**: `matplotlib`, `plotly`, `pandas`, `numpy`, `scipy`
134
+ * **Utilities**: `python-dotenv`
135
+ * **Monitoring**: `langsmith` (Optional)
136
+
137
+ **Deployment:**
138
+
139
+ * Hugging Face Spaces compatible.
140
+ * Environment variable configuration for API keys.
141
+ * Production-ready error handling and logging.
142
+
143
+ ***
144
+
145
+ ### Results & Impact
146
+ Mimir represents a synthesis of prompt engineering best practices with educational technology requirements. The integration of an intelligent, conditional graphing tool demonstrates the ability to create AI systems that augment and enhance human learning processes, embodying the role of an educational partner who empowers students to succeed through genuine understanding.
147
+
148
+ > **Portfolio Demonstration**: This project evidences advanced prompt engineering capabilities, full-stack AI application development, and domain-specific AI system design suitable for enterprise educational technology environments.
agents.py ADDED
@@ -0,0 +1,1021 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # agents.py
2
+ """
3
+ Unified agent architecture for Mimir Educational AI Assistant.
4
+
5
+ Components:
6
+ - ToolDecisionAgent: Determines visualization tool necessity
7
+ - PromptRoutingAgents: 4 decision agents for library_state management
8
+ - ThinkingAgents: Preprocessing agents for complex reasoning
9
+ - ResponseAgent: Main educational response generation (Phi3)
10
+
11
+ All agents use proper LangChain SystemMessage/HumanMessage architecture.
12
+ """
13
+
14
+ import os
15
+ import re
16
+ import torch
17
+ import logging
18
+
19
+ # Setup main logger first
20
+ logging.basicConfig(level=logging.INFO)
21
+ logger = logging.getLogger(__name__)
22
+
23
+ def log_step(step_name, start_time=None):
24
+ """Log a pipeline step with timestamp and duration"""
25
+ now = time.time()
26
+ timestamp = datetime.now().strftime("%H:%M:%S.%f")[:-3]
27
+
28
+ if start_time:
29
+ duration = now - start_time
30
+ logger.info(f"[{timestamp}] ✓ {step_name} completed in {duration:.2f}s")
31
+ else:
32
+ logger.info(f"[{timestamp}] → {step_name} starting...")
33
+
34
+ return now
35
+
36
+ agent = None
37
+
38
+ from typing import Dict, List, Optional, Tuple, Type
39
+ from datetime import datetime
40
+ import warnings
41
+
42
+ # Transformers for standard models
43
+ from transformers import (
44
+ AutoTokenizer,
45
+ AutoModelForCausalLM,
46
+ BitsAndBytesConfig,
47
+ )
48
+
49
+ # For GGUF model loading
50
+ try:
51
+ from llama_cpp import Llama
52
+ LLAMA_CPP_AVAILABLE = True
53
+ except ImportError:
54
+ LLAMA_CPP_AVAILABLE = False
55
+ logging.warning("llama-cpp-python not available - GGUF models will not load")
56
+
57
+ # ZeroGPU support
58
+ try:
59
+ import spaces
60
+ HF_SPACES_AVAILABLE = True
61
+ except ImportError:
62
+ HF_SPACES_AVAILABLE = False
63
+ class DummySpaces:
64
+ @staticmethod
65
+ def GPU(duration=90):
66
+ def decorator(func):
67
+ return func
68
+ return decorator
69
+ spaces = DummySpaces()
70
+
71
+ # Accelerate
72
+ from accelerate import Accelerator
73
+ from accelerate.utils import set_seed
74
+
75
+ # LangChain Core for proper message handling
76
+ from langchain_core.runnables import Runnable
77
+ from langchain_core.runnables.utils import Input, Output
78
+ from langchain_core.messages import SystemMessage, HumanMessage
79
+
80
+ # Import ALL prompts from prompt library
81
+ from prompt_library import (
82
+ # System prompts
83
+ CORE_IDENTITY,
84
+ TOOL_DECISION,
85
+ agent_1_system,
86
+ agent_2_system,
87
+ agent_3_system,
88
+ agent_4_system,
89
+
90
+ # Thinking agent system prompts
91
+ MATH_THINKING,
92
+ QUESTION_ANSWER_DESIGN,
93
+ REASONING_THINKING,
94
+
95
+ # Response agent prompts (dynamically applied)
96
+ VAUGE_INPUT,
97
+ USER_UNDERSTANDING,
98
+ GENERAL_FORMATTING,
99
+ LATEX_FORMATTING,
100
+ GUIDING_TEACHING,
101
+ STRUCTURE_PRACTICE_QUESTIONS,
102
+ PRACTICE_QUESTION_FOLLOWUP,
103
+ TOOL_USE_ENHANCEMENT,
104
+ )
105
+
106
+ CACHE_DIR = "/data/compiled_models"
107
+
108
+ from huggingface_hub import hf_hub_download
109
+
110
+ def check_model_cache() -> Dict[str, bool]:
111
+ """Check which models are pre-compiled"""
112
+ cache_status = {
113
+ "phi3": os.path.exists(f"{CACHE_DIR}/PHI3_READY"),
114
+ "mistral_reasoning": os.path.exists(f"{CACHE_DIR}/MISTRAL_REASONING_READY"),
115
+ "mistral_math_gguf": os.path.exists(f"{CACHE_DIR}/MISTRAL_MATH_GGUF_READY"),
116
+ "rag_embeddings": os.path.exists(f"{CACHE_DIR}/RAG_EMBEDDINGS_READY"),
117
+ "all_compiled": os.path.exists(f"{CACHE_DIR}/COMPILED_READY"),
118
+ }
119
+
120
+ if cache_status["all_compiled"]:
121
+ logger.info("✓ All models pre-compiled and cached")
122
+ else:
123
+ logger.warning("⚠️ Some models not pre-compiled - first load will be slower")
124
+
125
+ return cache_status
126
+
127
+ # Call at module load:
128
+ _cache_status = check_model_cache()
129
+
130
+ logger = logging.getLogger(__name__)
131
+
132
+ # Suppress warnings
133
+ warnings.filterwarnings("ignore", category=UserWarning)
134
+ warnings.filterwarnings("ignore", category=FutureWarning)
135
+
136
+ # Model paths
137
+ MISTRAL_REASONING = "yentinglin/Mistral-Small-24B-Instruct-2501-reasoning"
138
+ MISTRAL_MATH_GGUF = "brittlewis12/Mistral-Small-24B-Instruct-2501-reasoning-GGUF"
139
+ FINE_TUNED_PHI3 = "jdesiree/Mimir-Phi-3.5"
140
+ BASE_PHI3 = "microsoft/Phi-3-mini-4k-instruct"
141
+
142
+ HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
143
+
144
+ def get_cached_gguf_path() -> Optional[str]:
145
+ """Get GGUF model path from cache marker if available"""
146
+ marker_file = f"{CACHE_DIR}/MISTRAL_MATH_GGUF_READY"
147
+ if os.path.exists(marker_file):
148
+ try:
149
+ with open(marker_file, 'r') as f:
150
+ content = f.read()
151
+ # Extract path from "GGUF model path: /path/to/model.gguf"
152
+ if "GGUF model path:" in content:
153
+ path = content.split("GGUF model path:")[-1].strip()
154
+ if os.path.exists(path):
155
+ logger.info(f"Found cached GGUF model: {path}")
156
+ return path
157
+ except Exception as e:
158
+ logger.warning(f"Could not read GGUF cache marker: {e}")
159
+ return None
160
+
161
+ def get_cached_gguf_path() -> Optional[str]:
162
+ """Get GGUF model path from cache marker if available"""
163
+ marker_file = f"{CACHE_DIR}/MISTRAL_MATH_GGUF_READY"
164
+ if os.path.exists(marker_file):
165
+ try:
166
+ with open(marker_file, 'r') as f:
167
+ content = f.read()
168
+ if "GGUF model path:" in content:
169
+ path = content.split("GGUF model path:")[-1].strip()
170
+ if os.path.exists(path):
171
+ logger.info(f"✓ Found cached GGUF model at: {path}")
172
+ return path
173
+ except Exception as e:
174
+ logger.warning(f"Could not read GGUF cache marker: {e}")
175
+ return None
176
+
177
+ # ============================================================================
178
+ # TOOL DECISION AGENT
179
+ # ============================================================================
180
+
181
+ class ToolDecisionAgent:
182
+ """
183
+ Determines if visualization tools are needed for a given query.
184
+ Uses Mistral-Small-24B with TOOL_DECISION system prompt.
185
+ """
186
+
187
+ def __init__(self):
188
+ self.model = None
189
+ self.tokenizer = None
190
+ self.model_loaded = False
191
+ logger.info("ToolDecisionAgent initialized (lazy loading)")
192
+
193
+ @spaces.GPU(duration=50)
194
+ def _load_model(self):
195
+ """Load Mistral model on first use"""
196
+ if self.model_loaded:
197
+ return
198
+
199
+ logger.info(f"Loading tool decision model: {MISTRAL_REASONING}")
200
+
201
+ quantization_config = BitsAndBytesConfig(
202
+ load_in_4bit=True,
203
+ bnb_4bit_compute_dtype=torch.float16,
204
+ bnb_4bit_quant_type="nf4",
205
+ bnb_4bit_use_double_quant=True,
206
+ )
207
+
208
+ self.tokenizer = AutoTokenizer.from_pretrained(
209
+ MISTRAL_REASONING,
210
+ trust_remote_code=True,
211
+ token=HF_TOKEN
212
+ )
213
+
214
+ self.model = AutoModelForCausalLM.from_pretrained(
215
+ MISTRAL_REASONING,
216
+ quantization_config=quantization_config,
217
+ torch_dtype=torch.float16,
218
+ trust_remote_code=True,
219
+ low_cpu_mem_usage=True,
220
+ token=HF_TOKEN,
221
+ device_map="auto",
222
+ )
223
+
224
+ self.model_loaded = True
225
+ logger.info("Tool decision model loaded successfully")
226
+
227
+ @spaces.GPU(duration=50)
228
+ def should_use_visualization(self, query: str) -> bool:
229
+ """
230
+ Determine if query requires visualization tools.
231
+ Uses TOOL_DECISION as system prompt.
232
+
233
+ Args:
234
+ query: User's question/prompt
235
+
236
+ Returns:
237
+ bool: True if visualization needed, False otherwise
238
+ """
239
+ self._load_model()
240
+
241
+ try:
242
+ # Use LangChain message format
243
+ messages = [
244
+ SystemMessage(content=TOOL_DECISION),
245
+ HumanMessage(content=f"Query: {query}")
246
+ ]
247
+
248
+ # Format using tokenizer's chat template
249
+ formatted_prompt = self.tokenizer.apply_chat_template(
250
+ [{"role": "system", "content": TOOL_DECISION},
251
+ {"role": "user", "content": f"Query: {query}"}],
252
+ tokenize=False,
253
+ add_generation_prompt=True
254
+ )
255
+
256
+ inputs = self.tokenizer(formatted_prompt, return_tensors="pt").to(self.model.device)
257
+
258
+ with torch.no_grad():
259
+ outputs = self.model.generate(
260
+ **inputs,
261
+ max_new_tokens=10,
262
+ temperature=0.1,
263
+ do_sample=False,
264
+ pad_token_id=self.tokenizer.eos_token_id
265
+ )
266
+
267
+ decision_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
268
+ # Extract only the new tokens (after the prompt)
269
+ decision_text = decision_text.split("Decision:")[-1].strip().upper()
270
+
271
+ result = "YES" in decision_text and "NO" not in decision_text
272
+ logger.info(f"Tool decision for '{query[:50]}...': {'YES' if result else 'NO'}")
273
+
274
+ return result
275
+
276
+ except Exception as e:
277
+ logger.error(f"Tool decision error: {e}")
278
+ # Fallback to keyword check
279
+ return any(kw in query.lower() for kw in ['graph', 'chart', 'plot', 'visualize'])
280
+
281
+
282
+ # ============================================================================
283
+ # PROMPT ROUTING AGENTS
284
+ # ============================================================================
285
+
286
+ class PromptRoutingAgents:
287
+ """
288
+ Four specialized agents for library_state decision-making.
289
+ All share a single Mistral-Small-24B model for efficiency.
290
+ Each uses its corresponding agent_X_system prompt as SystemMessage.
291
+ """
292
+
293
+ def __init__(self):
294
+ self.model = None
295
+ self.tokenizer = None
296
+ self.model_loaded = False
297
+ logger.info("PromptRoutingAgents initialized (lazy loading)")
298
+
299
+ @spaces.GPU(duration=50)
300
+ def _load_model(self):
301
+ """Load shared Mistral model on first use"""
302
+ if self.model_loaded:
303
+ return
304
+
305
+ logger.info(f"Loading routing agents model: {MISTRAL_REASONING}")
306
+
307
+ quantization_config = BitsAndBytesConfig(
308
+ load_in_4bit=True,
309
+ bnb_4bit_compute_dtype=torch.float16,
310
+ bnb_4bit_quant_type="nf4",
311
+ bnb_4bit_use_double_quant=True,
312
+ )
313
+
314
+ self.tokenizer = AutoTokenizer.from_pretrained(
315
+ MISTRAL_REASONING,
316
+ trust_remote_code=True,
317
+ token=HF_TOKEN
318
+ )
319
+
320
+ self.model = AutoModelForCausalLM.from_pretrained(
321
+ MISTRAL_REASONING,
322
+ quantization_config=quantization_config,
323
+ torch_dtype=torch.float16,
324
+ trust_remote_code=True,
325
+ low_cpu_mem_usage=True,
326
+ token=HF_TOKEN,
327
+ device_map="auto",
328
+ )
329
+
330
+ self.model_loaded = True
331
+ logger.info("Routing agents model loaded successfully")
332
+
333
+ def _run_agent(self, system_prompt: str, user_message: str, max_tokens: int = 50) -> str:
334
+ """Execute agent with system prompt and user message using LangChain format"""
335
+ self._load_model()
336
+
337
+ # Format using tokenizer's chat template
338
+ formatted_prompt = self.tokenizer.apply_chat_template(
339
+ [{"role": "system", "content": system_prompt},
340
+ {"role": "user", "content": user_message}],
341
+ tokenize=False,
342
+ add_generation_prompt=True
343
+ )
344
+
345
+ inputs = self.tokenizer(formatted_prompt, return_tensors="pt").to(self.model.device)
346
+
347
+ with torch.no_grad():
348
+ outputs = self.model.generate(
349
+ **inputs,
350
+ max_new_tokens=max_tokens,
351
+ temperature=0.1,
352
+ do_sample=True,
353
+ pad_token_id=self.tokenizer.eos_token_id
354
+ )
355
+
356
+ response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
357
+ # Extract assistant response (after the prompt)
358
+ if "<|assistant|>" in response:
359
+ response = response.split("<|assistant|>")[-1].strip()
360
+
361
+ return response
362
+
363
+ @spaces.GPU(duration=50)
364
+ def agent_1_practice_questions(self, user_input: str, recent_history: List) -> bool:
365
+ """
366
+ Agent 1: Determine if practice questions are needed.
367
+ Uses agent_1_system as SystemMessage.
368
+ """
369
+ # Format history
370
+ history_text = "\n".join([f"{msg.get('role', 'unknown')}: {msg.get('content', '')[:100]}"
371
+ for msg in recent_history[-4:]]) if recent_history else "No history"
372
+
373
+ # User message per redesign document format
374
+ user_message = f"""Current user input: {user_input}
375
+
376
+ Recent conversation:
377
+ {history_text}
378
+
379
+ Determine if practice questions should be provided:"""
380
+
381
+ # Use agent_1_system as system prompt
382
+ result = self._run_agent(agent_1_system, user_message, max_tokens=30)
383
+ decision = "STRUCTURE_PRACTICE_QUESTIONS" in result
384
+
385
+ logger.info(f"Agent 1 (practice questions): {decision}")
386
+ return decision
387
+
388
+ @spaces.GPU(duration=50)
389
+ def agent_2_discovery_mode(self, user_input: str) -> Optional[str]:
390
+ """
391
+ Agent 2: Detect if vague input or understanding check needed.
392
+ Uses agent_2_system as SystemMessage.
393
+
394
+ Returns:
395
+ - "VAUGE_INPUT" if input is unclear/ambiguous
396
+ - "USER_UNDERSTANDING" if checking student's knowledge level
397
+ - None if neither applies
398
+ """
399
+ # User message per redesign document format
400
+ user_message = f"""Student query: {user_input}
401
+
402
+ Classification:"""
403
+
404
+ # Use agent_2_system as system prompt
405
+ result = self._run_agent(agent_2_system, user_message, max_tokens=30)
406
+ result_upper = result.upper()
407
+
408
+ # Parse result per agent_2_system expected outputs
409
+ if "VAUGE_INPUT" in result_upper or "VAGUE" in result_upper:
410
+ if "USER_UNDERSTANDING" not in result_upper:
411
+ logger.info("Agent 2: VAUGE_INPUT detected")
412
+ return "VAUGE_INPUT"
413
+ else:
414
+ # Both detected - agent should return both
415
+ logger.info("Agent 2: Both detected (should not happen per prompt)")
416
+ return "VAUGE_INPUT" # Prioritize first
417
+ elif "USER_UNDERSTANDING" in result_upper:
418
+ logger.info("Agent 2: USER_UNDERSTANDING detected")
419
+ return "USER_UNDERSTANDING"
420
+ else:
421
+ logger.info("Agent 2: Neither condition detected")
422
+ return None
423
+
424
+ @spaces.GPU(duration=50)
425
+ def agent_3_followup_assessment(self, user_input: str, recent_history: List) -> bool:
426
+ """
427
+ Agent 3: Determine if practice question follow-up is needed.
428
+ Uses agent_3_system (formatted) as SystemMessage.
429
+ """
430
+ # Format history
431
+ history_text = "\n".join([f"{msg.get('role', 'unknown')}: {msg.get('content', '')[:100]}"
432
+ for msg in recent_history[-4:]]) if recent_history else "No history"
433
+
434
+ # User message per redesign document format
435
+ user_message = f"""Current user response: {user_input}
436
+
437
+ Recent conversation:
438
+ {history_text}
439
+
440
+ Is this a follow-up to a practice question?"""
441
+
442
+ # Format agent_3_system with STRUCTURE_PRACTICE_QUESTIONS placeholder
443
+ formatted_system = agent_3_system.format(
444
+ STRUCTURE_PRACTICE_QUESTIONS=STRUCTURE_PRACTICE_QUESTIONS
445
+ )
446
+
447
+ # Use formatted agent_3_system as system prompt
448
+ result = self._run_agent(formatted_system, user_message, max_tokens=20)
449
+ decision = "PRACTICE_QUESTION_FOLLOWUP" in result or "TRUE" in result.upper()
450
+
451
+ logger.info(f"Agent 3 (followup assessment): {decision}")
452
+ return decision
453
+
454
+ @spaces.GPU(duration=50)
455
+ def agent_4_teaching_mode(self, user_input: str, recent_history: List) -> Dict[str, bool]:
456
+ """
457
+ Agent 4: Assess teaching mode and practice structure needs.
458
+ Uses agent_4_system as SystemMessage.
459
+
460
+ Returns dict with:
461
+ - "GUIDING_TEACHING": True if direct pedagogical guidance needed
462
+ - "STRUCTURE_PRACTICE_QUESTIONS": True if structured practice needed
463
+ """
464
+ # Format history
465
+ history_text = "\n".join([f"{msg.get('role', 'unknown')}: {msg.get('content', '')[:100]}"
466
+ for msg in recent_history[-4:]]) if recent_history else "No history"
467
+
468
+ # User message per redesign document format
469
+ user_message = f"""Current query: {user_input}
470
+
471
+ Recent conversation:
472
+ {history_text}
473
+
474
+ Teaching mode assessment:"""
475
+
476
+ # Use agent_4_system as system prompt
477
+ result = self._run_agent(agent_4_system, user_message, max_tokens=50)
478
+ result_upper = result.upper()
479
+
480
+ # Parse result per agent_4_system expected outputs
481
+ decisions = {
482
+ "GUIDING_TEACHING": "GUIDING_TEACHING" in result_upper,
483
+ "STRUCTURE_PRACTICE_QUESTIONS": "STRUCTURE_PRACTICE_QUESTIONS" in result_upper
484
+ }
485
+
486
+ logger.info(f"Agent 4 decisions: {decisions}")
487
+ return decisions
488
+
489
+
490
+ # ============================================================================
491
+ # THINKING AGENTS
492
+ # ============================================================================
493
+
494
+ class ThinkingAgents:
495
+ """
496
+ Preprocessing agents for complex reasoning.
497
+ Each uses its corresponding thinking prompt as SystemMessage:
498
+ - Math Thinking: MATH_THINKING (GGUF Mistral)
499
+ - Question/Answer Design: QUESTION_ANSWER_DESIGN (Standard Mistral)
500
+ - Reasoning: REASONING_THINKING (Standard Mistral)
501
+ """
502
+
503
+ def __init__(self):
504
+ self.math_model = None # GGUF Llama
505
+ self.reasoning_model = None # Standard Mistral
506
+ self.reasoning_tokenizer = None
507
+ self.math_model_loaded = False
508
+ self.reasoning_model_loaded = False
509
+ logger.info("ThinkingAgents initialized (lazy loading)")
510
+
511
+ def _load_math_model(self):
512
+ """Load GGUF math thinking model - cache-aware"""
513
+ if self.math_model_loaded:
514
+ return
515
+
516
+ if not LLAMA_CPP_AVAILABLE:
517
+ logger.error("llama-cpp-python not available - math thinking disabled")
518
+ return
519
+
520
+ logger.info(f"Loading GGUF math model: {MISTRAL_MATH_GGUF}")
521
+
522
+ try:
523
+ from huggingface_hub import hf_hub_download
524
+
525
+ # Check for cached model first
526
+ cached_path = get_cached_gguf_path()
527
+
528
+ if cached_path:
529
+ logger.info("Using pre-cached GGUF model (fast path)")
530
+ model_path = cached_path
531
+ else:
532
+ logger.info("Downloading GGUF model from HuggingFace...")
533
+ model_path = hf_hub_download(
534
+ repo_id=MISTRAL_MATH_GGUF,
535
+ filename="mistral-small-24b-instruct-2501-reasoning-Q4_K_M.gguf",
536
+ token=HF_TOKEN
537
+ )
538
+ logger.info(f"Downloaded GGUF to: {model_path}")
539
+
540
+ self.math_model = Llama(
541
+ model_path=model_path,
542
+ n_ctx=4096,
543
+ n_threads=4,
544
+ n_gpu_layers=35,
545
+ )
546
+
547
+ self.math_model_loaded = True
548
+ logger.info("✓ GGUF math model ready")
549
+
550
+ except Exception as e:
551
+ logger.error(f"Failed to load GGUF math model: {e}")
552
+
553
+ @spaces.GPU(duration=60)
554
+ def _load_reasoning_model(self):
555
+ """Load standard Mistral for reasoning/QA design"""
556
+ if self.reasoning_model_loaded:
557
+ return
558
+
559
+ logger.info(f"Loading reasoning model: {MISTRAL_REASONING}")
560
+
561
+ quantization_config = BitsAndBytesConfig(
562
+ load_in_4bit=True,
563
+ bnb_4bit_compute_dtype=torch.float16,
564
+ bnb_4bit_quant_type="nf4",
565
+ bnb_4bit_use_double_quant=True,
566
+ )
567
+
568
+ self.reasoning_tokenizer = AutoTokenizer.from_pretrained(
569
+ MISTRAL_REASONING,
570
+ trust_remote_code=True,
571
+ token=HF_TOKEN
572
+ )
573
+
574
+ self.reasoning_model = AutoModelForCausalLM.from_pretrained(
575
+ MISTRAL_REASONING,
576
+ quantization_config=quantization_config,
577
+ torch_dtype=torch.float16,
578
+ trust_remote_code=True,
579
+ low_cpu_mem_usage=True,
580
+ token=HF_TOKEN,
581
+ device_map="auto",
582
+ )
583
+
584
+ self.reasoning_model_loaded = True
585
+ logger.info("Reasoning model loaded successfully")
586
+
587
+ @spaces.GPU(duration=60)
588
+ def math_thinking(self, user_input: str, conversation_history: str) -> str:
589
+ """
590
+ Math-specific Tree-of-Thought reasoning preprocessing.
591
+ Uses MATH_THINKING as system prompt with GGUF Mistral model.
592
+ """
593
+ self._load_math_model()
594
+
595
+ if self.math_model is None:
596
+ logger.warning("Math model not available, returning empty context")
597
+ return ""
598
+
599
+ try:
600
+ # User message per redesign document format
601
+ user_message = f"""Conversation History:
602
+ {conversation_history}
603
+
604
+ Current User Query:
605
+ {user_input}
606
+
607
+ Provide mathematical thinking context:"""
608
+
609
+ # Combine system (MATH_THINKING) + user message
610
+ # For GGUF/llama-cpp, we format manually
611
+ full_prompt = f"""<|system|>
612
+ {MATH_THINKING}
613
+ <|end|>
614
+ <|user|>
615
+ {user_message}
616
+ <|end|>
617
+ <|assistant|>
618
+ """
619
+
620
+ response = self.math_model(
621
+ full_prompt,
622
+ max_tokens=512,
623
+ temperature=0.7,
624
+ stop=["</thinking>", "\n\n---", "<|end|>"],
625
+ )
626
+
627
+ thinking_output = response['choices'][0]['text'].strip()
628
+ logger.info(f"Math thinking generated: {len(thinking_output)} chars")
629
+
630
+ return thinking_output
631
+
632
+ except Exception as e:
633
+ logger.error(f"Math thinking error: {e}")
634
+ return ""
635
+
636
+ @spaces.GPU(duration=60)
637
+ def question_answer_design(self, user_input: str, conversation_history: str,
638
+ tool_img_output: str = "", tool_context: str = "") -> str:
639
+ """
640
+ Chain-of-Thought for question formulation and response design.
641
+ Uses QUESTION_ANSWER_DESIGN (formatted) as system prompt.
642
+ """
643
+ self._load_reasoning_model()
644
+
645
+ try:
646
+ # Format QUESTION_ANSWER_DESIGN with required variables
647
+ formatted_qa_system = QUESTION_ANSWER_DESIGN.format(
648
+ tool_img_output=tool_img_output if tool_img_output else "No tool output provided",
649
+ tool_context=tool_context if tool_context else "No tool context available",
650
+ STRUCTURE_PRACTICE_QUESTIONS=STRUCTURE_PRACTICE_QUESTIONS,
651
+ LATEX_FORMATTING=LATEX_FORMATTING
652
+ )
653
+
654
+ # User message per redesign document format
655
+ user_message = f"""Conversation History:
656
+ {conversation_history}
657
+
658
+ Current Query: {user_input}
659
+
660
+ Design question/answer approach:"""
661
+
662
+ # Format using tokenizer's chat template
663
+ formatted_prompt = self.reasoning_tokenizer.apply_chat_template(
664
+ [{"role": "system", "content": formatted_qa_system},
665
+ {"role": "user", "content": user_message}],
666
+ tokenize=False,
667
+ add_generation_prompt=True
668
+ )
669
+
670
+ inputs = self.reasoning_tokenizer(formatted_prompt, return_tensors="pt").to(self.reasoning_model.device)
671
+
672
+ with torch.no_grad():
673
+ outputs = self.reasoning_model.generate(
674
+ **inputs,
675
+ max_new_tokens=512,
676
+ temperature=0.7,
677
+ do_sample=True,
678
+ pad_token_id=self.reasoning_tokenizer.eos_token_id
679
+ )
680
+
681
+ thinking_output = self.reasoning_tokenizer.decode(outputs[0], skip_special_tokens=True)
682
+ # Extract only new tokens
683
+ if "<|assistant|>" in thinking_output:
684
+ thinking_output = thinking_output.split("<|assistant|>")[-1].strip()
685
+
686
+ logger.info(f"QA design thinking generated: {len(thinking_output)} chars")
687
+ return thinking_output
688
+
689
+ except Exception as e:
690
+ logger.error(f"QA design thinking error: {e}")
691
+ return ""
692
+
693
+ @spaces.GPU(duration=60)
694
+ def reasoning_thinking(self, user_input: str, conversation_history: str) -> str:
695
+ """
696
+ General Chain-of-Thought reasoning preprocessing.
697
+ Uses REASONING_THINKING as system prompt.
698
+ """
699
+ self._load_reasoning_model()
700
+
701
+ try:
702
+ # User message per redesign document format
703
+ user_message = f"""Conversation History:
704
+ {conversation_history}
705
+
706
+ Current Query: {user_input}
707
+
708
+ Provide reasoning context:"""
709
+
710
+ # Format using tokenizer's chat template
711
+ formatted_prompt = self.reasoning_tokenizer.apply_chat_template(
712
+ [{"role": "system", "content": REASONING_THINKING},
713
+ {"role": "user", "content": user_message}],
714
+ tokenize=False,
715
+ add_generation_prompt=True
716
+ )
717
+
718
+ inputs = self.reasoning_tokenizer(formatted_prompt, return_tensors="pt").to(self.reasoning_model.device)
719
+
720
+ with torch.no_grad():
721
+ outputs = self.reasoning_model.generate(
722
+ **inputs,
723
+ max_new_tokens=512,
724
+ temperature=0.7,
725
+ do_sample=True,
726
+ pad_token_id=self.reasoning_tokenizer.eos_token_id
727
+ )
728
+
729
+ thinking_output = self.reasoning_tokenizer.decode(outputs[0], skip_special_tokens=True)
730
+ # Extract only new tokens
731
+ if "<|assistant|>" in thinking_output:
732
+ thinking_output = thinking_output.split("<|assistant|>")[-1].strip()
733
+
734
+ logger.info(f"Reasoning thinking generated: {len(thinking_output)} chars")
735
+ return thinking_output
736
+
737
+ except Exception as e:
738
+ logger.error(f"Reasoning thinking error: {e}")
739
+ return ""
740
+
741
+ @spaces.GPU(duration=90)
742
+ def process(self, user_input: str, conversation_history: str,
743
+ thinking_prompts: str, tool_img_output: str = "",
744
+ tool_context: str = "") -> str:
745
+ """
746
+ Execute appropriate thinking agents based on active prompts.
747
+ Per redesign document orchestration.
748
+
749
+ Args:
750
+ user_input: Current user query
751
+ conversation_history: Formatted recent conversation
752
+ thinking_prompts: Newline-joined string of active thinking prompt names
753
+ tool_img_output: Tool image output if available
754
+ tool_context: Tool context if available
755
+
756
+ Returns:
757
+ Combined thinking context from all active agents
758
+ """
759
+ thinking_outputs = []
760
+
761
+ # Execute thinking agents based on which prompts are active
762
+ if "MATH_THINKING" in thinking_prompts:
763
+ math_output = self.math_thinking(user_input, conversation_history)
764
+ if math_output:
765
+ thinking_outputs.append(f"=== Mathematical Thinking Context ===\n{math_output}")
766
+
767
+ if "QUESTION_ANSWER_DESIGN" in thinking_prompts:
768
+ qa_output = self.question_answer_design(
769
+ user_input,
770
+ conversation_history,
771
+ tool_img_output,
772
+ tool_context
773
+ )
774
+ if qa_output:
775
+ thinking_outputs.append(f"=== Question Design Context ===\n{qa_output}")
776
+
777
+ if "REASONING_THINKING" in thinking_prompts:
778
+ reasoning_output = self.reasoning_thinking(user_input, conversation_history)
779
+ if reasoning_output:
780
+ thinking_outputs.append(f"=== Reasoning Context ===\n{reasoning_output}")
781
+
782
+ combined_context = "\n\n".join(thinking_outputs)
783
+ logger.info(f"Total thinking context: {len(combined_context)} chars from {len(thinking_outputs)} agents")
784
+
785
+ return combined_context
786
+
787
+
788
+ # ============================================================================
789
+ # RESPONSE AGENT (Phi3 with Fine-tuned + Fallback)
790
+ # ============================================================================
791
+
792
+ class ResponseAgent(Runnable):
793
+ """
794
+ PEFT-enabled Phi3 LLM for educational response generation.
795
+ Uses CORE_IDENTITY as base system prompt.
796
+ Additional prompts dynamically added to user message based on library_state.
797
+
798
+ Features:
799
+ - Fine-tuned model: jdesiree/Mimir-Phi-3.5
800
+ - Fallback to base: microsoft/Phi-3-mini-4k-instruct
801
+ - 4-bit quantization for memory efficiency
802
+ - ZeroGPU decorators for on-demand GPU allocation
803
+ """
804
+
805
+ def __init__(self, model_path: str = FINE_TUNED_PHI3, base_model: str = BASE_PHI3):
806
+ super().__init__()
807
+ logger.info(f"Initializing ResponseAgent (Phi3)...")
808
+
809
+ self.accelerator = None
810
+
811
+ self.model_path = model_path
812
+ self.base_model_path = base_model
813
+ self.tokenizer = None
814
+ self.base_model = None
815
+ self.model_loaded = False
816
+ self.model_type = None
817
+
818
+ self._initialize_tokenizer()
819
+ logger.info("ResponseAgent initialized (model will load on first GPU call)")
820
+
821
+ def _initialize_tokenizer(self):
822
+ """Initialize tokenizer (CPU operation, safe to do at init)"""
823
+ try:
824
+ logger.info(f"Loading tokenizer from base model: {self.base_model_path}")
825
+ self.tokenizer = AutoTokenizer.from_pretrained(
826
+ self.base_model_path,
827
+ trust_remote_code=True,
828
+ token=HF_TOKEN,
829
+ use_fast=False
830
+ )
831
+ self._configure_special_tokens()
832
+ except Exception as e:
833
+ logger.error(f"Failed to initialize tokenizer: {e}")
834
+ raise
835
+
836
+ def _configure_special_tokens(self):
837
+ """Configure special tokens for Phi-3"""
838
+ special_tokens_dict = {}
839
+ if "<|end|>" not in self.tokenizer.all_special_tokens:
840
+ if hasattr(self.tokenizer, 'additional_special_tokens'):
841
+ additional_tokens = self.tokenizer.additional_special_tokens or []
842
+ if "<|end|>" not in additional_tokens:
843
+ additional_tokens.append("<|end|>")
844
+ special_tokens_dict["additional_special_tokens"] = additional_tokens
845
+ else:
846
+ special_tokens_dict["additional_special_tokens"] = ["<|end|>"]
847
+
848
+ if special_tokens_dict:
849
+ self.tokenizer.add_special_tokens(special_tokens_dict)
850
+
851
+ if self.tokenizer.pad_token is None:
852
+ self.tokenizer.pad_token = self.tokenizer.eos_token
853
+
854
+ @spaces.GPU(duration=120)
855
+ def _load_and_prepare_model(self):
856
+ """Load model with ZeroGPU + Accelerate integration"""
857
+ if self.model_loaded:
858
+ return
859
+
860
+ logger.info("Loading ResponseAgent model with ZeroGPU + Accelerate...")
861
+
862
+ self.accelerator = Accelerator(
863
+ mixed_precision="fp16",
864
+ gradient_accumulation_steps=1,
865
+ log_with=None,
866
+ project_dir=None
867
+ )
868
+ set_seed(42)
869
+
870
+ quantization_config = BitsAndBytesConfig(
871
+ load_in_4bit=True,
872
+ bnb_4bit_compute_dtype=torch.float16,
873
+ bnb_4bit_quant_type="nf4",
874
+ bnb_4bit_use_double_quant=True,
875
+ )
876
+
877
+ model = None
878
+ try:
879
+ logger.info(f"Attempting to load fine-tuned model: {self.model_path}")
880
+ model = AutoModelForCausalLM.from_pretrained(
881
+ self.model_path,
882
+ quantization_config=quantization_config,
883
+ torch_dtype=torch.float16,
884
+ trust_remote_code=True,
885
+ low_cpu_mem_usage=True,
886
+ token=HF_TOKEN,
887
+ attn_implementation="eager",
888
+ device_map="auto",
889
+ )
890
+ self.model_type = "fine-tuned"
891
+ logger.info("✓ Fine-tuned model loaded")
892
+ except Exception as e:
893
+ logger.warning(f"Fine-tuned model failed: {e}, using base model")
894
+ model = AutoModelForCausalLM.from_pretrained(
895
+ self.base_model_path,
896
+ quantization_config=quantization_config,
897
+ torch_dtype=torch.float16,
898
+ trust_remote_code=True,
899
+ low_cpu_mem_usage=True,
900
+ token=HF_TOKEN,
901
+ attn_implementation="eager",
902
+ device_map="auto",
903
+ )
904
+ self.model_type = "base-fallback"
905
+ logger.info("✓ Base model loaded")
906
+
907
+ self.base_model = self.accelerator.prepare(model)
908
+ self.model_loaded = True
909
+
910
+ logger.info(f"ResponseAgent ready: {self.model_type} on {self.accelerator.device}")
911
+
912
+ def _format_chat_template(self, complete_prompt: str) -> str:
913
+ """Format prompt using Phi-3's chat template"""
914
+ try:
915
+ messages = [{"role": "user", "content": complete_prompt}]
916
+ formatted_text = self.tokenizer.apply_chat_template(
917
+ messages,
918
+ tokenize=False,
919
+ add_generation_prompt=True
920
+ )
921
+ return formatted_text
922
+ except Exception as e:
923
+ logger.warning(f"Chat template failed, using fallback: {e}")
924
+ return f"<|user|>\n{complete_prompt}<|end|>\n<|assistant|>\n"
925
+
926
+ @spaces.GPU(duration=180)
927
+ def invoke(self, input: Input, config=None) -> Output:
928
+ """
929
+ Main inference method.
930
+ Expects input formatted per redesign document:
931
+ - CORE_IDENTITY (always included)
932
+ - prompt_segments (from library_state)
933
+ - tool outputs
934
+ - conversation history
935
+ - thinking context
936
+ - user query
937
+
938
+ Args:
939
+ input: Complete formatted prompt string
940
+
941
+ Returns:
942
+ Generated response string
943
+ """
944
+ if isinstance(input, dict):
945
+ complete_prompt = input.get('input', str(input))
946
+ else:
947
+ complete_prompt = str(input)
948
+
949
+ try:
950
+ self._load_and_prepare_model()
951
+
952
+ text = self._format_chat_template(complete_prompt)
953
+
954
+ max_input_length = 3500
955
+ inputs = self.tokenizer(
956
+ text,
957
+ return_tensors="pt",
958
+ padding=True,
959
+ truncation=True,
960
+ max_length=max_input_length
961
+ )
962
+
963
+ model_device = next(self.base_model.parameters()).device
964
+ inputs = {k: v.to(model_device) for k, v in inputs.items()}
965
+
966
+ with torch.no_grad():
967
+ outputs = self.base_model.generate(
968
+ input_ids=inputs['input_ids'],
969
+ attention_mask=inputs.get('attention_mask', None),
970
+ max_new_tokens=350,
971
+ do_sample=True,
972
+ temperature=0.7,
973
+ repetition_penalty=1.15,
974
+ pad_token_id=self.tokenizer.eos_token_id,
975
+ use_cache=False,
976
+ num_beams=1,
977
+ )
978
+
979
+ new_tokens = outputs[0][len(inputs['input_ids'][0]):].cpu()
980
+
981
+ if len(new_tokens) == 0:
982
+ logger.error("Model generated zero tokens!")
983
+ return "I'm still learning how to respond properly."
984
+
985
+ result = self.tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
986
+
987
+ for stop_word in ["User:", "<|end|>", "<|assistant|>"]:
988
+ if stop_word in result:
989
+ result = result.split(stop_word)[0].strip()
990
+ break
991
+
992
+ if not result:
993
+ logger.error("Empty result after processing!")
994
+ return "I'm still learning how to respond properly."
995
+
996
+ logger.info(f"ResponseAgent completed: {len(result)} chars using {self.model_type}")
997
+ return result
998
+
999
+ except Exception as e:
1000
+ logger.error(f"ResponseAgent error: {e}")
1001
+ import traceback
1002
+ logger.error(f"Traceback: {traceback.format_exc()}")
1003
+ return f"I encountered an error: {str(e)}"
1004
+
1005
+ def get_model_info(self):
1006
+ """Get model information for diagnostics"""
1007
+ return {
1008
+ "status": "loaded" if self.model_loaded else "not_loaded",
1009
+ "model_type": self.model_type,
1010
+ "using_fallback": self.model_type == "base-fallback" if self.model_type else False,
1011
+ "zerogpu_ready": True,
1012
+ "accelerate_ready": self.accelerator is not None,
1013
+ }
1014
+
1015
+ @property
1016
+ def InputType(self) -> Type[Input]:
1017
+ return str
1018
+
1019
+ @property
1020
+ def OutputType(self) -> Type[Output]:
1021
+ return str
app.py ADDED
@@ -0,0 +1,1360 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ """
3
+ Mimir Educational AI Assistant - Main Application
4
+
5
+ Architecture:
6
+ - Multi-page Gradio interface (Chatbot + Analytics with link to Mimir case study)
7
+ - Agent-based orchestration (Tool, Routing, Thinking, Response)
8
+ - Global state management with SQLite + HF dataset backup
9
+ - Prompt state tracking per turn
10
+ - LightEval for metrics tracking
11
+ - Logger for timing functions
12
+ """
13
+
14
+ import os
15
+ import re
16
+ import sys
17
+ import time
18
+ import json
19
+ import base64
20
+ import logging
21
+ import sqlite3
22
+ import subprocess
23
+ import threading
24
+ import warnings
25
+ import uuid
26
+ from datetime import datetime
27
+ from typing import Dict, List, Optional, Tuple, Any
28
+
29
+ # Core dependencies
30
+ import torch
31
+ import gradio as gr
32
+ from dotenv import load_dotenv
33
+
34
+ # Agent architecture
35
+ from agents import (
36
+ ToolDecisionAgent,
37
+ PromptRoutingAgents,
38
+ ThinkingAgents,
39
+ ResponseAgent,
40
+ )
41
+
42
+ # State management
43
+ from state_manager import (
44
+ GlobalStateManager,
45
+ LogicalExpressions,
46
+ )
47
+
48
+ # Prompt library
49
+ from prompt_library import (
50
+ CORE_IDENTITY,
51
+ VAUGE_INPUT,
52
+ USER_UNDERSTANDING,
53
+ GENERAL_FORMATTING,
54
+ LATEX_FORMATTING,
55
+ GUIDING_TEACHING,
56
+ STRUCTURE_PRACTICE_QUESTIONS,
57
+ PRACTICE_QUESTION_FOLLOWUP,
58
+ TOOL_USE_ENHANCEMENT,
59
+ )
60
+
61
+ # LangGraph imports
62
+ from langgraph.graph import StateGraph, START, END
63
+ from langgraph.graph.message import add_messages
64
+ from langgraph.checkpoint.memory import MemorySaver
65
+ from langgraph.prebuilt import ToolNode
66
+
67
+ # LangChain Core
68
+ from langchain_core.tools import tool
69
+ from langchain_core.messages import HumanMessage, SystemMessage, AIMessage, ToolMessage, BaseMessage
70
+
71
+ # LightEval for metrics
72
+ try:
73
+ from lighteval.logging.evaluation_tracker import EvaluationTracker
74
+ from lighteval.models.transformers.transformers_model import TransformersModel
75
+ from lighteval.metrics.metrics_sample import BertScore, ROUGE
76
+ from lighteval.tasks.requests import Doc
77
+ LIGHTEVAL_AVAILABLE = True
78
+ except ImportError:
79
+ LIGHTEVAL_AVAILABLE = False
80
+ logging.warning("LightEval not available - metrics tracking limited")
81
+
82
+ # Tool for graphing
83
+ from graph_tool import generate_plot
84
+
85
+ # Suppress warnings
86
+ warnings.filterwarnings("ignore", category=UserWarning)
87
+ warnings.filterwarnings("ignore", category=FutureWarning)
88
+
89
+ # Load environment
90
+ load_dotenv(".env")
91
+ HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
92
+
93
+ # Configuration
94
+ DEBUG_STATE = os.getenv("DEBUG_STATE", "false").lower() == "true"
95
+ CURRENT_YEAR = datetime.now().year
96
+
97
+
98
+ # ============================================================================
99
+ # LOGGING SETUP
100
+ # ============================================================================
101
+
102
+ logging.basicConfig(
103
+ level=logging.INFO,
104
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
105
+ )
106
+ logger = logging.getLogger(__name__)
107
+
108
+
109
+ def log_step(step_name: str, start_time: Optional[float] = None) -> float:
110
+ """
111
+ Log a pipeline step with timestamp and duration.
112
+
113
+ Args:
114
+ step_name: Name of the step
115
+ start_time: Start time from previous call (if completing a step)
116
+
117
+ Returns:
118
+ Current time for next call
119
+ """
120
+ now = time.time()
121
+ timestamp = datetime.now().strftime("%H:%M:%S.%f")[:-3]
122
+
123
+ if start_time:
124
+ duration = now - start_time
125
+ logger.info(f"[{timestamp}] COMPLETED: {step_name} ({duration:.2f}s)")
126
+ else:
127
+ logger.info(f"[{timestamp}] STARTING: {step_name}")
128
+
129
+ return now
130
+
131
+
132
+ # ============================================================================
133
+ # GLOBAL INITIALIZATION
134
+ # ============================================================================
135
+
136
+ logger.info("="*60)
137
+ logger.info("INITIALIZING MIMIR APPLICATION")
138
+ logger.info("="*60)
139
+
140
+ init_start = log_step("Global Initialization")
141
+
142
+ # Initialize state management
143
+ global_state_manager = GlobalStateManager()
144
+ logical_expressions = LogicalExpressions()
145
+ logger.info("State management initialized")
146
+
147
+ # Initialize agents (lazy loading - models load on first use)
148
+ tool_agent = ToolDecisionAgent()
149
+ routing_agents = PromptRoutingAgents()
150
+ thinking_agents = ThinkingAgents()
151
+ response_agent = ResponseAgent()
152
+ logger.info("Agents initialized (lazy loading)")
153
+
154
+ log_step("Global Initialization", init_start)
155
+
156
+
157
+ # ============================================================================
158
+ # ANALYTICS & DATABASE FUNCTIONS
159
+ # ============================================================================
160
+
161
+ def get_trackio_database_path(project_name: str) -> Optional[str]:
162
+ """Get path to metrics SQLite database"""
163
+ possible_paths = [
164
+ f"./{project_name}.db",
165
+ f"./trackio_data/{project_name}.db",
166
+ f"./.trackio/{project_name}.db",
167
+ "./mimir_metrics.db"
168
+ ]
169
+
170
+ for path in possible_paths:
171
+ if os.path.exists(path):
172
+ return path
173
+
174
+ return None
175
+
176
+
177
+ def get_project_statistics_with_nulls(cursor, project_name: str) -> Dict:
178
+ """Query metrics database for project statistics"""
179
+ try:
180
+ stats = {}
181
+
182
+ # Total conversations
183
+ try:
184
+ cursor.execute("""
185
+ SELECT COUNT(DISTINCT run_id) as total_runs
186
+ FROM metrics
187
+ WHERE project_name = ?
188
+ """, (project_name,))
189
+ result = cursor.fetchone()
190
+ stats["total_conversations"] = result["total_runs"] if result and result["total_runs"] > 0 else None
191
+ except sqlite3.Error:
192
+ stats["total_conversations"] = None
193
+
194
+ # Average response time
195
+ try:
196
+ cursor.execute("""
197
+ SELECT AVG(CAST(value AS FLOAT)) as avg_response_time
198
+ FROM metrics
199
+ WHERE project_name = ? AND metric_name = 'response_time'
200
+ """, (project_name,))
201
+ result = cursor.fetchone()
202
+ if result and result["avg_response_time"] is not None:
203
+ stats["avg_session_length"] = round(result["avg_response_time"], 2)
204
+ else:
205
+ stats["avg_session_length"] = None
206
+ except sqlite3.Error:
207
+ stats["avg_session_length"] = None
208
+
209
+ # Success rate
210
+ try:
211
+ cursor.execute("""
212
+ SELECT
213
+ COUNT(*) as total_responses,
214
+ SUM(CASE WHEN CAST(value AS FLOAT) > 3.5 THEN 1 ELSE 0 END) as successful_responses
215
+ FROM metrics
216
+ WHERE project_name = ? AND metric_name = 'quality_score'
217
+ """, (project_name,))
218
+ result = cursor.fetchone()
219
+ if result and result["total_responses"] > 0:
220
+ success_rate = (result["successful_responses"] / result["total_responses"]) * 100
221
+ stats["success_rate"] = round(success_rate, 1)
222
+ else:
223
+ stats["success_rate"] = None
224
+ except sqlite3.Error:
225
+ stats["success_rate"] = None
226
+
227
+ return stats
228
+
229
+ except sqlite3.Error as e:
230
+ logger.error(f"Database error: {e}")
231
+ return {"total_conversations": None, "avg_session_length": None, "success_rate": None}
232
+
233
+
234
+ def get_recent_interactions_with_nulls(cursor, project_name: str, limit: int = 10) -> List:
235
+ """Query for recent interactions"""
236
+ try:
237
+ cursor.execute("""
238
+ SELECT
239
+ m1.timestamp,
240
+ m2.value as response_time,
241
+ m3.value as prompt_mode,
242
+ m4.value as tools_used,
243
+ m5.value as quality_score,
244
+ m6.value as adapter_used,
245
+ m1.run_id
246
+ FROM metrics m1
247
+ LEFT JOIN metrics m2 ON m1.run_id = m2.run_id AND m2.metric_name = 'response_time'
248
+ LEFT JOIN metrics m3 ON m1.run_id = m3.run_id AND m3.metric_name = 'prompt_mode'
249
+ LEFT JOIN metrics m4 ON m1.run_id = m4.run_id AND m4.metric_name = 'tools_used'
250
+ LEFT JOIN metrics m5 ON m1.run_id = m5.run_id AND m5.metric_name = 'quality_score'
251
+ LEFT JOIN metrics m6 ON m1.run_id = m6.run_id AND m6.metric_name = 'active_adapter'
252
+ WHERE m1.project_name = ? AND m1.metric_name = 'conversation_start'
253
+ ORDER BY m1.timestamp DESC
254
+ LIMIT ?
255
+ """, (project_name, limit))
256
+
257
+ results = cursor.fetchall()
258
+ recent_data = []
259
+
260
+ for row in results:
261
+ recent_data.append([
262
+ row["timestamp"][:16] if row["timestamp"] else None,
263
+ float(row["response_time"]) if row["response_time"] is not None else None,
264
+ row["prompt_mode"] if row["prompt_mode"] else None,
265
+ bool(int(row["tools_used"])) if row["tools_used"] is not None else None,
266
+ float(row["quality_score"]) if row["quality_score"] is not None else None,
267
+ row["adapter_used"] if row["adapter_used"] else None
268
+ ])
269
+
270
+ return recent_data
271
+
272
+ except sqlite3.Error as e:
273
+ logger.error(f"Database error: {e}")
274
+ return []
275
+
276
+
277
+ def create_dashboard_html_with_nulls(project_name: str, project_stats: Dict) -> str:
278
+ """Create dashboard HTML with enhanced metrics"""
279
+ def format_stat(value, suffix="", no_data_text="No data"):
280
+ if value is None:
281
+ return f'<span style="color: #999; font-style: italic;">{no_data_text}</span>'
282
+ return f"{value}{suffix}"
283
+
284
+ def format_large_stat(value, suffix="", no_data_text="--"):
285
+ if value is None:
286
+ return f'<span style="color: #ccc;">{no_data_text}</span>'
287
+ return f"{value}{suffix}"
288
+
289
+ # Get evaluation metrics from global state
290
+ try:
291
+ eval_summary = global_state_manager.get_evaluation_summary()
292
+ cache_status = global_state_manager.get_cache_status()
293
+
294
+ project_stats["ml_educational_quality"] = eval_summary['aggregate_metrics']['avg_educational_quality']
295
+ project_stats["ml_classifier_accuracy"] = eval_summary['aggregate_metrics']['classifier_accuracy_rate']
296
+ project_stats["active_sessions"] = cache_status['total_conversation_sessions']
297
+
298
+ except Exception as e:
299
+ logger.warning(f"Could not get global state metrics: {e}")
300
+ project_stats["ml_educational_quality"] = None
301
+ project_stats["ml_classifier_accuracy"] = None
302
+ project_stats["active_sessions"] = None
303
+
304
+ # Status determination
305
+ success_rate = project_stats.get("success_rate")
306
+ if success_rate is not None:
307
+ if success_rate >= 80:
308
+ status_color = "#4CAF50"
309
+ status_text = "Excellent"
310
+ elif success_rate >= 60:
311
+ status_color = "#FF9800"
312
+ status_text = "Good"
313
+ else:
314
+ status_color = "#F44336"
315
+ status_text = "Needs Improvement"
316
+ else:
317
+ status_color = "#999"
318
+ status_text = "No data"
319
+
320
+ # ML metrics section
321
+ ml_metrics_section = f"""
322
+ <div style="margin: 15px 0; padding: 10px; background: #f0f8ff; border-radius: 4px; border-left: 4px solid #007bff;">
323
+ <strong>ML Performance:</strong>
324
+ Educational Quality: {format_stat(project_stats.get('ml_educational_quality'), '', 'N/A')} |
325
+ Classifier Accuracy: {format_stat(project_stats.get('ml_classifier_accuracy'), '%' if project_stats.get('ml_classifier_accuracy') else '', 'N/A')} |
326
+ Active Sessions: {format_stat(project_stats.get('active_sessions'), '', 'N/A')}
327
+ </div>
328
+ """
329
+
330
+ dashboard_html = f'''
331
+ <div style="text-align: center; padding: 20px; border: 1px solid #ddd; border-radius: 8px; background: #f9f9f9;">
332
+ <h3>{project_name} Analytics</h3>
333
+
334
+ <div style="display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 15px; margin: 20px 0;">
335
+ <div style="padding: 15px; background: white; border-radius: 6px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
336
+ <div style="font-size: 24px; font-weight: bold; color: #2196F3;">{format_large_stat(project_stats.get('total_conversations'))}</div>
337
+ <div style="color: #666; font-size: 12px;">Total Sessions</div>
338
+ </div>
339
+ <div style="padding: 15px; background: white; border-radius: 6px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
340
+ <div style="font-size: 24px; font-weight: bold; color: #FF9800;">{format_large_stat(project_stats.get('avg_session_length'), 's' if project_stats.get('avg_session_length') else '')}</div>
341
+ <div style="color: #666; font-size: 12px;">Avg Response Time</div>
342
+ </div>
343
+ <div style="padding: 15px; background: white; border-radius: 6px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
344
+ <div style="font-size: 24px; font-weight: bold; color: {status_color};">{format_large_stat(success_rate, '%' if success_rate else '')}</div>
345
+ <div style="color: #666; font-size: 12px;">Success Rate ({status_text})</div>
346
+ </div>
347
+ </div>
348
+
349
+ {ml_metrics_section}
350
+
351
+ <div style="margin: 15px 0; padding: 10px; background: #fff3cd; border-radius: 4px; font-size: 14px;">
352
+ <strong>Model:</strong> {format_stat(project_stats.get('model_type'), no_data_text='Unknown')} |
353
+ <strong>Last Updated:</strong> {project_stats.get('last_updated', 'Unknown')}
354
+ </div>
355
+ </div>
356
+ '''
357
+
358
+ return dashboard_html
359
+
360
+
361
+ def calculate_response_quality(response: str) -> float:
362
+ """Calculate response quality score"""
363
+ try:
364
+ length_score = min(len(response) / 200, 1.0)
365
+ educational_keywords = ['learn', 'understand', 'concept', 'example', 'practice']
366
+ keyword_score = sum(1 for keyword in educational_keywords if keyword in response.lower()) / len(educational_keywords)
367
+
368
+ if len(response) < 20:
369
+ return 2.0
370
+ elif len(response) > 2000:
371
+ return 3.5
372
+
373
+ base_score = 2.5 + (length_score * 1.5) + (keyword_score * 1.0)
374
+ return min(max(base_score, 1.0), 5.0)
375
+ except:
376
+ return 3.0
377
+
378
+
379
+ def evaluate_educational_quality_with_tracking(user_query: str, response: str, thread_id: str = None, session_id: str = None):
380
+ """Educational quality evaluation with state tracking using LightEval"""
381
+ start_time = time.time()
382
+
383
+ try:
384
+ # Educational indicators
385
+ educational_indicators = {
386
+ 'has_examples': 'example' in response.lower(),
387
+ 'structured_explanation': '##' in response or '1.' in response,
388
+ 'appropriate_length': 100 < len(response) < 1500,
389
+ 'encourages_learning': any(phrase in response.lower()
390
+ for phrase in ['practice', 'try', 'consider', 'think about']),
391
+ 'uses_latex': '$' in response,
392
+ 'has_clear_sections': response.count('\n\n') >= 2
393
+ }
394
+
395
+ educational_score = sum(educational_indicators.values()) / len(educational_indicators)
396
+ semantic_quality = min(len(response) / 500, 1.0)
397
+ response_time = time.time() - start_time
398
+
399
+ # Use LightEval if available
400
+ if LIGHTEVAL_AVAILABLE:
401
+ try:
402
+ doc = Doc(
403
+ task_name=f"turn_{thread_id or session_id}",
404
+ query=user_query,
405
+ choices=[response],
406
+ gold_index=-1,
407
+ specific_output=response
408
+ )
409
+
410
+ bert_score = BertScore().compute(doc)
411
+ semantic_quality = bert_score if bert_score else semantic_quality
412
+
413
+ except Exception as lighteval_error:
414
+ logger.warning(f"LightEval computation failed: {lighteval_error}")
415
+
416
+ metrics = {
417
+ 'semantic_quality': semantic_quality,
418
+ 'educational_score': educational_score,
419
+ 'response_time': response_time,
420
+ 'indicators': educational_indicators
421
+ }
422
+
423
+ # Track in global state
424
+ global_state_manager.add_educational_quality_score(
425
+ user_query=user_query,
426
+ response=response,
427
+ metrics=metrics,
428
+ session_id=session_id
429
+ )
430
+
431
+ logger.info(f"Educational quality evaluated: {educational_score:.3f}")
432
+ return metrics
433
+
434
+ except Exception as e:
435
+ logger.error(f"Educational quality evaluation failed: {e}")
436
+ return {'educational_score': 0.5, 'semantic_quality': 0.5, 'response_time': 0.0}
437
+
438
+
439
+ def make_classification_with_tracking(user_input: str, conversation_length: int, is_first_turn: bool,
440
+ input_character_count: int, is_short_input: bool,
441
+ recent_discovery_count: int, contains_greeting: bool,
442
+ contains_educational_keywords: bool, requires_visualization: bool,
443
+ topic_change_detected: bool, session_id: str = None):
444
+ """Legacy function for ML classifier tracking (now replaced by agents)"""
445
+ logger.info("ML classifier tracking called (legacy - now using agent-based routing)")
446
+ return None
447
+
448
+
449
+ def log_metrics_to_database(project_name: str, run_id: str, metrics: Dict):
450
+ """Log metrics to SQLite database for dashboard"""
451
+ try:
452
+ db_path = get_trackio_database_path(project_name)
453
+
454
+ if db_path is None:
455
+ db_path = "./mimir_metrics.db"
456
+
457
+ conn = sqlite3.connect(db_path)
458
+ cursor = conn.cursor()
459
+
460
+ # Create metrics table if not exists
461
+ cursor.execute("""
462
+ CREATE TABLE IF NOT EXISTS metrics (
463
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
464
+ project_name TEXT,
465
+ run_id TEXT,
466
+ metric_name TEXT,
467
+ value TEXT,
468
+ timestamp TEXT
469
+ )
470
+ """)
471
+
472
+ # Insert metrics
473
+ timestamp = datetime.now().isoformat()
474
+ for metric_name, metric_value in metrics.items():
475
+ cursor.execute("""
476
+ INSERT INTO metrics (project_name, run_id, metric_name, value, timestamp)
477
+ VALUES (?, ?, ?, ?, ?)
478
+ """, (project_name, run_id, metric_name, str(metric_value), timestamp))
479
+
480
+ conn.commit()
481
+ conn.close()
482
+
483
+ logger.info(f"Logged {len(metrics)} metrics to database")
484
+
485
+ except Exception as e:
486
+ logger.error(f"Failed to log metrics to database: {e}")
487
+
488
+
489
+ def sync_trackio_with_global_state():
490
+ """Sync metrics database with global state manager data"""
491
+ try:
492
+ eval_summary = global_state_manager.get_evaluation_summary()
493
+
494
+ # Log to database
495
+ metrics = {
496
+ "educational_quality_avg": eval_summary['aggregate_metrics']['avg_educational_quality'],
497
+ "classifier_accuracy": eval_summary['aggregate_metrics']['classifier_accuracy_rate'],
498
+ "user_satisfaction": eval_summary['aggregate_metrics']['user_satisfaction_rate'],
499
+ "total_evaluations": sum(eval_summary['total_evaluations'].values())
500
+ }
501
+
502
+ log_metrics_to_database("Mimir", str(uuid.uuid4()), metrics)
503
+
504
+ logger.info("Synced global state metrics to database")
505
+
506
+ except Exception as e:
507
+ logger.error(f"Failed to sync metrics to database: {e}")
508
+
509
+
510
+ def refresh_analytics_data_persistent():
511
+ """Refresh analytics data with global state persistence"""
512
+ project_name = "Mimir"
513
+
514
+ try:
515
+ analytics_state = global_state_manager.get_analytics_state()
516
+ last_refresh = analytics_state.get('last_refresh')
517
+
518
+ # If refreshed within last 30 seconds, return cached
519
+ if last_refresh and (datetime.now() - last_refresh).seconds < 30:
520
+ logger.info("Using cached analytics data (recent refresh)")
521
+ return (
522
+ analytics_state['project_stats'],
523
+ analytics_state['recent_interactions'],
524
+ analytics_state['dashboard_html']
525
+ )
526
+
527
+ db_path = get_trackio_database_path(project_name)
528
+
529
+ if db_path is None:
530
+ logger.warning("No metrics database found")
531
+ project_stats = {
532
+ "total_conversations": None,
533
+ "avg_session_length": None,
534
+ "success_rate": None,
535
+ "model_type": "Phi-3-mini (Fine-tuned)",
536
+ "last_updated": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
537
+ }
538
+
539
+ dashboard_html = create_dashboard_html_with_nulls(project_name, project_stats)
540
+ recent_interactions = []
541
+
542
+ global_state_manager.update_analytics_state(
543
+ project_stats=project_stats,
544
+ recent_interactions=recent_interactions,
545
+ dashboard_html=dashboard_html
546
+ )
547
+
548
+ return project_stats, recent_interactions, dashboard_html
549
+
550
+ conn = sqlite3.connect(db_path)
551
+ conn.row_factory = sqlite3.Row
552
+ cursor = conn.cursor()
553
+
554
+ project_stats = get_project_statistics_with_nulls(cursor, project_name)
555
+ project_stats["model_type"] = "Phi-3-mini (Fine-tuned)"
556
+ project_stats["last_updated"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
557
+
558
+ recent_data = get_recent_interactions_with_nulls(cursor, project_name, limit=10)
559
+ dashboard_html = create_dashboard_html_with_nulls(project_name, project_stats)
560
+
561
+ conn.close()
562
+
563
+ global_state_manager.update_analytics_state(
564
+ project_stats=project_stats,
565
+ recent_interactions=recent_data,
566
+ dashboard_html=dashboard_html
567
+ )
568
+
569
+ logger.info("Analytics data refreshed and cached successfully")
570
+ return project_stats, recent_data, dashboard_html
571
+
572
+ except Exception as e:
573
+ logger.error(f"Error refreshing analytics: {e}")
574
+
575
+ error_stats = {
576
+ "error": str(e),
577
+ "total_conversations": None,
578
+ "avg_session_length": None,
579
+ "success_rate": None,
580
+ "model_type": "Error",
581
+ "last_updated": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
582
+ }
583
+
584
+ error_html = f"""
585
+ <div style="text-align: center; padding: 40px; border: 2px dashed #f44336; border-radius: 8px; background: #ffebee;">
586
+ <h3 style="color: #f44336;">Analytics Error</h3>
587
+ <p>Could not load analytics data: {str(e)[:100]}</p>
588
+ </div>
589
+ """
590
+
591
+ global_state_manager.update_analytics_state(
592
+ project_stats=error_stats,
593
+ recent_interactions=[],
594
+ dashboard_html=error_html,
595
+ error_state=str(e)
596
+ )
597
+
598
+ return error_stats, [], error_html
599
+
600
+
601
+ def export_metrics_json_persistent():
602
+ """Export metrics as JSON file"""
603
+ try:
604
+ project_stats, recent_data, _ = refresh_analytics_data_persistent()
605
+
606
+ export_data = {
607
+ "project": "Mimir",
608
+ "export_timestamp": datetime.now().isoformat(),
609
+ "statistics": project_stats,
610
+ "recent_interactions": recent_data
611
+ }
612
+
613
+ filename = f"mimir_metrics_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
614
+
615
+ with open(filename, 'w') as f:
616
+ json.dump(export_data, f, indent=2, default=str)
617
+
618
+ global_state_manager.add_export_record("JSON", filename, success=True)
619
+
620
+ logger.info(f"Metrics exported to {filename}")
621
+ gr.Info(f"Metrics exported successfully to {filename}")
622
+
623
+ except Exception as e:
624
+ global_state_manager.add_export_record("JSON", "failed", success=False)
625
+ logger.error(f"Export failed: {e}")
626
+ gr.Warning(f"Export failed: {str(e)}")
627
+
628
+
629
+ def export_metrics_csv_persistent():
630
+ """Export metrics as CSV file"""
631
+ try:
632
+ import csv
633
+
634
+ _, recent_data, _ = refresh_analytics_data_persistent()
635
+
636
+ filename = f"mimir_metrics_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
637
+
638
+ with open(filename, 'w', newline='') as f:
639
+ writer = csv.writer(f)
640
+ writer.writerow(["Timestamp", "Response Time", "Mode", "Tools Used", "Quality Score", "Adapter"])
641
+
642
+ for row in recent_data:
643
+ writer.writerow(row)
644
+
645
+ global_state_manager.add_export_record("CSV", filename, success=True)
646
+
647
+ logger.info(f"Metrics exported to {filename}")
648
+ gr.Info(f"Metrics exported successfully to {filename}")
649
+
650
+ except Exception as e:
651
+ global_state_manager.add_export_record("CSV", "failed", success=False)
652
+ logger.error(f"Export failed: {e}")
653
+ gr.Warning(f"Export failed: {str(e)}")
654
+
655
+
656
+ def load_analytics_state():
657
+ """Load analytics state from global manager"""
658
+ analytics_state = global_state_manager.get_analytics_state()
659
+
660
+ project_stats = analytics_state['project_stats']
661
+ recent_interactions = analytics_state['recent_interactions']
662
+ dashboard_html = analytics_state['dashboard_html']
663
+
664
+ if dashboard_html is None:
665
+ dashboard_html = """
666
+ <div style="text-align: center; padding: 40px; border: 2px dashed #ccc; border-radius: 8px; background: #f8f9fa;">
667
+ <h3>Analytics Dashboard</h3>
668
+ <p>Click "Refresh Data" to load analytics.</p>
669
+ </div>
670
+ """
671
+
672
+ return project_stats, recent_interactions, dashboard_html
673
+
674
+
675
+ def get_global_state_debug_info():
676
+ """Get debug information about global state"""
677
+ cache_status = global_state_manager.get_cache_status()
678
+
679
+ debug_info = {
680
+ "cache_status": cache_status,
681
+ "timestamp": datetime.now().isoformat(),
682
+ "sessions": global_state_manager.get_all_sessions()
683
+ }
684
+
685
+ return debug_info
686
+
687
+
688
+ # ============================================================================
689
+ # POST-PROCESSING
690
+ # ============================================================================
691
+
692
+ class ResponsePostProcessor:
693
+ """Post-processing pipeline for educational responses"""
694
+
695
+ def __init__(self, max_length: int = 1800, min_length: int = 10):
696
+ self.max_length = max_length
697
+ self.min_length = min_length
698
+
699
+ self.logical_stop_patterns = [
700
+ r'\n\n---\n',
701
+ r'\n\n## Summary\b',
702
+ r'\n\nIn conclusion\b',
703
+ r'\n\nTo summarize\b',
704
+ ]
705
+
706
+ def process_response(self, raw_response: str, user_query: str = "") -> str:
707
+ """Main post-processing pipeline"""
708
+ try:
709
+ cleaned = self._enhanced_token_cleanup(raw_response)
710
+ cleaned = self._truncate_intelligently(cleaned)
711
+ cleaned = self._enhance_readability(cleaned)
712
+
713
+ if not self._passes_quality_check(cleaned):
714
+ return self._generate_fallback_response(user_query)
715
+
716
+ return cleaned.strip()
717
+
718
+ except Exception as e:
719
+ logger.error(f"Post-processing error: {e}")
720
+ return raw_response
721
+
722
+ def _enhanced_token_cleanup(self, text: str) -> str:
723
+ """Remove model artifacts"""
724
+ artifacts = [
725
+ r'<\|.*?\|>',
726
+ r'###\s*$',
727
+ r'User:\s*$',
728
+ r'Assistant:\s*$',
729
+ r'\n\s*\n\s*\n+',
730
+ ]
731
+
732
+ for pattern in artifacts:
733
+ text = re.sub(pattern, '', text, flags=re.MULTILINE)
734
+
735
+ return text
736
+
737
+ def _truncate_intelligently(self, text: str) -> str:
738
+ """Truncate at logical educational endpoints"""
739
+ for pattern in self.logical_stop_patterns:
740
+ match = re.search(pattern, text, re.IGNORECASE)
741
+ if match:
742
+ return text[:match.start()].strip()
743
+
744
+ if len(text) <= self.max_length:
745
+ return text
746
+
747
+ sentences = re.split(r'[.!?]+\s+', text)
748
+ truncated = ""
749
+
750
+ for sentence in sentences:
751
+ test_length = len(truncated + sentence + ". ")
752
+ if test_length <= self.max_length:
753
+ truncated += sentence + ". "
754
+ else:
755
+ break
756
+
757
+ return truncated.strip()
758
+
759
+ def _enhance_readability(self, text: str) -> str:
760
+ """Format for better presentation"""
761
+ text = re.sub(r'([.!?])([A-Z])', r'\1 \2', text)
762
+ text = re.sub(r'\s{2,}', ' ', text)
763
+ text = re.sub(r'\n\s*[-*]\s*', '\n- ', text)
764
+
765
+ return text
766
+
767
+ def _passes_quality_check(self, text: str) -> bool:
768
+ """Final quality validation"""
769
+ if len(text.strip()) < self.min_length:
770
+ return False
771
+
772
+ sentences = re.split(r'[.!?]+', text)
773
+ valid_sentences = [s for s in sentences if len(s.strip()) > 5]
774
+
775
+ return len(valid_sentences) > 0
776
+
777
+ def _generate_fallback_response(self, user_query: str) -> str:
778
+ """Generate safe fallback"""
779
+ return "I'd be happy to help you understand this better. Could you clarify what specific aspect you'd like me to focus on?"
780
+
781
+ def process_and_stream_response(self, raw_response: str, user_query: str = ""):
782
+ """Process response then stream word-by-word"""
783
+ try:
784
+ processed_response = self.process_response(raw_response, user_query)
785
+
786
+ words = processed_response.split()
787
+ current_output = ""
788
+
789
+ for i, word in enumerate(words):
790
+ current_output += word
791
+ if i < len(words) - 1:
792
+ current_output += " "
793
+
794
+ yield current_output
795
+ time.sleep(0.015)
796
+
797
+ except Exception as e:
798
+ logger.error(f"Stream processing error: {e}")
799
+ yield "I encountered an error processing the response."
800
+
801
+
802
+ post_processor = ResponsePostProcessor()
803
+
804
+
805
+ # ============================================================================
806
+ # TOOL FUNCTIONS
807
+ # ============================================================================
808
+
809
+ @tool(return_direct=False)
810
+ def Create_Graph_Tool(
811
+ data: dict,
812
+ plot_type: str,
813
+ title: str = "Generated Plot",
814
+ x_label: str = "",
815
+ y_label: str = "",
816
+ educational_context: str = ""
817
+ ) -> str:
818
+ """Generate educational graphs"""
819
+ tool_start = log_step("Create_Graph_Tool")
820
+
821
+ try:
822
+ content, artifact = generate_plot(
823
+ data=data,
824
+ plot_type=plot_type,
825
+ title=title,
826
+ x_label=x_label,
827
+ y_label=y_label
828
+ )
829
+
830
+ if "error" in artifact:
831
+ log_step("Create_Graph_Tool", tool_start)
832
+ return f'<p style="color:red;">Graph generation failed: {artifact["error"]}</p>'
833
+
834
+ base64_image = artifact["base64_image"]
835
+
836
+ context_html = ""
837
+ if educational_context:
838
+ context_html = f'<div style="margin: 10px 0; padding: 10px; background: #f8f9fa; border-left: 4px solid #007bff;">{educational_context}</div>'
839
+
840
+ result = f"""{context_html}
841
+ <div style="text-align: center; margin: 20px 0;">
842
+ <img src="data:image/png;base64,{base64_image}"
843
+ style="max-width: 100%; height: auto; border-radius: 8px; box-shadow: 0 2px 10px rgba(0,0,0,0.1);"
844
+ alt="{title}" />
845
+ </div>"""
846
+
847
+ log_step("Create_Graph_Tool", tool_start)
848
+ return result
849
+
850
+ except Exception as e:
851
+ logger.error(f"Graph tool error: {e}")
852
+ log_step("Create_Graph_Tool", tool_start)
853
+ return f'<p style="color:red;">Error: {str(e)}</p>'
854
+
855
+
856
+ # ============================================================================
857
+ # MAIN ORCHESTRATION WORKFLOW
858
+ # ============================================================================
859
+
860
+ def orchestrate_turn(user_input: str, session_id: str = "default") -> str:
861
+ """
862
+ Main orchestration function implementing the redesign workflow.
863
+
864
+ Steps:
865
+ 1. Reset prompt state
866
+ 2. Process user input (history)
867
+ 3. Tool decision
868
+ 4. Regex checks
869
+ 5. Agent execution
870
+ 6. Thinking agents
871
+ 7. Prompt assembly
872
+ 8. Response generation
873
+ 9. Metrics tracking
874
+ """
875
+ turn_start = log_step("orchestrate_turn")
876
+ run_id = str(uuid.uuid4())
877
+
878
+ try:
879
+ # ====================================================================
880
+ # STEP 1: RESET PROMPT STATE
881
+ # ====================================================================
882
+ step_start = log_step("Step 1: Reset prompt state")
883
+ global_state_manager.reset_prompt_state()
884
+ prompt_state = global_state_manager.get_prompt_state_manager()
885
+ log_step("Step 1: Reset prompt state", step_start)
886
+
887
+ # ====================================================================
888
+ # STEP 2: USER INPUT PROCESSING
889
+ # ====================================================================
890
+ step_start = log_step("Step 2: Process user input")
891
+
892
+ # Get conversation history
893
+ conversation_state = global_state_manager.get_conversation_state(session_id)
894
+ recent_history = conversation_state['conversation_state'][-8:] if conversation_state['conversation_state'] else []
895
+
896
+ # Format history for agents
897
+ recent_history_formatted = "\n".join([
898
+ f"{msg['role']}: {msg['content'][:100]}"
899
+ for msg in recent_history
900
+ ]) if recent_history else "No previous conversation"
901
+
902
+ log_step("Step 2: Process user input", step_start)
903
+
904
+ # ====================================================================
905
+ # STEP 3: TOOL DECISION ENGINE
906
+ # ====================================================================
907
+ step_start = log_step("Step 3: Tool decision")
908
+ tool_decision_result = tool_agent.should_use_visualization(user_input)
909
+
910
+ tool_img_output = ""
911
+ tool_context = ""
912
+
913
+ if tool_decision_result:
914
+ logger.info("Tool decision: YES - visualization needed")
915
+ prompt_state.update("TOOL_USE_ENHANCEMENT", True)
916
+ else:
917
+ logger.info("Tool decision: NO - no visualization needed")
918
+
919
+ log_step("Step 3: Tool decision", step_start)
920
+
921
+ # ====================================================================
922
+ # STEP 4: REGEX LOGICAL EXPRESSIONS
923
+ # ====================================================================
924
+ step_start = log_step("Step 4: Regex checks")
925
+ logical_expressions.apply_all_checks(user_input, prompt_state)
926
+ log_step("Step 4: Regex checks", step_start)
927
+
928
+ # ====================================================================
929
+ # STEP 5: SEQUENTIAL AGENT EXECUTION
930
+ # ====================================================================
931
+ step_start = log_step("Step 5: Routing agents")
932
+
933
+ # Agent 1: Practice questions
934
+ agent1_start = log_step("Agent 1: Practice questions")
935
+ agent_1_result = routing_agents.agent_1_practice_questions(
936
+ user_input,
937
+ recent_history
938
+ )
939
+ if agent_1_result:
940
+ prompt_state.update("STRUCTURE_PRACTICE_QUESTIONS", True)
941
+ log_step("Agent 1: Practice questions", agent1_start)
942
+
943
+ # Agent 2: Discovery mode
944
+ agent2_start = log_step("Agent 2: Discovery mode")
945
+ agent_2_result = routing_agents.agent_2_discovery_mode(user_input)
946
+ if agent_2_result:
947
+ prompt_state.update(agent_2_result, True)
948
+ log_step("Agent 2: Discovery mode", agent2_start)
949
+
950
+ # Agent 3: Follow-up assessment
951
+ agent3_start = log_step("Agent 3: Follow-up assessment")
952
+ agent_3_result = routing_agents.agent_3_followup_assessment(
953
+ user_input,
954
+ recent_history
955
+ )
956
+ if agent_3_result:
957
+ prompt_state.update("PRACTICE_QUESTION_FOLLOWUP", True)
958
+ log_step("Agent 3: Follow-up assessment", agent3_start)
959
+
960
+ # Agent 4: Teaching mode
961
+ agent4_start = log_step("Agent 4: Teaching mode")
962
+ agent_4_results = routing_agents.agent_4_teaching_mode(
963
+ user_input,
964
+ recent_history
965
+ )
966
+ prompt_state.update_multiple(agent_4_results)
967
+ log_step("Agent 4: Teaching mode", agent4_start)
968
+
969
+ log_step("Step 5: Routing agents", step_start)
970
+
971
+ # ====================================================================
972
+ # STEP 6: THINKING AGENT PROCESSING
973
+ # ====================================================================
974
+ step_start = log_step("Step 6: Thinking agents")
975
+
976
+ # Determine which thinking agents to activate
977
+ thinking_prompts_list = []
978
+
979
+ # Math thinking (if math detected)
980
+ if prompt_state.is_active("LATEX_FORMATTING"):
981
+ thinking_prompts_list.append("MATH_THINKING")
982
+ prompt_state.update("MATH_THINKING", True)
983
+
984
+ # Question design (if practice questions needed)
985
+ if prompt_state.is_active("STRUCTURE_PRACTICE_QUESTIONS"):
986
+ thinking_prompts_list.append("QUESTION_ANSWER_DESIGN")
987
+ prompt_state.update("QUESTION_ANSWER_DESIGN", True)
988
+
989
+ # Reasoning thinking (for teaching/tools/followup)
990
+ if (prompt_state.is_active("TOOL_USE_ENHANCEMENT") or
991
+ prompt_state.is_active("PRACTICE_QUESTION_FOLLOWUP") or
992
+ prompt_state.is_active("GUIDING_TEACHING")):
993
+ thinking_prompts_list.append("REASONING_THINKING")
994
+ prompt_state.update("REASONING_THINKING", True)
995
+
996
+ # Execute thinking agents if any are active
997
+ thinking_context = ""
998
+ if thinking_prompts_list:
999
+ thinking_prompts_string = '\n'.join(thinking_prompts_list)
1000
+ logger.info(f"Active thinking agents: {thinking_prompts_list}")
1001
+
1002
+ think_start = log_step("Thinking agents execution")
1003
+ thinking_context = thinking_agents.process(
1004
+ user_input=user_input,
1005
+ conversation_history=recent_history_formatted,
1006
+ thinking_prompts=thinking_prompts_string,
1007
+ tool_img_output=tool_img_output,
1008
+ tool_context=tool_context
1009
+ )
1010
+ log_step("Thinking agents execution", think_start)
1011
+
1012
+ log_step("Step 6: Thinking agents", step_start)
1013
+
1014
+ # ====================================================================
1015
+ # STEP 7: RESPONSE PROMPT ASSEMBLY
1016
+ # ====================================================================
1017
+ step_start = log_step("Step 7: Prompt assembly")
1018
+
1019
+ # Get active response prompts
1020
+ response_prompt_names = prompt_state.get_active_response_prompts()
1021
+
1022
+ # Build prompt segments
1023
+ prompt_segments = [CORE_IDENTITY]
1024
+
1025
+ prompt_map = {
1026
+ "VAUGE_INPUT": VAUGE_INPUT,
1027
+ "USER_UNDERSTANDING": USER_UNDERSTANDING,
1028
+ "GENERAL_FORMATTING": GENERAL_FORMATTING,
1029
+ "LATEX_FORMATTING": LATEX_FORMATTING,
1030
+ "GUIDING_TEACHING": GUIDING_TEACHING,
1031
+ "STRUCTURE_PRACTICE_QUESTIONS": STRUCTURE_PRACTICE_QUESTIONS,
1032
+ "PRACTICE_QUESTION_FOLLOWUP": PRACTICE_QUESTION_FOLLOWUP,
1033
+ "TOOL_USE_ENHANCEMENT": TOOL_USE_ENHANCEMENT,
1034
+ }
1035
+
1036
+ for prompt_name in response_prompt_names:
1037
+ if prompt_name in prompt_map:
1038
+ prompt_segments.append(prompt_map[prompt_name])
1039
+
1040
+ prompt_segments_text = "\n\n".join(prompt_segments)
1041
+
1042
+ logger.info(f"Active prompts: {response_prompt_names}")
1043
+ log_step("Step 7: Prompt assembly", step_start)
1044
+
1045
+ # ====================================================================
1046
+ # STEP 8: FINAL PROMPT CONSTRUCTION
1047
+ # ====================================================================
1048
+ step_start = log_step("Step 8: Final prompt construction")
1049
+
1050
+ # Knowledge cutoff
1051
+ knowledge_cutoff = f"""
1052
+
1053
+ The current year is {CURRENT_YEAR}. Your knowledge cutoff date is October 2023. If the user asks about recent events or dynamic facts, inform them you may not have the most up-to-date information and suggest referencing direct sources."""
1054
+
1055
+ complete_prompt = f"""
1056
+ {prompt_segments_text}
1057
+
1058
+ If tools were used, context and output will be here. Ignore if empty:
1059
+ Image output: {tool_img_output}
1060
+ Image context: {tool_context}
1061
+
1062
+ Conversation history, if available:
1063
+ {recent_history_formatted}
1064
+
1065
+ Consider any context available to you:
1066
+ {thinking_context}
1067
+
1068
+ Here is the user's current query:
1069
+ {user_input}
1070
+
1071
+ {knowledge_cutoff}
1072
+ """
1073
+
1074
+ log_step("Step 8: Final prompt construction", step_start)
1075
+
1076
+ # ====================================================================
1077
+ # STEP 9: RESPONSE GENERATION
1078
+ # ====================================================================
1079
+ step_start = log_step("Step 9: Response generation")
1080
+ raw_response = response_agent.invoke(complete_prompt)
1081
+ log_step("Step 9: Response generation", step_start)
1082
+
1083
+ # ====================================================================
1084
+ # STEP 10: POST-PROCESSING
1085
+ # ====================================================================
1086
+ step_start = log_step("Step 10: Post-processing")
1087
+ processed_response = post_processor.process_response(raw_response, user_input)
1088
+ log_step("Step 10: Post-processing", step_start)
1089
+
1090
+ # ====================================================================
1091
+ # STEP 11: METRICS TRACKING
1092
+ # ====================================================================
1093
+ step_start = log_step("Step 11: Metrics tracking")
1094
+
1095
+ try:
1096
+ # Track educational quality
1097
+ quality_metrics = evaluate_educational_quality_with_tracking(
1098
+ user_query=user_input,
1099
+ response=processed_response,
1100
+ thread_id=run_id,
1101
+ session_id=session_id
1102
+ )
1103
+
1104
+ # Log metrics to database
1105
+ metrics_to_log = {
1106
+ "conversation_start": datetime.now().isoformat(),
1107
+ "response_time": time.time() - turn_start,
1108
+ "quality_score": calculate_response_quality(processed_response),
1109
+ "educational_score": quality_metrics['educational_score'],
1110
+ "prompt_mode": ",".join(response_prompt_names),
1111
+ "tools_used": 1 if prompt_state.is_active("TOOL_USE_ENHANCEMENT") else 0,
1112
+ "thinking_agents": ",".join(thinking_prompts_list) if thinking_prompts_list else "none",
1113
+ "active_adapter": response_agent.model_type if response_agent.model_loaded else "not_loaded"
1114
+ }
1115
+
1116
+ log_metrics_to_database("Mimir", run_id, metrics_to_log)
1117
+
1118
+ except Exception as metrics_error:
1119
+ logger.warning(f"Metrics tracking failed: {metrics_error}")
1120
+
1121
+ log_step("Step 11: Metrics tracking", step_start)
1122
+
1123
+ log_step("orchestrate_turn", turn_start)
1124
+ return processed_response
1125
+
1126
+ except Exception as e:
1127
+ logger.error(f"Orchestration error: {e}")
1128
+ import traceback
1129
+ logger.error(traceback.format_exc())
1130
+ log_step("orchestrate_turn", turn_start)
1131
+ return f"I encountered an error: {str(e)}"
1132
+
1133
+
1134
+ # ============================================================================
1135
+ # GRADIO CALLBACK FUNCTIONS
1136
+ # ============================================================================
1137
+
1138
+ def get_loading_animation_base64():
1139
+ """Load animated GIF as base64"""
1140
+ try:
1141
+ with open("loading_animation.gif", "rb") as gif_file:
1142
+ gif_data = gif_file.read()
1143
+ gif_base64 = base64.b64encode(gif_data).decode('utf-8')
1144
+ return f"data:image/gif;base64,{gif_base64}"
1145
+ except FileNotFoundError:
1146
+ logger.warning("loading_animation.gif not found")
1147
+ return None
1148
+
1149
+
1150
+ def remove_loading_animations(chat_history):
1151
+ """Remove loading animations from chat"""
1152
+ return [msg for msg in chat_history if not (
1153
+ msg.get("role") == "assistant" and
1154
+ "loading-animation" in str(msg.get("content", ""))
1155
+ )]
1156
+
1157
+
1158
+ def add_user_message(message, chat_history, conversation_state):
1159
+ """Add user message"""
1160
+ callback_start = log_step("add_user_message")
1161
+
1162
+ if not message.strip():
1163
+ log_step("add_user_message", callback_start)
1164
+ return "", chat_history, conversation_state
1165
+
1166
+ # Get current state
1167
+ current_state = global_state_manager.get_conversation_state()
1168
+ chat_history = current_state['chat_history']
1169
+ conversation_state = current_state['conversation_state']
1170
+
1171
+ # Add to both states
1172
+ conversation_state.append({"role": "user", "content": message})
1173
+ chat_history.append({"role": "user", "content": message})
1174
+
1175
+ # Update global state
1176
+ global_state_manager.update_conversation_state(chat_history, conversation_state)
1177
+
1178
+ log_step("add_user_message", callback_start)
1179
+ return "", chat_history, conversation_state
1180
+
1181
+
1182
+ def add_loading_animation(chat_history, conversation_state):
1183
+ """Add loading animation"""
1184
+ callback_start = log_step("add_loading_animation")
1185
+
1186
+ current_state = global_state_manager.get_conversation_state()
1187
+ chat_history = current_state['chat_history']
1188
+ conversation_state = current_state['conversation_state']
1189
+
1190
+ if not conversation_state:
1191
+ log_step("add_loading_animation", callback_start)
1192
+ return chat_history, conversation_state
1193
+
1194
+ chat_history = remove_loading_animations(chat_history)
1195
+
1196
+ gif_data = get_loading_animation_base64()
1197
+ if gif_data:
1198
+ loading_html = f'<div class="loading-animation" style="display: flex; align-items: center; justify-content: center; padding: 0.5px;"><img src="{gif_data}" alt="Thinking..." style="height: 64px; width: auto; max-width: 80px;" /></div>'
1199
+ else:
1200
+ loading_html = '<div class="loading-animation" style="display: flex; align-items: center; justify-content: center; padding: 0.5px;"><div style="width: 64px; height: 64px;"></div></div>'
1201
+
1202
+ chat_history.append({"role": "assistant", "content": loading_html})
1203
+
1204
+ global_state_manager.update_conversation_state(chat_history, conversation_state)
1205
+
1206
+ log_step("add_loading_animation", callback_start)
1207
+ return chat_history, conversation_state
1208
+
1209
+
1210
+ def generate_response(chat_history, conversation_state):
1211
+ """Generate response using orchestration"""
1212
+ callback_start = log_step("generate_response")
1213
+
1214
+ current_state = global_state_manager.get_conversation_state()
1215
+ chat_history = current_state['chat_history']
1216
+ conversation_state = current_state['conversation_state']
1217
+
1218
+ if not conversation_state:
1219
+ log_step("generate_response", callback_start)
1220
+ return chat_history, conversation_state
1221
+
1222
+ # Get last user message
1223
+ last_user_message = ""
1224
+ for msg in reversed(conversation_state):
1225
+ if msg["role"] == "user":
1226
+ last_user_message = msg["content"]
1227
+ break
1228
+
1229
+ if not last_user_message:
1230
+ log_step("generate_response", callback_start)
1231
+ return chat_history, conversation_state
1232
+
1233
+ try:
1234
+ # Remove loading animation
1235
+ chat_history = remove_loading_animations(chat_history)
1236
+ yield chat_history, conversation_state
1237
+
1238
+ # Stream response using post-processor
1239
+ orch_start = log_step("orchestrate_turn call")
1240
+ raw_response = orchestrate_turn(last_user_message)
1241
+ log_step("orchestrate_turn call", orch_start)
1242
+
1243
+ # Stream the processed response
1244
+ for chunk in post_processor.process_and_stream_response(raw_response, last_user_message):
1245
+ if chat_history and chat_history[-1]["role"] == "assistant":
1246
+ chat_history[-1]["content"] = chunk
1247
+ else:
1248
+ chat_history.append({"role": "assistant", "content": chunk})
1249
+
1250
+ yield chat_history, conversation_state
1251
+
1252
+ # Add to conversation state
1253
+ final_response = chunk if 'chunk' in locals() else raw_response
1254
+ conversation_state.append({"role": "assistant", "content": final_response})
1255
+
1256
+ # Update global state
1257
+ global_state_manager.update_conversation_state(chat_history, conversation_state)
1258
+ yield chat_history, conversation_state
1259
+
1260
+ except Exception as e:
1261
+ logger.error(f"Response generation error: {e}")
1262
+ error_msg = f"I encountered an error: {str(e)}"
1263
+
1264
+ chat_history = remove_loading_animations(chat_history)
1265
+ chat_history.append({"role": "assistant", "content": error_msg})
1266
+ conversation_state.append({"role": "assistant", "content": error_msg})
1267
+
1268
+ global_state_manager.update_conversation_state(chat_history, conversation_state)
1269
+ yield chat_history, conversation_state
1270
+
1271
+ log_step("generate_response", callback_start)
1272
+
1273
+
1274
+ def reset_conversation():
1275
+ """Reset conversation"""
1276
+ callback_start = log_step("reset_conversation")
1277
+ global_state_manager.reset_conversation_state()
1278
+ log_step("reset_conversation", callback_start)
1279
+ return [], []
1280
+
1281
+
1282
+ def load_conversation_state():
1283
+ """Load conversation state"""
1284
+ callback_start = log_step("load_conversation_state")
1285
+ current_state = global_state_manager.get_conversation_state()
1286
+ log_step("load_conversation_state", callback_start)
1287
+ return current_state['chat_history'], current_state['conversation_state']
1288
+
1289
+
1290
+ # ============================================================================
1291
+ # MULTI-PAGE INTERFACE
1292
+ # ============================================================================
1293
+
1294
+ def create_interface():
1295
+ """Create multi-page Gradio interface"""
1296
+ logger.info("Creating Gradio interface...")
1297
+
1298
+ # Import page modules
1299
+ import gradio_chatbot
1300
+ import gradio_analytics
1301
+
1302
+ with gr.Blocks(title="Mimir - Educational AI Assistant") as demo:
1303
+ navbar = gr.Navbar(
1304
+ visible=True,
1305
+ main_page_name="Mimir Chatbot",
1306
+ value=[("Case Study", "https://github.com/Jdesiree112/Technical_Portfolio/tree/main/CaseStudy_Mimir")]
1307
+ )
1308
+ gradio_chatbot.demo.render()
1309
+
1310
+ with demo.route("Analytics"):
1311
+ navbar = gr.Navbar(
1312
+ visible=True,
1313
+ main_page_name="Mimir Chatbot",
1314
+ value=[("Case Study", "https://github.com/Jdesiree112/Technical_Portfolio/tree/main/CaseStudy_Mimir")]
1315
+ )
1316
+ gradio_analytics.demo.render()
1317
+
1318
+ logger.info("Interface created successfully")
1319
+ return demo
1320
+
1321
+
1322
+ # ============================================================================
1323
+ # MAIN EXECUTION
1324
+ # ============================================================================
1325
+
1326
+ if __name__ == "__main__":
1327
+ try:
1328
+ # Pre-download models if needed
1329
+ logger.info("Checking for model downloads...")
1330
+ try:
1331
+ subprocess.run([sys.executable, "pre_download.py"], check=True)
1332
+ except Exception as e:
1333
+ logger.warning(f"Pre-download failed: {e}")
1334
+
1335
+ logger.info("="*60)
1336
+ logger.info("MIMIR APPLICATION READY")
1337
+ logger.info("="*60)
1338
+ logger.info(f"LightEval available: {LIGHTEVAL_AVAILABLE}")
1339
+ logger.info(f"Current year: {CURRENT_YEAR}")
1340
+ logger.info("="*60)
1341
+
1342
+ # Create and launch interface
1343
+ interface = create_interface()
1344
+
1345
+ interface.launch(
1346
+ server_name="0.0.0.0",
1347
+ share=False,
1348
+ debug=False,
1349
+ favicon_path="favicon.ico" if os.path.exists("favicon.ico") else None,
1350
+ show_error=True,
1351
+ quiet=False,
1352
+ prevent_thread_lock=False,
1353
+ max_threads=40
1354
+ )
1355
+
1356
+ except Exception as e:
1357
+ logger.error(f"Failed to launch Mimir: {e}")
1358
+ import traceback
1359
+ logger.error(traceback.format_exc())
1360
+ raise
app_V1.0.py ADDED
The diff for this file is too large to render. See raw diff
 
compile_model.py ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # compile_model.py
2
+ """
3
+ Compile and cache all models for Mimir Educational AI Assistant:
4
+ - Phi-3 (fine-tuned + base) for ResponseAgent
5
+ - Mistral-Small-24B for ToolDecisionAgent, PromptRoutingAgents, ThinkingAgents
6
+ - GGUF Mistral for math thinking
7
+ - RAG embeddings (if used)
8
+ """
9
+ import torch
10
+ import os
11
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
12
+ from accelerate import Accelerator, set_seed
13
+ from sentence_transformers import SentenceTransformer
14
+ from huggingface_hub import hf_hub_download
15
+ from huggingface_hub import scan_cache_dir
16
+
17
+ # Try to import llama-cpp for GGUF
18
+ try:
19
+ from llama_cpp import Llama
20
+ LLAMA_CPP_AVAILABLE = True
21
+ except ImportError:
22
+ LLAMA_CPP_AVAILABLE = False
23
+ print("⚠️ llama-cpp-python not available - GGUF model will not be cached")
24
+
25
+ HF_TOKEN = os.getenv("HF_TOKEN")
26
+
27
+ # Model paths (matching agents.py)
28
+ FINE_TUNED_PHI3 = "jdesiree/Mimir-Phi-3.5"
29
+ BASE_PHI3 = "microsoft/Phi-3-mini-4k-instruct"
30
+ MISTRAL_REASONING = "yentinglin/Mistral-Small-24B-Instruct-2501-reasoning"
31
+ MISTRAL_MATH_GGUF = "brittlewis12/Mistral-Small-24B-Instruct-2501-reasoning-GGUF"
32
+ EMBEDDINGS_MODEL = "thenlper/gte-small"
33
+
34
+ CACHE_DIR = "/data/compiled_models"
35
+
36
+
37
+ def compile_phi3():
38
+ """Compile Phi-3 ResponseAgent model (fine-tuned with base fallback)"""
39
+ print(f"\n{'='*60}")
40
+ print("COMPILING PHI-3 RESPONSE AGENT")
41
+ print(f"{'='*60}")
42
+
43
+ accelerator = Accelerator(mixed_precision="fp16")
44
+ set_seed(42)
45
+
46
+ quantization_config = BitsAndBytesConfig(
47
+ load_in_4bit=True,
48
+ bnb_4bit_compute_dtype=torch.float16,
49
+ bnb_4bit_quant_type="nf4",
50
+ bnb_4bit_use_double_quant=True,
51
+ )
52
+
53
+ # Try fine-tuned model first
54
+ try:
55
+ print(f"→ Loading fine-tuned model: {FINE_TUNED_PHI3}")
56
+ model = AutoModelForCausalLM.from_pretrained(
57
+ FINE_TUNED_PHI3,
58
+ quantization_config=quantization_config,
59
+ torch_dtype=torch.float16,
60
+ trust_remote_code=True,
61
+ token=HF_TOKEN,
62
+ device_map="auto",
63
+ )
64
+
65
+ tokenizer = AutoTokenizer.from_pretrained(
66
+ BASE_PHI3,
67
+ trust_remote_code=True,
68
+ token=HF_TOKEN
69
+ )
70
+
71
+ # Warmup
72
+ print("→ Running warmup inference...")
73
+ test_prompt = tokenizer.apply_chat_template(
74
+ [{"role": "user", "content": "Hello"}],
75
+ tokenize=False,
76
+ add_generation_prompt=True
77
+ )
78
+ inputs = tokenizer(test_prompt, return_tensors="pt").to(model.device)
79
+ _ = model.generate(**inputs, max_new_tokens=10)
80
+
81
+ print("✓ Fine-tuned Phi-3 compiled and cached")
82
+
83
+ except Exception as e:
84
+ print(f"⚠️ Fine-tuned model failed: {e}")
85
+ print(f"→ Loading base model: {BASE_PHI3}")
86
+
87
+ model = AutoModelForCausalLM.from_pretrained(
88
+ BASE_PHI3,
89
+ quantization_config=quantization_config,
90
+ torch_dtype=torch.float16,
91
+ trust_remote_code=True,
92
+ token=HF_TOKEN,
93
+ device_map="auto",
94
+ )
95
+
96
+ tokenizer = AutoTokenizer.from_pretrained(
97
+ BASE_PHI3,
98
+ trust_remote_code=True,
99
+ token=HF_TOKEN
100
+ )
101
+
102
+ # Warmup
103
+ print("→ Running warmup inference...")
104
+ test_prompt = tokenizer.apply_chat_template(
105
+ [{"role": "user", "content": "Hello"}],
106
+ tokenize=False,
107
+ add_generation_prompt=True
108
+ )
109
+ inputs = tokenizer(test_prompt, return_tensors="pt").to(model.device)
110
+ _ = model.generate(**inputs, max_new_tokens=10)
111
+
112
+ print("✓ Base Phi-3 compiled and cached")
113
+
114
+ # Save marker
115
+ with open(f"{CACHE_DIR}/PHI3_READY", "w") as f:
116
+ f.write(f"Phi-3 model loaded\n")
117
+
118
+
119
+ def compile_mistral_reasoning():
120
+ """Compile Mistral-Small-24B for agents (tool, routing, thinking)"""
121
+ print(f"\n{'='*60}")
122
+ print("COMPILING MISTRAL-SMALL-24B REASONING MODEL")
123
+ print(f"{'='*60}")
124
+
125
+ print(f"→ Loading model: {MISTRAL_REASONING}")
126
+
127
+ try:
128
+ cache_info = scan_cache_dir()
129
+ model_cached = any(MISTRAL_REASONING in str(repo.repo_id) for repo in cache_info.repos)
130
+ if model_cached:
131
+ print(f"✓ Model already in HF cache: {MISTRAL_REASONING}")
132
+ else:
133
+ print(f"→ Model not cached, will download: {MISTRAL_REASONING}")
134
+ except:
135
+ print(f"→ Loading model: {MISTRAL_REASONING}")
136
+
137
+ quantization_config = BitsAndBytesConfig(
138
+ load_in_4bit=True,
139
+ bnb_4bit_compute_dtype=torch.float16,
140
+ bnb_4bit_quant_type="nf4",
141
+ bnb_4bit_use_double_quant=True,
142
+ )
143
+
144
+ tokenizer = AutoTokenizer.from_pretrained(
145
+ MISTRAL_REASONING,
146
+ trust_remote_code=True,
147
+ token=HF_TOKEN
148
+ )
149
+
150
+ model = AutoModelForCausalLM.from_pretrained(
151
+ MISTRAL_REASONING,
152
+ quantization_config=quantization_config,
153
+ torch_dtype=torch.float16,
154
+ trust_remote_code=True,
155
+ token=HF_TOKEN,
156
+ device_map="auto",
157
+ )
158
+
159
+ # Warmup
160
+ print("→ Running warmup inference...")
161
+ test_messages = [
162
+ {"role": "system", "content": "You are a helpful assistant."},
163
+ {"role": "user", "content": "Hello"}
164
+ ]
165
+ formatted_prompt = tokenizer.apply_chat_template(
166
+ test_messages,
167
+ tokenize=False,
168
+ add_generation_prompt=True
169
+ )
170
+ inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
171
+ _ = model.generate(**inputs, max_new_tokens=10)
172
+
173
+ print("✓ Mistral reasoning model compiled and cached")
174
+
175
+ # Save marker
176
+ with open(f"{CACHE_DIR}/MISTRAL_REASONING_READY", "w") as f:
177
+ f.write(f"Mistral reasoning model loaded\n")
178
+
179
+
180
+ def compile_mistral_math_gguf():
181
+ """Download and cache GGUF math thinking model"""
182
+ print(f"\n{'='*60}")
183
+ print("COMPILING MISTRAL MATH GGUF MODEL")
184
+ print(f"{'='*60}")
185
+
186
+ if not LLAMA_CPP_AVAILABLE:
187
+ print("⚠️ Skipping GGUF model - llama-cpp-python not available")
188
+ return
189
+
190
+ print(f"→ Downloading GGUF model: {MISTRAL_MATH_GGUF}")
191
+
192
+ try:
193
+ # Download GGUF file
194
+ model_path = hf_hub_download(
195
+ repo_id=MISTRAL_MATH_GGUF,
196
+ filename="mistral-small-24b-instruct-2501-reasoning-Q4_K_M.gguf",
197
+ token=HF_TOKEN
198
+ )
199
+
200
+ print(f"→ GGUF downloaded to: {model_path}")
201
+
202
+ # Test load
203
+ print("→ Testing GGUF model load...")
204
+ math_model = Llama(
205
+ model_path=model_path,
206
+ n_ctx=4096,
207
+ n_threads=4,
208
+ n_gpu_layers=35,
209
+ )
210
+
211
+ # Warmup
212
+ print("→ Running warmup inference...")
213
+ _ = math_model("Test prompt", max_tokens=10)
214
+
215
+ print("✓ GGUF math model cached")
216
+
217
+ # Save marker
218
+ with open(f"{CACHE_DIR}/MISTRAL_MATH_GGUF_READY", "w") as f:
219
+ f.write(f"GGUF model path: {model_path}\n")
220
+
221
+ except Exception as e:
222
+ print(f"⚠️ GGUF model caching failed: {e}")
223
+
224
+
225
+ def compile_rag_embeddings():
226
+ """Pre-load and cache RAG embeddings model (if still used)"""
227
+ print(f"\n{'='*60}")
228
+ print("COMPILING RAG EMBEDDINGS")
229
+ print(f"{'='*60}")
230
+
231
+ print(f"→ Loading embeddings model: {EMBEDDINGS_MODEL}")
232
+
233
+ # Load embeddings model
234
+ embeddings_model = SentenceTransformer(EMBEDDINGS_MODEL)
235
+
236
+ # Warmup
237
+ print("→ Running warmup for embeddings model...")
238
+ test_texts = ["What is calculus?", "Explain physics"]
239
+ _ = embeddings_model.encode(test_texts)
240
+
241
+ print("✓ RAG embeddings model cached")
242
+
243
+ # Save marker
244
+ with open(f"{CACHE_DIR}/RAG_EMBEDDINGS_READY", "w") as f:
245
+ f.write(f"Embeddings model loaded: {EMBEDDINGS_MODEL}\n")
246
+
247
+
248
+ def compile_all():
249
+ """Compile all models for Mimir"""
250
+
251
+ os.makedirs(CACHE_DIR, exist_ok=True)
252
+
253
+ print("\n" + "="*60)
254
+ print("MIMIR MODEL COMPILATION")
255
+ print("="*60)
256
+ print("\nThis will compile and cache:")
257
+ print(" 1. Phi-3 ResponseAgent (fine-tuned + base)")
258
+ print(" 2. Mistral-Small-24B (tool, routing, thinking agents)")
259
+ print(" 3. GGUF Mistral Math (if llama-cpp available)")
260
+ print(" 4. RAG Embeddings (if needed)")
261
+ print("="*60)
262
+
263
+ try:
264
+ compile_phi3()
265
+ except Exception as e:
266
+ print(f"❌ Phi-3 compilation failed: {e}")
267
+
268
+ try:
269
+ compile_mistral_reasoning()
270
+ except Exception as e:
271
+ print(f"❌ Mistral reasoning compilation failed: {e}")
272
+
273
+ try:
274
+ compile_mistral_math_gguf()
275
+ except Exception as e:
276
+ print(f"❌ GGUF compilation failed: {e}")
277
+
278
+ try:
279
+ compile_rag_embeddings()
280
+ except Exception as e:
281
+ print(f"❌ RAG embeddings compilation failed: {e}")
282
+
283
+ # Final marker
284
+ with open(f"{CACHE_DIR}/COMPILED_READY", "w") as f:
285
+ f.write("All models compiled successfully\n")
286
+
287
+ print("\n" + "="*60)
288
+ print("✓ COMPILATION COMPLETE")
289
+ print("="*60)
290
+ print(f"Cache directory: {CACHE_DIR}")
291
+ print("Models are ready for Mimir startup!")
292
+
293
+
294
+ if __name__ == "__main__":
295
+ compile_all()
configuration_phi3.py ADDED
@@ -0,0 +1,227 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2024 Microsoft and the HuggingFace Inc. team. All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ """ Phi-3 model configuration"""
17
+
18
+
19
+ from transformers.configuration_utils import PretrainedConfig
20
+ from transformers.utils import logging
21
+
22
+
23
+ logger = logging.get_logger(__name__)
24
+
25
+ PHI3_PRETRAINED_CONFIG_ARCHIVE_MAP = {
26
+ "microsoft/Phi-3-mini-4k-instruct": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/config.json",
27
+ "microsoft/Phi-3-mini-128k-instruct": "https://huggingface.co/microsoft/Phi-3-mini-128k-instruct/resolve/main/config.json",
28
+ }
29
+
30
+
31
+ class Phi3Config(PretrainedConfig):
32
+ r"""
33
+ This is the configuration class to store the configuration of a [`Phi3Model`]. It is used to instantiate a Phi-3
34
+ model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
35
+ defaults will yield a similar configuration to that of the
36
+ [microsoft/Phi-3-mini-4k-instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct).
37
+
38
+ Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
39
+ documentation from [`PretrainedConfig`] for more information.
40
+
41
+ Args:
42
+ vocab_size (`int`, *optional*, defaults to 32064):
43
+ Vocabulary size of the Phi-3 model. Defines the number of different tokens that can be represented by the
44
+ `inputs_ids` passed when calling [`Phi3Model`].
45
+ hidden_size (`int`, *optional*, defaults to 3072):
46
+ Dimension of the hidden representations.
47
+ intermediate_size (`int`, *optional*, defaults to 8192):
48
+ Dimension of the MLP representations.
49
+ num_hidden_layers (`int`, *optional*, defaults to 32):
50
+ Number of hidden layers in the Transformer decoder.
51
+ num_attention_heads (`int`, *optional*, defaults to 32):
52
+ Number of attention heads for each attention layer in the Transformer decoder.
53
+ num_key_value_heads (`int`, *optional*):
54
+ This is the number of key_value heads that should be used to implement Grouped Query Attention. If
55
+ `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
56
+ `num_key_value_heads=1 the model will use Multi Query Attention (MQA) otherwise GQA is used. When
57
+ converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
58
+ by meanpooling all the original heads within that group. For more details checkout [this
59
+ paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
60
+ `num_attention_heads`.
61
+ resid_pdrop (`float`, *optional*, defaults to 0.0):
62
+ Dropout probability for mlp outputs.
63
+ embd_pdrop (`int`, *optional*, defaults to 0.0):
64
+ The dropout ratio for the embeddings.
65
+ attention_dropout (`float`, *optional*, defaults to 0.0):
66
+ The dropout ratio after computing the attention scores.
67
+ hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
68
+ The non-linear activation function (function or string) in the decoder.
69
+ max_position_embeddings (`int`, *optional*, defaults to 4096):
70
+ The maximum sequence length that this model might ever be used with.
71
+ original_max_position_embeddings (`int`, *optional*, defaults to 4096):
72
+ The maximum sequence length that this model was trained with. This is used to determine the size of the
73
+ original RoPE embeddings when using long scaling.
74
+ initializer_range (`float`, *optional*, defaults to 0.02):
75
+ The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
76
+ rms_norm_eps (`float`, *optional*, defaults to 1e-05):
77
+ The epsilon value used for the RMSNorm.
78
+ use_cache (`bool`, *optional*, defaults to `True`):
79
+ Whether or not the model should return the last key/values attentions (not used by all models). Only
80
+ relevant if `config.is_decoder=True`. Whether to tie weight embeddings or not.
81
+ tie_word_embeddings (`bool`, *optional*, defaults to `False`):
82
+ Whether to tie weight embeddings
83
+ rope_theta (`float`, *optional*, defaults to 10000.0):
84
+ The base period of the RoPE embeddings.
85
+ rope_scaling (`dict`, *optional*):
86
+ The scaling strategy for the RoPE embeddings. If `None`, no scaling is applied. If a dictionary, it must
87
+ contain the following keys: `type`, `short_factor` and `long_factor`. The `type` must be `longrope` and
88
+ the `short_factor` and `long_factor` must be lists of numbers with the same length as the hidden size
89
+ divided by the number of attention heads divided by 2.
90
+ bos_token_id (`int`, *optional*, defaults to 1):
91
+ The id of the "beginning-of-sequence" token.
92
+ eos_token_id (`int`, *optional*, defaults to 32000):
93
+ The id of the "end-of-sequence" token.
94
+ pad_token_id (`int`, *optional*, defaults to 32000):
95
+ The id of the padding token.
96
+ sliding_window (`int`, *optional*):
97
+ Sliding window attention window size. If `None`, no sliding window is applied.
98
+
99
+ Example:
100
+
101
+ ```python
102
+ >>> from transformers import Phi3Model, Phi3Config
103
+
104
+ >>> # Initializing a Phi-3 style configuration
105
+ >>> configuration = Phi3Config.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
106
+
107
+ >>> # Initializing a model from the configuration
108
+ >>> model = Phi3Model(configuration)
109
+
110
+ >>> # Accessing the model configuration
111
+ >>> configuration = model.config
112
+ ```"""
113
+
114
+ model_type = "phi3"
115
+ keys_to_ignore_at_inference = ["past_key_values"]
116
+
117
+ def __init__(
118
+ self,
119
+ vocab_size=32064,
120
+ hidden_size=3072,
121
+ intermediate_size=8192,
122
+ num_hidden_layers=32,
123
+ num_attention_heads=32,
124
+ num_key_value_heads=None,
125
+ resid_pdrop=0.0,
126
+ embd_pdrop=0.0,
127
+ attention_dropout=0.0,
128
+ hidden_act="silu",
129
+ max_position_embeddings=4096,
130
+ original_max_position_embeddings=4096,
131
+ initializer_range=0.02,
132
+ rms_norm_eps=1e-5,
133
+ use_cache=True,
134
+ tie_word_embeddings=False,
135
+ rope_theta=10000.0,
136
+ rope_scaling=None,
137
+ bos_token_id=1,
138
+ eos_token_id=32000,
139
+ pad_token_id=32000,
140
+ sliding_window=None,
141
+ **kwargs,
142
+ ):
143
+ self.vocab_size = vocab_size
144
+ self.hidden_size = hidden_size
145
+ self.intermediate_size = intermediate_size
146
+ self.num_hidden_layers = num_hidden_layers
147
+ self.num_attention_heads = num_attention_heads
148
+
149
+ if num_key_value_heads is None:
150
+ num_key_value_heads = num_attention_heads
151
+
152
+ self.num_key_value_heads = num_key_value_heads
153
+ self.resid_pdrop = resid_pdrop
154
+ self.embd_pdrop = embd_pdrop
155
+ self.attention_dropout = attention_dropout
156
+ self.hidden_act = hidden_act
157
+ self.max_position_embeddings = max_position_embeddings
158
+ self.original_max_position_embeddings = original_max_position_embeddings
159
+ self.initializer_range = initializer_range
160
+ self.rms_norm_eps = rms_norm_eps
161
+ self.use_cache = use_cache
162
+ self.rope_theta = rope_theta
163
+ self.rope_scaling = rope_scaling
164
+ self._rope_scaling_adjustment()
165
+ self._rope_scaling_validation()
166
+ self.sliding_window = sliding_window
167
+
168
+ super().__init__(
169
+ bos_token_id=bos_token_id,
170
+ eos_token_id=eos_token_id,
171
+ pad_token_id=pad_token_id,
172
+ tie_word_embeddings=tie_word_embeddings,
173
+ **kwargs,
174
+ )
175
+
176
+ def _rope_scaling_adjustment(self):
177
+ """
178
+ Adjust the `type` of the `rope_scaling` configuration for backward compatibility.
179
+ """
180
+ if self.rope_scaling is None:
181
+ return
182
+
183
+ rope_scaling_type = self.rope_scaling.get("type", None)
184
+
185
+ # For backward compatibility if previous version used "su" or "yarn"
186
+ if rope_scaling_type is not None and rope_scaling_type in ["su", "yarn"]:
187
+ self.rope_scaling["type"] = "longrope"
188
+
189
+ def _rope_scaling_validation(self):
190
+ """
191
+ Validate the `rope_scaling` configuration.
192
+ """
193
+ if self.rope_scaling is None:
194
+ return
195
+
196
+ if not isinstance(self.rope_scaling, dict) or len(self.rope_scaling) != 3:
197
+ raise ValueError(
198
+ "`rope_scaling` must be a dictionary with three fields, `type`, `short_factor` and `long_factor`, "
199
+ f"got {self.rope_scaling}"
200
+ )
201
+ rope_scaling_type = self.rope_scaling.get("type", None)
202
+ rope_scaling_short_factor = self.rope_scaling.get("short_factor", None)
203
+ rope_scaling_long_factor = self.rope_scaling.get("long_factor", None)
204
+ if rope_scaling_type is None or rope_scaling_type not in ["longrope"]:
205
+ raise ValueError(f"`rope_scaling`'s type field must be one of ['longrope'], got {rope_scaling_type}")
206
+ if not (
207
+ isinstance(rope_scaling_short_factor, list)
208
+ and all(isinstance(x, (int, float)) for x in rope_scaling_short_factor)
209
+ ):
210
+ raise ValueError(
211
+ f"`rope_scaling`'s short_factor field must be a list of numbers, got {rope_scaling_short_factor}"
212
+ )
213
+ if not len(rope_scaling_short_factor) == self.hidden_size // self.num_attention_heads // 2:
214
+ raise ValueError(
215
+ f"`rope_scaling`'s short_factor field must have length {self.hidden_size // self.num_attention_heads // 2}, got {len(rope_scaling_short_factor)}"
216
+ )
217
+ if not (
218
+ isinstance(rope_scaling_long_factor, list)
219
+ and all(isinstance(x, (int, float)) for x in rope_scaling_long_factor)
220
+ ):
221
+ raise ValueError(
222
+ f"`rope_scaling`'s long_factor field must be a list of numbers, got {rope_scaling_long_factor}"
223
+ )
224
+ if not len(rope_scaling_long_factor) == self.hidden_size // self.num_attention_heads // 2:
225
+ raise ValueError(
226
+ f"`rope_scaling`'s long_factor field must have length {self.hidden_size // self.num_attention_heads // 2}, got {len(rope_scaling_long_factor)}"
227
+ )
favicon.ico ADDED
gradio_analytics.py ADDED
@@ -0,0 +1,491 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # gradio_analytics.py
2
+ import gradio as gr
3
+ import logging
4
+ import json
5
+ import sqlite3
6
+ import os
7
+ from datetime import datetime
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ try:
12
+ from app import (
13
+ get_trackio_database_path,
14
+ get_project_statistics_with_nulls,
15
+ get_recent_interactions_with_nulls,
16
+ create_dashboard_html_with_nulls,
17
+ calculate_response_quality,
18
+ refresh_analytics_data_persistent as refresh_analytics_data,
19
+ export_metrics_json_persistent as export_metrics_json,
20
+ export_metrics_csv_persistent as export_metrics_csv,
21
+ load_analytics_state,
22
+ get_global_state_debug_info,
23
+ sync_trackio_with_global_state,
24
+ global_state_manager,
25
+ evaluate_educational_quality_with_tracking,
26
+ )
27
+ except ImportError:
28
+ def get_trackio_database_path(project_name):
29
+ return None
30
+
31
+ def get_project_statistics_with_nulls(cursor, project_name):
32
+ return {
33
+ "total_conversations": None,
34
+ "avg_session_length": None,
35
+ "success_rate": None
36
+ }
37
+
38
+ def get_recent_interactions_with_nulls(cursor, project_name, limit=10):
39
+ return []
40
+
41
+ def create_dashboard_html_with_nulls(project_name, project_stats):
42
+ return f"<div>Mock dashboard for {project_name}</div>"
43
+
44
+ def calculate_response_quality(response):
45
+ return 3.0
46
+
47
+ def refresh_analytics_data():
48
+ return {}, [], "<div>Mock analytics</div>"
49
+
50
+ def export_metrics_json():
51
+ gr.Info("Mock JSON export")
52
+
53
+ def export_metrics_csv():
54
+ gr.Info("Mock CSV export")
55
+
56
+ def load_analytics_state():
57
+ return {}, [], "<div>Mock analytics state</div>"
58
+
59
+ def get_global_state_debug_info():
60
+ return {"status": "mock"}
61
+
62
+ def sync_trackio_with_global_state():
63
+ pass
64
+
65
+ def evaluate_educational_quality_with_tracking(*args, **kwargs):
66
+ return {"educational_score": 0.5}
67
+
68
+ class MockStateManager:
69
+ def get_cache_status(self):
70
+ return {"status": "mock"}
71
+ def get_evaluation_summary(self, include_history=False):
72
+ return {"aggregate_metrics": {}, "total_evaluations": {}}
73
+ def clear_all_states(self):
74
+ pass
75
+ def _backup_to_hf_dataset(self):
76
+ pass
77
+
78
+ global_state_manager = MockStateManager()
79
+
80
+ def load_custom_css():
81
+ try:
82
+ with open("styles.css", "r", encoding="utf-8") as css_file:
83
+ css_content = css_file.read()
84
+ logger.info(f"CSS loaded successfully for analytics page")
85
+ return css_content
86
+ except FileNotFoundError:
87
+ logger.warning("styles.css file not found for analytics page")
88
+ return ""
89
+ except Exception as e:
90
+ logger.warning(f"Error reading styles.css: {e}")
91
+ return ""
92
+
93
+ def launch_external_trackio():
94
+ try:
95
+ import subprocess
96
+ result = subprocess.run(
97
+ ["trackio", "show", "--project", "Mimir"],
98
+ capture_output=False,
99
+ text=True
100
+ )
101
+
102
+ if result.returncode == 0:
103
+ gr.Info("Trackio dashboard launched in browser")
104
+ else:
105
+ gr.Warning("Could not launch trackio dashboard")
106
+
107
+ except Exception as e:
108
+ logger.error(f"Failed to launch trackio: {e}")
109
+ gr.Warning(f"Failed to launch trackio dashboard: {str(e)}")
110
+
111
+ def show_cache_status():
112
+ try:
113
+ debug_info = get_global_state_debug_info()
114
+ cache_status = debug_info.get("cache_status", {})
115
+
116
+ status_text = f"""
117
+ **Global State Cache Status:**
118
+ - Session ID: {cache_status.get('session_id', 'Unknown')}
119
+ - Analytics Cached: {'Yes' if cache_status.get('analytics_cached') else 'No'}
120
+ - Conversation Cached: {'Yes' if cache_status.get('conversation_cached') else 'No'}
121
+ - Analytics Last Refresh: {cache_status.get('analytics_last_refresh', 'Never')}
122
+ - Total Analytics Sessions: {cache_status.get('total_analytics_sessions', 0)}
123
+ - Total Conversation Sessions: {cache_status.get('total_conversation_sessions', 0)}
124
+
125
+ **Analytics Data Status:**
126
+ - Has Analytics Data: {'Yes' if cache_status.get('analytics_has_data') else 'No'}
127
+ - Conversation Length: {cache_status.get('conversation_length', 0)} messages
128
+ - Chat History Length: {cache_status.get('chat_history_length', 0)} messages
129
+
130
+ *Last Updated: {datetime.now().strftime('%H:%M:%S')}*
131
+ """
132
+
133
+ gr.Info("Cache status updated - check the Status panel")
134
+ return status_text
135
+
136
+ except Exception as e:
137
+ error_text = f"Error getting cache status: {str(e)}"
138
+ gr.Warning(error_text)
139
+ return error_text
140
+
141
+ def manual_backup_to_hf():
142
+ try:
143
+ global_state_manager._backup_to_hf_dataset()
144
+ gr.Info("Manual backup to HF dataset completed successfully")
145
+ return f"Backup completed at {datetime.now().strftime('%H:%M:%S')}"
146
+ except Exception as e:
147
+ gr.Warning(f"Backup failed: {str(e)}")
148
+ return f"Backup failed: {str(e)}"
149
+
150
+ def get_persistence_status():
151
+ try:
152
+ status_info = {
153
+ "SQLite DB": "Active" if os.path.exists(global_state_manager._db_path) else "Not Found",
154
+ "HF Dataset": global_state_manager.dataset_repo,
155
+ "Last HF Backup": global_state_manager._last_hf_backup.strftime('%Y-%m-%d %H:%M:%S'),
156
+ "DB Path": global_state_manager._db_path,
157
+ "Backup Interval": f"{global_state_manager._hf_backup_interval}s"
158
+ }
159
+ return status_info
160
+ except Exception as e:
161
+ return {"error": str(e)}
162
+
163
+ def clear_all_global_states():
164
+ try:
165
+ global_state_manager.clear_all_states()
166
+ gr.Info("All global states cleared successfully")
167
+
168
+ empty_stats = {
169
+ "total_conversations": None,
170
+ "avg_session_length": None,
171
+ "success_rate": None,
172
+ "model_type": "Cleared",
173
+ "last_updated": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
174
+ }
175
+
176
+ empty_html = """
177
+ <div style="text-align: center; padding: 40px; border: 2px dashed #ccc; border-radius: 8px; background: #f8f9fa;">
178
+ <h3>States Cleared</h3>
179
+ <p>All global states have been cleared.</p>
180
+ <p>Click "Refresh Data" to reload analytics.</p>
181
+ </div>
182
+ """
183
+
184
+ return empty_stats, [], empty_html
185
+
186
+ except Exception as e:
187
+ gr.Warning(f"Failed to clear states: {str(e)}")
188
+ return load_analytics_state()
189
+
190
+ def show_evaluation_metrics():
191
+ try:
192
+ eval_summary = global_state_manager.get_evaluation_summary(include_history=True)
193
+
194
+ metrics_data = [
195
+ ["Educational Quality", f"{eval_summary['aggregate_metrics']['avg_educational_quality']:.3f}"],
196
+ ["User Satisfaction", f"{eval_summary['aggregate_metrics']['user_satisfaction_rate']:.3f}"]
197
+ ]
198
+
199
+ recent_evaluations = []
200
+ if 'history' in eval_summary:
201
+ for eval_item in eval_summary['history']['recent_educational_scores'][-5:]:
202
+ recent_evaluations.append([
203
+ eval_item['timestamp'][:16],
204
+ f"{eval_item['educational_score']:.3f}",
205
+ f"{eval_item['semantic_quality']:.3f}",
206
+ f"{eval_item['response_time']:.3f}s"
207
+ ])
208
+
209
+ return eval_summary, metrics_data, recent_evaluations
210
+
211
+ except Exception as e:
212
+ logger.error(f"Error getting evaluation metrics: {e}")
213
+ return {}, [], []
214
+
215
+ def sync_and_refresh_all():
216
+ try:
217
+ sync_trackio_with_global_state()
218
+ project_stats, recent_interactions, dashboard_html = refresh_analytics_data()
219
+ eval_summary, metrics_data, recent_evaluations = show_evaluation_metrics()
220
+
221
+ gr.Info("All data synced and refreshed successfully")
222
+
223
+ return project_stats, recent_interactions, dashboard_html, eval_summary, metrics_data, recent_evaluations
224
+
225
+ except Exception as e:
226
+ logger.error(f"Sync and refresh failed: {e}")
227
+ gr.Warning(f"Sync failed: {str(e)}")
228
+ return load_analytics_state() + ({}, [], [])
229
+
230
+ with gr.Blocks() as demo:
231
+ custom_css = load_custom_css()
232
+ if custom_css:
233
+ gr.HTML(f'<style>{custom_css}</style>')
234
+
235
+ gr.HTML('<div class="analytics-title"><h2>Mimir Analytics Dashboard</h2></div>')
236
+
237
+ gr.Markdown("Monitor educational AI performance and effectiveness metrics with persistent state management.")
238
+
239
+ with gr.Tabs():
240
+ with gr.TabItem("Traditional Analytics"):
241
+ with gr.Row():
242
+ with gr.Column(scale=1):
243
+ gr.Markdown("## Controls")
244
+ refresh_btn = gr.Button("Refresh Data", variant="primary")
245
+ sync_all_btn = gr.Button("Sync & Refresh All", variant="primary")
246
+
247
+ with gr.Row():
248
+ export_json_btn = gr.Button("Export JSON", variant="secondary", size="sm")
249
+ export_csv_btn = gr.Button("Export CSV", variant="secondary", size="sm")
250
+
251
+ launch_trackio_btn = gr.Button("Launch Trackio Dashboard", variant="secondary")
252
+
253
+ gr.Markdown("### State Management")
254
+ with gr.Row():
255
+ cache_status_btn = gr.Button("Cache Status", size="sm")
256
+ clear_states_btn = gr.Button("Clear All States", size="sm", variant="stop")
257
+
258
+ with gr.Group():
259
+ gr.Markdown("### Project Information")
260
+ project_info = gr.JSON(
261
+ value={
262
+ "total_conversations": None,
263
+ "avg_session_length": None,
264
+ "success_rate": None,
265
+ "model_type": None
266
+ },
267
+ label="Project Stats"
268
+ )
269
+
270
+ with gr.Group():
271
+ gr.Markdown("### System Status")
272
+ status_panel = gr.Markdown(
273
+ "Click 'Cache Status' to view global state information.",
274
+ label="Status Information"
275
+ )
276
+
277
+ with gr.Column(scale=2):
278
+ gr.Markdown("## Key Metrics Dashboard")
279
+ trackio_iframe = gr.HTML(
280
+ value="""
281
+ <div style="text-align: center; padding: 40px; border: 2px dashed #ccc; border-radius: 8px; background: #f8f9fa;">
282
+ <h3>Trackio Dashboard</h3>
283
+ <p>Analytics data will appear here after conversations.</p>
284
+ <p>Data is automatically cached and persists across page navigation.</p>
285
+ <p>To launch trackio dashboard separately, run:</p>
286
+ <code style="background: #e9ecef; padding: 4px 8px; border-radius: 4px;">trackio show --project "Mimir"</code>
287
+ </div>
288
+ """,
289
+ label="Dashboard"
290
+ )
291
+
292
+ with gr.Row():
293
+ with gr.Column():
294
+ gr.Markdown("## Recent Interactions")
295
+ gr.Markdown("*Data persists when switching between Chatbot and Analytics pages*")
296
+ recent_metrics = gr.Dataframe(
297
+ headers=["Timestamp", "Response Time", "Prompt Mode", "Tools Used", "Quality Score", "Adapter"],
298
+ datatype=["str", "number", "str", "bool", "number", "str"],
299
+ row_count=10,
300
+ col_count=6,
301
+ interactive=False,
302
+ label="Latest Sessions",
303
+ value=[],
304
+ show_label=True
305
+ )
306
+
307
+ with gr.TabItem("ML Performance"):
308
+ gr.Markdown("## Agent-Based Performance & Global State Metrics")
309
+
310
+ with gr.Row():
311
+ with gr.Column(scale=1):
312
+ eval_metrics_btn = gr.Button("Get Evaluation Metrics", variant="primary")
313
+
314
+ with gr.Group():
315
+ gr.Markdown("### Model Cache Status")
316
+ cache_status_display = gr.JSON(
317
+ value={},
318
+ label="Cache Information"
319
+ )
320
+
321
+ with gr.Column(scale=2):
322
+ gr.Markdown("### Aggregate Performance Metrics")
323
+ eval_metrics_table = gr.Dataframe(
324
+ headers=["Metric", "Score"],
325
+ datatype=["str", "str"],
326
+ label="Model Performance",
327
+ value=[]
328
+ )
329
+
330
+ eval_summary_display = gr.JSON(
331
+ value={},
332
+ label="Detailed Evaluation Summary"
333
+ )
334
+
335
+ with gr.Row():
336
+ with gr.Column():
337
+ gr.Markdown("### Recent Quality Evaluations")
338
+ recent_evaluations_table = gr.Dataframe(
339
+ headers=["Timestamp", "Educational Score", "Semantic Quality", "Response Time"],
340
+ datatype=["str", "str", "str", "str"],
341
+ label="Recent Evaluations",
342
+ value=[]
343
+ )
344
+
345
+ with gr.TabItem("System Status"):
346
+ gr.Markdown("## Global State Manager & System Diagnostics")
347
+
348
+ with gr.Row():
349
+ with gr.Column():
350
+ gr.Markdown("### Global State Cache")
351
+ cache_details = gr.Markdown("Click 'Show Cache Status' to view detailed information.")
352
+
353
+ show_cache_btn = gr.Button("Show Cache Status", variant="primary")
354
+ refresh_cache_btn = gr.Button("Refresh Cache Info", variant="secondary")
355
+
356
+ gr.Markdown("### Persistence Controls")
357
+ backup_btn = gr.Button("Manual Backup to HF Dataset", variant="primary")
358
+ backup_status = gr.Textbox(label="Backup Status", value="No recent backup", interactive=False)
359
+
360
+ with gr.Column():
361
+ gr.Markdown("### System Actions")
362
+ sync_trackio_btn = gr.Button("Sync to Database", variant="secondary")
363
+ clear_all_btn = gr.Button("Clear All Global States", variant="stop")
364
+
365
+ gr.Markdown("### Persistence Status")
366
+ persistence_info = gr.JSON(
367
+ value={},
368
+ label="Persistence Information"
369
+ )
370
+
371
+ gr.Markdown("### Performance Monitor")
372
+ perf_info = gr.JSON(
373
+ value={},
374
+ label="Performance Information"
375
+ )
376
+
377
+ demo.load(
378
+ load_analytics_state,
379
+ inputs=None,
380
+ outputs=[project_info, recent_metrics, trackio_iframe],
381
+ show_progress="hidden"
382
+ )
383
+
384
+ demo.load(
385
+ fn=lambda: global_state_manager.get_cache_status(),
386
+ inputs=None,
387
+ outputs=[cache_status_display],
388
+ show_progress="hidden"
389
+ )
390
+
391
+ demo.load(
392
+ fn=get_persistence_status,
393
+ inputs=None,
394
+ outputs=[persistence_info],
395
+ show_progress="hidden"
396
+ )
397
+
398
+ refresh_btn.click(
399
+ fn=refresh_analytics_data,
400
+ inputs=[],
401
+ outputs=[project_info, recent_metrics, trackio_iframe],
402
+ show_progress="full"
403
+ )
404
+
405
+ sync_all_btn.click(
406
+ fn=sync_and_refresh_all,
407
+ inputs=[],
408
+ outputs=[project_info, recent_metrics, trackio_iframe, eval_summary_display, eval_metrics_table, recent_evaluations_table],
409
+ show_progress="full"
410
+ )
411
+
412
+ export_json_btn.click(
413
+ fn=export_metrics_json,
414
+ inputs=[],
415
+ outputs=[],
416
+ show_progress="full"
417
+ )
418
+
419
+ export_csv_btn.click(
420
+ fn=export_metrics_csv,
421
+ inputs=[],
422
+ outputs=[],
423
+ show_progress="full"
424
+ )
425
+
426
+ launch_trackio_btn.click(
427
+ fn=launch_external_trackio,
428
+ inputs=[],
429
+ outputs=[],
430
+ show_progress="full"
431
+ )
432
+
433
+ cache_status_btn.click(
434
+ fn=show_cache_status,
435
+ inputs=[],
436
+ outputs=[status_panel],
437
+ show_progress="full"
438
+ )
439
+
440
+ clear_states_btn.click(
441
+ fn=clear_all_global_states,
442
+ inputs=[],
443
+ outputs=[project_info, recent_metrics, trackio_iframe],
444
+ show_progress="full"
445
+ )
446
+
447
+ eval_metrics_btn.click(
448
+ fn=show_evaluation_metrics,
449
+ inputs=[],
450
+ outputs=[eval_summary_display, eval_metrics_table, recent_evaluations_table],
451
+ show_progress="full"
452
+ )
453
+
454
+ show_cache_btn.click(
455
+ fn=show_cache_status,
456
+ inputs=[],
457
+ outputs=[cache_details],
458
+ show_progress="full"
459
+ )
460
+
461
+ refresh_cache_btn.click(
462
+ fn=lambda: global_state_manager.get_cache_status(),
463
+ inputs=[],
464
+ outputs=[perf_info],
465
+ show_progress="full"
466
+ )
467
+
468
+ backup_btn.click(
469
+ fn=manual_backup_to_hf,
470
+ inputs=[],
471
+ outputs=[backup_status],
472
+ show_progress="full"
473
+ )
474
+
475
+ sync_trackio_btn.click(
476
+ fn=sync_trackio_with_global_state,
477
+ inputs=[],
478
+ outputs=[],
479
+ show_progress="full"
480
+ )
481
+
482
+ clear_all_btn.click(
483
+ fn=clear_all_global_states,
484
+ inputs=[],
485
+ outputs=[project_info, recent_metrics, trackio_iframe],
486
+ show_progress="full"
487
+ )
488
+
489
+ if __name__ == "__main__":
490
+ logger.info("Running analytics dashboard standalone with global state management")
491
+ demo.launch(server_name="0.0.0.0", server_port=7861)
gradio_chatbot.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # gradio_chatbot.py
2
+ import gradio as gr
3
+ import logging
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+ from app import (
8
+ add_user_message,
9
+ add_loading_animation,
10
+ generate_response,
11
+ reset_conversation,
12
+ load_conversation_state,
13
+ remove_loading_animations,
14
+ global_state_manager, # Import the instance from app.py
15
+ )
16
+
17
+
18
+ def load_custom_css():
19
+ try:
20
+ with open("styles.css", "r", encoding="utf-8") as css_file:
21
+ css_content = css_file.read()
22
+ logger.info(f"CSS loaded successfully, length: {len(css_content)} characters")
23
+ return css_content
24
+ except FileNotFoundError:
25
+ logger.warning("styles.css file not found, using default styling")
26
+ return ""
27
+ except Exception as e:
28
+ logger.warning(f"Error reading styles.css: {e}")
29
+ return ""
30
+
31
+
32
+ def restore_state_on_page_access():
33
+ """Restore conversation state when page loads"""
34
+ try:
35
+ current_state = global_state_manager.get_conversation_state()
36
+ chat_history = current_state.get('chat_history', [])
37
+ conversation_state_data = current_state.get('conversation_state', [])
38
+ logger.info(f"Restored state: {len(chat_history)} messages in chat history, {len(conversation_state_data)} in conversation state")
39
+ return chat_history, conversation_state_data
40
+ except Exception as e:
41
+ logger.error(f"Failed to restore state: {e}")
42
+ return [], []
43
+
44
+
45
+ with gr.Blocks() as demo:
46
+ custom_css = load_custom_css()
47
+ if custom_css:
48
+ gr.HTML(f'<style>{custom_css}</style>')
49
+
50
+ try:
51
+ initial_chat_history, initial_conversation_state = load_conversation_state()
52
+ except:
53
+ initial_chat_history, initial_conversation_state = [], []
54
+
55
+ conversation_state = gr.State(initial_conversation_state)
56
+
57
+ gr.HTML('<div class="title-header"><h1>Mimir</h1></div>')
58
+
59
+ with gr.Row():
60
+ chatbot = gr.Chatbot(
61
+ type="messages",
62
+ show_copy_button=True,
63
+ show_share_button=False,
64
+ layout="bubble",
65
+ autoscroll=True,
66
+ avatar_images=None,
67
+ elem_id="main-chatbot",
68
+ scale=1,
69
+ height="65vh",
70
+ value=initial_chat_history,
71
+ latex_delimiters=[
72
+ {"left": "$$", "right": "$$", "display": True},
73
+ {"left": "$", "right": "$", "display": False},
74
+ ]
75
+ )
76
+
77
+ with gr.Row(elem_classes=["input-controls"]):
78
+ msg = gr.Textbox(
79
+ placeholder="Ask me about math, research, study strategies, or any educational topic...",
80
+ show_label=False,
81
+ lines=6,
82
+ max_lines=8,
83
+ elem_classes=["input-textbox"],
84
+ container=False,
85
+ scale=4
86
+ )
87
+ with gr.Column(elem_classes=["button-column"], scale=1):
88
+ send = gr.Button("Send", elem_classes=["send-button"], size="sm")
89
+ clear = gr.Button("Clear", elem_classes=["clear-button"], size="sm")
90
+
91
+ demo.load(
92
+ fn=restore_state_on_page_access,
93
+ outputs=[chatbot, conversation_state],
94
+ queue=False
95
+ )
96
+
97
+ msg.submit(
98
+ add_user_message,
99
+ inputs=[msg, chatbot, conversation_state],
100
+ outputs=[msg, chatbot, conversation_state],
101
+ show_progress="hidden"
102
+ ).then(
103
+ add_loading_animation,
104
+ inputs=[chatbot, conversation_state],
105
+ outputs=[chatbot, conversation_state],
106
+ show_progress="hidden"
107
+ ).then(
108
+ lambda chat_history, conv_state: (remove_loading_animations(chat_history), conv_state),
109
+ inputs=[chatbot, conversation_state],
110
+ outputs=[chatbot, conversation_state],
111
+ show_progress="hidden"
112
+ ).then(
113
+ generate_response,
114
+ inputs=[chatbot, conversation_state],
115
+ outputs=[chatbot, conversation_state],
116
+ show_progress="hidden"
117
+ )
118
+
119
+ send.click(
120
+ add_user_message,
121
+ inputs=[msg, chatbot, conversation_state],
122
+ outputs=[msg, chatbot, conversation_state],
123
+ show_progress="hidden"
124
+ ).then(
125
+ add_loading_animation,
126
+ inputs=[chatbot, conversation_state],
127
+ outputs=[chatbot, conversation_state],
128
+ show_progress="hidden"
129
+ ).then(
130
+ lambda chat_history, conv_state: (remove_loading_animations(chat_history), conv_state),
131
+ inputs=[chatbot, conversation_state],
132
+ outputs=[chatbot, conversation_state],
133
+ show_progress="hidden"
134
+ ).then(
135
+ generate_response,
136
+ inputs=[chatbot, conversation_state],
137
+ outputs=[chatbot, conversation_state],
138
+ show_progress="hidden"
139
+ )
140
+
141
+ clear.click(
142
+ reset_conversation,
143
+ outputs=[chatbot, conversation_state],
144
+ show_progress="hidden"
145
+ )
146
+
147
+
148
+ if __name__ == "__main__":
149
+ logger.info("Running chatbot interface standalone")
150
+ demo.launch(server_name="0.0.0.0", server_port=7860)
graph_tool.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #graph_tool.py
2
+
3
+ import base64
4
+ import io
5
+ import json
6
+ from typing import Dict, List, Literal, Tuple
7
+
8
+ import matplotlib.pyplot as plt
9
+ from langchain_core.tools import tool
10
+
11
+ # Use the @tool decorator and specify the "content_and_artifact" response format.
12
+ @tool(response_format="content_and_artifact")
13
+ def generate_plot(
14
+ data: Dict[str, float],
15
+ plot_type: Literal["bar", "line", "pie"],
16
+ title: str = "Generated Plot",
17
+ labels: List[str] = None,
18
+ x_label: str = "",
19
+ y_label: str = ""
20
+ ) -> Tuple:
21
+ """
22
+ Generates a plot (bar, line, or pie) from a dictionary of data and returns it
23
+ as a base64 encoded PNG image artifact.
24
+
25
+ Args:
26
+ data (Dict[str, float]): A dictionary where keys are labels and values are the numeric data to plot.
27
+ plot_type (Literal["bar", "line", "pie"]): The type of plot to generate.
28
+ title (str): The title for the plot.
29
+ labels (List[str]): Optional list of labels to use for the x-axis or pie slices. If not provided, data keys are used.
30
+ x_label (str): The label for the x-axis (for bar and line charts).
31
+ y_label (str): The label for the y-axis (for bar and line charts).
32
+
33
+ Returns:
34
+ A tuple containing:
35
+ - A string message confirming the plot was generated.
36
+ - A dictionary artifact with the base64 encoded image string and its format.
37
+ """
38
+ # --- Input Validation ---
39
+ if not isinstance(data, dict) or not data:
40
+ content = "Error: Data must be a non-empty dictionary."
41
+ artifact = {"error": content}
42
+ return content, artifact
43
+
44
+ try:
45
+ y_data = [float(val) for val in data.values()]
46
+ except (ValueError, TypeError):
47
+ content = "Error: All data values must be numeric."
48
+ artifact = {"error": content}
49
+ return content, artifact
50
+
51
+ x_data = list(data.keys())
52
+
53
+ # --- Plot Generation ---
54
+ try:
55
+ fig, ax = plt.subplots(figsize=(10, 6))
56
+
57
+ if plot_type == 'bar':
58
+ # Use provided labels if they match the data length, otherwise use data keys
59
+ bar_labels = labels if labels and len(labels) == len(x_data) else x_data
60
+ bars = ax.bar(bar_labels, y_data)
61
+ ax.set_xlabel(x_label)
62
+ ax.set_ylabel(y_label)
63
+ ax.set_ylim(bottom=0)
64
+ for bar, value in zip(bars, y_data):
65
+ height = bar.get_height()
66
+ ax.text(bar.get_x() + bar.get_width()/2., height, f'{value}', ha='center', va='bottom')
67
+
68
+ elif plot_type == 'line':
69
+ line_labels = labels if labels and len(labels) == len(x_data) else x_data
70
+ ax.plot(line_labels, y_data, marker='o')
71
+ ax.set_xlabel(x_label)
72
+ ax.set_ylabel(y_label)
73
+ ax.set_ylim(bottom=0)
74
+ ax.grid(True, alpha=0.3)
75
+
76
+ elif plot_type == 'pie':
77
+ pie_labels = labels if labels and len(labels) == len(y_data) else list(data.keys())
78
+ ax.pie(y_data, labels=pie_labels, autopct='%1.1f%%', startangle=90)
79
+ ax.axis('equal')
80
+
81
+ else:
82
+ content = f"Error: Invalid plot_type '{plot_type}'. Choose 'bar', 'line', or 'pie'."
83
+ artifact = {"error": content}
84
+ return content, artifact
85
+
86
+ ax.set_title(title, fontsize=14, fontweight='bold')
87
+ plt.tight_layout()
88
+
89
+ # --- In-Memory Image Conversion ---
90
+ buf = io.BytesIO()
91
+ plt.savefig(buf, format='png', dpi=150)
92
+ plt.close(fig)
93
+ buf.seek(0)
94
+
95
+ img_base64 = base64.b64encode(buf.getvalue()).decode('utf-8')
96
+
97
+ # --- Return Content and Artifact ---
98
+ content = f"Successfully generated a {plot_type} plot titled '{title}'."
99
+ artifact = {
100
+ "base64_image": img_base64,
101
+ "format": "png"
102
+ }
103
+ return content, artifact
104
+
105
+ except Exception as e:
106
+ plt.close('all')
107
+ content = f"An unexpected error occurred while generating the plot: {str(e)}"
108
+ artifact = {"error": str(e)}
109
+ return content, artifact
loading_animation.gif ADDED
mimir_classifier.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b8d5d6247cdf48c288bf3690fe1bc05df7bbff550f968ebfd093d5738a1a9b7
3
+ size 7037
performance_metrics.log ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FALLBACK ACTIVATED: Using base model. Strategy: graceful | Logged: 2025-09-16 03:13:00
2
+ Agent initialized with FALLBACK model: {'status': 'loaded', 'model_type': 'base', 'is_adapter': False, 'fine_tuned_path': 'jdesiree/Mimir-Phi-3.5', 'base_model_path': 'microsoft/Phi-3-mini-4k-instruct', 'using_fallback': True} | Logged: 2025-09-16 03:13:00
3
+ Init and LangGraph workflow setup time: 14.1047 seconds. Timestamp: 2025-09-16 03:12:47 | Logged: 2025-09-16 03:13:01
4
+ Tool decision time (excluded): 0.0002 seconds. Timestamp: 2025-09-16 03:13:02 | Logged: 2025-09-16 03:13:02
5
+ Tool decision workflow time: 0.0005 seconds. Decision: False. Timestamp: 2025-09-16 03:13:02 | Logged: 2025-09-16 03:13:02
6
+ Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:13:02 | Logged: 2025-09-16 03:13:02
7
+ Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:13:02 | Logged: 2025-09-16 03:13:02
8
+ Prompt decision: CONVERSATIONAL | Context: start=True, greeting=True, casual=False, edu_context=False, discovery=False, tools=False | Decision time: 0.0007s | Input: 'Hello...' | Timestamp: 2025-09-16 03:13:02 | Logged: 2025-09-16 03:13:02
9
+ Call model time (error): 1.5796 seconds. Timestamp: 2025-09-16 03:13:02 | Logged: 2025-09-16 03:13:03
10
+ Total query processing time: 2.0022 seconds. Input: 'Hello...'. Timestamp: 2025-09-16 03:13:01 | Logged: 2025-09-16 03:13:03
11
+ Agent warmup completed in 2.00 seconds | Logged: 2025-09-16 03:13:03
12
+ Create interface time: 0.2044 seconds. Timestamp: 2025-09-16 03:13:03 | Logged: 2025-09-16 03:13:03
13
+ FALLBACK ACTIVATED: Using base model. Strategy: graceful | Logged: 2025-09-16 03:13:59
14
+ Agent initialized with FALLBACK model: {'status': 'loaded', 'model_type': 'base', 'is_adapter': False, 'fine_tuned_path': 'jdesiree/Mimir-Phi-3.5', 'base_model_path': 'microsoft/Phi-3-mini-4k-instruct', 'using_fallback': True} | Logged: 2025-09-16 03:13:59
15
+ Init and LangGraph workflow setup time: 1.8577 seconds. Timestamp: 2025-09-16 03:13:57 | Logged: 2025-09-16 03:13:59
16
+ Tool decision time (excluded): 0.0001 seconds. Timestamp: 2025-09-16 03:13:59 | Logged: 2025-09-16 03:13:59
17
+ Tool decision workflow time: 0.0003 seconds. Decision: False. Timestamp: 2025-09-16 03:13:59 | Logged: 2025-09-16 03:13:59
18
+ Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:13:59 | Logged: 2025-09-16 03:13:59
19
+ Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:13:59 | Logged: 2025-09-16 03:13:59
20
+ Prompt decision: CONVERSATIONAL | Context: start=True, greeting=True, casual=False, edu_context=False, discovery=False, tools=False | Decision time: 0.0007s | Input: 'Hello...' | Timestamp: 2025-09-16 03:13:59 | Logged: 2025-09-16 03:13:59
21
+ Call model time (error): 0.6432 seconds. Timestamp: 2025-09-16 03:13:59 | Logged: 2025-09-16 03:14:00
22
+ Total query processing time: 0.6579 seconds. Input: 'Hello...'. Timestamp: 2025-09-16 03:13:59 | Logged: 2025-09-16 03:14:00
23
+ Agent warmup completed in 0.66 seconds | Logged: 2025-09-16 03:14:00
24
+ Create interface time: 0.5570 seconds. Timestamp: 2025-09-16 03:14:00 | Logged: 2025-09-16 03:14:00
25
+ FALLBACK ACTIVATED: Using base model. Strategy: graceful | Logged: 2025-09-16 03:14:22
26
+ Agent initialized with FALLBACK model: {'status': 'loaded', 'model_type': 'base', 'is_adapter': False, 'fine_tuned_path': 'jdesiree/Mimir-Phi-3.5', 'base_model_path': 'microsoft/Phi-3-mini-4k-instruct', 'using_fallback': True} | Logged: 2025-09-16 03:14:22
27
+ Init and LangGraph workflow setup time: 1.8653 seconds. Timestamp: 2025-09-16 03:14:20 | Logged: 2025-09-16 03:14:22
28
+ Tool decision time (excluded): 0.0002 seconds. Timestamp: 2025-09-16 03:14:22 | Logged: 2025-09-16 03:14:22
29
+ Tool decision workflow time: 0.0004 seconds. Decision: False. Timestamp: 2025-09-16 03:14:22 | Logged: 2025-09-16 03:14:22
30
+ Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:14:22 | Logged: 2025-09-16 03:14:22
31
+ Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:14:22 | Logged: 2025-09-16 03:14:22
32
+ Prompt decision: CONVERSATIONAL | Context: start=True, greeting=True, casual=False, edu_context=False, discovery=False, tools=False | Decision time: 0.0008s | Input: 'Hello...' | Timestamp: 2025-09-16 03:14:22 | Logged: 2025-09-16 03:14:22
33
+ Call model time (error): 0.8109 seconds. Timestamp: 2025-09-16 03:14:22 | Logged: 2025-09-16 03:14:23
34
+ Total query processing time: 0.8284 seconds. Input: 'Hello...'. Timestamp: 2025-09-16 03:14:22 | Logged: 2025-09-16 03:14:23
35
+ Agent warmup completed in 0.83 seconds | Logged: 2025-09-16 03:14:23
36
+ Create interface time: 0.5381 seconds. Timestamp: 2025-09-16 03:14:23 | Logged: 2025-09-16 03:14:23
37
+ FALLBACK ACTIVATED: Using base model. Strategy: graceful | Logged: 2025-09-16 03:14:43
38
+ Agent initialized with FALLBACK model: {'status': 'loaded', 'model_type': 'base', 'is_adapter': False, 'fine_tuned_path': 'jdesiree/Mimir-Phi-3.5', 'base_model_path': 'microsoft/Phi-3-mini-4k-instruct', 'using_fallback': True} | Logged: 2025-09-16 03:14:43
39
+ Init and LangGraph workflow setup time: 1.7132 seconds. Timestamp: 2025-09-16 03:14:42 | Logged: 2025-09-16 03:14:43
40
+ Tool decision time (excluded): 0.0002 seconds. Timestamp: 2025-09-16 03:14:43 | Logged: 2025-09-16 03:14:43
41
+ Tool decision workflow time: 0.0004 seconds. Decision: False. Timestamp: 2025-09-16 03:14:43 | Logged: 2025-09-16 03:14:43
42
+ Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:14:43 | Logged: 2025-09-16 03:14:43
43
+ Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:14:43 | Logged: 2025-09-16 03:14:43
44
+ Prompt decision: CONVERSATIONAL | Context: start=True, greeting=True, casual=False, edu_context=False, discovery=False, tools=False | Decision time: 0.0009s | Input: 'Hello...' | Timestamp: 2025-09-16 03:14:43 | Logged: 2025-09-16 03:14:43
45
+ Call model time (error): 0.6961 seconds. Timestamp: 2025-09-16 03:14:43 | Logged: 2025-09-16 03:14:44
46
+ Total query processing time: 0.7111 seconds. Input: 'Hello...'. Timestamp: 2025-09-16 03:14:43 | Logged: 2025-09-16 03:14:44
47
+ Agent warmup completed in 0.71 seconds | Logged: 2025-09-16 03:14:44
48
+ Create interface time: 0.5078 seconds. Timestamp: 2025-09-16 03:14:44 | Logged: 2025-09-16 03:14:45
49
+ FALLBACK ACTIVATED: Using base model. Strategy: graceful | Logged: 2025-09-16 03:15:43
50
+ Agent initialized with FALLBACK model: {'status': 'loaded', 'model_type': 'base', 'is_adapter': False, 'fine_tuned_path': 'jdesiree/Mimir-Phi-3.5', 'base_model_path': 'microsoft/Phi-3-mini-4k-instruct', 'using_fallback': True} | Logged: 2025-09-16 03:15:43
51
+ Init and LangGraph workflow setup time: 1.9391 seconds. Timestamp: 2025-09-16 03:15:41 | Logged: 2025-09-16 03:15:43
52
+ Tool decision time (excluded): 0.0001 seconds. Timestamp: 2025-09-16 03:15:43 | Logged: 2025-09-16 03:15:43
53
+ Tool decision workflow time: 0.0003 seconds. Decision: False. Timestamp: 2025-09-16 03:15:43 | Logged: 2025-09-16 03:15:43
54
+ Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:15:43 | Logged: 2025-09-16 03:15:43
55
+ Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:15:43 | Logged: 2025-09-16 03:15:43
56
+ Prompt decision: CONVERSATIONAL | Context: start=True, greeting=True, casual=False, edu_context=False, discovery=False, tools=False | Decision time: 0.0008s | Input: 'Hello...' | Timestamp: 2025-09-16 03:15:43 | Logged: 2025-09-16 03:15:43
57
+ Call model time (error): 0.6238 seconds. Timestamp: 2025-09-16 03:15:43 | Logged: 2025-09-16 03:15:44
58
+ Total query processing time: 0.6380 seconds. Input: 'Hello...'. Timestamp: 2025-09-16 03:15:43 | Logged: 2025-09-16 03:15:44
59
+ Agent warmup completed in 0.64 seconds | Logged: 2025-09-16 03:15:44
60
+ Create interface time: 0.5507 seconds. Timestamp: 2025-09-16 03:15:44 | Logged: 2025-09-16 03:15:44
61
+ FALLBACK ACTIVATED: Using base model. Strategy: graceful | Logged: 2025-09-16 03:17:18
62
+ Agent initialized with FALLBACK model: {'status': 'loaded', 'model_type': 'base', 'is_adapter': False, 'fine_tuned_path': 'jdesiree/Mimir-Phi-3.5', 'base_model_path': 'microsoft/Phi-3-mini-4k-instruct', 'using_fallback': True} | Logged: 2025-09-16 03:17:18
63
+ Init and LangGraph workflow setup time: 1.9692 seconds. Timestamp: 2025-09-16 03:17:16 | Logged: 2025-09-16 03:17:18
64
+ Tool decision time (excluded): 0.0004 seconds. Timestamp: 2025-09-16 03:17:18 | Logged: 2025-09-16 03:17:18
65
+ Tool decision workflow time: 0.0012 seconds. Decision: False. Timestamp: 2025-09-16 03:17:18 | Logged: 2025-09-16 03:17:18
66
+ Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:17:18 | Logged: 2025-09-16 03:17:18
67
+ Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:17:18 | Logged: 2025-09-16 03:17:18
68
+ Prompt decision: CONVERSATIONAL | Context: start=True, greeting=True, casual=False, edu_context=False, discovery=False, tools=False | Decision time: 0.0008s | Input: 'Hello...' | Timestamp: 2025-09-16 03:17:18 | Logged: 2025-09-16 03:17:18
69
+ Call model time (error): 0.6223 seconds. Timestamp: 2025-09-16 03:17:18 | Logged: 2025-09-16 03:17:18
70
+ Total query processing time: 0.6606 seconds. Input: 'Hello...'. Timestamp: 2025-09-16 03:17:18 | Logged: 2025-09-16 03:17:18
71
+ Agent warmup completed in 0.66 seconds | Logged: 2025-09-16 03:17:18
72
+ Create interface time: 0.6001 seconds. Timestamp: 2025-09-16 03:17:18 | Logged: 2025-09-16 03:17:19
73
+ FALLBACK ACTIVATED: Using base model. Strategy: graceful | Logged: 2025-09-16 03:20:45
74
+ Agent initialized with FALLBACK model: {'status': 'loaded', 'model_type': 'base', 'is_adapter': False, 'fine_tuned_path': 'jdesiree/Mimir-Phi-3.5', 'base_model_path': 'microsoft/Phi-3-mini-4k-instruct', 'using_fallback': True} | Logged: 2025-09-16 03:20:45
75
+ Init and LangGraph workflow setup time: 2.2781 seconds. Timestamp: 2025-09-16 03:20:43 | Logged: 2025-09-16 03:20:45
76
+ Tool decision time (excluded): 0.0002 seconds. Timestamp: 2025-09-16 03:20:45 | Logged: 2025-09-16 03:20:45
77
+ Tool decision workflow time: 0.0004 seconds. Decision: False. Timestamp: 2025-09-16 03:20:45 | Logged: 2025-09-16 03:20:45
78
+ Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:20:45 | Logged: 2025-09-16 03:20:45
79
+ Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:20:45 | Logged: 2025-09-16 03:20:45
80
+ Prompt decision: CONVERSATIONAL | Context: start=True, greeting=True, casual=False, edu_context=False, discovery=False, tools=False | Decision time: 0.0007s | Input: 'Hello...' | Timestamp: 2025-09-16 03:20:45 | Logged: 2025-09-16 03:20:45
81
+ Call model time (error): 0.9643 seconds. Timestamp: 2025-09-16 03:20:45 | Logged: 2025-09-16 03:20:46
82
+ Total query processing time: 0.9942 seconds. Input: 'Hello...'. Timestamp: 2025-09-16 03:20:45 | Logged: 2025-09-16 03:20:46
83
+ Agent warmup completed in 0.99 seconds | Logged: 2025-09-16 03:20:46
84
+ Create interface time: 0.5993 seconds. Timestamp: 2025-09-16 03:20:46 | Logged: 2025-09-16 03:20:46
85
+ FALLBACK ACTIVATED: Using base model. Strategy: graceful | Logged: 2025-09-16 03:23:25
86
+ Agent initialized with FALLBACK model: {'status': 'loaded', 'model_type': 'base', 'is_adapter': False, 'fine_tuned_path': 'jdesiree/Mimir-Phi-3.5', 'base_model_path': 'microsoft/Phi-3-mini-4k-instruct', 'using_fallback': True} | Logged: 2025-09-16 03:23:25
87
+ Init and LangGraph workflow setup time: 1.7249 seconds. Timestamp: 2025-09-16 03:23:23 | Logged: 2025-09-16 03:23:25
88
+ Tool decision time (excluded): 0.0002 seconds. Timestamp: 2025-09-16 03:23:25 | Logged: 2025-09-16 03:23:25
89
+ Tool decision workflow time: 0.0004 seconds. Decision: False. Timestamp: 2025-09-16 03:23:25 | Logged: 2025-09-16 03:23:25
90
+ Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:23:25 | Logged: 2025-09-16 03:23:25
91
+ Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:23:25 | Logged: 2025-09-16 03:23:25
92
+ Prompt decision: CONVERSATIONAL | Context: start=True, greeting=True, casual=False, edu_context=False, discovery=False, tools=False | Decision time: 0.0011s | Input: 'Hello...' | Timestamp: 2025-09-16 03:23:25 | Logged: 2025-09-16 03:23:25
93
+ Call model time (error): 0.8647 seconds. Timestamp: 2025-09-16 03:23:25 | Logged: 2025-09-16 03:23:26
94
+ Total query processing time: 0.9050 seconds. Input: 'Hello...'. Timestamp: 2025-09-16 03:23:25 | Logged: 2025-09-16 03:23:26
95
+ Agent warmup completed in 0.91 seconds | Logged: 2025-09-16 03:23:26
96
+ Create interface time: 0.5289 seconds. Timestamp: 2025-09-16 03:23:26 | Logged: 2025-09-16 03:23:26
97
+ FALLBACK ACTIVATED: Using base model. Strategy: graceful | Logged: 2025-09-16 03:40:35
98
+ Agent initialized with FALLBACK model: {'status': 'loaded', 'model_type': 'base', 'is_adapter': False, 'fine_tuned_path': 'jdesiree/Mimir-Phi-3.5', 'base_model_path': 'microsoft/Phi-3-mini-4k-instruct', 'using_fallback': True} | Logged: 2025-09-16 03:40:35
99
+ Init and LangGraph workflow setup time: 2.0252 seconds. Timestamp: 2025-09-16 03:40:33 | Logged: 2025-09-16 03:40:35
100
+ Tool decision time (excluded): 0.0001 seconds. Timestamp: 2025-09-16 03:40:35 | Logged: 2025-09-16 03:40:35
101
+ Tool decision workflow time: 0.0003 seconds. Decision: False. Timestamp: 2025-09-16 03:40:35 | Logged: 2025-09-16 03:40:35
102
+ Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:40:35 | Logged: 2025-09-16 03:40:35
103
+ Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:40:35 | Logged: 2025-09-16 03:40:35
104
+ Prompt decision: CONVERSATIONAL | Context: start=True, greeting=True, casual=False, edu_context=False, discovery=False, tools=False | Decision time: 0.0008s | Input: 'Hello...' | Timestamp: 2025-09-16 03:40:35 | Logged: 2025-09-16 03:40:35
105
+ Call model time (error): 3.0598 seconds. Timestamp: 2025-09-16 03:40:35 | Logged: 2025-09-16 03:40:38
106
+ Total query processing time: 3.0750 seconds. Input: 'Hello...'. Timestamp: 2025-09-16 03:40:35 | Logged: 2025-09-16 03:40:38
107
+ Agent warmup completed in 3.08 seconds | Logged: 2025-09-16 03:40:38
108
+ Create interface time: 0.5787 seconds. Timestamp: 2025-09-16 03:40:38 | Logged: 2025-09-16 03:40:39
pre_download.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pre_download.py
2
+ """
3
+ Pre-download all models for Mimir to avoid cold start delays.
4
+ Downloads models to HF cache without loading them fully.
5
+ """
6
+ import os
7
+ from huggingface_hub import snapshot_download, hf_hub_download
8
+
9
+ HF_TOKEN = os.getenv("HF_TOKEN")
10
+
11
+ # All models used in Mimir
12
+ MODELS = {
13
+ "phi3_finetuned": "jdesiree/Mimir-Phi-3.5",
14
+ "phi3_base": "microsoft/Phi-3-mini-4k-instruct",
15
+ "mistral_reasoning": "yentinglin/Mistral-Small-24B-Instruct-2501-reasoning",
16
+ "mistral_math_gguf": "brittlewis12/Mistral-Small-24B-Instruct-2501-reasoning-GGUF",
17
+ "embeddings": "thenlper/gte-small",
18
+ }
19
+
20
+ def download_model(repo_id: str, model_name: str):
21
+ """Download a model to HF cache"""
22
+ try:
23
+ print(f"→ Downloading {model_name}: {repo_id}")
24
+
25
+ # For GGUF repo, download specific file
26
+ if "GGUF" in repo_id:
27
+ hf_hub_download(
28
+ repo_id=repo_id,
29
+ filename="mistral-small-24b-instruct-2501-reasoning-Q4_K_M.gguf",
30
+ token=HF_TOKEN
31
+ )
32
+ else:
33
+ # Standard model download
34
+ snapshot_download(
35
+ repo_id=repo_id,
36
+ token=HF_TOKEN,
37
+ ignore_patterns=["*.msgpack", "*.h5", "*.ot", "*.safetensors"] # Skip unnecessary files
38
+ )
39
+
40
+ print(f"✓ {model_name} downloaded")
41
+
42
+ except Exception as e:
43
+ print(f"{model_name} download failed: {e}")
44
+
45
+ def main():
46
+ print("="*60)
47
+ print("PRE-DOWNLOADING MIMIR MODELS")
48
+ print("="*60)
49
+
50
+ for model_name, repo_id in MODELS.items():
51
+ download_model(repo_id, model_name)
52
+
53
+ print("\n" + "="*60)
54
+ print("✓ ALL MODELS DOWNLOADED")
55
+ print("="*60)
56
+
57
+ if __name__ == "__main__":
58
+ main()
prompt_classifier.py ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Mimir Prompt Classifier - ML-based decision engine for prompt segment selection
3
+ Receives pre-calculated features from app.py for clean separation of concerns
4
+ """
5
+
6
+ import pickle
7
+ import numpy as np
8
+ import pandas as pd
9
+ import os
10
+ import re
11
+ import logging
12
+ from typing import Dict, List, Optional, Any
13
+ from dataclasses import dataclass, field
14
+ from sklearn.tree import DecisionTreeClassifier
15
+ from sklearn.multioutput import MultiOutputClassifier
16
+ from sklearn.model_selection import train_test_split
17
+ from sklearn.metrics import classification_report, accuracy_score
18
+ from datasets import load_dataset
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+ @dataclass
23
+ class ConversationInput:
24
+ """Input data structure for the classifier - all values pre-calculated by app.py"""
25
+ user_input: str
26
+ conversation_length: int # Total user prompts sent (tracked in app.py)
27
+ is_first_turn: bool # Tracked in app.py (starts True, becomes False after first response)
28
+ input_character_count: int # Length of user prompt
29
+ is_short_input: bool # True if user prompt ≤6 chars, False if >6 chars
30
+ recent_discovery_count: int # Count tracked in app.py
31
+ contains_greeting: bool # From regex logic
32
+ contains_educational_keywords: bool # From regex logic
33
+ requires_visualization: bool # Yes/No from tool decision engine
34
+ topic_change_detected: bool # From regex logic
35
+
36
+ @dataclass
37
+ class ClassificationResult:
38
+ """Output data structure from the classifier"""
39
+ use_discovery_mode: bool
40
+ use_conversational: bool
41
+ use_guiding_teaching: bool
42
+ use_tool_enhancement: bool
43
+ confidence_scores: Dict[str, float] = field(default_factory=dict)
44
+ decision_time: float = 0.0
45
+
46
+ class RegexPatterns:
47
+ """Regex patterns for app.py to use for feature extraction"""
48
+
49
+ @staticmethod
50
+ def get_greeting_pattern():
51
+ return re.compile(
52
+ r'^(hello|hi|hey|good\s+(morning|afternoon|evening)|greetings?|howdy|what\'s\s+up|sup)\s*[!.]*$',
53
+ re.IGNORECASE
54
+ )
55
+
56
+ @staticmethod
57
+ def get_educational_pattern():
58
+ return re.compile(
59
+ r'\b(study|learn|homework|test|exam|practice|explain|teach|understand|help|math|science|'
60
+ r'essay|research|assignment|question|problem|calculus|algebra|chemistry|physics|biology|history|ACT|LSAT|SAT)\b',
61
+ re.IGNORECASE
62
+ )
63
+
64
+ @staticmethod
65
+ def get_topic_change_pattern():
66
+ return re.compile(
67
+ r'(actually|instead|now|let\'s|what\s+about|can\s+we|switch|move\s+on|'
68
+ r'let\'s\s+do|let\'s\s+try|change\s+to|talk\s+about)',
69
+ re.IGNORECASE
70
+ )
71
+
72
+ class ConversationFeatureExtractor:
73
+ """Minimal feature extractor that uses pre-calculated values"""
74
+
75
+ def extract_features(self, conversation_input: ConversationInput) -> Dict[str, float]:
76
+ """Extract numerical features from pre-calculated conversation input"""
77
+
78
+ # Convert all inputs to float for ML model
79
+ features = {
80
+ 'conversation_length': float(conversation_input.conversation_length),
81
+ 'is_first_turn': float(conversation_input.is_first_turn),
82
+ 'input_character_count': float(conversation_input.input_character_count),
83
+ 'is_short_input': float(conversation_input.is_short_input),
84
+ 'recent_discovery_count': float(conversation_input.recent_discovery_count),
85
+ 'contains_greeting': float(conversation_input.contains_greeting),
86
+ 'contains_educational_keywords': float(conversation_input.contains_educational_keywords),
87
+ 'requires_visualization': float(conversation_input.requires_visualization),
88
+ 'topic_change_detected': float(conversation_input.topic_change_detected),
89
+ }
90
+
91
+ # Add derived features
92
+ features['is_early_conversation'] = float(conversation_input.conversation_length <= 3)
93
+ features['has_sufficient_discovery'] = float(conversation_input.recent_discovery_count >= 2)
94
+
95
+ # Interaction features that help the decision tree
96
+ features['greeting_and_first_turn'] = features['contains_greeting'] * features['is_first_turn']
97
+ features['educational_and_early'] = features['contains_educational_keywords'] * features['is_early_conversation']
98
+ features['topic_change_and_not_first'] = features['topic_change_detected'] * (1.0 - features['is_first_turn'])
99
+
100
+ return features
101
+
102
+ class MimirPromptClassifier:
103
+ """Main classifier for prompt segment decision making"""
104
+
105
+ def __init__(self, model_path: Optional[str] = None):
106
+ self.feature_extractor = ConversationFeatureExtractor()
107
+ self.model = None
108
+ self.feature_names = None
109
+ self.target_names = ['use_discovery_mode', 'use_conversational', 'use_guiding_teaching', 'use_tool_enhancement']
110
+
111
+ if model_path and os.path.exists(model_path):
112
+ self.load_model(model_path)
113
+
114
+ def train_from_huggingface(self, dataset_name: str = "jdesiree/Mimir_DecisionClassifier", test_size: float = 0.2):
115
+ """Train the classifier using data from Hugging Face"""
116
+ logger.info(f"Loading dataset: {dataset_name}")
117
+
118
+ try:
119
+ dataset = load_dataset(dataset_name)
120
+ df = pd.DataFrame(dataset['train'])
121
+ except Exception as e:
122
+ logger.error(f"Failed to load dataset: {e}")
123
+ raise
124
+
125
+ # Prepare features and targets
126
+ X, y = self._prepare_training_data(df)
127
+
128
+ # Split data
129
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
130
+
131
+ # Train model with parameters optimized for interpretability
132
+ self.model = MultiOutputClassifier(
133
+ DecisionTreeClassifier(
134
+ criterion='entropy',
135
+ max_depth=8,
136
+ min_samples_split=5,
137
+ min_samples_leaf=2,
138
+ random_state=42
139
+ )
140
+ )
141
+
142
+ logger.info("Training classifier...")
143
+ self.model.fit(X_train, y_train)
144
+
145
+ # Evaluate
146
+ y_pred = self.model.predict(X_test)
147
+
148
+ # Calculate accuracy for each target
149
+ accuracies = []
150
+ for i, target in enumerate(self.target_names):
151
+ accuracy = accuracy_score(y_test[:, i], y_pred[:, i])
152
+ accuracies.append(accuracy)
153
+ logger.info(f"{target} accuracy: {accuracy:.3f}")
154
+
155
+ overall_accuracy = np.mean(accuracies)
156
+ logger.info(f"Overall accuracy: {overall_accuracy:.3f}")
157
+
158
+ # Print detailed classification report for each target
159
+ for i, target in enumerate(self.target_names):
160
+ print(f"\n{target}:")
161
+ print(classification_report(y_test[:, i], y_pred[:, i]))
162
+
163
+ def _prepare_training_data(self, df: pd.DataFrame) -> tuple[np.ndarray, np.ndarray]:
164
+ """Convert DataFrame to training features and targets"""
165
+ features_list = []
166
+ targets_list = []
167
+
168
+ for _, row in df.iterrows():
169
+ # Create ConversationInput object from dataset row
170
+ conv_input = ConversationInput(
171
+ user_input=row['user_input'],
172
+ conversation_length=int(row['conversation_length']),
173
+ is_first_turn=bool(row['is_first_turn']),
174
+ input_character_count=int(row['input_character_count']),
175
+ is_short_input=bool(row['is_short_input']),
176
+ recent_discovery_count=int(row['recent_discovery_count']),
177
+ contains_greeting=bool(row['contains_greeting']),
178
+ contains_educational_keywords=bool(row['contains_educational_keywords']),
179
+ requires_visualization=bool(row['requires_visualization']),
180
+ topic_change_detected=bool(row['topic_change_detected'])
181
+ )
182
+
183
+ # Extract features
184
+ features = self.feature_extractor.extract_features(conv_input)
185
+ features_list.append(list(features.values()))
186
+
187
+ # Extract targets
188
+ targets = [
189
+ bool(row['use_discovery_mode']),
190
+ bool(row['use_conversational']),
191
+ bool(row['use_guiding_teaching']),
192
+ bool(row['use_tool_enhancement'])
193
+ ]
194
+ targets_list.append(targets)
195
+
196
+ # Store feature names for later use
197
+ if features_list:
198
+ sample_features = self.feature_extractor.extract_features(
199
+ ConversationInput("", 0, False, 0, False, 0, False, False, False, False)
200
+ )
201
+ self.feature_names = list(sample_features.keys())
202
+
203
+ return np.array(features_list), np.array(targets_list, dtype=int)
204
+
205
+ def predict(self, conversation_input: ConversationInput) -> ClassificationResult:
206
+ """Make prediction for prompt segments"""
207
+ if self.model is None:
208
+ raise ValueError("Model not trained or loaded. Call train_from_huggingface() first.")
209
+
210
+ # Extract features
211
+ features = self.feature_extractor.extract_features(conversation_input)
212
+ feature_vector = np.array([list(features.values())])
213
+
214
+ # Make prediction
215
+ predictions = self.model.predict(feature_vector)[0]
216
+
217
+ return ClassificationResult(
218
+ use_discovery_mode=bool(predictions[0]),
219
+ use_conversational=bool(predictions[1]),
220
+ use_guiding_teaching=bool(predictions[2]),
221
+ use_tool_enhancement=bool(predictions[3])
222
+ )
223
+
224
+ def save_model(self, model_path: str):
225
+ """Save the trained model"""
226
+ model_data = {
227
+ 'model': self.model,
228
+ 'feature_names': self.feature_names,
229
+ 'target_names': self.target_names
230
+ }
231
+ with open(model_path, 'wb') as f:
232
+ pickle.dump(model_data, f)
233
+ logger.info(f"Model saved to {model_path}")
234
+
235
+ def load_model(self, model_path: str):
236
+ """Load a pre-trained model"""
237
+ try:
238
+ with open(model_path, 'rb') as f:
239
+ model_data = pickle.load(f)
240
+
241
+ self.model = model_data['model']
242
+ self.feature_names = model_data['feature_names']
243
+ self.target_names = model_data.get('target_names', self.target_names)
244
+ logger.info(f"Model loaded from {model_path}")
245
+ except Exception as e:
246
+ logger.error(f"Failed to load model: {e}")
247
+ raise
248
+
249
+ def get_feature_importance(self) -> Dict[str, float]:
250
+ """Get feature importance scores"""
251
+ if self.model is None or self.feature_names is None:
252
+ return {}
253
+
254
+ # Average feature importance across all outputs
255
+ importance_scores = {}
256
+ for i, feature_name in enumerate(self.feature_names):
257
+ avg_importance = np.mean([estimator.feature_importances_[i] for estimator in self.model.estimators_])
258
+ importance_scores[feature_name] = avg_importance
259
+
260
+ return dict(sorted(importance_scores.items(), key=lambda x: x[1], reverse=True))
261
+
262
+ def debug_prediction(self, conversation_input: ConversationInput) -> Dict:
263
+ """Get detailed prediction information for debugging"""
264
+ features = self.feature_extractor.extract_features(conversation_input)
265
+ result = self.predict(conversation_input)
266
+
267
+ return {
268
+ 'input': conversation_input,
269
+ 'features': features,
270
+ 'prediction': result,
271
+ 'feature_importance': self.get_feature_importance()
272
+ }
273
+
274
+
275
+ # Convenience function for easy integration
276
+ def create_classifier(dataset_name: str = "jdesiree/Mimir_DecisionClassifier",
277
+ model_path: Optional[str] = None) -> MimirPromptClassifier:
278
+ """Create and train a classifier"""
279
+ classifier = MimirPromptClassifier()
280
+
281
+ if model_path and os.path.exists(model_path):
282
+ classifier.load_model(model_path)
283
+ else:
284
+ classifier.train_from_huggingface(dataset_name)
285
+ if model_path:
286
+ classifier.save_model(model_path)
287
+
288
+ return classifier
289
+
290
+
291
+ # Helper functions for app.py integration
292
+ def check_contains_greeting(user_input: str) -> bool:
293
+ """Check if input contains greeting pattern"""
294
+ pattern = RegexPatterns.get_greeting_pattern()
295
+ return bool(pattern.match(user_input.lower().strip()))
296
+
297
+ def check_contains_educational_keywords(user_input: str) -> bool:
298
+ """Check if input contains educational keywords"""
299
+ pattern = RegexPatterns.get_educational_pattern()
300
+ return bool(pattern.search(user_input.lower()))
301
+
302
+ def check_topic_change_detected(user_input: str) -> bool:
303
+ """Check if input indicates topic change"""
304
+ pattern = RegexPatterns.get_topic_change_pattern()
305
+ return bool(pattern.search(user_input.lower()))
306
+
307
+ def determine_is_short_input(user_input: str) -> bool:
308
+ """Determine if input is short (≤6 characters)"""
309
+ return len(user_input.strip()) <= 6
310
+
311
+
312
+ if __name__ == "__main__":
313
+ # Example usage and testing
314
+ logging.basicConfig(level=logging.INFO)
315
+
316
+ # Create and train classifier
317
+ classifier = create_classifier()
318
+
319
+ # Test with sample input
320
+ test_input = ConversationInput(
321
+ user_input="Can you help me with calculus?",
322
+ conversation_length=1,
323
+ is_first_turn=True,
324
+ input_character_count=26,
325
+ is_short_input=False,
326
+ recent_discovery_count=0,
327
+ contains_greeting=False,
328
+ contains_educational_keywords=True,
329
+ requires_visualization=False,
330
+ topic_change_detected=False
331
+ )
332
+
333
+ result = classifier.predict(test_input)
334
+ print(f"Prediction: {result}")
335
+
336
+ # Debug prediction
337
+ debug_info = classifier.debug_prediction(test_input)
338
+ print(f"Features: {debug_info['features']}")
339
+ print(f"Feature importance: {classifier.get_feature_importance()}")
prompt_library.py ADDED
@@ -0,0 +1,509 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # prompt_library.py
2
+ '''This file is to be the dedicated prompt library repository. Rather than keeping the full library in the app.py, the prompts will be centralized here for ease of editing.'''
3
+
4
+ '''
5
+ Prompts for Response Generation Input Templating
6
+ '''
7
+ # --- Always Included ---
8
+
9
+ # Core Identity (Universal Base)
10
+ CORE_IDENTITY = """
11
+
12
+ You are Mimir, an expert multi-concept tutor designed to facilitate genuine learning and understanding. Your primary mission is to guide students through the learning process concisely, without excessive filler language.
13
+
14
+ ## Communication Standards
15
+ - Use an approachable, friendly tone with professional language choice suitable for an educational environment.
16
+ - You may not, under any circumstances, use vulgar language, even if asked to do so.
17
+ - Write at a reading level that is accessible to young adults.
18
+ - Be supportive and encouraging without being condescending.
19
+ - You may use conversational language if the user input does so, but in moderation to reciprocate briefly before proceeding with the task.
20
+ - You present critiques as educational opportunities when needed.
21
+
22
+ ## Follow-up Responses
23
+ - If you have conversation history, you must consider it in your new response.
24
+ - If the previous turn included practice questions and the current user input is the user answering the practice questions, you must grade the user's response for accuracy and give them feedback.
25
+ - If this is the first turn, address the user input as is appropriate per the full instructions.
26
+ """
27
+
28
+ # --- Formatting ---
29
+
30
+ # General Formatting
31
+ GENERAL_FORMATTING = '''
32
+
33
+ ## General Formatting Guidelines
34
+ - Headings must be on their own line, not included inside a sentence or body text.
35
+ - Use ## and ### headings when needed. If only one heading level is needed, use ##.
36
+ - Separate paragraphs with a blank line.
37
+ - Organize content logically using headers and subheadings for complex answers.
38
+ - For simple responses, use minimal formatting; for multi-step explanations, use clear structure.
39
+ - Separate sections and paragraphs with a full black line.
40
+ - Do not use emojis.
41
+ '''
42
+
43
+ # LaTeX Formatting
44
+ LATEX_FORMATTING = '''
45
+
46
+ You have access to LaTeX and markdown rendering.
47
+ - For inline math, use $ ... $, e.g. $\sum_{i=0}^n i^2$
48
+ - For centered display math, use $$ ... $$ on its own line.
49
+ - To show a literal dollar sign, use `\$` (e.g., \$5.00).
50
+ - To show literal parentheses in LaTeX, use `\(` and `\)` (e.g., \(a+b\)).
51
+ '''
52
+
53
+ # --- Discovery Prompts ---
54
+
55
+ # Vauge Input Discovery
56
+ VAUGE_INPUT = """
57
+
58
+ Use discover tactics to understand the user's goals. Consider any context given in the user's input or chat history. Ask the user how you may help them, suggesting you can create practice questions to study for a test or delve into a topic."""
59
+
60
+ # User's Understanding
61
+ USER_UNDERSTANDING = '''
62
+
63
+ Use discover tactics to understand the user's goals. Consider the topic(s) currently being discussed in the user input as well as the recent chat history. As an educator, consider how you may uncover the user's current knowledge of the topic, as well as how you may approach instructing or inform the user to facilitate learning. Do no include your thinking in the final response, instead condense your thinking into targeted questions that prompt the user to consider these concepts and present to you their objective.
64
+ '''
65
+
66
+ # --- Instructional Prompts ---
67
+
68
+ # Guiding/Teaching Mode
69
+ GUIDING_TEACHING = """
70
+
71
+ As a skilled educator, considering the conversation history and current user input, aiming to guide the user in understanding further the topic being discussed. You adhere to academic integrity guidelines and tailor your approach based on subject. You must consider any conversation history.
72
+
73
+ ## Academic Integrity Guidelines
74
+ - Do not provide full solutions - guide through processes instead
75
+ - Break problems into conceptual components
76
+ - Ask clarifying questions about their understanding
77
+ - Provide analogous examples, not direct answers
78
+ - Encourage original thinking and reasoning skills
79
+
80
+ ## Subject-Specific Approaches
81
+ - **Math problems**: Explain concepts and guide through steps without computing final answers
82
+ - **Multiple-choice**: Discuss underlying concepts, not correct choices
83
+ - **Essays**: Focus on research strategies and organization techniques
84
+ - **Factual questions**: Provide educational context and encourage synthesis
85
+ """
86
+
87
+ # Practice Question formatting, table integration, and tool output integration
88
+ STRUCTURE_PRACTICE_QUESTIONS = '''
89
+
90
+ You must include one to two practice questions for the user. Included here are formatting and usage instruction guidelines for how to integrate practice questions into your response to the user.
91
+
92
+ ### Question Formatting
93
+ Write a practice question relevant to the user's learning objective, testing their knowledge on recently discussed topics. Keep the questions direct and concise. End all questions with directions to the user as to how to reply, rather that be to given a written response, or select from a bank of answers you will provide below.
94
+
95
+ If tool output is included in this prompt tailor the question to require an understanding on the image to be able to correctly answer the question or questions. Evaluate all included context relating to the tool output to gain an understanding of what the output represents to appropriately interpret how to integrate the image into your response.
96
+
97
+ If the topic being discussed could benefit from one or more practice questions requiring the analysis of data, put no tool output is provided, produce a markdown table per the below formatting guidelines, and tailor your questions to require interpretation of the data.
98
+
99
+ ### Question Data Reference Formatting
100
+
101
+ 1. 1 to 4 sentence question
102
+ This is the format you must use to integrate the image output of the graphing tool:
103
+ ![Chart, Graph](my_image.png "Scenic View")
104
+
105
+
106
+ | Example C1 | Example C2 |...
107
+ | :---------------: | :----------------: |...
108
+ | Content...... | Content....... |...
109
+
110
+ ### Practice Question Answer Options Formatting
111
+
112
+ **Single Option Multiple Choice**
113
+ Provide the user with four options, placed under the question and any relevant reference data if included.
114
+
115
+ A. Option
116
+ B. Option
117
+ C. Option
118
+ D. Option
119
+
120
+
121
+ **All That Apply**
122
+ Use this format to indicate the user is to reply to one or more of the options, as this is a multi-selection multiple-choice question format.
123
+
124
+ - [ ] A. Option
125
+ - [ ] B. Option
126
+ - [ ] C. Option
127
+ - [ ] D. Option
128
+
129
+ ---
130
+
131
+ **Written Response**
132
+
133
+ Prompt the user, in one sentence, to write their response when you are posing a written response to a question.
134
+
135
+ '''
136
+
137
+ # Practice Question follow-up
138
+ PRACTICE_QUESTION_FOLLOWUP = '''
139
+
140
+ In the previous turn, you sent the user one or more practice questions. You must assess the question(s), identify the correct answers, and grade the user's response.
141
+
142
+ In your final response to the user, only include your feedback identifying if the user was correct.
143
+ If the user answered incorrectly, provide constructive feedback, the correct answer, and a rationale explaining the answer.
144
+ If the user answered correctly, congratulate them and offer to either move forward in exploring the topic further or continue with more practice questions.
145
+ If the user did not answer, assess the user input for this turn. Ask the user if they would like to try to answer the questions or if they need further help.
146
+ '''
147
+
148
+ # --- Tool Use ---
149
+
150
+ # Tool Use Enhancement
151
+ TOOL_USE_ENHANCEMENT = """
152
+
153
+ ## Tool Usage for Educational Enhancement
154
+
155
+ Apply when teaching concepts that benefit from visual representation or when practice questions require charts/graphs.
156
+ You are equipped with a sophisticated data visualization tool, `Create_Graph_Tool`, designed to create precise, publication-quality charts. Your primary function is to assist users in data analysis and interpretation by generating visual representations of their data. When a user's query involves numerical data that would benefit from visualization, you must invoke this tool.
157
+
158
+ ## Tool Decision Criteria
159
+
160
+ - Teaching mathematical functions, trends, or relationships
161
+ - Demonstrating statistical concepts or data analysis
162
+ - Creating practice questions that test chart interpretation skills
163
+ - Illustrating proportional relationships or comparisons
164
+
165
+ **Tool Signature:**
166
+
167
+ `Create_Graph_Tool(data: Dict[str, float], plot_type: Literal["bar", "line", "pie"], title: str, x_label: str, y_label: str, educational_context: str)`
168
+
169
+ **Parameter Guide:**
170
+
171
+ * `data` **(Required)**: A dictionary where keys are string labels and values are the corresponding numeric data points.
172
+ * *Example:* `{"Experiment A": 88.5, "Experiment B": 92.1}`
173
+ * `plot_type` **(Required)**: The specific type of chart to generate. This **must** be one of `"bar"`, `"line"`, or `"pie"`.
174
+ * `title` (Optional): A formal title for the plot.
175
+ * `x_label` (Optional): The label for the horizontal axis (for `bar` and `line` charts).
176
+ * `y_label` (Optional): The label for the vertical axis (for `bar` and `line` charts).
177
+ * `educational_context` (Optional): Explanation of why this visualization helps learning.
178
+
179
+ **Example Scenarios:**
180
+
181
+ * **User Query:** "I need help practicing the interpretation of trends in line graphs. To analyze the efficacy of a new fertilizer, I have recorded crop yield in kilograms over five weeks. Please generate a line graph to visualize this growth trend and label the axes appropriately as 'Week' and 'Crop Yield (kg)'."
182
+ * **Your Tool Call:**
183
+ * `data`: `{"Week 1": 120, "Week 2": 155, "Week 3": 190, "Week 4": 210, "Week 5": 245}`
184
+ * `plot_type`: `"line"`
185
+ * `title`: `"Efficacy of New Fertilizer on Crop Yield"`
186
+ * `x_label`: `"Week"`
187
+ * `y_label`: `"Crop Yield (kg)"`
188
+ * `educational_context`: `"This line graph helps visualize the consistent upward trend in crop yield, making it easier to identify growth patterns and analyze the fertilizer's effectiveness over time."`
189
+
190
+ * **User Query:** "I am studying for my ACT, and I am at a loss in interpreting the charts. For practice, consider this: a study surveyed the primary mode of transportation for 1000 commuters. The results were: 450 drive, 300 use public transit, 150 cycle, and 100 walk. Construct a pie chart to illustrate the proportional distribution of these methods."
191
+ * **Your Tool Call:**
192
+ * `data`: `{"Driving": 450, "Public Transit": 300, "Cycling": 150, "Walking": 100}`
193
+ * `plot_type`: `"pie"`
194
+ * `title`: `"Proportional Distribution of Commuter Transportation Methods"`
195
+ * `educational_context`: `"This pie chart clearly shows the relative proportions of each transportation method, making it easy to see that driving is the most common method (45%) while walking is the least common (10%)."`
196
+ NOTE: If specific data to use is not supplied by the user, create reasonable example data that illustrates the concept being taught."""
197
+
198
+
199
+ '''
200
+ The prompt used by the routing agent, determines if tools are enabled.
201
+ '''
202
+
203
+ # --- Tool Decision Engine Prompt ---
204
+ TOOL_DECISION = """
205
+
206
+ Analyze this educational query and determine if creating a graph, chart, or visual representation would significantly enhance learning and understanding.
207
+
208
+ Query: "{query}"
209
+
210
+ EXCLUDE if query is:
211
+ - Greetings or casual conversation (hello, hi, hey)
212
+ - Simple definitions without data
213
+ - General explanations that don't involve data
214
+
215
+ INCLUDE if query involves:
216
+ - Mathematical functions or relationships
217
+ - Data analysis or statistics
218
+ - Comparisons that benefit from charts
219
+ - Trends or patterns over time
220
+ - Creating practice questions with data
221
+
222
+ Answer with exactly: YES or NO
223
+
224
+ Decision:"""
225
+
226
+ '''
227
+ System Instructions for the four classification agents
228
+ '''
229
+ # --- Classification Prompts ---
230
+
231
+ agent_1_system = '''
232
+ As a teacher's aid, considering the current user prompt/input and recent conversation history, determine if practice questions are needed. Your goal,is to determine dynamically if the user's current understanding and the conversation as a whole would benefit from the model offering practice questions to the user.
233
+
234
+ Cases where practice question's are beneficial:
235
+ - The user requested practice questions.
236
+ Examples:
237
+ 1. Can you make some ACT math section practice questions?
238
+ - The user expressed that they would like to gauge their understanding.
239
+ Examples:
240
+ 1. I want to figure out where I am in prep for my history exam, it is on the American Civil War.
241
+ - The previous turns include model instruction on a topic and the user has expressed some level of understanding.
242
+ Examples:
243
+ 1. The chat history is an exchange between the user and model on a specific topic, and the current turn is the user responding to model instruction. The user appears to be grasping hte concept, so a practice question would be helpful to gauge the user's grasp of the discussed topic.
244
+
245
+ When strictly inappropriate to include practice questions:
246
+ - The current user prompt/input is conversational, or nonsense:
247
+ Examples:
248
+ 1. Hello/Hi/Thank You...
249
+ 2. grey, blue colored stuff
250
+ 3. fnsjdfnbiwe
251
+ - The user's question is straightforward, requiring a general answer or tutoring rather than user knowledge testing.
252
+ Examples:
253
+ 1. Can you tell me when WW2 started?
254
+ 2. Who are the key players in the civil rights movement?
255
+ 3. What do the variables mean in a quadradic equatin?
256
+
257
+ Before determining your final response, consider if issuing a practice question would be beneficial or inappropriate. Ask yourself if the user has received instruction on a topic, or requested practice questions prior to returning your final response.
258
+
259
+ If the current turn qualifies for practice question generations, return exactly "STRUCTURE_PRACTICE_QUESTIONS"
260
+ Otherwise, return "No Practice questions are needed."
261
+
262
+ Do not return any other values outside of the provided options.
263
+ '''
264
+
265
+ agent_2_system = '''
266
+ As an expert in intension analysis, determine if one, both or neither of the following cases is true considering the current user prompt/input.
267
+
268
+ **Vauge Prompt**
269
+ Appply this option if the user prompt/input is overly vauge and uniterpretable. IT has no indication that it is a followup message, possibly being a simple greeting. THis selection results in the user's rpomptbeing handled lightly with a simple request for a task and suggestions for the user to pick from.
270
+
271
+ **Unclear Needs**
272
+ Apply this if the user's current message is just a greeting or conversational. Also apply this option if the current message include comment like or similair to "lets change subjects." Consider that returning the positive value for this option, which is USER_UNDERSTANDING, then the users prompt will be handled with discovery tactics to uncover the user's goals. of the two options, this option yeilds a more detailed course of action in uncovering user needs.
273
+
274
+ **Neither**
275
+ Apply neither if the user appears to be responding to a previous message, makes a direct request, or is otherwise a coherant message.
276
+ Example:
277
+ 1. I think the answer is A (responding)
278
+ 2. Can you explain why the sky is blue? (direct request)
279
+ 3. To my understanding
280
+
281
+ Your final response must be one of the following:
282
+ "VAUGE_INPUT USER_UNDERSTANDING"
283
+ "USER_UNDERSTANDING"
284
+ "VAUGE_INPUT"
285
+ "Neither is applicable."
286
+
287
+ Do not return any other values outside of the provided options.
288
+ '''
289
+
290
+ agent_3_system = '''
291
+ Given a current user prompt/input and recent conversation history, you determine if the current turn is a followup from a practice question.
292
+
293
+ For context, consider the instructions given to generate practice questions:
294
+ {STRUCTURE_PRACTICE_QUESTIONS}
295
+
296
+ The user prompt/input is a followup if the previous turns contains a practice question per the previous guidelines.
297
+ The user prompt may or may not answer the question(s).
298
+
299
+ If the current turn is a followup reply from the user regarding a practice question, return "PRACTICE_QUESTION_FOLLOWUP True"
300
+ Otherwise return "Not a followup"
301
+
302
+ Do not return any other values outside of the provided options.
303
+ '''
304
+
305
+ agent_4_system = '''
306
+ As an educational proffession whom is assessing a student's current needs, provided the current user prompt/input and recent conversation history, determine if the user is in need of instruction or teaching on a topic, and/or a practice question to enhance their learning.
307
+
308
+ "GUIDING_TEACHING"
309
+ Guiding and teaching is a curated approach to instructing the user on a given topic. This catagory should be applied if the user is requesting information, seems confused on previous instruction, or continuing a discussion on a topic.
310
+
311
+ "STRUCTURE_PRACTICE_QUESTIONS"
312
+ This catagory is applicable if the user responded positivel to previous instruction by the model on a set topic, or has requested practice questions directly.
313
+
314
+ Neither apply if no topics are specifically stated in the current or past prompts.
315
+
316
+ You may return the following outputs based on your assessment:
317
+ "GUIDING_TEACHING"
318
+ "STRUCTURE_PRACTICE_QUESTIONS"
319
+ "GUIDING_TEACHING STRUCTURE_PRACTICE_QUESTIONS"
320
+ "Neither Apply"
321
+
322
+ Do not return any other values outside of the provided options.
323
+ '''
324
+
325
+ '''
326
+ Thinking prompts for use by the agent constructing reasoning invisible to the user, outputs to be supplied to the response model for context and examples.
327
+ '''
328
+ # --- Thinking Prompts ---
329
+
330
+ # Thinking process for math-based teaching and problem solving. Tree-of-Thought Prompting
331
+ MATH_THINKING = '''
332
+ Math based thinking process instructions:
333
+
334
+ Given a user input and recent chat history, you execute a thinking process to determine your goal. Below is provided the decision tree you will utilize, logically proceeding question by question until you reach an end point. You will then process the user prompt per the instructions outlined in the endpoint. Your final output is to be cleaning structured as context fro answering the user prompt.
335
+
336
+ **General Final Response Output Rules**
337
+
338
+ When formatting context, apply LaTeX formatting per these guidelines:
339
+ You have access to LaTeX and markdown rendering.
340
+ - For inline math, use $ ... $, e.g. $\sum_{i=0}^n i^2$
341
+ - For centered display math, use $$ ... $$ on its own line.
342
+ - To show a literal dollar sign, use `\$` (e.g., \$5.00).
343
+ - To show literal parentheses in LaTeX, use `\(` and `\)` (e.g., \(a+b\)).
344
+
345
+ Content must be ordered logically, building from foundational knowledge to final solutions. Follow proper order of operation. The level of detail is dictated by the output of the decision tree below.
346
+
347
+
348
+ **Decision Tree**
349
+ Each question has two possible outcomes, narrowing the options. Consider each against the supplied user input and conversation history, proceeding in order. You must apply the general output rules and the final endpoint rules to your reasoning and process in producing the final output for context, to be utilized by another model in producing the final response.
350
+
351
+ Is the math based question or request complex?
352
+ 1A. The question is a low-level math question or request not requiring more than five steps for completion. Examples: basic arithmetic or definitions.
353
+ 1B. The question or request is complex or multifaceted. Examples: tasks that require more than five steps to address. May pertain to advanced mathematical domains such as engineering or physics
354
+
355
+
356
+ **End Points**
357
+ 1A. Evaluate the topic being discussed, considering the newest user and conversation input. Define key terms at the beginning of your context generation, such as the operators and their use in the problem and any principles that apply. Step by step solve the problem presented in the current user query, if one is presented. All math must be formatted per the LaTeX formatting guidelines, with each step on its own line with a description over top expressing why the step is being done and what principles are being applied. Maintain a minimal level of detail, focusing on large topics rather than granular details.
358
+ EXAMPLE:
359
+ [INPUT]
360
+ user: "Can you explain the Pythagorean theorem?"
361
+ chat_history: None
362
+
363
+ [OUTPUT]
364
+ **Key Terms**
365
+ - **Right Triangle:** A triangle with one angle measuring exactly 90 degrees.
366
+ - **Hypotenuse:** The longest side of a right triangle, opposite the right angle.
367
+ - **Legs:** The two shorter sides of a right triangle that form the right angle.
368
+
369
+ **Principle: The Pythagorean Theorem**
370
+ The theorem states that in a right triangle, the square of the length of the hypotenuse (c) is equal to the sum of the squares of the lengths of the other two sides (a and b).
371
+
372
+ **Formula**
373
+ The relationship is expressed with the formula:
374
+ $$a^2 + b^2 = c^2$$
375
+
376
+ 1B. Evaluate the topic being discussed, considering the newest user and conversation input. Define key terms at the beginning of your context generation, such as the operators and their use in the problem and any principles that apply. Identify the domain or school of knowledge. Step by step solve the problem presented in the current user query, if one is presented. List steps in a numbered list. All math must be formatted per the LaTeX formatting guidelines, with each step on its own line with a description over top expressing why the step is being done, and the relevant principles being applied. Include a summary of steps taken and the final answer below the full steps list, in a bulleted list.
377
+ EXAMPLE:
378
+ [INPUT]
379
+ user: "Okay, can you solve the definite integral of f(x) = 3x^2 from x=1 to x=3?"
380
+ chat_history: "user: \"What is an integral?\"\nassistant: \"An integral is a mathematical object that can be interpreted as an area or a generalization of area. The process of finding an integral is called integration.\""
381
+
382
+ [OUTPUT]
383
+ **Domain:** Integral Calculus
384
+
385
+ **Key Terms**
386
+ - **Definite Integral:** Represents the net area under a curve between two points, known as the limits of integration.
387
+ - **Antiderivative:** A function whose derivative is the original function. The process relies on the Fundamental Theorem of Calculus.
388
+ - **Limits of Integration:** The start (lower) and end (upper) points of the interval over which the integral is calculated. In this case, 1 and 3.
389
+
390
+ **Problem**
391
+ Solve the definite integral:
392
+ $$\int_{1}^{3} 3x^2 \,dx$$
393
+
394
+ **Step-by-Step Solution**
395
+ 1. **Find the antiderivative of the function.**
396
+ We apply the power rule for integration, $\int x^n \,dx = \frac{x^{n+1}}{n+1}$.
397
+ $$ \int 3x^2 \,dx = 3 \cdot \frac{x^{2+1}}{2+1} = 3 \cdot \frac{x^3}{3} = x^3 $$
398
+ 2. **Apply the Fundamental Theorem of Calculus.**
399
+ We will evaluate the antiderivative at the upper and lower limits of integration, $F(b) - F(a)$.
400
+ $$ [x^3]_1^3 $$
401
+ 3. **Evaluate the antiderivative at the upper limit (x=3).**
402
+ $$ (3)^3 = 27 $$
403
+ 4. **Evaluate the antiderivative at the lower limit (x=1).**
404
+ $$ (1)^3 = 1 $$
405
+ 5. **Subtract the lower limit result from the upper limit result.**
406
+ This gives the final value of the definite integral.
407
+ $$ 27 - 1 = 26 $$
408
+
409
+ **Summary**
410
+ - The antiderivative of $3x^2$ is $x^3$.
411
+ - Evaluating the antiderivative from $x=1$ to $x=3$ yields $(3)^3 - (1)^3$.
412
+ - The final answer is $26$.
413
+
414
+ '''
415
+
416
+ # CHAIN OF THOUGH PROMPTING, GUIDING THE MODEL IN PROCESSING TOOL OUTPUT FOR QUESTIONS, DESIGNING TABLES FOR CONTEXTUAL DATA, AND DESIGNING PRACTICE QUESTIONS AS WELL AS AN ANSWER BANK.
417
+ QUESTION_ANSWER_DESIGN = '''
418
+ As seasoning test question writing specialist, your task is to produce context to create a practice question for the user.
419
+
420
+ Tool Outputs (if provided)
421
+ If tool call outputs are avialble, the practice question must use and require understanding of the data presented.
422
+ Image output: {tool_img_output}
423
+ Image context to consider: {tool_context}
424
+
425
+ You must construct practice questions per the formatting guidelines included here:
426
+ {STRUCTURE_PRACTICE_QUESTIONS}
427
+
428
+ Math LaTeX Formatting Guidelines:
429
+ {LATEX_FORMATTING}
430
+
431
+ Follow this logical process:
432
+ 1. Assess the current round's user input and the conversation history, if there is one. What specific topics or concepts are discussed? What instruction has the model previously given? Also identify the subject domain. Return this context summaried at teh top of your context output.
433
+ 2. Produce a practice question for the user on the identified topic or concept. Return the pract question with the heading "Practice Question"
434
+ - If Math or requiring scientific calculations: The question must not be an example given by the model or user in the conversation history. It may be inspired by the conversation history, but it must require the user to try to solve the problem based on what they learned. If no tool output is given to base the question on, then you must create your own data for the user to interpret, solve, or otherwise manipulate to come to an answer.You may provide data by means of the tool image output, with the question constructed using the tool context output. If no tool output is included, you may provide data as a markdown table or integrated into the question. Math must be formatted using LaTeX as outlined in the LaTeX guidelines given above.
435
+ - If History/social studies/art or otherwise static fact related: The question must be answerable with based on previosu model teaching or instruction from the conversation history.
436
+
437
+ 3. Produce an answer bank under the question with the correct answer or answers labeled. If it is a written response question, you must write examples of possible correct answers for the new model to utilize in grading the user's answer.
438
+ '''
439
+
440
+ # This prompt is reserved for high complexity user queries, aiming to generate context in support of the response agent.
441
+ REASONING_THINKING = '''
442
+ Considering the provided current user prompt/input and recent conversation history, as an educational professional skilled in breaking down concepts, return context that would be beneficial in producing a response to the user.
443
+
444
+ 1. Begin by thinking about what the user is asking about, such as the topic or domain of knowledge. Summarizes the user's request as well as what has been said relating to the topic or goal in the conversation history. Give this section the heading "User Knowledge Summary."
445
+ 2. Evaluate the user's previous statements for accuracy. Ask yourself if the user appears to be grasping the concept or struggling with some part of it. Produce a brief analysis section that defines the user's established understanding, or if this is unknown. Propose potential concepts to cover to aid the user. Return this section with the head "User Understanding."
446
+ 3. Identify steps taken by the model in previous turns to aid the user, as well as the apparent effectiveness of said steps, if conversation history is available. Produce this section with the heading "Previous Actions."
447
+ 4. Identify relevant facts that would aid the user in understanding the concept, following a logical order in listing these items. Present these items in a nested list, with a title for each nested block at the higher level and atomic facts nested underneath. Produce this section with the heading "Reference Fact Sheet"
448
+
449
+ Review your response prior to returning it as output. Review for accuracy and relevance, producing only facts that support further learning rather than information the user has already shown understand of.
450
+
451
+ Examples:
452
+ [INPUT]
453
+ user: "I know principal is the starting money and the rate is the percentage. But I don't get what 'compounding frequency' means. Does it matter if it's daily vs yearly?"
454
+ chat_history: "user: \"How do I calculate compound interest?\"\nassistant: \"## Calculating Compound Interest\n\nThat's a great question! Compound interest is essentially interest earned on the initial amount of money (the principal) as well as on the accumulated interest from previous periods.\n\nTo give you the most helpful explanation, it would be useful to know what you're familiar with already. Have you encountered terms like 'principal', 'annual interest rate', or 'compounding frequency' before?\""
455
+
456
+ [OUTPUT]
457
+ ### User Knowledge Summary
458
+ The user's goal is to learn how to calculate compound interest. The conversation began with the user asking for the calculation method. The model responded by defining the term and asking discovery questions to gauge the user's prior knowledge of key variables. The user has now confirmed they understand 'principal' and 'interest rate' but are specifically asking for a definition of 'compounding frequency' and an explanation of its importance.
459
+
460
+ ### User Understanding
461
+ The user has a foundational grasp of the core components of interest calculations (principal, rate). Their point of confusion is isolated to the concept of compounding frequency. They have correctly intuited that the frequency (e.g., daily vs. yearly) matters but do not understand why. To aid them, the next steps should be to formally define compounding frequency and then use a comparative example to illustrate its impact on the final amount.
462
+
463
+ ### Previous Actions
464
+ In the previous turn, the model successfully employed a guided discovery tactic. Instead of providing the formula outright, it defined the topic and asked targeted questions. This action was highly effective, as it allowed the user to articulate their specific knowledge gap, enabling a more focused and efficient teaching approach in the next turn.
465
+
466
+ ### Reference Fact Sheet
467
+ - **The Compound Interest Formula**
468
+ - The standard formula is $A = P(1 + r/n)^{nt}$
469
+ - **A**: The future value of the investment/loan, including interest.
470
+ - **P**: The principal amount (the initial amount of money).
471
+ - **r**: The annual interest rate (in decimal form).
472
+ - **n**: The number of times that interest is compounded per year.
473
+ - **t**: The number of years the money is invested or borrowed for.
474
+ - **The Role of Compounding Frequency (n)**
475
+ - This variable represents how often the accumulated interest is calculated and added to the principal.
476
+ - A higher frequency means interest starts earning its own interest sooner and more often within the same year.
477
+ - This directly leads to a higher future value (A), even if all other variables (P, r, t) are the same.
478
+ - **Common Values for 'n'**
479
+ - **Annually**: n = 1
480
+ - **Semiannually**: n = 2
481
+ - **Quarterly**: n = 4
482
+ - **Monthly**: n = 12
483
+ - **Daily**: n = 365
484
+
485
+ [INPUT]
486
+ user: "I think the radius is the line that goes all the way across the middle."
487
+ chat_history: "user: \"How do you find the area of a circle?\"\nassistant: \"## Finding the Area of a Circle\n\nThat's a great question! To calculate the area of a circle, we use a specific formula that involves a special number called Pi.\n\nBefore we dive into the formula, what do you already know about the different parts of a circle, like the radius or the diameter?\""
488
+
489
+ [OUTPUT]
490
+ ### User Knowledge Summary
491
+ The user wants to learn how to calculate the area of a circle. In the previous turn, the model asked the user about their knowledge of a circle's parts to establish a baseline. The user has now responded, incorrectly identifying the radius as the diameter.
492
+
493
+ ### User Understanding
494
+ The user has a misunderstanding of core terminology essential for this calculation. They have confused the radius with the diameter. This foundational knowledge gap must be corrected before introducing the area formula. Potential concepts to cover are the definitions of radius and diameter and their mathematical relationship.
495
+
496
+ ### Previous Actions
497
+ In the previous turn, the model employed a discovery tactic by asking about the user's prior knowledge of circle components. This was an effective step, as it successfully revealed a critical misconception in the user's understanding that can now be corrected.
498
+
499
+ ### Reference Fact Sheet
500
+ - Core Components of a Circle
501
+ - **Radius (r):** The distance from the center of the circle to any point on its edge.
502
+ - **Diameter (d):** The distance from one edge of the circle to the other, passing through the center.
503
+ - **Relationship:** The diameter is always exactly twice the length of the radius ($d = 2r$). Conversely, the radius is half the diameter ($r = d/2$).
504
+ - The Area Formula
505
+ - **Pi ($\pi$):** A special mathematical constant, approximately equal to 3.14159, that represents the ratio of a circle's circumference to its diameter.
506
+ - **Formula:** The area ($A$) of a circle is calculated using the formula $A = \pi r^2$.
507
+ - **Crucial Detail:** The formula uses the **radius**, not the diameter. If given the diameter, it must first be converted to the radius before calculating the area.
508
+
509
+ '''
requirements.txt ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # requirements.txt
2
+ # Mimir Educational AI Assistant Dependencies
3
+
4
+ # =============================================================================
5
+ # ZeroGPU COMPATIBILITY
6
+ # =============================================================================
7
+ # DO NOT specify torch versions - provided by ZeroGPU environment
8
+ spaces
9
+
10
+ # =============================================================================
11
+ # CORE ML/AI PACKAGES
12
+ # =============================================================================
13
+ transformers>=4.41.0
14
+ huggingface_hub>=0.20.0
15
+ safetensors
16
+ accelerate>=0.31.0
17
+ bitsandbytes
18
+ sentencepiece
19
+ peft>=0.10.0
20
+
21
+ # GGUF model support for Math Thinking Agent
22
+ # llama-cpp-python>=0.2.0
23
+
24
+ # =============================================================================
25
+ # LANGCHAIN ECOSYSTEM
26
+ # =============================================================================
27
+ langgraph>=0.2.0
28
+ langchain-core>=0.3.0
29
+ langchain-community>=0.3.0
30
+ langchain-huggingface>=0.1.0
31
+
32
+ # =============================================================================
33
+ # UI FRAMEWORK
34
+ # =============================================================================
35
+ gradio>=5.46.1
36
+
37
+ # =============================================================================
38
+ # DATA & STATE MANAGEMENT
39
+ # =============================================================================
40
+ datasets>=2.14.0 # For HF dataset backup in state manager
41
+ python-dotenv>=1.0.0 # Environment variable management
42
+
43
+ # =============================================================================
44
+ # VISUALIZATION & TOOLS
45
+ # =============================================================================
46
+ matplotlib>=3.7.0 # For graph_tool.py
47
+ plotly>=5.15.0 # For advanced visualizations
48
+ pandas>=2.0.0 # Data handling
49
+ numpy>=1.24.0 # Numerical operations
50
+
51
+ # =============================================================================
52
+ # METRICS & EVALUATION
53
+ # =============================================================================
54
+ lighteval # For educational quality metrics and LightEval integration
55
+ trackio
56
+ # =============================================================================
57
+ # UTILITIES
58
+ # =============================================================================
59
+ tqdm>=4.65.0 # Progress bars
60
+
61
+ # =============================================================================
62
+ # NOTES
63
+ # =============================================================================
64
+ # Removed dependencies:
65
+
66
+ # - scikit-learn: ML classifier replaced by agent-based routing
67
+ # - sentence-transformers, faiss-cpu: RAG not used
68
+ # - pyspellchecker: Spell checking removed
69
+ # - scipy: Not used in current implementation
70
+ # - langsmith: Not used
71
+ # - emoji: Not used
72
+ # - tiktoken, langchain-text-splitters: RAG components not used
state_manager.py ADDED
@@ -0,0 +1,801 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # state_manager.py
2
+ """
3
+ Global state management and logical expression system for Mimir.
4
+
5
+ Components:
6
+ - GlobalStateManager: Thread-safe state persistence with SQLite + HF dataset backup
7
+ - PromptStateManager: Per-turn prompt segment activation tracking
8
+ - LogicalExpressions: Regex-based prompt triggers
9
+ """
10
+
11
+ import os
12
+ import re
13
+ import sqlite3
14
+ import json
15
+ import logging
16
+ import threading
17
+ from datetime import datetime, timedelta
18
+ from typing import Dict, List, Optional, Any
19
+ from datasets import load_dataset, Dataset
20
+ from huggingface_hub import HfApi
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ # ============================================================================
26
+ # PROMPT STATE MANAGER
27
+ # ============================================================================
28
+
29
+ class PromptStateManager:
30
+ """
31
+ Manages prompt segment activation state for a single turn.
32
+ Resets to default (all False) at the start of each turn.
33
+ """
34
+
35
+ def __init__(self):
36
+ self._default_state = {
37
+ "MATH_THINKING": False,
38
+ "QUESTION_ANSWER_DESIGN": False,
39
+ "REASONING_THINKING": False,
40
+ "VAUGE_INPUT": False,
41
+ "USER_UNDERSTANDING": False,
42
+ "GENERAL_FORMATTING": False,
43
+ "LATEX_FORMATTING": False,
44
+ "GUIDING_TEACHING": False,
45
+ "STRUCTURE_PRACTICE_QUESTIONS": False,
46
+ "PRACTICE_QUESTION_FOLLOWUP": False,
47
+ "TOOL_USE_ENHANCEMENT": False,
48
+ }
49
+ self._current_state = self._default_state.copy()
50
+ logger.info("PromptStateManager initialized")
51
+
52
+ def reset(self):
53
+ """Reset all prompt states to False for new turn"""
54
+ self._current_state = self._default_state.copy()
55
+ logger.debug("Prompt state reset for new turn")
56
+
57
+ def get_state(self) -> Dict[str, bool]:
58
+ """Get current prompt state dictionary"""
59
+ return self._current_state.copy()
60
+
61
+ def update(self, prompt_name: str, value: bool):
62
+ """
63
+ Update a specific prompt state.
64
+
65
+ Args:
66
+ prompt_name: Name of prompt segment (must be in default_state)
67
+ value: True to activate, False to deactivate
68
+ """
69
+ if prompt_name not in self._default_state:
70
+ logger.warning(f"Unknown prompt name: {prompt_name}")
71
+ return
72
+
73
+ self._current_state[prompt_name] = value
74
+ logger.debug(f"Prompt state updated: {prompt_name} = {value}")
75
+
76
+ def update_multiple(self, updates: Dict[str, bool]):
77
+ """
78
+ Update multiple prompt states at once.
79
+
80
+ Args:
81
+ updates: Dictionary of {prompt_name: bool} updates
82
+ """
83
+ for prompt_name, value in updates.items():
84
+ self.update(prompt_name, value)
85
+
86
+ def is_active(self, prompt_name: str) -> bool:
87
+ """Check if a prompt segment is active"""
88
+ return self._current_state.get(prompt_name, False)
89
+
90
+ def get_active_prompts(self) -> List[str]:
91
+ """Get list of all currently active prompt names"""
92
+ return [name for name, active in self._current_state.items() if active]
93
+
94
+ def get_active_response_prompts(self) -> List[str]:
95
+ """
96
+ Get list of active response agent prompts only.
97
+ Excludes thinking agent prompts.
98
+ """
99
+ response_prompts = [
100
+ "VAUGE_INPUT", "USER_UNDERSTANDING", "GENERAL_FORMATTING",
101
+ "LATEX_FORMATTING", "GUIDING_TEACHING", "STRUCTURE_PRACTICE_QUESTIONS",
102
+ "PRACTICE_QUESTION_FOLLOWUP", "TOOL_USE_ENHANCEMENT"
103
+ ]
104
+ return [name for name in response_prompts if self._current_state.get(name, False)]
105
+
106
+ def get_active_thinking_prompts(self) -> List[str]:
107
+ """
108
+ Get list of active thinking agent prompts only.
109
+ """
110
+ thinking_prompts = ["MATH_THINKING", "QUESTION_ANSWER_DESIGN", "REASONING_THINKING"]
111
+ return [name for name in thinking_prompts if self._current_state.get(name, False)]
112
+
113
+
114
+ # ============================================================================
115
+ # LOGICAL EXPRESSIONS
116
+ # ============================================================================
117
+
118
+ class LogicalExpressions:
119
+ """
120
+ Regex-based logical expressions for prompt trigger detection.
121
+ Analyzes user input to activate appropriate prompt segments.
122
+ """
123
+
124
+ def __init__(self):
125
+ # Math-related keywords
126
+ self.math_regex = r'\b(math|calculus|algebra|geometry|equation|formula|solve|calculate|derivative|integral|trigonometry|statistics|probability)\b'
127
+
128
+ # Additional regex patterns can be added here
129
+ logger.info("LogicalExpressions initialized")
130
+
131
+ def check_math_keywords(self, user_input: str) -> bool:
132
+ """
133
+ Check if user input contains mathematical keywords.
134
+ Triggers LATEX_FORMATTING.
135
+
136
+ Args:
137
+ user_input: User's message
138
+
139
+ Returns:
140
+ True if math keywords detected
141
+ """
142
+ result = bool(re.search(self.math_regex, user_input, re.IGNORECASE))
143
+ if result:
144
+ logger.debug(f"Math keywords detected in: '{user_input[:50]}...'")
145
+ return result
146
+
147
+ def apply_all_checks(self, user_input: str, prompt_state: PromptStateManager):
148
+ """
149
+ Apply all logical expression checks and update prompt_state.
150
+
151
+ Args:
152
+ user_input: User's message
153
+ prompt_state: PromptStateManager instance to update
154
+ """
155
+ # GENERAL_FORMATTING is always applied
156
+ prompt_state.update("GENERAL_FORMATTING", True)
157
+
158
+ # Check for math keywords
159
+ if self.check_math_keywords(user_input):
160
+ prompt_state.update("LATEX_FORMATTING", True)
161
+
162
+ # Additional checks can be added here as needed
163
+ logger.debug(f"Logical expressions applied. Active prompts: {prompt_state.get_active_prompts()}")
164
+
165
+
166
+ # ============================================================================
167
+ # GLOBAL STATE MANAGER
168
+ # ============================================================================
169
+
170
+ class GlobalStateManager:
171
+ """
172
+ Thread-safe global state manager with SQLite persistence and HF dataset backup.
173
+ Now includes PromptStateManager for per-turn prompt segment tracking.
174
+ """
175
+
176
+ def __init__(self, db_path="mimir_analytics.db", dataset_repo="jdesiree/mimir_analytics"):
177
+ self._db_path = db_path
178
+ self.dataset_repo = dataset_repo
179
+ self.hf_token = os.getenv("HF_TOKEN")
180
+
181
+ # Existing state caches
182
+ self._states = {}
183
+ self._analytics_cache = {}
184
+ self._ml_models_cache = {}
185
+ self._evaluation_cache = {}
186
+
187
+ # Thread safety
188
+ self._lock = threading.Lock()
189
+
190
+ # Cleanup settings
191
+ self._cleanup_interval = 3600
192
+ self._max_age = 24 * 3600
193
+ self._last_cleanup = datetime.now()
194
+ self._last_hf_backup = datetime.now()
195
+ self._hf_backup_interval = 3600
196
+
197
+ # NEW: Prompt state management
198
+ self._prompt_state_manager = PromptStateManager()
199
+
200
+ # Initialize existing systems
201
+ self._init_database()
202
+ self._load_from_database()
203
+ self._load_from_hf_dataset()
204
+
205
+ logger.info("GlobalStateManager initialized with PromptStateManager")
206
+
207
+ # ========================================================================
208
+ # PROMPT STATE MANAGEMENT
209
+ # ========================================================================
210
+
211
+ def get_prompt_state_manager(self) -> PromptStateManager:
212
+ """Get the prompt state manager for current turn"""
213
+ return self._prompt_state_manager
214
+
215
+ def reset_prompt_state(self):
216
+ """Reset prompt state for new turn"""
217
+ self._prompt_state_manager.reset()
218
+ logger.debug("Prompt state reset for new turn")
219
+
220
+ def get_prompt_state(self) -> Dict[str, bool]:
221
+ """Get current prompt state dictionary"""
222
+ return self._prompt_state_manager.get_state()
223
+
224
+ def update_prompt_state(self, prompt_name: str, value: bool):
225
+ """Update specific prompt state"""
226
+ self._prompt_state_manager.update(prompt_name, value)
227
+
228
+ def update_prompt_states(self, updates: Dict[str, bool]):
229
+ """Update multiple prompt states"""
230
+ self._prompt_state_manager.update_multiple(updates)
231
+
232
+ # ========================================================================
233
+ # EXISTING DATABASE METHODS (unchanged)
234
+ # ========================================================================
235
+
236
+ def _init_database(self):
237
+ """Initialize SQLite database for persistent storage"""
238
+ conn = sqlite3.connect(self._db_path)
239
+ cursor = conn.cursor()
240
+
241
+ cursor.execute("""
242
+ CREATE TABLE IF NOT EXISTS conversations (
243
+ session_id TEXT PRIMARY KEY,
244
+ chat_history TEXT,
245
+ conversation_state TEXT,
246
+ last_accessed TEXT,
247
+ created TEXT
248
+ )
249
+ """)
250
+
251
+ cursor.execute("""
252
+ CREATE TABLE IF NOT EXISTS analytics (
253
+ session_id TEXT PRIMARY KEY,
254
+ project_stats TEXT,
255
+ recent_interactions TEXT,
256
+ dashboard_html TEXT,
257
+ last_refresh TEXT,
258
+ export_history TEXT
259
+ )
260
+ """)
261
+
262
+ cursor.execute("""
263
+ CREATE TABLE IF NOT EXISTS evaluations (
264
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
265
+ session_id TEXT,
266
+ timestamp TEXT,
267
+ metric_type TEXT,
268
+ metric_data TEXT
269
+ )
270
+ """)
271
+
272
+ cursor.execute("""
273
+ CREATE TABLE IF NOT EXISTS classifications (
274
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
275
+ session_id TEXT,
276
+ timestamp TEXT,
277
+ user_input TEXT,
278
+ prediction_data TEXT,
279
+ features TEXT
280
+ )
281
+ """)
282
+
283
+ conn.commit()
284
+ conn.close()
285
+
286
+ def _load_from_database(self):
287
+ """Load all data from SQLite on startup"""
288
+ try:
289
+ conn = sqlite3.connect(self._db_path)
290
+ cursor = conn.cursor()
291
+
292
+ cursor.execute("SELECT * FROM conversations")
293
+ for row in cursor.fetchall():
294
+ session_id = row[0]
295
+ self._states[session_id] = {
296
+ 'chat_history': json.loads(row[1]),
297
+ 'conversation_state': json.loads(row[2]),
298
+ 'last_accessed': datetime.fromisoformat(row[3]),
299
+ 'created': datetime.fromisoformat(row[4])
300
+ }
301
+
302
+ cursor.execute("SELECT * FROM analytics")
303
+ for row in cursor.fetchall():
304
+ session_id = row[0]
305
+ self._analytics_cache[session_id] = {
306
+ 'project_stats': json.loads(row[1]),
307
+ 'recent_interactions': json.loads(row[2]),
308
+ 'dashboard_html': row[3],
309
+ 'last_refresh': datetime.fromisoformat(row[4]) if row[4] else None,
310
+ 'export_history': json.loads(row[5]),
311
+ 'last_accessed': datetime.now()
312
+ }
313
+
314
+ conn.close()
315
+ logger.info(f"Loaded {len(self._states)} conversations and {len(self._analytics_cache)} analytics from database")
316
+ except Exception as e:
317
+ logger.error(f"Error loading from database: {e}")
318
+
319
+ def _load_from_hf_dataset(self):
320
+ """Load data from HF dataset on startup"""
321
+ try:
322
+ ds = load_dataset(self.dataset_repo, split="train", token=self.hf_token)
323
+
324
+ for item in ds:
325
+ if item['data_type'] == 'conversation':
326
+ session_id = item['session_id']
327
+ data = json.loads(item['data'])
328
+ self._states[session_id] = data
329
+ elif item['data_type'] == 'analytics':
330
+ session_id = item['session_id']
331
+ data = json.loads(item['data'])
332
+ self._analytics_cache[session_id] = data
333
+
334
+ logger.info(f"Loaded data from HF dataset {self.dataset_repo}")
335
+ except Exception as e:
336
+ logger.warning(f"Could not load from HF dataset: {e}")
337
+
338
+ def _save_to_database_conversations(self, session_id):
339
+ """Save conversation to SQLite"""
340
+ if session_id not in self._states:
341
+ return
342
+
343
+ state = self._states[session_id]
344
+ conn = sqlite3.connect(self._db_path)
345
+ cursor = conn.cursor()
346
+
347
+ cursor.execute("""
348
+ INSERT OR REPLACE INTO conversations
349
+ (session_id, chat_history, conversation_state, last_accessed, created)
350
+ VALUES (?, ?, ?, ?, ?)
351
+ """, (
352
+ session_id,
353
+ json.dumps(state['chat_history']),
354
+ json.dumps(state['conversation_state']),
355
+ state['last_accessed'].isoformat(),
356
+ state.get('created', datetime.now()).isoformat()
357
+ ))
358
+
359
+ conn.commit()
360
+ conn.close()
361
+
362
+ def _save_to_database_analytics(self, session_id):
363
+ """Save analytics to SQLite"""
364
+ if session_id not in self._analytics_cache:
365
+ return
366
+
367
+ analytics = self._analytics_cache[session_id]
368
+ conn = sqlite3.connect(self._db_path)
369
+ cursor = conn.cursor()
370
+
371
+ cursor.execute("""
372
+ INSERT OR REPLACE INTO analytics
373
+ (session_id, project_stats, recent_interactions, dashboard_html, last_refresh, export_history)
374
+ VALUES (?, ?, ?, ?, ?, ?)
375
+ """, (
376
+ session_id,
377
+ json.dumps(analytics.get('project_stats', {})),
378
+ json.dumps(analytics.get('recent_interactions', [])),
379
+ analytics.get('dashboard_html', ''),
380
+ analytics.get('last_refresh').isoformat() if analytics.get('last_refresh') else None,
381
+ json.dumps(analytics.get('export_history', []))
382
+ ))
383
+
384
+ conn.commit()
385
+ conn.close()
386
+
387
+ def _backup_to_hf_dataset(self):
388
+ """Backup all data to HF dataset"""
389
+ if (datetime.now() - self._last_hf_backup).seconds < self._hf_backup_interval:
390
+ return
391
+
392
+ try:
393
+ data_items = []
394
+
395
+ for session_id, state in self._states.items():
396
+ data_items.append({
397
+ 'session_id': session_id,
398
+ 'data_type': 'conversation',
399
+ 'data': json.dumps(state, default=str),
400
+ 'timestamp': datetime.now().isoformat()
401
+ })
402
+
403
+ for session_id, analytics in self._analytics_cache.items():
404
+ data_items.append({
405
+ 'session_id': session_id,
406
+ 'data_type': 'analytics',
407
+ 'data': json.dumps(analytics, default=str),
408
+ 'timestamp': datetime.now().isoformat()
409
+ })
410
+
411
+ if data_items:
412
+ ds = Dataset.from_list(data_items)
413
+ ds.push_to_hub(self.dataset_repo, token=self.hf_token)
414
+ self._last_hf_backup = datetime.now()
415
+ logger.info(f"Backed up {len(data_items)} items to HF dataset")
416
+ except Exception as e:
417
+ logger.error(f"Error backing up to HF dataset: {e}")
418
+
419
+ def _cleanup_old_states(self):
420
+ """Remove old unused states to prevent memory leaks"""
421
+ now = datetime.now()
422
+ if (now - self._last_cleanup).seconds < self._cleanup_interval:
423
+ return
424
+
425
+ with self._lock:
426
+ expired_keys = []
427
+ for session_id, state_data in self._states.items():
428
+ if (now - state_data.get('last_accessed', now)).seconds > self._max_age:
429
+ expired_keys.append(session_id)
430
+
431
+ for key in expired_keys:
432
+ del self._states[key]
433
+ logger.info(f"Cleaned up expired state: {key}")
434
+
435
+ self._last_cleanup = now
436
+
437
+ # ========================================================================
438
+ # CONVERSATION STATE METHODS (unchanged)
439
+ # ========================================================================
440
+
441
+ def get_session_id(self, request=None):
442
+ """Generate or retrieve session ID"""
443
+ return "default_session"
444
+
445
+ def get_conversation_state(self, session_id=None):
446
+ """Get conversation state for a session"""
447
+ if session_id is None:
448
+ session_id = self.get_session_id()
449
+
450
+ self._cleanup_old_states()
451
+
452
+ with self._lock:
453
+ if session_id not in self._states:
454
+ self._states[session_id] = {
455
+ 'chat_history': [],
456
+ 'conversation_state': [],
457
+ 'last_accessed': datetime.now(),
458
+ 'created': datetime.now()
459
+ }
460
+ else:
461
+ self._states[session_id]['last_accessed'] = datetime.now()
462
+
463
+ return self._states[session_id].copy()
464
+
465
+ def update_conversation_state(self, chat_history, conversation_state, session_id=None):
466
+ """Update conversation state for a session"""
467
+ if session_id is None:
468
+ session_id = self.get_session_id()
469
+
470
+ with self._lock:
471
+ if session_id not in self._states:
472
+ self._states[session_id] = {}
473
+
474
+ self._states[session_id].update({
475
+ 'chat_history': chat_history.copy() if chat_history else [],
476
+ 'conversation_state': conversation_state.copy() if conversation_state else [],
477
+ 'last_accessed': datetime.now()
478
+ })
479
+
480
+ self._save_to_database_conversations(session_id)
481
+ self._backup_to_hf_dataset()
482
+
483
+ def reset_conversation_state(self, session_id=None):
484
+ """Reset conversation state for a session"""
485
+ if session_id is None:
486
+ session_id = self.get_session_id()
487
+
488
+ with self._lock:
489
+ if session_id in self._states:
490
+ self._states[session_id].update({
491
+ 'chat_history': [],
492
+ 'conversation_state': [],
493
+ 'last_accessed': datetime.now()
494
+ })
495
+ self._save_to_database_conversations(session_id)
496
+
497
+ def get_all_sessions(self):
498
+ """Get all active sessions (for analytics)"""
499
+ self._cleanup_old_states()
500
+ with self._lock:
501
+ return list(self._states.keys())
502
+
503
+ # ========================================================================
504
+ # ANALYTICS STATE METHODS (unchanged)
505
+ # ========================================================================
506
+
507
+ def get_analytics_state(self, session_id=None):
508
+ """Get analytics state for a session"""
509
+ if session_id is None:
510
+ session_id = self.get_session_id()
511
+
512
+ self._cleanup_old_states()
513
+
514
+ with self._lock:
515
+ if session_id not in self._analytics_cache:
516
+ self._analytics_cache[session_id] = {
517
+ 'project_stats': {
518
+ "total_conversations": None,
519
+ "avg_session_length": None,
520
+ "success_rate": None,
521
+ "model_type": "Phi-3-mini (Fine-tuned)",
522
+ "last_updated": None
523
+ },
524
+ 'recent_interactions': [],
525
+ 'dashboard_html': None,
526
+ 'last_refresh': None,
527
+ 'export_history': [],
528
+ 'database_status': 'unknown',
529
+ 'error_state': None,
530
+ 'last_accessed': datetime.now()
531
+ }
532
+ else:
533
+ self._analytics_cache[session_id]['last_accessed'] = datetime.now()
534
+
535
+ return self._analytics_cache[session_id].copy()
536
+
537
+ def update_analytics_state(self, project_stats=None, recent_interactions=None,
538
+ dashboard_html=None, error_state=None, session_id=None):
539
+ """Update analytics state for a session"""
540
+ if session_id is None:
541
+ session_id = self.get_session_id()
542
+
543
+ with self._lock:
544
+ if session_id not in self._analytics_cache:
545
+ self._analytics_cache[session_id] = {}
546
+
547
+ current_time = datetime.now()
548
+
549
+ if project_stats is not None:
550
+ self._analytics_cache[session_id]['project_stats'] = project_stats.copy()
551
+ self._analytics_cache[session_id]['last_refresh'] = current_time
552
+
553
+ if recent_interactions is not None:
554
+ self._analytics_cache[session_id]['recent_interactions'] = recent_interactions.copy()
555
+
556
+ if dashboard_html is not None:
557
+ self._analytics_cache[session_id]['dashboard_html'] = dashboard_html
558
+
559
+ if error_state is not None:
560
+ self._analytics_cache[session_id]['error_state'] = error_state
561
+
562
+ self._analytics_cache[session_id]['last_accessed'] = current_time
563
+
564
+ self._save_to_database_analytics(session_id)
565
+ self._backup_to_hf_dataset()
566
+
567
+ def add_export_record(self, export_type, filename, success=True, session_id=None):
568
+ """Add export record to analytics state"""
569
+ if session_id is None:
570
+ session_id = self.get_session_id()
571
+
572
+ with self._lock:
573
+ if session_id not in self._analytics_cache:
574
+ self.get_analytics_state(session_id)
575
+
576
+ export_record = {
577
+ 'timestamp': datetime.now().isoformat(),
578
+ 'type': export_type,
579
+ 'filename': filename,
580
+ 'success': success
581
+ }
582
+
583
+ if 'export_history' not in self._analytics_cache[session_id]:
584
+ self._analytics_cache[session_id]['export_history'] = []
585
+
586
+ self._analytics_cache[session_id]['export_history'].append(export_record)
587
+
588
+ if len(self._analytics_cache[session_id]['export_history']) > 20:
589
+ self._analytics_cache[session_id]['export_history'] = \
590
+ self._analytics_cache[session_id]['export_history'][-20:]
591
+
592
+ self._save_to_database_analytics(session_id)
593
+
594
+ # ========================================================================
595
+ # ML MODEL CACHE METHODS (unchanged)
596
+ # ========================================================================
597
+
598
+ def get_ml_model_cache(self, model_type: str = "prompt_classifier"):
599
+ """Get cached ML model"""
600
+ with self._lock:
601
+ return self._ml_models_cache.get(model_type, None)
602
+
603
+ def cache_ml_model(self, model, model_type: str = "prompt_classifier", metadata: dict = None):
604
+ """Cache a trained ML model"""
605
+ with self._lock:
606
+ self._ml_models_cache[model_type] = {
607
+ 'model': model,
608
+ 'cached_at': datetime.now(),
609
+ 'metadata': metadata or {},
610
+ 'access_count': 0
611
+ }
612
+ logger.info(f"ML model '{model_type}' cached successfully")
613
+
614
+ # ========================================================================
615
+ # EVALUATION STATE METHODS (unchanged)
616
+ # ========================================================================
617
+
618
+ def get_evaluation_state(self, session_id=None):
619
+ """Get evaluation state for a session"""
620
+ if session_id is None:
621
+ session_id = self.get_session_id()
622
+
623
+ with self._lock:
624
+ if session_id not in self._evaluation_cache:
625
+ self._evaluation_cache[session_id] = {
626
+ 'educational_quality_scores': [],
627
+ 'rag_performance_metrics': [],
628
+ 'prompt_classification_accuracy': [],
629
+ 'user_feedback_history': [],
630
+ 'aggregate_metrics': {
631
+ 'avg_educational_quality': 0.0,
632
+ 'avg_rag_relevance': 0.0,
633
+ 'classifier_accuracy_rate': 0.0,
634
+ 'user_satisfaction_rate': 0.0
635
+ },
636
+ 'evaluation_session_count': 0,
637
+ 'last_updated': datetime.now()
638
+ }
639
+
640
+ return self._evaluation_cache[session_id].copy()
641
+
642
+ def add_educational_quality_score(self, user_query: str, response: str, metrics: dict, session_id=None):
643
+ """Add educational quality evaluation result"""
644
+ if session_id is None:
645
+ session_id = self.get_session_id()
646
+
647
+ with self._lock:
648
+ if session_id not in self._evaluation_cache:
649
+ self.get_evaluation_state(session_id)
650
+
651
+ quality_record = {
652
+ 'timestamp': datetime.now().isoformat(),
653
+ 'user_query': user_query[:100],
654
+ 'response_length': len(response),
655
+ 'semantic_quality': metrics.get('semantic_quality', 0.0),
656
+ 'educational_score': metrics.get('educational_score', 0.0),
657
+ 'response_time': metrics.get('response_time', 0.0),
658
+ 'overall_score': (metrics.get('semantic_quality', 0.0) + metrics.get('educational_score', 0.0)) / 2
659
+ }
660
+
661
+ self._evaluation_cache[session_id]['educational_quality_scores'].append(quality_record)
662
+ self._update_aggregate_metrics(session_id)
663
+
664
+ def add_prompt_classification_result(self, predicted_mode: str, was_successful: bool, metadata: dict = None, session_id=None):
665
+ """Add prompt classification accuracy result"""
666
+ if session_id is None:
667
+ session_id = self.get_session_id()
668
+
669
+ with self._lock:
670
+ if session_id not in self._evaluation_cache:
671
+ self.get_evaluation_state(session_id)
672
+
673
+ classification_record = {
674
+ 'timestamp': datetime.now().isoformat(),
675
+ 'predicted_mode': predicted_mode,
676
+ 'was_successful': was_successful,
677
+ 'accuracy_score': 1.0 if was_successful else 0.0,
678
+ 'metadata': metadata or {}
679
+ }
680
+
681
+ self._evaluation_cache[session_id]['prompt_classification_accuracy'].append(classification_record)
682
+ self._update_aggregate_metrics(session_id)
683
+
684
+ def add_user_feedback(self, response_id: str, feedback_type: str, conversation_context: dict = None, session_id=None):
685
+ """Add user feedback result"""
686
+ if session_id is None:
687
+ session_id = self.get_session_id()
688
+
689
+ with self._lock:
690
+ if session_id not in self._evaluation_cache:
691
+ self.get_evaluation_state(session_id)
692
+
693
+ feedback_record = {
694
+ 'timestamp': datetime.now().isoformat(),
695
+ 'response_id': response_id,
696
+ 'feedback_type': feedback_type,
697
+ 'satisfaction_score': 1.0 if feedback_type == 'thumbs_up' else 0.0,
698
+ 'conversation_context': conversation_context or {}
699
+ }
700
+
701
+ self._evaluation_cache[session_id]['user_feedback_history'].append(feedback_record)
702
+ self._update_aggregate_metrics(session_id)
703
+
704
+ def _update_aggregate_metrics(self, session_id: str):
705
+ """Update aggregate metrics for a session"""
706
+ eval_state = self._evaluation_cache[session_id]
707
+
708
+ if eval_state['educational_quality_scores']:
709
+ avg_educational = sum(score['overall_score'] for score in eval_state['educational_quality_scores']) / len(eval_state['educational_quality_scores'])
710
+ eval_state['aggregate_metrics']['avg_educational_quality'] = avg_educational
711
+
712
+ if eval_state['prompt_classification_accuracy']:
713
+ accuracy_rate = sum(result['accuracy_score'] for result in eval_state['prompt_classification_accuracy']) / len(eval_state['prompt_classification_accuracy'])
714
+ eval_state['aggregate_metrics']['classifier_accuracy_rate'] = accuracy_rate
715
+
716
+ if eval_state['user_feedback_history']:
717
+ satisfaction_rate = sum(feedback['satisfaction_score'] for feedback in eval_state['user_feedback_history']) / len(eval_state['user_feedback_history'])
718
+ eval_state['aggregate_metrics']['user_satisfaction_rate'] = satisfaction_rate
719
+
720
+ eval_state['last_updated'] = datetime.now()
721
+ eval_state['evaluation_session_count'] += 1
722
+
723
+ def get_evaluation_summary(self, session_id=None, include_history: bool = False):
724
+ """Get evaluation summary for analytics"""
725
+ if session_id is None:
726
+ session_id = self.get_session_id()
727
+
728
+ eval_state = self.get_evaluation_state(session_id)
729
+
730
+ summary = {
731
+ 'aggregate_metrics': eval_state['aggregate_metrics'],
732
+ 'total_evaluations': {
733
+ 'educational_quality': len(eval_state['educational_quality_scores']),
734
+ 'classification_accuracy': len(eval_state['prompt_classification_accuracy']),
735
+ 'user_feedback': len(eval_state['user_feedback_history'])
736
+ },
737
+ 'last_updated': eval_state['last_updated'],
738
+ 'session_evaluation_count': eval_state['evaluation_session_count']
739
+ }
740
+
741
+ if include_history:
742
+ summary['history'] = {
743
+ 'recent_educational_scores': eval_state['educational_quality_scores'][-10:],
744
+ 'recent_classification_results': eval_state['prompt_classification_accuracy'][-10:],
745
+ 'recent_user_feedback': eval_state['user_feedback_history'][-10:]
746
+ }
747
+
748
+ return summary
749
+
750
+ # ========================================================================
751
+ # UTILITY METHODS
752
+ # ========================================================================
753
+
754
+ def get_cache_status(self, session_id=None):
755
+ """Get cache status for debugging"""
756
+ if session_id is None:
757
+ session_id = self.get_session_id()
758
+
759
+ with self._lock:
760
+ analytics_cached = session_id in self._analytics_cache
761
+ conversation_cached = session_id in self._states
762
+
763
+ cache_info = {
764
+ 'session_id': session_id,
765
+ 'analytics_cached': analytics_cached,
766
+ 'conversation_cached': conversation_cached,
767
+ 'total_analytics_sessions': len(self._analytics_cache),
768
+ 'total_conversation_sessions': len(self._states),
769
+ 'prompt_state_active_count': len(self._prompt_state_manager.get_active_prompts())
770
+ }
771
+
772
+ if analytics_cached:
773
+ analytics_state = self._analytics_cache[session_id]
774
+ cache_info['analytics_last_refresh'] = analytics_state.get('last_refresh')
775
+ cache_info['analytics_has_data'] = bool(analytics_state.get('project_stats', {}).get('total_conversations'))
776
+
777
+ if conversation_cached:
778
+ conversation_state = self._states[session_id]
779
+ cache_info['conversation_length'] = len(conversation_state.get('conversation_state', []))
780
+ cache_info['chat_history_length'] = len(conversation_state.get('chat_history', []))
781
+
782
+ return cache_info
783
+
784
+ def reset_analytics_state(self, session_id=None):
785
+ """Reset analytics state for a session"""
786
+ if session_id is None:
787
+ session_id = self.get_session_id()
788
+
789
+ with self._lock:
790
+ if session_id in self._analytics_cache:
791
+ del self._analytics_cache[session_id]
792
+
793
+ def clear_all_states(self):
794
+ """Clear all states - use with caution"""
795
+ with self._lock:
796
+ self._states.clear()
797
+ self._analytics_cache.clear()
798
+ self._ml_models_cache.clear()
799
+ self._evaluation_cache.clear()
800
+ self._prompt_state_manager.reset()
801
+ logger.info("All global states cleared")
styles.css ADDED
@@ -0,0 +1,353 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* ============================
2
+ GLOBAL THEME & VARIABLES
3
+ ============================ */
4
+ :root {
5
+ /* Text Colors */
6
+ --primarytext-color: #1a1a1a;
7
+ --secondarytext-color: #555;
8
+
9
+ /* Primary Colors */
10
+ --primary-dark: #345da8;
11
+ --primary-light: #a8b5c9;
12
+
13
+ /* Secondary Colors */
14
+ --secondary-dark: #063d80;
15
+ --secondary-light: #6ea1fa;
16
+
17
+ /* Chat & Container Colors */
18
+ --chathistory_area: #f0f1f4;
19
+ --container-color: #f5f6f8;
20
+ --Send: #6ea1fa;
21
+ --Send-hover: #87d0d5;
22
+ --clear: #b2b8c2;
23
+ --clear-hover: #2c5be0;
24
+ --text_areabackground: #fafafa;
25
+
26
+ /* Chat Bubble Colors */
27
+ --bot-bubble-color: #b9c8e3;
28
+ --user-bubble-color: #e3eaf6;
29
+
30
+ /* Scrollbar Colors */
31
+ --scrollbar-bg: #d0d3d8;
32
+ --scrollbar-thumb: #a2a6ad;
33
+ --scrollbar-thumb-hover: #888d94;
34
+
35
+ /* Border & Radius */
36
+ --border-thin: 1px;
37
+ --border-medium: 2px;
38
+ --border-default: 1px;
39
+ --border-focus: 2px;
40
+ --border-hover: 3px;
41
+ --button-border: 2px;
42
+ --radius-sm: 4px;
43
+ --radius-md: 6px;
44
+ }
45
+
46
+ /* ============================
47
+ DARK MODE THEME (SOFTER)
48
+ ============================ */
49
+ @media (prefers-color-scheme: dark) {
50
+ :root {
51
+ --primarytext-color: #f8f8f8;
52
+ --secondarytext-color: #d0d3d8;
53
+
54
+ --primary-dark: #27477d;
55
+ --primary-light: #7d8da9;
56
+
57
+ --secondary-dark: #042a59;
58
+ --secondary-light: #5e88d6;
59
+
60
+ --chathistory_area: #202327;
61
+ --container-color: #1b1d20;
62
+ --Send: #5e88d6;
63
+ --Send-hover: #7ac4c9;
64
+ --clear: #7a7f88;
65
+ --clear-hover: #5e88d6;
66
+ --text_areabackground: #25282c;
67
+
68
+ --bot-bubble-color: #425575;
69
+ --user-bubble-color: #566583;
70
+
71
+ --scrollbar-bg: #2b2e33;
72
+ --scrollbar-thumb: #4b4f56;
73
+ --scrollbar-thumb-hover: #5e636b;
74
+ }
75
+ }
76
+
77
+ /* ============================
78
+ FONT IMPORT & BASE STYLING
79
+ ============================ */
80
+ @import url('https://fonts.googleapis.com/css2?family=Oswald:wght@200..700&display=swap');
81
+
82
+ body {
83
+ background: var(--text_areabackground);
84
+ color: var(--primarytext-color);
85
+ font-family: "Oswald", sans-serif;
86
+ margin: 0;
87
+ }
88
+
89
+ * {
90
+ color: var(--primarytext-color) !important;
91
+ font-family: "Oswald", sans-serif !important;
92
+ box-sizing: border-box;
93
+ }
94
+
95
+ /* ============================
96
+ CUSTOM SCROLLBAR
97
+ ============================ */
98
+ ::-webkit-scrollbar {
99
+ width: 12px;
100
+ }
101
+
102
+ ::-webkit-scrollbar-track {
103
+ background: var(--scrollbar-bg);
104
+ }
105
+
106
+ ::-webkit-scrollbar-thumb {
107
+ background-color: var(--scrollbar-thumb);
108
+ border-radius: 6px;
109
+ border: 2px solid var(--scrollbar-bg);
110
+ }
111
+
112
+ ::-webkit-scrollbar-thumb:hover {
113
+ background-color: var(--scrollbar-thumb-hover);
114
+ }
115
+
116
+ /* ============================
117
+ GRADIO CONTAINER & LAYOUT
118
+ ============================ */
119
+ .gradio-container,
120
+ [data-testid="block-container"],
121
+ .contain {
122
+ background-color: var(--container-color) !important;
123
+ font-family: "Oswald", sans-serif !important;
124
+ display: flex !important;
125
+ flex-direction: column !important;
126
+ height: 100vh !important;
127
+ max-height: 100vh !important;
128
+ overflow: hidden !important;
129
+ }
130
+
131
+ /* ============================
132
+ HEADER & NAVIGATION
133
+ ============================ */
134
+ .title-header {
135
+ background-color: transparent;
136
+ padding: 10px;
137
+ border-bottom: var(--border-focus) solid var(--primary-dark);
138
+ display: flex;
139
+ align-items: center;
140
+ height: 60px !important;
141
+ }
142
+
143
+ .title-header h1 {
144
+ font-size: 3.5rem;
145
+ font-weight: 700;
146
+ color: var(--primarytext-color);
147
+ margin: 0;
148
+ }
149
+
150
+ /* ============================
151
+ CHAT CONTAINER
152
+ ============================ */
153
+ #main-chatbot,
154
+ [data-testid="chatbot"],
155
+ .gradio-chatbot,
156
+ [role="log"] {
157
+ border: var(--border-default) solid var(--primary-dark) !important;
158
+ border-radius: var(--radius-md) !important;
159
+ background-color: var(--chathistory_area) !important;
160
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1) !important;
161
+ padding: 15px !important;
162
+ margin: 15px 20px !important;
163
+ flex: 1 !important;
164
+ overflow-y: auto !important;
165
+ }
166
+
167
+ /* ============================
168
+ TEXT INPUT AREA
169
+ ============================ */
170
+ textarea,
171
+ .gradio-textbox textarea {
172
+ background-color: var(--text_areabackground) !important;
173
+ border: var(--border-default) solid var(--secondary-dark) !important;
174
+ border-radius: var(--radius-md) !important;
175
+ color: var(--primarytext-color) !important;
176
+ padding: 10px !important;
177
+ resize: none !important;
178
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1) !important;
179
+ }
180
+
181
+ textarea:focus {
182
+ border-color: var(--secondary-light) !important;
183
+ box-shadow: 0 0 0 var(--border-focus) rgba(96, 165, 250, 0.2) !important;
184
+ }
185
+
186
+ /* ============================
187
+ BUTTONS
188
+ ============================ */
189
+ button.send-button {
190
+ background-color: var(--Send) !important;
191
+ color: var(--primarytext-color) !important;
192
+ border: var(--button-border) solid var(--secondary-dark) !important;
193
+ border-radius: var(--radius-md) !important;
194
+ padding: 8px 16px !important;
195
+ font-weight: 600 !important;
196
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
197
+ width: 100%;
198
+ }
199
+
200
+ button.send-button:hover {
201
+ background-color: var(--Send-hover) !important;
202
+ }
203
+
204
+ button.clear-button {
205
+ background-color: var(--clear) !important;
206
+ color: var(--primarytext-color) !important;
207
+ border: var(--button-border) solid var(--secondary-dark) !important;
208
+ border-radius: var(--radius-md) !important;
209
+ padding: 8px 16px !important;
210
+ font-weight: 600 !important;
211
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
212
+ width: 100%;
213
+ }
214
+
215
+ button.clear-button:hover {
216
+ background-color: var(--clear-hover) !important;
217
+ }
218
+
219
+ /* ============================
220
+ CHAT BUBBLES (VARIABLE COLORS)
221
+ ============================ */
222
+ .message.user,
223
+ .message.bot {
224
+ background: none !important;
225
+ border: none !important;
226
+ padding: 0 !important;
227
+ margin: 0 !important;
228
+ box-shadow: none !important;
229
+ }
230
+
231
+ .message-row {
232
+ display: flex;
233
+ margin: 8px 12px;
234
+ }
235
+
236
+ .message.panel-full-width {
237
+ max-width: 80%;
238
+ min-width: 240px;
239
+ padding: 14px 20px !important;
240
+ border-radius: 18px !important;
241
+ box-shadow: none !important;
242
+ position: relative;
243
+ line-height: 1.5;
244
+ word-wrap: break-word;
245
+ }
246
+
247
+ /* Bot Bubble */
248
+ .message-row.bot-row .message.panel-full-width {
249
+ background-color: var(--bot-bubble-color) !important;
250
+ color: var(--primarytext-color) !important;
251
+ margin-right: auto;
252
+ margin-left: 0;
253
+ }
254
+
255
+ .message-row.bot-row .message.panel-full-width::before {
256
+ content: "";
257
+ position: absolute;
258
+ top: 12px;
259
+ left: -10px;
260
+ width: 0;
261
+ height: 0;
262
+ border-top: 10px solid transparent;
263
+ border-right: 10px solid var(--bot-bubble-color);
264
+ border-bottom: 10px solid transparent;
265
+ }
266
+
267
+ /* User Bubble */
268
+ .message-row.user-row .message.panel-full-width {
269
+ background-color: var(--user-bubble-color) !important;
270
+ color: var(--primarytext-color) !important;
271
+ margin-left: auto;
272
+ margin-right: 0;
273
+ }
274
+
275
+ .message-row.user-row .message.panel-full-width::before {
276
+ content: "";
277
+ position: absolute;
278
+ top: 12px;
279
+ right: -10px;
280
+ width: 0;
281
+ height: 0;
282
+ border-top: 10px solid transparent;
283
+ border-left: 10px solid var(--user-bubble-color);
284
+ border-bottom: 10px solid transparent;
285
+ }
286
+
287
+ /* ============================
288
+ RESPONSIVE ADJUSTMENTS
289
+ ============================ */
290
+ @media (max-width: 768px) {
291
+ .message.panel-full-width {
292
+ max-width: 85%;
293
+ }
294
+ }
295
+
296
+ /* ============================
297
+ FOOTER: RESTORE BUILT-IN GRADIO LINKS (settings, API, etc.)
298
+ ============================ */
299
+ footer.svelte-czcr5b {
300
+ display: flex !important;
301
+ align-items: center !important;
302
+ justify-content: center !important;
303
+ gap: 12px !important;
304
+ visibility: visible !important;
305
+ position: fixed !important;
306
+ bottom: 0 !important;
307
+ left: 0 !important;
308
+ right: 0 !important;
309
+ background-color: var(--container-color) !important;
310
+ backdrop-filter: blur(5px) !important;
311
+ border-top: var(--border-default) solid rgba(0, 0, 0, 0.12) !important;
312
+ padding: 8px 16px !important;
313
+ z-index: 1000 !important;
314
+ min-height: 36px !important;
315
+ }
316
+
317
+
318
+ footer.svelte-czcr5b a,
319
+ footer.svelte-czcr5b button,
320
+ footer.svelte-czcr5b span {
321
+ color: var(--secondarytext-color) !important;
322
+ font-size: 12px !important;
323
+ font-family: "Oswald", sans-serif !important;
324
+ text-decoration: none !important;
325
+ background: none !important;
326
+ border: none !important;
327
+ cursor: pointer !important;
328
+ opacity: 0.8;
329
+ transition: opacity 0.15s ease;
330
+ }
331
+
332
+
333
+ footer.svelte-czcr5b a:hover,
334
+ footer.svelte-czcr5b button:hover,
335
+ footer.svelte-czcr5b span:hover {
336
+ opacity: 1;
337
+ color: var(--primarytext-color) !important;
338
+ }
339
+
340
+
341
+ /* Divider style between footer links */
342
+ footer.svelte-czcr5b .divider {
343
+ color: var(--secondarytext-color) !important;
344
+ opacity: 0.5;
345
+ margin: 0 6px !important;
346
+ }
347
+
348
+
349
+ /* Make sure footer items never collapse */
350
+ footer.svelte-czcr5b > * {
351
+ display: inline-flex !important;
352
+ align-items: center !important;
353
+ }