Spaces:

jdesiree
/

Mimir

Sleeping

App Files Files Community

jdesiree commited on Oct 12, 2025

Commit

d8b2b50

0 Parent(s):

Update requirements.txt

Browse files

Files changed (22) hide show

.gitattributes +37 -0
.huggingface/config.yaml +9 -0
LightEval_Mimir.py +109 -0
README.md +148 -0
agents.py +1021 -0
app.py +1360 -0
app_V1.0.py +0 -0
compile_model.py +295 -0
configuration_phi3.py +227 -0
favicon.ico +0 -0
gradio_analytics.py +491 -0
gradio_chatbot.py +150 -0
graph_tool.py +109 -0
loading_animation.gif +0 -0
mimir_classifier.pkl +3 -0
performance_metrics.log +108 -0
pre_download.py +58 -0
prompt_classifier.py +339 -0
prompt_library.py +509 -0
requirements.txt +72 -0
state_manager.py +801 -0
styles.css +353 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,37 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+loading_animation.gif.gif filter=lfs diff=lfs merge=lfs -text
+loading_animation.gif filter=lfs diff=lfs merge=lfs -text

.huggingface/config.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+# .huggingface/config.yaml
+# Pre-download models during build
+models:
+  - jdesiree/Mimir-Phi-3.5
+  - microsoft/Phi-3-mini-4k-instruct
+  - microsoft/Phi-3-mini-128k-instruct
+  - thenlper/gte-small
+  Linked models

LightEval_Mimir.py ADDED Viewed

	@@ -0,0 +1,109 @@

+# LightEval_Mimir.py
+'''This document outlines hte LightEval setu for tracking performance metrics of Mimir, to be sent to the trackio page for viszulization.'''
+# Imports
+from lighteval.metrics.metrics_sample import BertScore, ROUGE
+from lighteval.tasks.requests import Doc
+async def evaluate_educational_quality(user_query, response, thread_id):
+    """Dynamic evaluation using LightEval metrics"""
+    # Create ephemeral task for this turn
+    doc = Doc(
+        task_name=f"turn_{thread_id}",
+        query=user_query,
+        choices=[response],
+        gold_index=-1,  # No ground truth initially
+        specific_output=response
+    )
+    # Use BertScore for semantic quality
+    bert_score = BertScore().compute(doc)
+    # Custom educational coherence metric
+    educational_indicators = {
+        'has_examples': 'example' in response.lower(),
+        'structured_explanation': '##' in response or '1.' in response,
+        'appropriate_length': 100 < len(response) < 1500,
+        'encourages_learning': any(phrase in response.lower()
+            for phrase in ['practice', 'try', 'consider', 'think about'])
+    }
+    return {
+        'semantic_quality': bert_score,
+        'educational_score': sum(educational_indicators.values()) / len(educational_indicators),
+        'response_time': time.time() - start_time
+    }
+def track_rag_performance(query, retrieved_docs, used_in_response):
+    """Evaluate RAG retrieval quality"""
+    from lighteval.metrics.utils.metric_utils import SampleLevelMetric
+    # Track retrieval-to-response alignment
+    retrieval_relevance = calculate_relevance(query, retrieved_docs)
+    retrieval_usage = len(used_in_response) / len(retrieved_docs) if retrieved_docs else 0
+    # Log to trackio with LightEval structure
+    metric_payload = {
+        "evaluation_id": str(uuid.uuid4()),
+        "task": "rag_retrieval",
+        "metrics": {
+            "retrieval_relevance": retrieval_relevance,
+            "retrieval_usage_rate": retrieval_usage,
+            "num_docs_retrieved": len(retrieved_docs)
+        },
+        "metadata": {
+            "query": query[:100],
+            "sources": [doc.metadata.get('source') for doc in retrieved_docs]
+        }
+    }
+    send_evaluation_to_trackio(metric_payload)
+def evaluate_prompt_classification(predicted_mode, actual_conversation_outcome, thread_id):
+    """Track prompt classifier accuracy in production"""
+    # Did the predicted mode lead to successful interaction?
+    success_indicators = {
+        'discovery_mode': lambda outcome: 'clarified_topic' in outcome,
+        'teaching_mode': lambda outcome: outcome.get('quality_score', 0) > 3.5,
+        'conversational': lambda outcome: outcome.get('user_satisfied', False)
+    }
+    mode_was_correct = success_indicators.get(
+        predicted_mode,
+        lambda x: True
+    )(actual_conversation_outcome)
+    # Create LightEval-style evaluation
+    from lighteval.metrics import Metrics
+    accuracy_metric = Metrics.ACCURACY if mode_was_correct else 0
+    return {
+        "prompt_classifier_accuracy": accuracy_metric,
+        "predicted_mode": predicted_mode,
+        "conversation_length": len(conversation_state)
+    }
+def process_user_feedback(response_id, feedback_type, conversation_state):
+    """Convert user feedback to LightEval ground truth"""
+    last_exchange = {
+        "query": conversation_state[-2]["content"],  # User's question
+        "response": conversation_state[-1]["content"], # Agent's response
+        "gold_index": 0 if feedback_type == "thumbs_up" else -1
+    }
+    # Create retrospective evaluation with ground truth
+    from lighteval.tasks.requests import Doc
+    doc = Doc(
+        task_name="user_feedback_eval",
+        query=last_exchange["query"],
+        choices=[last_exchange["response"]],
+        gold_index=last_exchange["gold_index"]
+    )
+    # Now you have ground truth for accuracy metrics!
+    accuracy = 1.0 if feedback_type == "thumbs_up" else 0.0
+    return {"user_feedback_accuracy": accuracy, "response_id": response_id}

README.md ADDED Viewed

	@@ -0,0 +1,148 @@

+---
+title: Mimir
+emoji: 📚
+colorFrom: indigo
+colorTo: blue
+sdk: gradio
+sdk_version: 5.47.0
+app_file: app.py
+pinned: true
+python_version: '3.10'
+short_description: Advanced prompt engineering for educational AI systems.
+thumbnail: >-
+  https://cdn-uploads.huggingface.co/production/uploads/68700e7552b74a1dcbb2a87e/Z7P8DJ57rc5P1ozA5gwp3.png
+hardware: zero-gpu-dynamic
+hf_oauth: true
+hf_oauth_expiration_minutes: 120
+---
+# Mimir: Educational AI Assistant
+## Advanced Prompt Engineering Portfolio Project
+### Project Overview
+Mimir demonstrates sophisticated prompt engineering techniques applied to educational technology, showcasing the implementation of context-aware AI systems that prioritize pedagogical effectiveness over simple answer generation. A key feature is its ability to **dynamically generate custom data visualizations**, determined by an intelligent decision engine that assesses whether a visual aid will enhance the pedagogical explanation. This project exemplifies professional-grade prompt design for educational applications, embodying the role of an educational partner that guides students to discover answers for themselves.
+***
+### Technical Architecture
+**Core Technologies:**
+* **LangChain**: Prompt template management and conversation chain orchestration.
+* **LangGraph**: Orchestrates the application's flow as a state machine (**StateGraph**). It manages the conditional logic for the tool-use decision engine, routing user queries between the LLM, a pre-built **ToolNode** for graph generation, and the final response node.
+* **Gradio**: Full-stack web interface with custom CSS styling.
+* **Hugging Face Inference API**: Model deployment and response generation.
+* **Python**: Backend logic and integration layer.
+* **Matplotlib**: Powers the dynamic, in-memory generation of educational graphs and charts.
+**Key Frameworks:**
+* `langchain.prompts.ChatPromptTemplate` for dynamic prompt construction.
+* `langchain_huggingface.HuggingFaceEndpoint` for model interface.
+* `langchain.schema` message objects (HumanMessage, AIMessage, SystemMessage).
+* `langgraph.graph.StateGraph` & `langgraph.prebuilt.ToolNode` for building and executing the conditional logic graph.
+* `langgraph.checkpoint.memory.MemorySaver` for persistent conversation state.
+***
+### Prompt Engineering Techniques Demonstrated
+#### 1. Unified System Prompt Architecture
+Employs a single, comprehensive system prompt that establishes the AI's core persona as **Mimir, an expert multi-concept tutor**. This foundational prompt meticulously defines the AI's behavior, tone, and pedagogical mission. It integrates:
+* **Core Educational Principles**: A directive to prioritize teaching methodology, foster critical thinking, and provide comprehensive explanations over direct answers.
+* **Defined Persona & Tone**: Specific instructions to maintain an engaging, supportive, and intellectually appropriate tone for high school students, while avoiding fluff and emojis.
+* **Specific Response Guidelines**: Contextual rules for handling different academic tasks, such as explaining concepts in math problems instead of solving them, or discussing research strategies for essays rather than writing them.
+#### 2. Instructional Design Integration
+The core prompt incorporates evidence-based instructional design principles:
+* **Scaffolding**: Breaking complex concepts into manageable components.
+* **Socratic Method**: Guiding discovery rather than providing direct answers.
+* **Metacognitive Strategies**: Teaching learning-how-to-learn approaches.
+#### 3. Academic Integrity Constraints
+Implemented ethical AI guidelines directly into the system prompt:
+* Explicit instructions to avoid homework completion.
+* Focus on **process over product delivery**.
+* Critical thinking skill development emphasis.
+#### 4. Two-Stage Tool-Use Prompting
+A sophisticated two-stage prompting strategy governs the use of the `Create_Graph_Tool`:
+* **Tool-Use Decision Prompt**: A highly-constrained template is used by the `Tool_Decision_Engine` to determine if a tool should be used. This prompt forces a **YES** or **NO** response based on whether a visual aid would significantly enhance learning, using explicit **INCLUDE** and **EXCLUDE** criteria.
+* **Tool-Execution Guidance**: The main system prompt contains separate, explicit instructions on how to use the tool once the decision has been made. It provides the exact **JSON structure** the model must output, including fields like `data`, `plot_type`, and `educational_context`, ensuring the generated graphs are pedagogically sound.
+***
+### Advanced Implementation Features
+#### Intelligent Graphing Tool Integration
+A custom, dynamic visualization system was developed to provide multi-modal educational responses.
+* **LLM-Powered Analysis**: For relevant queries, a targeted LLM call is made using the specialized YES/NO decision prompt.
+* **Dynamic Visualization Tool (`Create_Graph_Tool`)**: Designed and implemented a custom visualization tool using **matplotlib**. The tool receives a JSON configuration from the LLM and generates high-quality bar, line, or pie charts. The entire process occurs in-memory:
+    * The plot is rendered into a `BytesIO` buffer.
+    * The image is encoded into a **base64 string**.
+    * The final output is an HTML `<img>` tag with the embedded base64 data, which is displayed directly in the chat interface, eliminating the need for file I/O.
+    * The tool's docstring provides a clear schema and usage instructions for the LLM, ensuring reliable and pedagogically sound visualizations.
+#### Stateful Conversation Management with LangGraph
+Implements persistent, multi-turn conversations using LangGraph's **MemorySaver**. This allows the application's state, including the full message history (`add_messages`), to be saved and resumed, ensuring robust context management even when tool use is involved.
+#### Response Streaming & Truncation
+* Smart text truncation preserving sentence integrity.
+* Real-time response streaming for improved UX.
+* Error handling and fallback mechanisms.
+#### Template Chaining Architecture
+The core logic utilizes **LangChain Expression Language (LCEL)** to pipe inputs through templates, models, and tools.
+***
+### User Interface Engineering
+* **Gradio Layout & Custom Styling**: The interface is built with `gr.Blocks`, using `gr.Column` and `gr.Row` to structure the main components. A custom `styles.css` file is loaded to apply specific theming, responsive design, and layout rules, moving beyond default Gradio styling for a tailored user experience.
+* **Component Architecture**: Modular Gradio component structure with custom CSS class integration and accessibility-compliant patterns.
+***
+### Prompt Engineering Methodologies Applied
+* **Template Parameterization**: Dynamic variable injection for contextual responses.
+* **Persona-Driven Response Generation**: Crafting a detailed persona within the system prompt to guide the AI's tone, style, and pedagogical approach consistently.
+* **Domain-Specific Language Modeling**: Educational vocabulary and pedagogical terminology integration.
+* **Multi-Modal Response Formatting**: Structured output generation with educational formatting.
+* **Agentic Tool Routing**: Designing prompts and logic that enable an AI system to intelligently decide which tool is appropriate for a given task, simulating agent-like behavior.
+***
+### Professional Applications
+This project demonstrates competency in:
+* **Enterprise-Grade Prompt Design**: Scalable template and tool-use architecture.
+* **Educational Technology Integration**: Designing AI tutors with robust pedagogical frameworks and dynamic, multi-modal response capabilities.
+* **Ethical AI Implementation**: Academic integrity safeguards and responsible AI practices.
+* **Full-Stack AI Application Development**: End-to-end system implementation.
+* **Intelligent Agent & Tool Development**: Building AI agents that can utilize custom tools to solve complex problems.
+***
+### Technical Specifications
+**Dependencies:**
+* **Core ML/AI**: `transformers`, `torch`, `accelerate`
+* **LangChain & LangGraph**: `langgraph`, `langchain-core`, `langchain-community`, `langchain-huggingface`
+* **UI Framework**: `gradio`
+* **Visualization**: `matplotlib`, `plotly`, `pandas`, `numpy`, `scipy`
+* **Utilities**: `python-dotenv`
+* **Monitoring**: `langsmith` (Optional)
+**Deployment:**
+* Hugging Face Spaces compatible.
+* Environment variable configuration for API keys.
+* Production-ready error handling and logging.
+***
+### Results & Impact
+Mimir represents a synthesis of prompt engineering best practices with educational technology requirements. The integration of an intelligent, conditional graphing tool demonstrates the ability to create AI systems that augment and enhance human learning processes, embodying the role of an educational partner who empowers students to succeed through genuine understanding.
+> **Portfolio Demonstration**: This project evidences advanced prompt engineering capabilities, full-stack AI application development, and domain-specific AI system design suitable for enterprise educational technology environments.

agents.py ADDED Viewed

	@@ -0,0 +1,1021 @@

+# agents.py
+"""
+Unified agent architecture for Mimir Educational AI Assistant.
+Components:
+- ToolDecisionAgent: Determines visualization tool necessity
+- PromptRoutingAgents: 4 decision agents for library_state management
+- ThinkingAgents: Preprocessing agents for complex reasoning
+- ResponseAgent: Main educational response generation (Phi3)
+All agents use proper LangChain SystemMessage/HumanMessage architecture.
+"""
+import os
+import re
+import torch
+import logging
+# Setup main logger first
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def log_step(step_name, start_time=None):
+    """Log a pipeline step with timestamp and duration"""
+    now = time.time()
+    timestamp = datetime.now().strftime("%H:%M:%S.%f")[:-3]
+    if start_time:
+        duration = now - start_time
+        logger.info(f"[{timestamp}] ✓ {step_name} completed in {duration:.2f}s")
+    else:
+        logger.info(f"[{timestamp}] → {step_name} starting...")
+    return now
+agent = None
+from typing import Dict, List, Optional, Tuple, Type
+from datetime import datetime
+import warnings
+# Transformers for standard models
+from transformers import (
+    AutoTokenizer,
+    AutoModelForCausalLM,
+    BitsAndBytesConfig,
+)
+# For GGUF model loading
+try:
+    from llama_cpp import Llama
+    LLAMA_CPP_AVAILABLE = True
+except ImportError:
+    LLAMA_CPP_AVAILABLE = False
+    logging.warning("llama-cpp-python not available - GGUF models will not load")
+# ZeroGPU support
+try:
+    import spaces
+    HF_SPACES_AVAILABLE = True
+except ImportError:
+    HF_SPACES_AVAILABLE = False
+    class DummySpaces:
+        @staticmethod
+        def GPU(duration=90):
+            def decorator(func):
+                return func
+            return decorator
+    spaces = DummySpaces()
+# Accelerate
+from accelerate import Accelerator
+from accelerate.utils import set_seed
+# LangChain Core for proper message handling
+from langchain_core.runnables import Runnable
+from langchain_core.runnables.utils import Input, Output
+from langchain_core.messages import SystemMessage, HumanMessage
+# Import ALL prompts from prompt library
+from prompt_library import (
+    # System prompts
+    CORE_IDENTITY,
+    TOOL_DECISION,
+    agent_1_system,
+    agent_2_system,
+    agent_3_system,
+    agent_4_system,
+    # Thinking agent system prompts
+    MATH_THINKING,
+    QUESTION_ANSWER_DESIGN,
+    REASONING_THINKING,
+    # Response agent prompts (dynamically applied)
+    VAUGE_INPUT,
+    USER_UNDERSTANDING,
+    GENERAL_FORMATTING,
+    LATEX_FORMATTING,
+    GUIDING_TEACHING,
+    STRUCTURE_PRACTICE_QUESTIONS,
+    PRACTICE_QUESTION_FOLLOWUP,
+    TOOL_USE_ENHANCEMENT,
+)
+CACHE_DIR = "/data/compiled_models"
+from huggingface_hub import hf_hub_download
+def check_model_cache() -> Dict[str, bool]:
+    """Check which models are pre-compiled"""
+    cache_status = {
+        "phi3": os.path.exists(f"{CACHE_DIR}/PHI3_READY"),
+        "mistral_reasoning": os.path.exists(f"{CACHE_DIR}/MISTRAL_REASONING_READY"),
+        "mistral_math_gguf": os.path.exists(f"{CACHE_DIR}/MISTRAL_MATH_GGUF_READY"),
+        "rag_embeddings": os.path.exists(f"{CACHE_DIR}/RAG_EMBEDDINGS_READY"),
+        "all_compiled": os.path.exists(f"{CACHE_DIR}/COMPILED_READY"),
+    }
+    if cache_status["all_compiled"]:
+        logger.info("✓ All models pre-compiled and cached")
+    else:
+        logger.warning("⚠️  Some models not pre-compiled - first load will be slower")
+    return cache_status
+# Call at module load:
+_cache_status = check_model_cache()
+logger = logging.getLogger(__name__)
+# Suppress warnings
+warnings.filterwarnings("ignore", category=UserWarning)
+warnings.filterwarnings("ignore", category=FutureWarning)
+# Model paths
+MISTRAL_REASONING = "yentinglin/Mistral-Small-24B-Instruct-2501-reasoning"
+MISTRAL_MATH_GGUF = "brittlewis12/Mistral-Small-24B-Instruct-2501-reasoning-GGUF"
+FINE_TUNED_PHI3 = "jdesiree/Mimir-Phi-3.5"
+BASE_PHI3 = "microsoft/Phi-3-mini-4k-instruct"
+HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
+def get_cached_gguf_path() -> Optional[str]:
+    """Get GGUF model path from cache marker if available"""
+    marker_file = f"{CACHE_DIR}/MISTRAL_MATH_GGUF_READY"
+    if os.path.exists(marker_file):
+        try:
+            with open(marker_file, 'r') as f:
+                content = f.read()
+                # Extract path from "GGUF model path: /path/to/model.gguf"
+                if "GGUF model path:" in content:
+                    path = content.split("GGUF model path:")[-1].strip()
+                    if os.path.exists(path):
+                        logger.info(f"Found cached GGUF model: {path}")
+                        return path
+        except Exception as e:
+            logger.warning(f"Could not read GGUF cache marker: {e}")
+    return None
+def get_cached_gguf_path() -> Optional[str]:
+    """Get GGUF model path from cache marker if available"""
+    marker_file = f"{CACHE_DIR}/MISTRAL_MATH_GGUF_READY"
+    if os.path.exists(marker_file):
+        try:
+            with open(marker_file, 'r') as f:
+                content = f.read()
+                if "GGUF model path:" in content:
+                    path = content.split("GGUF model path:")[-1].strip()
+                    if os.path.exists(path):
+                        logger.info(f"✓ Found cached GGUF model at: {path}")
+                        return path
+        except Exception as e:
+            logger.warning(f"Could not read GGUF cache marker: {e}")
+    return None
+# ============================================================================
+# TOOL DECISION AGENT
+# ============================================================================
+class ToolDecisionAgent:
+    """
+    Determines if visualization tools are needed for a given query.
+    Uses Mistral-Small-24B with TOOL_DECISION system prompt.
+    """
+    def __init__(self):
+        self.model = None
+        self.tokenizer = None
+        self.model_loaded = False
+        logger.info("ToolDecisionAgent initialized (lazy loading)")
+    @spaces.GPU(duration=50)
+    def _load_model(self):
+        """Load Mistral model on first use"""
+        if self.model_loaded:
+            return
+        logger.info(f"Loading tool decision model: {MISTRAL_REASONING}")
+        quantization_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_compute_dtype=torch.float16,
+            bnb_4bit_quant_type="nf4",
+            bnb_4bit_use_double_quant=True,
+        )
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            MISTRAL_REASONING,
+            trust_remote_code=True,
+            token=HF_TOKEN
+        )
+        self.model = AutoModelForCausalLM.from_pretrained(
+            MISTRAL_REASONING,
+            quantization_config=quantization_config,
+            torch_dtype=torch.float16,
+            trust_remote_code=True,
+            low_cpu_mem_usage=True,
+            token=HF_TOKEN,
+            device_map="auto",
+        )
+        self.model_loaded = True
+        logger.info("Tool decision model loaded successfully")
+    @spaces.GPU(duration=50)
+    def should_use_visualization(self, query: str) -> bool:
+        """
+        Determine if query requires visualization tools.
+        Uses TOOL_DECISION as system prompt.
+        Args:
+            query: User's question/prompt
+        Returns:
+            bool: True if visualization needed, False otherwise
+        """
+        self._load_model()
+        try:
+            # Use LangChain message format
+            messages = [
+                SystemMessage(content=TOOL_DECISION),
+                HumanMessage(content=f"Query: {query}")
+            ]
+            # Format using tokenizer's chat template
+            formatted_prompt = self.tokenizer.apply_chat_template(
+                [{"role": "system", "content": TOOL_DECISION},
+                 {"role": "user", "content": f"Query: {query}"}],
+                tokenize=False,
+                add_generation_prompt=True
+            )
+            inputs = self.tokenizer(formatted_prompt, return_tensors="pt").to(self.model.device)
+            with torch.no_grad():
+                outputs = self.model.generate(
+                    **inputs,
+                    max_new_tokens=10,
+                    temperature=0.1,
+                    do_sample=False,
+                    pad_token_id=self.tokenizer.eos_token_id
+                )
+            decision_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            # Extract only the new tokens (after the prompt)
+            decision_text = decision_text.split("Decision:")[-1].strip().upper()
+            result = "YES" in decision_text and "NO" not in decision_text
+            logger.info(f"Tool decision for '{query[:50]}...': {'YES' if result else 'NO'}")
+            return result
+        except Exception as e:
+            logger.error(f"Tool decision error: {e}")
+            # Fallback to keyword check
+            return any(kw in query.lower() for kw in ['graph', 'chart', 'plot', 'visualize'])
+# ============================================================================
+# PROMPT ROUTING AGENTS
+# ============================================================================
+class PromptRoutingAgents:
+    """
+    Four specialized agents for library_state decision-making.
+    All share a single Mistral-Small-24B model for efficiency.
+    Each uses its corresponding agent_X_system prompt as SystemMessage.
+    """
+    def __init__(self):
+        self.model = None
+        self.tokenizer = None
+        self.model_loaded = False
+        logger.info("PromptRoutingAgents initialized (lazy loading)")
+    @spaces.GPU(duration=50)
+    def _load_model(self):
+        """Load shared Mistral model on first use"""
+        if self.model_loaded:
+            return
+        logger.info(f"Loading routing agents model: {MISTRAL_REASONING}")
+        quantization_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_compute_dtype=torch.float16,
+            bnb_4bit_quant_type="nf4",
+            bnb_4bit_use_double_quant=True,
+        )
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            MISTRAL_REASONING,
+            trust_remote_code=True,
+            token=HF_TOKEN
+        )
+        self.model = AutoModelForCausalLM.from_pretrained(
+            MISTRAL_REASONING,
+            quantization_config=quantization_config,
+            torch_dtype=torch.float16,
+            trust_remote_code=True,
+            low_cpu_mem_usage=True,
+            token=HF_TOKEN,
+            device_map="auto",
+        )
+        self.model_loaded = True
+        logger.info("Routing agents model loaded successfully")
+    def _run_agent(self, system_prompt: str, user_message: str, max_tokens: int = 50) -> str:
+        """Execute agent with system prompt and user message using LangChain format"""
+        self._load_model()
+        # Format using tokenizer's chat template
+        formatted_prompt = self.tokenizer.apply_chat_template(
+            [{"role": "system", "content": system_prompt},
+             {"role": "user", "content": user_message}],
+            tokenize=False,
+            add_generation_prompt=True
+        )
+        inputs = self.tokenizer(formatted_prompt, return_tensors="pt").to(self.model.device)
+        with torch.no_grad():
+            outputs = self.model.generate(
+                **inputs,
+                max_new_tokens=max_tokens,
+                temperature=0.1,
+                do_sample=True,
+                pad_token_id=self.tokenizer.eos_token_id
+            )
+        response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Extract assistant response (after the prompt)
+        if "<|assistant|>" in response:
+            response = response.split("<|assistant|>")[-1].strip()
+        return response
+    @spaces.GPU(duration=50)
+    def agent_1_practice_questions(self, user_input: str, recent_history: List) -> bool:
+        """
+        Agent 1: Determine if practice questions are needed.
+        Uses agent_1_system as SystemMessage.
+        """
+        # Format history
+        history_text = "\n".join([f"{msg.get('role', 'unknown')}: {msg.get('content', '')[:100]}"
+                                  for msg in recent_history[-4:]]) if recent_history else "No history"
+        # User message per redesign document format
+        user_message = f"""Current user input: {user_input}
+Recent conversation:
+{history_text}
+Determine if practice questions should be provided:"""
+        # Use agent_1_system as system prompt
+        result = self._run_agent(agent_1_system, user_message, max_tokens=30)
+        decision = "STRUCTURE_PRACTICE_QUESTIONS" in result
+        logger.info(f"Agent 1 (practice questions): {decision}")
+        return decision
+    @spaces.GPU(duration=50)
+    def agent_2_discovery_mode(self, user_input: str) -> Optional[str]:
+        """
+        Agent 2: Detect if vague input or understanding check needed.
+        Uses agent_2_system as SystemMessage.
+        Returns:
+        - "VAUGE_INPUT" if input is unclear/ambiguous
+        - "USER_UNDERSTANDING" if checking student's knowledge level
+        - None if neither applies
+        """
+        # User message per redesign document format
+        user_message = f"""Student query: {user_input}
+Classification:"""
+        # Use agent_2_system as system prompt
+        result = self._run_agent(agent_2_system, user_message, max_tokens=30)
+        result_upper = result.upper()
+        # Parse result per agent_2_system expected outputs
+        if "VAUGE_INPUT" in result_upper or "VAGUE" in result_upper:
+            if "USER_UNDERSTANDING" not in result_upper:
+                logger.info("Agent 2: VAUGE_INPUT detected")
+                return "VAUGE_INPUT"
+            else:
+                # Both detected - agent should return both
+                logger.info("Agent 2: Both detected (should not happen per prompt)")
+                return "VAUGE_INPUT"  # Prioritize first
+        elif "USER_UNDERSTANDING" in result_upper:
+            logger.info("Agent 2: USER_UNDERSTANDING detected")
+            return "USER_UNDERSTANDING"
+        else:
+            logger.info("Agent 2: Neither condition detected")
+            return None
+    @spaces.GPU(duration=50)
+    def agent_3_followup_assessment(self, user_input: str, recent_history: List) -> bool:
+        """
+        Agent 3: Determine if practice question follow-up is needed.
+        Uses agent_3_system (formatted) as SystemMessage.
+        """
+        # Format history
+        history_text = "\n".join([f"{msg.get('role', 'unknown')}: {msg.get('content', '')[:100]}"
+                                  for msg in recent_history[-4:]]) if recent_history else "No history"
+        # User message per redesign document format
+        user_message = f"""Current user response: {user_input}
+Recent conversation:
+{history_text}
+Is this a follow-up to a practice question?"""
+        # Format agent_3_system with STRUCTURE_PRACTICE_QUESTIONS placeholder
+        formatted_system = agent_3_system.format(
+            STRUCTURE_PRACTICE_QUESTIONS=STRUCTURE_PRACTICE_QUESTIONS
+        )
+        # Use formatted agent_3_system as system prompt
+        result = self._run_agent(formatted_system, user_message, max_tokens=20)
+        decision = "PRACTICE_QUESTION_FOLLOWUP" in result or "TRUE" in result.upper()
+        logger.info(f"Agent 3 (followup assessment): {decision}")
+        return decision
+    @spaces.GPU(duration=50)
+    def agent_4_teaching_mode(self, user_input: str, recent_history: List) -> Dict[str, bool]:
+        """
+        Agent 4: Assess teaching mode and practice structure needs.
+        Uses agent_4_system as SystemMessage.
+        Returns dict with:
+        - "GUIDING_TEACHING": True if direct pedagogical guidance needed
+        - "STRUCTURE_PRACTICE_QUESTIONS": True if structured practice needed
+        """
+        # Format history
+        history_text = "\n".join([f"{msg.get('role', 'unknown')}: {msg.get('content', '')[:100]}"
+                                  for msg in recent_history[-4:]]) if recent_history else "No history"
+        # User message per redesign document format
+        user_message = f"""Current query: {user_input}
+Recent conversation:
+{history_text}
+Teaching mode assessment:"""
+        # Use agent_4_system as system prompt
+        result = self._run_agent(agent_4_system, user_message, max_tokens=50)
+        result_upper = result.upper()
+        # Parse result per agent_4_system expected outputs
+        decisions = {
+            "GUIDING_TEACHING": "GUIDING_TEACHING" in result_upper,
+            "STRUCTURE_PRACTICE_QUESTIONS": "STRUCTURE_PRACTICE_QUESTIONS" in result_upper
+        }
+        logger.info(f"Agent 4 decisions: {decisions}")
+        return decisions
+# ============================================================================
+# THINKING AGENTS
+# ============================================================================
+class ThinkingAgents:
+    """
+    Preprocessing agents for complex reasoning.
+    Each uses its corresponding thinking prompt as SystemMessage:
+    - Math Thinking: MATH_THINKING (GGUF Mistral)
+    - Question/Answer Design: QUESTION_ANSWER_DESIGN (Standard Mistral)
+    - Reasoning: REASONING_THINKING (Standard Mistral)
+    """
+    def __init__(self):
+        self.math_model = None  # GGUF Llama
+        self.reasoning_model = None  # Standard Mistral
+        self.reasoning_tokenizer = None
+        self.math_model_loaded = False
+        self.reasoning_model_loaded = False
+        logger.info("ThinkingAgents initialized (lazy loading)")
+    def _load_math_model(self):
+        """Load GGUF math thinking model - cache-aware"""
+        if self.math_model_loaded:
+            return
+        if not LLAMA_CPP_AVAILABLE:
+            logger.error("llama-cpp-python not available - math thinking disabled")
+            return
+        logger.info(f"Loading GGUF math model: {MISTRAL_MATH_GGUF}")
+        try:
+            from huggingface_hub import hf_hub_download
+            # Check for cached model first
+            cached_path = get_cached_gguf_path()
+            if cached_path:
+                logger.info("Using pre-cached GGUF model (fast path)")
+                model_path = cached_path
+            else:
+                logger.info("Downloading GGUF model from HuggingFace...")
+                model_path = hf_hub_download(
+                    repo_id=MISTRAL_MATH_GGUF,
+                    filename="mistral-small-24b-instruct-2501-reasoning-Q4_K_M.gguf",
+                    token=HF_TOKEN
+                )
+                logger.info(f"Downloaded GGUF to: {model_path}")
+            self.math_model = Llama(
+                model_path=model_path,
+                n_ctx=4096,
+                n_threads=4,
+                n_gpu_layers=35,
+            )
+            self.math_model_loaded = True
+            logger.info("✓ GGUF math model ready")
+        except Exception as e:
+            logger.error(f"Failed to load GGUF math model: {e}")
+    @spaces.GPU(duration=60)
+    def _load_reasoning_model(self):
+        """Load standard Mistral for reasoning/QA design"""
+        if self.reasoning_model_loaded:
+            return
+        logger.info(f"Loading reasoning model: {MISTRAL_REASONING}")
+        quantization_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_compute_dtype=torch.float16,
+            bnb_4bit_quant_type="nf4",
+            bnb_4bit_use_double_quant=True,
+        )
+        self.reasoning_tokenizer = AutoTokenizer.from_pretrained(
+            MISTRAL_REASONING,
+            trust_remote_code=True,
+            token=HF_TOKEN
+        )
+        self.reasoning_model = AutoModelForCausalLM.from_pretrained(
+            MISTRAL_REASONING,
+            quantization_config=quantization_config,
+            torch_dtype=torch.float16,
+            trust_remote_code=True,
+            low_cpu_mem_usage=True,
+            token=HF_TOKEN,
+            device_map="auto",
+        )
+        self.reasoning_model_loaded = True
+        logger.info("Reasoning model loaded successfully")
+    @spaces.GPU(duration=60)
+    def math_thinking(self, user_input: str, conversation_history: str) -> str:
+        """
+        Math-specific Tree-of-Thought reasoning preprocessing.
+        Uses MATH_THINKING as system prompt with GGUF Mistral model.
+        """
+        self._load_math_model()
+        if self.math_model is None:
+            logger.warning("Math model not available, returning empty context")
+            return ""
+        try:
+            # User message per redesign document format
+            user_message = f"""Conversation History:
+{conversation_history}
+Current User Query:
+{user_input}
+Provide mathematical thinking context:"""
+            # Combine system (MATH_THINKING) + user message
+            # For GGUF/llama-cpp, we format manually
+            full_prompt = f"""<|system|>
+{MATH_THINKING}
+<|end|>
+<|user|>
+{user_message}
+<|end|>
+<|assistant|>
+"""
+            response = self.math_model(
+                full_prompt,
+                max_tokens=512,
+                temperature=0.7,
+                stop=["</thinking>", "\n\n---", "<|end|>"],
+            )
+            thinking_output = response['choices'][0]['text'].strip()
+            logger.info(f"Math thinking generated: {len(thinking_output)} chars")
+            return thinking_output
+        except Exception as e:
+            logger.error(f"Math thinking error: {e}")
+            return ""
+    @spaces.GPU(duration=60)
+    def question_answer_design(self, user_input: str, conversation_history: str,
+                              tool_img_output: str = "", tool_context: str = "") -> str:
+        """
+        Chain-of-Thought for question formulation and response design.
+        Uses QUESTION_ANSWER_DESIGN (formatted) as system prompt.
+        """
+        self._load_reasoning_model()
+        try:
+            # Format QUESTION_ANSWER_DESIGN with required variables
+            formatted_qa_system = QUESTION_ANSWER_DESIGN.format(
+                tool_img_output=tool_img_output if tool_img_output else "No tool output provided",
+                tool_context=tool_context if tool_context else "No tool context available",
+                STRUCTURE_PRACTICE_QUESTIONS=STRUCTURE_PRACTICE_QUESTIONS,
+                LATEX_FORMATTING=LATEX_FORMATTING
+            )
+            # User message per redesign document format
+            user_message = f"""Conversation History:
+{conversation_history}
+Current Query: {user_input}
+Design question/answer approach:"""
+            # Format using tokenizer's chat template
+            formatted_prompt = self.reasoning_tokenizer.apply_chat_template(
+                [{"role": "system", "content": formatted_qa_system},
+                 {"role": "user", "content": user_message}],
+                tokenize=False,
+                add_generation_prompt=True
+            )
+            inputs = self.reasoning_tokenizer(formatted_prompt, return_tensors="pt").to(self.reasoning_model.device)
+            with torch.no_grad():
+                outputs = self.reasoning_model.generate(
+                    **inputs,
+                    max_new_tokens=512,
+                    temperature=0.7,
+                    do_sample=True,
+                    pad_token_id=self.reasoning_tokenizer.eos_token_id
+                )
+            thinking_output = self.reasoning_tokenizer.decode(outputs[0], skip_special_tokens=True)
+            # Extract only new tokens
+            if "<|assistant|>" in thinking_output:
+                thinking_output = thinking_output.split("<|assistant|>")[-1].strip()
+            logger.info(f"QA design thinking generated: {len(thinking_output)} chars")
+            return thinking_output
+        except Exception as e:
+            logger.error(f"QA design thinking error: {e}")
+            return ""
+    @spaces.GPU(duration=60)
+    def reasoning_thinking(self, user_input: str, conversation_history: str) -> str:
+        """
+        General Chain-of-Thought reasoning preprocessing.
+        Uses REASONING_THINKING as system prompt.
+        """
+        self._load_reasoning_model()
+        try:
+            # User message per redesign document format
+            user_message = f"""Conversation History:
+{conversation_history}
+Current Query: {user_input}
+Provide reasoning context:"""
+            # Format using tokenizer's chat template
+            formatted_prompt = self.reasoning_tokenizer.apply_chat_template(
+                [{"role": "system", "content": REASONING_THINKING},
+                 {"role": "user", "content": user_message}],
+                tokenize=False,
+                add_generation_prompt=True
+            )
+            inputs = self.reasoning_tokenizer(formatted_prompt, return_tensors="pt").to(self.reasoning_model.device)
+            with torch.no_grad():
+                outputs = self.reasoning_model.generate(
+                    **inputs,
+                    max_new_tokens=512,
+                    temperature=0.7,
+                    do_sample=True,
+                    pad_token_id=self.reasoning_tokenizer.eos_token_id
+                )
+            thinking_output = self.reasoning_tokenizer.decode(outputs[0], skip_special_tokens=True)
+            # Extract only new tokens
+            if "<|assistant|>" in thinking_output:
+                thinking_output = thinking_output.split("<|assistant|>")[-1].strip()
+            logger.info(f"Reasoning thinking generated: {len(thinking_output)} chars")
+            return thinking_output
+        except Exception as e:
+            logger.error(f"Reasoning thinking error: {e}")
+            return ""
+    @spaces.GPU(duration=90)
+    def process(self, user_input: str, conversation_history: str,
+                thinking_prompts: str, tool_img_output: str = "",
+                tool_context: str = "") -> str:
+        """
+        Execute appropriate thinking agents based on active prompts.
+        Per redesign document orchestration.
+        Args:
+            user_input: Current user query
+            conversation_history: Formatted recent conversation
+            thinking_prompts: Newline-joined string of active thinking prompt names
+            tool_img_output: Tool image output if available
+            tool_context: Tool context if available
+        Returns:
+            Combined thinking context from all active agents
+        """
+        thinking_outputs = []
+        # Execute thinking agents based on which prompts are active
+        if "MATH_THINKING" in thinking_prompts:
+            math_output = self.math_thinking(user_input, conversation_history)
+            if math_output:
+                thinking_outputs.append(f"=== Mathematical Thinking Context ===\n{math_output}")
+        if "QUESTION_ANSWER_DESIGN" in thinking_prompts:
+            qa_output = self.question_answer_design(
+                user_input,
+                conversation_history,
+                tool_img_output,
+                tool_context
+            )
+            if qa_output:
+                thinking_outputs.append(f"=== Question Design Context ===\n{qa_output}")
+        if "REASONING_THINKING" in thinking_prompts:
+            reasoning_output = self.reasoning_thinking(user_input, conversation_history)
+            if reasoning_output:
+                thinking_outputs.append(f"=== Reasoning Context ===\n{reasoning_output}")
+        combined_context = "\n\n".join(thinking_outputs)
+        logger.info(f"Total thinking context: {len(combined_context)} chars from {len(thinking_outputs)} agents")
+        return combined_context
+# ============================================================================
+# RESPONSE AGENT (Phi3 with Fine-tuned + Fallback)
+# ============================================================================
+class ResponseAgent(Runnable):
+    """
+    PEFT-enabled Phi3 LLM for educational response generation.
+    Uses CORE_IDENTITY as base system prompt.
+    Additional prompts dynamically added to user message based on library_state.
+    Features:
+    - Fine-tuned model: jdesiree/Mimir-Phi-3.5
+    - Fallback to base: microsoft/Phi-3-mini-4k-instruct
+    - 4-bit quantization for memory efficiency
+    - ZeroGPU decorators for on-demand GPU allocation
+    """
+    def __init__(self, model_path: str = FINE_TUNED_PHI3, base_model: str = BASE_PHI3):
+        super().__init__()
+        logger.info(f"Initializing ResponseAgent (Phi3)...")
+        self.accelerator = None
+        self.model_path = model_path
+        self.base_model_path = base_model
+        self.tokenizer = None
+        self.base_model = None
+        self.model_loaded = False
+        self.model_type = None
+        self._initialize_tokenizer()
+        logger.info("ResponseAgent initialized (model will load on first GPU call)")
+    def _initialize_tokenizer(self):
+        """Initialize tokenizer (CPU operation, safe to do at init)"""
+        try:
+            logger.info(f"Loading tokenizer from base model: {self.base_model_path}")
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                self.base_model_path,
+                trust_remote_code=True,
+                token=HF_TOKEN,
+                use_fast=False
+            )
+            self._configure_special_tokens()
+        except Exception as e:
+            logger.error(f"Failed to initialize tokenizer: {e}")
+            raise
+    def _configure_special_tokens(self):
+        """Configure special tokens for Phi-3"""
+        special_tokens_dict = {}
+        if "<|end|>" not in self.tokenizer.all_special_tokens:
+            if hasattr(self.tokenizer, 'additional_special_tokens'):
+                additional_tokens = self.tokenizer.additional_special_tokens or []
+                if "<|end|>" not in additional_tokens:
+                    additional_tokens.append("<|end|>")
+                    special_tokens_dict["additional_special_tokens"] = additional_tokens
+            else:
+                special_tokens_dict["additional_special_tokens"] = ["<|end|>"]
+        if special_tokens_dict:
+            self.tokenizer.add_special_tokens(special_tokens_dict)
+        if self.tokenizer.pad_token is None:
+            self.tokenizer.pad_token = self.tokenizer.eos_token
+    @spaces.GPU(duration=120)
+    def _load_and_prepare_model(self):
+        """Load model with ZeroGPU + Accelerate integration"""
+        if self.model_loaded:
+            return
+        logger.info("Loading ResponseAgent model with ZeroGPU + Accelerate...")
+        self.accelerator = Accelerator(
+            mixed_precision="fp16",
+            gradient_accumulation_steps=1,
+            log_with=None,
+            project_dir=None
+        )
+        set_seed(42)
+        quantization_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_compute_dtype=torch.float16,
+            bnb_4bit_quant_type="nf4",
+            bnb_4bit_use_double_quant=True,
+        )
+        model = None
+        try:
+            logger.info(f"Attempting to load fine-tuned model: {self.model_path}")
+            model = AutoModelForCausalLM.from_pretrained(
+                self.model_path,
+                quantization_config=quantization_config,
+                torch_dtype=torch.float16,
+                trust_remote_code=True,
+                low_cpu_mem_usage=True,
+                token=HF_TOKEN,
+                attn_implementation="eager",
+                device_map="auto",
+            )
+            self.model_type = "fine-tuned"
+            logger.info("✓ Fine-tuned model loaded")
+        except Exception as e:
+            logger.warning(f"Fine-tuned model failed: {e}, using base model")
+            model = AutoModelForCausalLM.from_pretrained(
+                self.base_model_path,
+                quantization_config=quantization_config,
+                torch_dtype=torch.float16,
+                trust_remote_code=True,
+                low_cpu_mem_usage=True,
+                token=HF_TOKEN,
+                attn_implementation="eager",
+                device_map="auto",
+            )
+            self.model_type = "base-fallback"
+            logger.info("✓ Base model loaded")
+        self.base_model = self.accelerator.prepare(model)
+        self.model_loaded = True
+        logger.info(f"ResponseAgent ready: {self.model_type} on {self.accelerator.device}")
+    def _format_chat_template(self, complete_prompt: str) -> str:
+        """Format prompt using Phi-3's chat template"""
+        try:
+            messages = [{"role": "user", "content": complete_prompt}]
+            formatted_text = self.tokenizer.apply_chat_template(
+                messages,
+                tokenize=False,
+                add_generation_prompt=True
+            )
+            return formatted_text
+        except Exception as e:
+            logger.warning(f"Chat template failed, using fallback: {e}")
+            return f"<|user|>\n{complete_prompt}<|end|>\n<|assistant|>\n"
+    @spaces.GPU(duration=180)
+    def invoke(self, input: Input, config=None) -> Output:
+        """
+        Main inference method.
+        Expects input formatted per redesign document:
+        - CORE_IDENTITY (always included)
+        - prompt_segments (from library_state)
+        - tool outputs
+        - conversation history
+        - thinking context
+        - user query
+        Args:
+            input: Complete formatted prompt string
+        Returns:
+            Generated response string
+        """
+        if isinstance(input, dict):
+            complete_prompt = input.get('input', str(input))
+        else:
+            complete_prompt = str(input)
+        try:
+            self._load_and_prepare_model()
+            text = self._format_chat_template(complete_prompt)
+            max_input_length = 3500
+            inputs = self.tokenizer(
+                text,
+                return_tensors="pt",
+                padding=True,
+                truncation=True,
+                max_length=max_input_length
+            )
+            model_device = next(self.base_model.parameters()).device
+            inputs = {k: v.to(model_device) for k, v in inputs.items()}
+            with torch.no_grad():
+                outputs = self.base_model.generate(
+                    input_ids=inputs['input_ids'],
+                    attention_mask=inputs.get('attention_mask', None),
+                    max_new_tokens=350,
+                    do_sample=True,
+                    temperature=0.7,
+                    repetition_penalty=1.15,
+                    pad_token_id=self.tokenizer.eos_token_id,
+                    use_cache=False,
+                    num_beams=1,
+                )
+            new_tokens = outputs[0][len(inputs['input_ids'][0]):].cpu()
+            if len(new_tokens) == 0:
+                logger.error("Model generated zero tokens!")
+                return "I'm still learning how to respond properly."
+            result = self.tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
+            for stop_word in ["User:", "<|end|>", "<|assistant|>"]:
+                if stop_word in result:
+                    result = result.split(stop_word)[0].strip()
+                    break
+            if not result:
+                logger.error("Empty result after processing!")
+                return "I'm still learning how to respond properly."
+            logger.info(f"ResponseAgent completed: {len(result)} chars using {self.model_type}")
+            return result
+        except Exception as e:
+            logger.error(f"ResponseAgent error: {e}")
+            import traceback
+            logger.error(f"Traceback: {traceback.format_exc()}")
+            return f"I encountered an error: {str(e)}"
+    def get_model_info(self):
+        """Get model information for diagnostics"""
+        return {
+            "status": "loaded" if self.model_loaded else "not_loaded",
+            "model_type": self.model_type,
+            "using_fallback": self.model_type == "base-fallback" if self.model_type else False,
+            "zerogpu_ready": True,
+            "accelerate_ready": self.accelerator is not None,
+        }
+    @property
+    def InputType(self) -> Type[Input]:
+        return str
+    @property
+    def OutputType(self) -> Type[Output]:
+        return str

app.py ADDED Viewed

	@@ -0,0 +1,1360 @@

+# app.py
+"""
+Mimir Educational AI Assistant - Main Application
+Architecture:
+- Multi-page Gradio interface (Chatbot + Analytics with link to Mimir case study)
+- Agent-based orchestration (Tool, Routing, Thinking, Response)
+- Global state management with SQLite + HF dataset backup
+- Prompt state tracking per turn
+- LightEval for metrics tracking
+- Logger for timing functions
+"""
+import os
+import re
+import sys
+import time
+import json
+import base64
+import logging
+import sqlite3
+import subprocess
+import threading
+import warnings
+import uuid
+from datetime import datetime
+from typing import Dict, List, Optional, Tuple, Any
+# Core dependencies
+import torch
+import gradio as gr
+from dotenv import load_dotenv
+# Agent architecture
+from agents import (
+    ToolDecisionAgent,
+    PromptRoutingAgents,
+    ThinkingAgents,
+    ResponseAgent,
+)
+# State management
+from state_manager import (
+    GlobalStateManager,
+    LogicalExpressions,
+)
+# Prompt library
+from prompt_library import (
+    CORE_IDENTITY,
+    VAUGE_INPUT,
+    USER_UNDERSTANDING,
+    GENERAL_FORMATTING,
+    LATEX_FORMATTING,
+    GUIDING_TEACHING,
+    STRUCTURE_PRACTICE_QUESTIONS,
+    PRACTICE_QUESTION_FOLLOWUP,
+    TOOL_USE_ENHANCEMENT,
+)
+# LangGraph imports
+from langgraph.graph import StateGraph, START, END
+from langgraph.graph.message import add_messages
+from langgraph.checkpoint.memory import MemorySaver
+from langgraph.prebuilt import ToolNode
+# LangChain Core
+from langchain_core.tools import tool
+from langchain_core.messages import HumanMessage, SystemMessage, AIMessage, ToolMessage, BaseMessage
+# LightEval for metrics
+try:
+    from lighteval.logging.evaluation_tracker import EvaluationTracker
+    from lighteval.models.transformers.transformers_model import TransformersModel
+    from lighteval.metrics.metrics_sample import BertScore, ROUGE
+    from lighteval.tasks.requests import Doc
+    LIGHTEVAL_AVAILABLE = True
+except ImportError:
+    LIGHTEVAL_AVAILABLE = False
+    logging.warning("LightEval not available - metrics tracking limited")
+# Tool for graphing
+from graph_tool import generate_plot
+# Suppress warnings
+warnings.filterwarnings("ignore", category=UserWarning)
+warnings.filterwarnings("ignore", category=FutureWarning)
+# Load environment
+load_dotenv(".env")
+HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
+# Configuration
+DEBUG_STATE = os.getenv("DEBUG_STATE", "false").lower() == "true"
+CURRENT_YEAR = datetime.now().year
+# ============================================================================
+# LOGGING SETUP
+# ============================================================================
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+def log_step(step_name: str, start_time: Optional[float] = None) -> float:
+    """
+    Log a pipeline step with timestamp and duration.
+    Args:
+        step_name: Name of the step
+        start_time: Start time from previous call (if completing a step)
+    Returns:
+        Current time for next call
+    """
+    now = time.time()
+    timestamp = datetime.now().strftime("%H:%M:%S.%f")[:-3]
+    if start_time:
+        duration = now - start_time
+        logger.info(f"[{timestamp}] COMPLETED: {step_name} ({duration:.2f}s)")
+    else:
+        logger.info(f"[{timestamp}] STARTING: {step_name}")
+    return now
+# ============================================================================
+# GLOBAL INITIALIZATION
+# ============================================================================
+logger.info("="*60)
+logger.info("INITIALIZING MIMIR APPLICATION")
+logger.info("="*60)
+init_start = log_step("Global Initialization")
+# Initialize state management
+global_state_manager = GlobalStateManager()
+logical_expressions = LogicalExpressions()
+logger.info("State management initialized")
+# Initialize agents (lazy loading - models load on first use)
+tool_agent = ToolDecisionAgent()
+routing_agents = PromptRoutingAgents()
+thinking_agents = ThinkingAgents()
+response_agent = ResponseAgent()
+logger.info("Agents initialized (lazy loading)")
+log_step("Global Initialization", init_start)
+# ============================================================================
+# ANALYTICS & DATABASE FUNCTIONS
+# ============================================================================
+def get_trackio_database_path(project_name: str) -> Optional[str]:
+    """Get path to metrics SQLite database"""
+    possible_paths = [
+        f"./{project_name}.db",
+        f"./trackio_data/{project_name}.db",
+        f"./.trackio/{project_name}.db",
+        "./mimir_metrics.db"
+    ]
+    for path in possible_paths:
+        if os.path.exists(path):
+            return path
+    return None
+def get_project_statistics_with_nulls(cursor, project_name: str) -> Dict:
+    """Query metrics database for project statistics"""
+    try:
+        stats = {}
+        # Total conversations
+        try:
+            cursor.execute("""
+                SELECT COUNT(DISTINCT run_id) as total_runs
+                FROM metrics
+                WHERE project_name = ?
+            """, (project_name,))
+            result = cursor.fetchone()
+            stats["total_conversations"] = result["total_runs"] if result and result["total_runs"] > 0 else None
+        except sqlite3.Error:
+            stats["total_conversations"] = None
+        # Average response time
+        try:
+            cursor.execute("""
+                SELECT AVG(CAST(value AS FLOAT)) as avg_response_time
+                FROM metrics
+                WHERE project_name = ? AND metric_name = 'response_time'
+            """, (project_name,))
+            result = cursor.fetchone()
+            if result and result["avg_response_time"] is not None:
+                stats["avg_session_length"] = round(result["avg_response_time"], 2)
+            else:
+                stats["avg_session_length"] = None
+        except sqlite3.Error:
+            stats["avg_session_length"] = None
+        # Success rate
+        try:
+            cursor.execute("""
+                SELECT
+                    COUNT(*) as total_responses,
+                    SUM(CASE WHEN CAST(value AS FLOAT) > 3.5 THEN 1 ELSE 0 END) as successful_responses
+                FROM metrics
+                WHERE project_name = ? AND metric_name = 'quality_score'
+            """, (project_name,))
+            result = cursor.fetchone()
+            if result and result["total_responses"] > 0:
+                success_rate = (result["successful_responses"] / result["total_responses"]) * 100
+                stats["success_rate"] = round(success_rate, 1)
+            else:
+                stats["success_rate"] = None
+        except sqlite3.Error:
+            stats["success_rate"] = None
+        return stats
+    except sqlite3.Error as e:
+        logger.error(f"Database error: {e}")
+        return {"total_conversations": None, "avg_session_length": None, "success_rate": None}
+def get_recent_interactions_with_nulls(cursor, project_name: str, limit: int = 10) -> List:
+    """Query for recent interactions"""
+    try:
+        cursor.execute("""
+            SELECT
+                m1.timestamp,
+                m2.value as response_time,
+                m3.value as prompt_mode,
+                m4.value as tools_used,
+                m5.value as quality_score,
+                m6.value as adapter_used,
+                m1.run_id
+            FROM metrics m1
+            LEFT JOIN metrics m2 ON m1.run_id = m2.run_id AND m2.metric_name = 'response_time'
+            LEFT JOIN metrics m3 ON m1.run_id = m3.run_id AND m3.metric_name = 'prompt_mode'
+            LEFT JOIN metrics m4 ON m1.run_id = m4.run_id AND m4.metric_name = 'tools_used'
+            LEFT JOIN metrics m5 ON m1.run_id = m5.run_id AND m5.metric_name = 'quality_score'
+            LEFT JOIN metrics m6 ON m1.run_id = m6.run_id AND m6.metric_name = 'active_adapter'
+            WHERE m1.project_name = ? AND m1.metric_name = 'conversation_start'
+            ORDER BY m1.timestamp DESC
+            LIMIT ?
+        """, (project_name, limit))
+        results = cursor.fetchall()
+        recent_data = []
+        for row in results:
+            recent_data.append([
+                row["timestamp"][:16] if row["timestamp"] else None,
+                float(row["response_time"]) if row["response_time"] is not None else None,
+                row["prompt_mode"] if row["prompt_mode"] else None,
+                bool(int(row["tools_used"])) if row["tools_used"] is not None else None,
+                float(row["quality_score"]) if row["quality_score"] is not None else None,
+                row["adapter_used"] if row["adapter_used"] else None
+            ])
+        return recent_data
+    except sqlite3.Error as e:
+        logger.error(f"Database error: {e}")
+        return []
+def create_dashboard_html_with_nulls(project_name: str, project_stats: Dict) -> str:
+    """Create dashboard HTML with enhanced metrics"""
+    def format_stat(value, suffix="", no_data_text="No data"):
+        if value is None:
+            return f'<span style="color: #999; font-style: italic;">{no_data_text}</span>'
+        return f"{value}{suffix}"
+    def format_large_stat(value, suffix="", no_data_text="--"):
+        if value is None:
+            return f'<span style="color: #ccc;">{no_data_text}</span>'
+        return f"{value}{suffix}"
+    # Get evaluation metrics from global state
+    try:
+        eval_summary = global_state_manager.get_evaluation_summary()
+        cache_status = global_state_manager.get_cache_status()
+        project_stats["ml_educational_quality"] = eval_summary['aggregate_metrics']['avg_educational_quality']
+        project_stats["ml_classifier_accuracy"] = eval_summary['aggregate_metrics']['classifier_accuracy_rate']
+        project_stats["active_sessions"] = cache_status['total_conversation_sessions']
+    except Exception as e:
+        logger.warning(f"Could not get global state metrics: {e}")
+        project_stats["ml_educational_quality"] = None
+        project_stats["ml_classifier_accuracy"] = None
+        project_stats["active_sessions"] = None
+    # Status determination
+    success_rate = project_stats.get("success_rate")
+    if success_rate is not None:
+        if success_rate >= 80:
+            status_color = "#4CAF50"
+            status_text = "Excellent"
+        elif success_rate >= 60:
+            status_color = "#FF9800"
+            status_text = "Good"
+        else:
+            status_color = "#F44336"
+            status_text = "Needs Improvement"
+    else:
+        status_color = "#999"
+        status_text = "No data"
+    # ML metrics section
+    ml_metrics_section = f"""
+    <div style="margin: 15px 0; padding: 10px; background: #f0f8ff; border-radius: 4px; border-left: 4px solid #007bff;">
+        <strong>ML Performance:</strong>
+        Educational Quality: {format_stat(project_stats.get('ml_educational_quality'), '', 'N/A')} |
+        Classifier Accuracy: {format_stat(project_stats.get('ml_classifier_accuracy'), '%' if project_stats.get('ml_classifier_accuracy') else '', 'N/A')} |
+        Active Sessions: {format_stat(project_stats.get('active_sessions'), '', 'N/A')}
+    </div>
+    """
+    dashboard_html = f'''
+    <div style="text-align: center; padding: 20px; border: 1px solid #ddd; border-radius: 8px; background: #f9f9f9;">
+        <h3>{project_name} Analytics</h3>
+        <div style="display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 15px; margin: 20px 0;">
+            <div style="padding: 15px; background: white; border-radius: 6px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
+                <div style="font-size: 24px; font-weight: bold; color: #2196F3;">{format_large_stat(project_stats.get('total_conversations'))}</div>
+                <div style="color: #666; font-size: 12px;">Total Sessions</div>
+            </div>
+            <div style="padding: 15px; background: white; border-radius: 6px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
+                <div style="font-size: 24px; font-weight: bold; color: #FF9800;">{format_large_stat(project_stats.get('avg_session_length'), 's' if project_stats.get('avg_session_length') else '')}</div>
+                <div style="color: #666; font-size: 12px;">Avg Response Time</div>
+            </div>
+            <div style="padding: 15px; background: white; border-radius: 6px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
+                <div style="font-size: 24px; font-weight: bold; color: {status_color};">{format_large_stat(success_rate, '%' if success_rate else '')}</div>
+                <div style="color: #666; font-size: 12px;">Success Rate ({status_text})</div>
+            </div>
+        </div>
+        {ml_metrics_section}
+        <div style="margin: 15px 0; padding: 10px; background: #fff3cd; border-radius: 4px; font-size: 14px;">
+            <strong>Model:</strong> {format_stat(project_stats.get('model_type'), no_data_text='Unknown')} |
+            <strong>Last Updated:</strong> {project_stats.get('last_updated', 'Unknown')}
+        </div>
+    </div>
+    '''
+    return dashboard_html
+def calculate_response_quality(response: str) -> float:
+    """Calculate response quality score"""
+    try:
+        length_score = min(len(response) / 200, 1.0)
+        educational_keywords = ['learn', 'understand', 'concept', 'example', 'practice']
+        keyword_score = sum(1 for keyword in educational_keywords if keyword in response.lower()) / len(educational_keywords)
+        if len(response) < 20:
+            return 2.0
+        elif len(response) > 2000:
+            return 3.5
+        base_score = 2.5 + (length_score * 1.5) + (keyword_score * 1.0)
+        return min(max(base_score, 1.0), 5.0)
+    except:
+        return 3.0
+def evaluate_educational_quality_with_tracking(user_query: str, response: str, thread_id: str = None, session_id: str = None):
+    """Educational quality evaluation with state tracking using LightEval"""
+    start_time = time.time()
+    try:
+        # Educational indicators
+        educational_indicators = {
+            'has_examples': 'example' in response.lower(),
+            'structured_explanation': '##' in response or '1.' in response,
+            'appropriate_length': 100 < len(response) < 1500,
+            'encourages_learning': any(phrase in response.lower()
+                for phrase in ['practice', 'try', 'consider', 'think about']),
+            'uses_latex': '$' in response,
+            'has_clear_sections': response.count('\n\n') >= 2
+        }
+        educational_score = sum(educational_indicators.values()) / len(educational_indicators)
+        semantic_quality = min(len(response) / 500, 1.0)
+        response_time = time.time() - start_time
+        # Use LightEval if available
+        if LIGHTEVAL_AVAILABLE:
+            try:
+                doc = Doc(
+                    task_name=f"turn_{thread_id or session_id}",
+                    query=user_query,
+                    choices=[response],
+                    gold_index=-1,
+                    specific_output=response
+                )
+                bert_score = BertScore().compute(doc)
+                semantic_quality = bert_score if bert_score else semantic_quality
+            except Exception as lighteval_error:
+                logger.warning(f"LightEval computation failed: {lighteval_error}")
+        metrics = {
+            'semantic_quality': semantic_quality,
+            'educational_score': educational_score,
+            'response_time': response_time,
+            'indicators': educational_indicators
+        }
+        # Track in global state
+        global_state_manager.add_educational_quality_score(
+            user_query=user_query,
+            response=response,
+            metrics=metrics,
+            session_id=session_id
+        )
+        logger.info(f"Educational quality evaluated: {educational_score:.3f}")
+        return metrics
+    except Exception as e:
+        logger.error(f"Educational quality evaluation failed: {e}")
+        return {'educational_score': 0.5, 'semantic_quality': 0.5, 'response_time': 0.0}
+def make_classification_with_tracking(user_input: str, conversation_length: int, is_first_turn: bool,
+                                     input_character_count: int, is_short_input: bool,
+                                     recent_discovery_count: int, contains_greeting: bool,
+                                     contains_educational_keywords: bool, requires_visualization: bool,
+                                     topic_change_detected: bool, session_id: str = None):
+    """Legacy function for ML classifier tracking (now replaced by agents)"""
+    logger.info("ML classifier tracking called (legacy - now using agent-based routing)")
+    return None
+def log_metrics_to_database(project_name: str, run_id: str, metrics: Dict):
+    """Log metrics to SQLite database for dashboard"""
+    try:
+        db_path = get_trackio_database_path(project_name)
+        if db_path is None:
+            db_path = "./mimir_metrics.db"
+        conn = sqlite3.connect(db_path)
+        cursor = conn.cursor()
+        # Create metrics table if not exists
+        cursor.execute("""
+            CREATE TABLE IF NOT EXISTS metrics (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                project_name TEXT,
+                run_id TEXT,
+                metric_name TEXT,
+                value TEXT,
+                timestamp TEXT
+            )
+        """)
+        # Insert metrics
+        timestamp = datetime.now().isoformat()
+        for metric_name, metric_value in metrics.items():
+            cursor.execute("""
+                INSERT INTO metrics (project_name, run_id, metric_name, value, timestamp)
+                VALUES (?, ?, ?, ?, ?)
+            """, (project_name, run_id, metric_name, str(metric_value), timestamp))
+        conn.commit()
+        conn.close()
+        logger.info(f"Logged {len(metrics)} metrics to database")
+    except Exception as e:
+        logger.error(f"Failed to log metrics to database: {e}")
+def sync_trackio_with_global_state():
+    """Sync metrics database with global state manager data"""
+    try:
+        eval_summary = global_state_manager.get_evaluation_summary()
+        # Log to database
+        metrics = {
+            "educational_quality_avg": eval_summary['aggregate_metrics']['avg_educational_quality'],
+            "classifier_accuracy": eval_summary['aggregate_metrics']['classifier_accuracy_rate'],
+            "user_satisfaction": eval_summary['aggregate_metrics']['user_satisfaction_rate'],
+            "total_evaluations": sum(eval_summary['total_evaluations'].values())
+        }
+        log_metrics_to_database("Mimir", str(uuid.uuid4()), metrics)
+        logger.info("Synced global state metrics to database")
+    except Exception as e:
+        logger.error(f"Failed to sync metrics to database: {e}")
+def refresh_analytics_data_persistent():
+    """Refresh analytics data with global state persistence"""
+    project_name = "Mimir"
+    try:
+        analytics_state = global_state_manager.get_analytics_state()
+        last_refresh = analytics_state.get('last_refresh')
+        # If refreshed within last 30 seconds, return cached
+        if last_refresh and (datetime.now() - last_refresh).seconds < 30:
+            logger.info("Using cached analytics data (recent refresh)")
+            return (
+                analytics_state['project_stats'],
+                analytics_state['recent_interactions'],
+                analytics_state['dashboard_html']
+            )
+        db_path = get_trackio_database_path(project_name)
+        if db_path is None:
+            logger.warning("No metrics database found")
+            project_stats = {
+                "total_conversations": None,
+                "avg_session_length": None,
+                "success_rate": None,
+                "model_type": "Phi-3-mini (Fine-tuned)",
+                "last_updated": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+            }
+            dashboard_html = create_dashboard_html_with_nulls(project_name, project_stats)
+            recent_interactions = []
+            global_state_manager.update_analytics_state(
+                project_stats=project_stats,
+                recent_interactions=recent_interactions,
+                dashboard_html=dashboard_html
+            )
+            return project_stats, recent_interactions, dashboard_html
+        conn = sqlite3.connect(db_path)
+        conn.row_factory = sqlite3.Row
+        cursor = conn.cursor()
+        project_stats = get_project_statistics_with_nulls(cursor, project_name)
+        project_stats["model_type"] = "Phi-3-mini (Fine-tuned)"
+        project_stats["last_updated"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        recent_data = get_recent_interactions_with_nulls(cursor, project_name, limit=10)
+        dashboard_html = create_dashboard_html_with_nulls(project_name, project_stats)
+        conn.close()
+        global_state_manager.update_analytics_state(
+            project_stats=project_stats,
+            recent_interactions=recent_data,
+            dashboard_html=dashboard_html
+        )
+        logger.info("Analytics data refreshed and cached successfully")
+        return project_stats, recent_data, dashboard_html
+    except Exception as e:
+        logger.error(f"Error refreshing analytics: {e}")
+        error_stats = {
+            "error": str(e),
+            "total_conversations": None,
+            "avg_session_length": None,
+            "success_rate": None,
+            "model_type": "Error",
+            "last_updated": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        }
+        error_html = f"""
+        <div style="text-align: center; padding: 40px; border: 2px dashed #f44336; border-radius: 8px; background: #ffebee;">
+            <h3 style="color: #f44336;">Analytics Error</h3>
+            <p>Could not load analytics data: {str(e)[:100]}</p>
+        </div>
+        """
+        global_state_manager.update_analytics_state(
+            project_stats=error_stats,
+            recent_interactions=[],
+            dashboard_html=error_html,
+            error_state=str(e)
+        )
+        return error_stats, [], error_html
+def export_metrics_json_persistent():
+    """Export metrics as JSON file"""
+    try:
+        project_stats, recent_data, _ = refresh_analytics_data_persistent()
+        export_data = {
+            "project": "Mimir",
+            "export_timestamp": datetime.now().isoformat(),
+            "statistics": project_stats,
+            "recent_interactions": recent_data
+        }
+        filename = f"mimir_metrics_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
+        with open(filename, 'w') as f:
+            json.dump(export_data, f, indent=2, default=str)
+        global_state_manager.add_export_record("JSON", filename, success=True)
+        logger.info(f"Metrics exported to {filename}")
+        gr.Info(f"Metrics exported successfully to {filename}")
+    except Exception as e:
+        global_state_manager.add_export_record("JSON", "failed", success=False)
+        logger.error(f"Export failed: {e}")
+        gr.Warning(f"Export failed: {str(e)}")
+def export_metrics_csv_persistent():
+    """Export metrics as CSV file"""
+    try:
+        import csv
+        _, recent_data, _ = refresh_analytics_data_persistent()
+        filename = f"mimir_metrics_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
+        with open(filename, 'w', newline='') as f:
+            writer = csv.writer(f)
+            writer.writerow(["Timestamp", "Response Time", "Mode", "Tools Used", "Quality Score", "Adapter"])
+            for row in recent_data:
+                writer.writerow(row)
+        global_state_manager.add_export_record("CSV", filename, success=True)
+        logger.info(f"Metrics exported to {filename}")
+        gr.Info(f"Metrics exported successfully to {filename}")
+    except Exception as e:
+        global_state_manager.add_export_record("CSV", "failed", success=False)
+        logger.error(f"Export failed: {e}")
+        gr.Warning(f"Export failed: {str(e)}")
+def load_analytics_state():
+    """Load analytics state from global manager"""
+    analytics_state = global_state_manager.get_analytics_state()
+    project_stats = analytics_state['project_stats']
+    recent_interactions = analytics_state['recent_interactions']
+    dashboard_html = analytics_state['dashboard_html']
+    if dashboard_html is None:
+        dashboard_html = """
+        <div style="text-align: center; padding: 40px; border: 2px dashed #ccc; border-radius: 8px; background: #f8f9fa;">
+            <h3>Analytics Dashboard</h3>
+            <p>Click "Refresh Data" to load analytics.</p>
+        </div>
+        """
+    return project_stats, recent_interactions, dashboard_html
+def get_global_state_debug_info():
+    """Get debug information about global state"""
+    cache_status = global_state_manager.get_cache_status()
+    debug_info = {
+        "cache_status": cache_status,
+        "timestamp": datetime.now().isoformat(),
+        "sessions": global_state_manager.get_all_sessions()
+    }
+    return debug_info
+# ============================================================================
+# POST-PROCESSING
+# ============================================================================
+class ResponsePostProcessor:
+    """Post-processing pipeline for educational responses"""
+    def __init__(self, max_length: int = 1800, min_length: int = 10):
+        self.max_length = max_length
+        self.min_length = min_length
+        self.logical_stop_patterns = [
+            r'\n\n---\n',
+            r'\n\n## Summary\b',
+            r'\n\nIn conclusion\b',
+            r'\n\nTo summarize\b',
+        ]
+    def process_response(self, raw_response: str, user_query: str = "") -> str:
+        """Main post-processing pipeline"""
+        try:
+            cleaned = self._enhanced_token_cleanup(raw_response)
+            cleaned = self._truncate_intelligently(cleaned)
+            cleaned = self._enhance_readability(cleaned)
+            if not self._passes_quality_check(cleaned):
+                return self._generate_fallback_response(user_query)
+            return cleaned.strip()
+        except Exception as e:
+            logger.error(f"Post-processing error: {e}")
+            return raw_response
+    def _enhanced_token_cleanup(self, text: str) -> str:
+        """Remove model artifacts"""
+        artifacts = [
+            r'<\|.*?\|>',
+            r'###\s*$',
+            r'User:\s*$',
+            r'Assistant:\s*$',
+            r'\n\s*\n\s*\n+',
+        ]
+        for pattern in artifacts:
+            text = re.sub(pattern, '', text, flags=re.MULTILINE)
+        return text
+    def _truncate_intelligently(self, text: str) -> str:
+        """Truncate at logical educational endpoints"""
+        for pattern in self.logical_stop_patterns:
+            match = re.search(pattern, text, re.IGNORECASE)
+            if match:
+                return text[:match.start()].strip()
+        if len(text) <= self.max_length:
+            return text
+        sentences = re.split(r'[.!?]+\s+', text)
+        truncated = ""
+        for sentence in sentences:
+            test_length = len(truncated + sentence + ". ")
+            if test_length <= self.max_length:
+                truncated += sentence + ". "
+            else:
+                break
+        return truncated.strip()
+    def _enhance_readability(self, text: str) -> str:
+        """Format for better presentation"""
+        text = re.sub(r'([.!?])([A-Z])', r'\1 \2', text)
+        text = re.sub(r'\s{2,}', ' ', text)
+        text = re.sub(r'\n\s*[-*]\s*', '\n- ', text)
+        return text
+    def _passes_quality_check(self, text: str) -> bool:
+        """Final quality validation"""
+        if len(text.strip()) < self.min_length:
+            return False
+        sentences = re.split(r'[.!?]+', text)
+        valid_sentences = [s for s in sentences if len(s.strip()) > 5]
+        return len(valid_sentences) > 0
+    def _generate_fallback_response(self, user_query: str) -> str:
+        """Generate safe fallback"""
+        return "I'd be happy to help you understand this better. Could you clarify what specific aspect you'd like me to focus on?"
+    def process_and_stream_response(self, raw_response: str, user_query: str = ""):
+        """Process response then stream word-by-word"""
+        try:
+            processed_response = self.process_response(raw_response, user_query)
+            words = processed_response.split()
+            current_output = ""
+            for i, word in enumerate(words):
+                current_output += word
+                if i < len(words) - 1:
+                    current_output += " "
+                yield current_output
+                time.sleep(0.015)
+        except Exception as e:
+            logger.error(f"Stream processing error: {e}")
+            yield "I encountered an error processing the response."
+post_processor = ResponsePostProcessor()
+# ============================================================================
+# TOOL FUNCTIONS
+# ============================================================================
+@tool(return_direct=False)
+def Create_Graph_Tool(
+    data: dict,
+    plot_type: str,
+    title: str = "Generated Plot",
+    x_label: str = "",
+    y_label: str = "",
+    educational_context: str = ""
+) -> str:
+    """Generate educational graphs"""
+    tool_start = log_step("Create_Graph_Tool")
+    try:
+        content, artifact = generate_plot(
+            data=data,
+            plot_type=plot_type,
+            title=title,
+            x_label=x_label,
+            y_label=y_label
+        )
+        if "error" in artifact:
+            log_step("Create_Graph_Tool", tool_start)
+            return f'<p style="color:red;">Graph generation failed: {artifact["error"]}</p>'
+        base64_image = artifact["base64_image"]
+        context_html = ""
+        if educational_context:
+            context_html = f'<div style="margin: 10px 0; padding: 10px; background: #f8f9fa; border-left: 4px solid #007bff;">{educational_context}</div>'
+        result = f"""{context_html}
+<div style="text-align: center; margin: 20px 0;">
+    <img src="data:image/png;base64,{base64_image}"
+         style="max-width: 100%; height: auto; border-radius: 8px; box-shadow: 0 2px 10px rgba(0,0,0,0.1);"
+         alt="{title}" />
+</div>"""
+        log_step("Create_Graph_Tool", tool_start)
+        return result
+    except Exception as e:
+        logger.error(f"Graph tool error: {e}")
+        log_step("Create_Graph_Tool", tool_start)
+        return f'<p style="color:red;">Error: {str(e)}</p>'
+# ============================================================================
+# MAIN ORCHESTRATION WORKFLOW
+# ============================================================================
+def orchestrate_turn(user_input: str, session_id: str = "default") -> str:
+    """
+    Main orchestration function implementing the redesign workflow.
+    Steps:
+    1. Reset prompt state
+    2. Process user input (history)
+    3. Tool decision
+    4. Regex checks
+    5. Agent execution
+    6. Thinking agents
+    7. Prompt assembly
+    8. Response generation
+    9. Metrics tracking
+    """
+    turn_start = log_step("orchestrate_turn")
+    run_id = str(uuid.uuid4())
+    try:
+        # ====================================================================
+        # STEP 1: RESET PROMPT STATE
+        # ====================================================================
+        step_start = log_step("Step 1: Reset prompt state")
+        global_state_manager.reset_prompt_state()
+        prompt_state = global_state_manager.get_prompt_state_manager()
+        log_step("Step 1: Reset prompt state", step_start)
+        # ====================================================================
+        # STEP 2: USER INPUT PROCESSING
+        # ====================================================================
+        step_start = log_step("Step 2: Process user input")
+        # Get conversation history
+        conversation_state = global_state_manager.get_conversation_state(session_id)
+        recent_history = conversation_state['conversation_state'][-8:] if conversation_state['conversation_state'] else []
+        # Format history for agents
+        recent_history_formatted = "\n".join([
+            f"{msg['role']}: {msg['content'][:100]}"
+            for msg in recent_history
+        ]) if recent_history else "No previous conversation"
+        log_step("Step 2: Process user input", step_start)
+        # ====================================================================
+        # STEP 3: TOOL DECISION ENGINE
+        # ====================================================================
+        step_start = log_step("Step 3: Tool decision")
+        tool_decision_result = tool_agent.should_use_visualization(user_input)
+        tool_img_output = ""
+        tool_context = ""
+        if tool_decision_result:
+            logger.info("Tool decision: YES - visualization needed")
+            prompt_state.update("TOOL_USE_ENHANCEMENT", True)
+        else:
+            logger.info("Tool decision: NO - no visualization needed")
+        log_step("Step 3: Tool decision", step_start)
+        # ====================================================================
+        # STEP 4: REGEX LOGICAL EXPRESSIONS
+        # ====================================================================
+        step_start = log_step("Step 4: Regex checks")
+        logical_expressions.apply_all_checks(user_input, prompt_state)
+        log_step("Step 4: Regex checks", step_start)
+        # ====================================================================
+        # STEP 5: SEQUENTIAL AGENT EXECUTION
+        # ====================================================================
+        step_start = log_step("Step 5: Routing agents")
+        # Agent 1: Practice questions
+        agent1_start = log_step("Agent 1: Practice questions")
+        agent_1_result = routing_agents.agent_1_practice_questions(
+            user_input,
+            recent_history
+        )
+        if agent_1_result:
+            prompt_state.update("STRUCTURE_PRACTICE_QUESTIONS", True)
+        log_step("Agent 1: Practice questions", agent1_start)
+        # Agent 2: Discovery mode
+        agent2_start = log_step("Agent 2: Discovery mode")
+        agent_2_result = routing_agents.agent_2_discovery_mode(user_input)
+        if agent_2_result:
+            prompt_state.update(agent_2_result, True)
+        log_step("Agent 2: Discovery mode", agent2_start)
+        # Agent 3: Follow-up assessment
+        agent3_start = log_step("Agent 3: Follow-up assessment")
+        agent_3_result = routing_agents.agent_3_followup_assessment(
+            user_input,
+            recent_history
+        )
+        if agent_3_result:
+            prompt_state.update("PRACTICE_QUESTION_FOLLOWUP", True)
+        log_step("Agent 3: Follow-up assessment", agent3_start)
+        # Agent 4: Teaching mode
+        agent4_start = log_step("Agent 4: Teaching mode")
+        agent_4_results = routing_agents.agent_4_teaching_mode(
+            user_input,
+            recent_history
+        )
+        prompt_state.update_multiple(agent_4_results)
+        log_step("Agent 4: Teaching mode", agent4_start)
+        log_step("Step 5: Routing agents", step_start)
+        # ====================================================================
+        # STEP 6: THINKING AGENT PROCESSING
+        # ====================================================================
+        step_start = log_step("Step 6: Thinking agents")
+        # Determine which thinking agents to activate
+        thinking_prompts_list = []
+        # Math thinking (if math detected)
+        if prompt_state.is_active("LATEX_FORMATTING"):
+            thinking_prompts_list.append("MATH_THINKING")
+            prompt_state.update("MATH_THINKING", True)
+        # Question design (if practice questions needed)
+        if prompt_state.is_active("STRUCTURE_PRACTICE_QUESTIONS"):
+            thinking_prompts_list.append("QUESTION_ANSWER_DESIGN")
+            prompt_state.update("QUESTION_ANSWER_DESIGN", True)
+        # Reasoning thinking (for teaching/tools/followup)
+        if (prompt_state.is_active("TOOL_USE_ENHANCEMENT") or
+            prompt_state.is_active("PRACTICE_QUESTION_FOLLOWUP") or
+            prompt_state.is_active("GUIDING_TEACHING")):
+            thinking_prompts_list.append("REASONING_THINKING")
+            prompt_state.update("REASONING_THINKING", True)
+        # Execute thinking agents if any are active
+        thinking_context = ""
+        if thinking_prompts_list:
+            thinking_prompts_string = '\n'.join(thinking_prompts_list)
+            logger.info(f"Active thinking agents: {thinking_prompts_list}")
+            think_start = log_step("Thinking agents execution")
+            thinking_context = thinking_agents.process(
+                user_input=user_input,
+                conversation_history=recent_history_formatted,
+                thinking_prompts=thinking_prompts_string,
+                tool_img_output=tool_img_output,
+                tool_context=tool_context
+            )
+            log_step("Thinking agents execution", think_start)
+        log_step("Step 6: Thinking agents", step_start)
+        # ====================================================================
+        # STEP 7: RESPONSE PROMPT ASSEMBLY
+        # ====================================================================
+        step_start = log_step("Step 7: Prompt assembly")
+        # Get active response prompts
+        response_prompt_names = prompt_state.get_active_response_prompts()
+        # Build prompt segments
+        prompt_segments = [CORE_IDENTITY]
+        prompt_map = {
+            "VAUGE_INPUT": VAUGE_INPUT,
+            "USER_UNDERSTANDING": USER_UNDERSTANDING,
+            "GENERAL_FORMATTING": GENERAL_FORMATTING,
+            "LATEX_FORMATTING": LATEX_FORMATTING,
+            "GUIDING_TEACHING": GUIDING_TEACHING,
+            "STRUCTURE_PRACTICE_QUESTIONS": STRUCTURE_PRACTICE_QUESTIONS,
+            "PRACTICE_QUESTION_FOLLOWUP": PRACTICE_QUESTION_FOLLOWUP,
+            "TOOL_USE_ENHANCEMENT": TOOL_USE_ENHANCEMENT,
+        }
+        for prompt_name in response_prompt_names:
+            if prompt_name in prompt_map:
+                prompt_segments.append(prompt_map[prompt_name])
+        prompt_segments_text = "\n\n".join(prompt_segments)
+        logger.info(f"Active prompts: {response_prompt_names}")
+        log_step("Step 7: Prompt assembly", step_start)
+        # ====================================================================
+        # STEP 8: FINAL PROMPT CONSTRUCTION
+        # ====================================================================
+        step_start = log_step("Step 8: Final prompt construction")
+        # Knowledge cutoff
+        knowledge_cutoff = f"""
+The current year is {CURRENT_YEAR}. Your knowledge cutoff date is October 2023. If the user asks about recent events or dynamic facts, inform them you may not have the most up-to-date information and suggest referencing direct sources."""
+        complete_prompt = f"""
+{prompt_segments_text}
+If tools were used, context and output will be here. Ignore if empty:
+Image output: {tool_img_output}
+Image context: {tool_context}
+Conversation history, if available:
+{recent_history_formatted}
+Consider any context available to you:
+{thinking_context}
+Here is the user's current query:
+{user_input}
+{knowledge_cutoff}
+"""
+        log_step("Step 8: Final prompt construction", step_start)
+        # ====================================================================
+        # STEP 9: RESPONSE GENERATION
+        # ====================================================================
+        step_start = log_step("Step 9: Response generation")
+        raw_response = response_agent.invoke(complete_prompt)
+        log_step("Step 9: Response generation", step_start)
+        # ====================================================================
+        # STEP 10: POST-PROCESSING
+        # ====================================================================
+        step_start = log_step("Step 10: Post-processing")
+        processed_response = post_processor.process_response(raw_response, user_input)
+        log_step("Step 10: Post-processing", step_start)
+        # ====================================================================
+        # STEP 11: METRICS TRACKING
+        # ====================================================================
+        step_start = log_step("Step 11: Metrics tracking")
+        try:
+            # Track educational quality
+            quality_metrics = evaluate_educational_quality_with_tracking(
+                user_query=user_input,
+                response=processed_response,
+                thread_id=run_id,
+                session_id=session_id
+            )
+            # Log metrics to database
+            metrics_to_log = {
+                "conversation_start": datetime.now().isoformat(),
+                "response_time": time.time() - turn_start,
+                "quality_score": calculate_response_quality(processed_response),
+                "educational_score": quality_metrics['educational_score'],
+                "prompt_mode": ",".join(response_prompt_names),
+                "tools_used": 1 if prompt_state.is_active("TOOL_USE_ENHANCEMENT") else 0,
+                "thinking_agents": ",".join(thinking_prompts_list) if thinking_prompts_list else "none",
+                "active_adapter": response_agent.model_type if response_agent.model_loaded else "not_loaded"
+            }
+            log_metrics_to_database("Mimir", run_id, metrics_to_log)
+        except Exception as metrics_error:
+            logger.warning(f"Metrics tracking failed: {metrics_error}")
+        log_step("Step 11: Metrics tracking", step_start)
+        log_step("orchestrate_turn", turn_start)
+        return processed_response
+    except Exception as e:
+        logger.error(f"Orchestration error: {e}")
+        import traceback
+        logger.error(traceback.format_exc())
+        log_step("orchestrate_turn", turn_start)
+        return f"I encountered an error: {str(e)}"
+# ============================================================================
+# GRADIO CALLBACK FUNCTIONS
+# ============================================================================
+def get_loading_animation_base64():
+    """Load animated GIF as base64"""
+    try:
+        with open("loading_animation.gif", "rb") as gif_file:
+            gif_data = gif_file.read()
+            gif_base64 = base64.b64encode(gif_data).decode('utf-8')
+            return f"data:image/gif;base64,{gif_base64}"
+    except FileNotFoundError:
+        logger.warning("loading_animation.gif not found")
+        return None
+def remove_loading_animations(chat_history):
+    """Remove loading animations from chat"""
+    return [msg for msg in chat_history if not (
+        msg.get("role") == "assistant" and
+        "loading-animation" in str(msg.get("content", ""))
+    )]
+def add_user_message(message, chat_history, conversation_state):
+    """Add user message"""
+    callback_start = log_step("add_user_message")
+    if not message.strip():
+        log_step("add_user_message", callback_start)
+        return "", chat_history, conversation_state
+    # Get current state
+    current_state = global_state_manager.get_conversation_state()
+    chat_history = current_state['chat_history']
+    conversation_state = current_state['conversation_state']
+    # Add to both states
+    conversation_state.append({"role": "user", "content": message})
+    chat_history.append({"role": "user", "content": message})
+    # Update global state
+    global_state_manager.update_conversation_state(chat_history, conversation_state)
+    log_step("add_user_message", callback_start)
+    return "", chat_history, conversation_state
+def add_loading_animation(chat_history, conversation_state):
+    """Add loading animation"""
+    callback_start = log_step("add_loading_animation")
+    current_state = global_state_manager.get_conversation_state()
+    chat_history = current_state['chat_history']
+    conversation_state = current_state['conversation_state']
+    if not conversation_state:
+        log_step("add_loading_animation", callback_start)
+        return chat_history, conversation_state
+    chat_history = remove_loading_animations(chat_history)
+    gif_data = get_loading_animation_base64()
+    if gif_data:
+        loading_html = f'<div class="loading-animation" style="display: flex; align-items: center; justify-content: center; padding: 0.5px;"><img src="{gif_data}" alt="Thinking..." style="height: 64px; width: auto; max-width: 80px;" /></div>'
+    else:
+        loading_html = '<div class="loading-animation" style="display: flex; align-items: center; justify-content: center; padding: 0.5px;"><div style="width: 64px; height: 64px;"></div></div>'
+    chat_history.append({"role": "assistant", "content": loading_html})
+    global_state_manager.update_conversation_state(chat_history, conversation_state)
+    log_step("add_loading_animation", callback_start)
+    return chat_history, conversation_state
+def generate_response(chat_history, conversation_state):
+    """Generate response using orchestration"""
+    callback_start = log_step("generate_response")
+    current_state = global_state_manager.get_conversation_state()
+    chat_history = current_state['chat_history']
+    conversation_state = current_state['conversation_state']
+    if not conversation_state:
+        log_step("generate_response", callback_start)
+        return chat_history, conversation_state
+    # Get last user message
+    last_user_message = ""
+    for msg in reversed(conversation_state):
+        if msg["role"] == "user":
+            last_user_message = msg["content"]
+            break
+    if not last_user_message:
+        log_step("generate_response", callback_start)
+        return chat_history, conversation_state
+    try:
+        # Remove loading animation
+        chat_history = remove_loading_animations(chat_history)
+        yield chat_history, conversation_state
+        # Stream response using post-processor
+        orch_start = log_step("orchestrate_turn call")
+        raw_response = orchestrate_turn(last_user_message)
+        log_step("orchestrate_turn call", orch_start)
+        # Stream the processed response
+        for chunk in post_processor.process_and_stream_response(raw_response, last_user_message):
+            if chat_history and chat_history[-1]["role"] == "assistant":
+                chat_history[-1]["content"] = chunk
+            else:
+                chat_history.append({"role": "assistant", "content": chunk})
+            yield chat_history, conversation_state
+        # Add to conversation state
+        final_response = chunk if 'chunk' in locals() else raw_response
+        conversation_state.append({"role": "assistant", "content": final_response})
+        # Update global state
+        global_state_manager.update_conversation_state(chat_history, conversation_state)
+        yield chat_history, conversation_state
+    except Exception as e:
+        logger.error(f"Response generation error: {e}")
+        error_msg = f"I encountered an error: {str(e)}"
+        chat_history = remove_loading_animations(chat_history)
+        chat_history.append({"role": "assistant", "content": error_msg})
+        conversation_state.append({"role": "assistant", "content": error_msg})
+        global_state_manager.update_conversation_state(chat_history, conversation_state)
+        yield chat_history, conversation_state
+    log_step("generate_response", callback_start)
+def reset_conversation():
+    """Reset conversation"""
+    callback_start = log_step("reset_conversation")
+    global_state_manager.reset_conversation_state()
+    log_step("reset_conversation", callback_start)
+    return [], []
+def load_conversation_state():
+    """Load conversation state"""
+    callback_start = log_step("load_conversation_state")
+    current_state = global_state_manager.get_conversation_state()
+    log_step("load_conversation_state", callback_start)
+    return current_state['chat_history'], current_state['conversation_state']
+# ============================================================================
+# MULTI-PAGE INTERFACE
+# ============================================================================
+def create_interface():
+    """Create multi-page Gradio interface"""
+    logger.info("Creating Gradio interface...")
+    # Import page modules
+    import gradio_chatbot
+    import gradio_analytics
+    with gr.Blocks(title="Mimir - Educational AI Assistant") as demo:
+        navbar = gr.Navbar(
+            visible=True,
+            main_page_name="Mimir Chatbot",
+            value=[("Case Study", "https://github.com/Jdesiree112/Technical_Portfolio/tree/main/CaseStudy_Mimir")]
+        )
+        gradio_chatbot.demo.render()
+    with demo.route("Analytics"):
+        navbar = gr.Navbar(
+            visible=True,
+            main_page_name="Mimir Chatbot",
+            value=[("Case Study", "https://github.com/Jdesiree112/Technical_Portfolio/tree/main/CaseStudy_Mimir")]
+        )
+        gradio_analytics.demo.render()
+    logger.info("Interface created successfully")
+    return demo
+# ============================================================================
+# MAIN EXECUTION
+# ============================================================================
+if __name__ == "__main__":
+    try:
+        # Pre-download models if needed
+        logger.info("Checking for model downloads...")
+        try:
+            subprocess.run([sys.executable, "pre_download.py"], check=True)
+        except Exception as e:
+            logger.warning(f"Pre-download failed: {e}")
+        logger.info("="*60)
+        logger.info("MIMIR APPLICATION READY")
+        logger.info("="*60)
+        logger.info(f"LightEval available: {LIGHTEVAL_AVAILABLE}")
+        logger.info(f"Current year: {CURRENT_YEAR}")
+        logger.info("="*60)
+        # Create and launch interface
+        interface = create_interface()
+        interface.launch(
+            server_name="0.0.0.0",
+            share=False,
+            debug=False,
+            favicon_path="favicon.ico" if os.path.exists("favicon.ico") else None,
+            show_error=True,
+            quiet=False,
+            prevent_thread_lock=False,
+            max_threads=40
+        )
+    except Exception as e:
+        logger.error(f"Failed to launch Mimir: {e}")
+        import traceback
+        logger.error(traceback.format_exc())
+        raise

app_V1.0.py ADDED Viewed

The diff for this file is too large to render. See raw diff

compile_model.py ADDED Viewed

	@@ -0,0 +1,295 @@

+# compile_model.py
+"""
+Compile and cache all models for Mimir Educational AI Assistant:
+- Phi-3 (fine-tuned + base) for ResponseAgent
+- Mistral-Small-24B for ToolDecisionAgent, PromptRoutingAgents, ThinkingAgents
+- GGUF Mistral for math thinking
+- RAG embeddings (if used)
+"""
+import torch
+import os
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+from accelerate import Accelerator, set_seed
+from sentence_transformers import SentenceTransformer
+from huggingface_hub import hf_hub_download
+from huggingface_hub import scan_cache_dir
+# Try to import llama-cpp for GGUF
+try:
+    from llama_cpp import Llama
+    LLAMA_CPP_AVAILABLE = True
+except ImportError:
+    LLAMA_CPP_AVAILABLE = False
+    print("⚠️  llama-cpp-python not available - GGUF model will not be cached")
+HF_TOKEN = os.getenv("HF_TOKEN")
+# Model paths (matching agents.py)
+FINE_TUNED_PHI3 = "jdesiree/Mimir-Phi-3.5"
+BASE_PHI3 = "microsoft/Phi-3-mini-4k-instruct"
+MISTRAL_REASONING = "yentinglin/Mistral-Small-24B-Instruct-2501-reasoning"
+MISTRAL_MATH_GGUF = "brittlewis12/Mistral-Small-24B-Instruct-2501-reasoning-GGUF"
+EMBEDDINGS_MODEL = "thenlper/gte-small"
+CACHE_DIR = "/data/compiled_models"
+def compile_phi3():
+    """Compile Phi-3 ResponseAgent model (fine-tuned with base fallback)"""
+    print(f"\n{'='*60}")
+    print("COMPILING PHI-3 RESPONSE AGENT")
+    print(f"{'='*60}")
+    accelerator = Accelerator(mixed_precision="fp16")
+    set_seed(42)
+    quantization_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_compute_dtype=torch.float16,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_use_double_quant=True,
+    )
+    # Try fine-tuned model first
+    try:
+        print(f"→ Loading fine-tuned model: {FINE_TUNED_PHI3}")
+        model = AutoModelForCausalLM.from_pretrained(
+            FINE_TUNED_PHI3,
+            quantization_config=quantization_config,
+            torch_dtype=torch.float16,
+            trust_remote_code=True,
+            token=HF_TOKEN,
+            device_map="auto",
+        )
+        tokenizer = AutoTokenizer.from_pretrained(
+            BASE_PHI3,
+            trust_remote_code=True,
+            token=HF_TOKEN
+        )
+        # Warmup
+        print("→ Running warmup inference...")
+        test_prompt = tokenizer.apply_chat_template(
+            [{"role": "user", "content": "Hello"}],
+            tokenize=False,
+            add_generation_prompt=True
+        )
+        inputs = tokenizer(test_prompt, return_tensors="pt").to(model.device)
+        _ = model.generate(**inputs, max_new_tokens=10)
+        print("✓ Fine-tuned Phi-3 compiled and cached")
+    except Exception as e:
+        print(f"⚠️  Fine-tuned model failed: {e}")
+        print(f"→ Loading base model: {BASE_PHI3}")
+        model = AutoModelForCausalLM.from_pretrained(
+            BASE_PHI3,
+            quantization_config=quantization_config,
+            torch_dtype=torch.float16,
+            trust_remote_code=True,
+            token=HF_TOKEN,
+            device_map="auto",
+        )
+        tokenizer = AutoTokenizer.from_pretrained(
+            BASE_PHI3,
+            trust_remote_code=True,
+            token=HF_TOKEN
+        )
+        # Warmup
+        print("→ Running warmup inference...")
+        test_prompt = tokenizer.apply_chat_template(
+            [{"role": "user", "content": "Hello"}],
+            tokenize=False,
+            add_generation_prompt=True
+        )
+        inputs = tokenizer(test_prompt, return_tensors="pt").to(model.device)
+        _ = model.generate(**inputs, max_new_tokens=10)
+        print("✓ Base Phi-3 compiled and cached")
+    # Save marker
+    with open(f"{CACHE_DIR}/PHI3_READY", "w") as f:
+        f.write(f"Phi-3 model loaded\n")
+def compile_mistral_reasoning():
+    """Compile Mistral-Small-24B for agents (tool, routing, thinking)"""
+    print(f"\n{'='*60}")
+    print("COMPILING MISTRAL-SMALL-24B REASONING MODEL")
+    print(f"{'='*60}")
+    print(f"→ Loading model: {MISTRAL_REASONING}")
+    try:
+        cache_info = scan_cache_dir()
+        model_cached = any(MISTRAL_REASONING in str(repo.repo_id) for repo in cache_info.repos)
+        if model_cached:
+            print(f"✓ Model already in HF cache: {MISTRAL_REASONING}")
+        else:
+            print(f"→ Model not cached, will download: {MISTRAL_REASONING}")
+    except:
+        print(f"→ Loading model: {MISTRAL_REASONING}")
+    quantization_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_compute_dtype=torch.float16,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_use_double_quant=True,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        MISTRAL_REASONING,
+        trust_remote_code=True,
+        token=HF_TOKEN
+    )
+    model = AutoModelForCausalLM.from_pretrained(
+        MISTRAL_REASONING,
+        quantization_config=quantization_config,
+        torch_dtype=torch.float16,
+        trust_remote_code=True,
+        token=HF_TOKEN,
+        device_map="auto",
+    )
+    # Warmup
+    print("→ Running warmup inference...")
+    test_messages = [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "Hello"}
+    ]
+    formatted_prompt = tokenizer.apply_chat_template(
+        test_messages,
+        tokenize=False,
+        add_generation_prompt=True
+    )
+    inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
+    _ = model.generate(**inputs, max_new_tokens=10)
+    print("✓ Mistral reasoning model compiled and cached")
+    # Save marker
+    with open(f"{CACHE_DIR}/MISTRAL_REASONING_READY", "w") as f:
+        f.write(f"Mistral reasoning model loaded\n")
+def compile_mistral_math_gguf():
+    """Download and cache GGUF math thinking model"""
+    print(f"\n{'='*60}")
+    print("COMPILING MISTRAL MATH GGUF MODEL")
+    print(f"{'='*60}")
+    if not LLAMA_CPP_AVAILABLE:
+        print("⚠️  Skipping GGUF model - llama-cpp-python not available")
+        return
+    print(f"→ Downloading GGUF model: {MISTRAL_MATH_GGUF}")
+    try:
+        # Download GGUF file
+        model_path = hf_hub_download(
+            repo_id=MISTRAL_MATH_GGUF,
+            filename="mistral-small-24b-instruct-2501-reasoning-Q4_K_M.gguf",
+            token=HF_TOKEN
+        )
+        print(f"→ GGUF downloaded to: {model_path}")
+        # Test load
+        print("→ Testing GGUF model load...")
+        math_model = Llama(
+            model_path=model_path,
+            n_ctx=4096,
+            n_threads=4,
+            n_gpu_layers=35,
+        )
+        # Warmup
+        print("→ Running warmup inference...")
+        _ = math_model("Test prompt", max_tokens=10)
+        print("✓ GGUF math model cached")
+        # Save marker
+        with open(f"{CACHE_DIR}/MISTRAL_MATH_GGUF_READY", "w") as f:
+            f.write(f"GGUF model path: {model_path}\n")
+    except Exception as e:
+        print(f"⚠️  GGUF model caching failed: {e}")
+def compile_rag_embeddings():
+    """Pre-load and cache RAG embeddings model (if still used)"""
+    print(f"\n{'='*60}")
+    print("COMPILING RAG EMBEDDINGS")
+    print(f"{'='*60}")
+    print(f"→ Loading embeddings model: {EMBEDDINGS_MODEL}")
+    # Load embeddings model
+    embeddings_model = SentenceTransformer(EMBEDDINGS_MODEL)
+    # Warmup
+    print("→ Running warmup for embeddings model...")
+    test_texts = ["What is calculus?", "Explain physics"]
+    _ = embeddings_model.encode(test_texts)
+    print("✓ RAG embeddings model cached")
+    # Save marker
+    with open(f"{CACHE_DIR}/RAG_EMBEDDINGS_READY", "w") as f:
+        f.write(f"Embeddings model loaded: {EMBEDDINGS_MODEL}\n")
+def compile_all():
+    """Compile all models for Mimir"""
+    os.makedirs(CACHE_DIR, exist_ok=True)
+    print("\n" + "="*60)
+    print("MIMIR MODEL COMPILATION")
+    print("="*60)
+    print("\nThis will compile and cache:")
+    print("  1. Phi-3 ResponseAgent (fine-tuned + base)")
+    print("  2. Mistral-Small-24B (tool, routing, thinking agents)")
+    print("  3. GGUF Mistral Math (if llama-cpp available)")
+    print("  4. RAG Embeddings (if needed)")
+    print("="*60)
+    try:
+        compile_phi3()
+    except Exception as e:
+        print(f"❌ Phi-3 compilation failed: {e}")
+    try:
+        compile_mistral_reasoning()
+    except Exception as e:
+        print(f"❌ Mistral reasoning compilation failed: {e}")
+    try:
+        compile_mistral_math_gguf()
+    except Exception as e:
+        print(f"❌ GGUF compilation failed: {e}")
+    try:
+        compile_rag_embeddings()
+    except Exception as e:
+        print(f"❌ RAG embeddings compilation failed: {e}")
+    # Final marker
+    with open(f"{CACHE_DIR}/COMPILED_READY", "w") as f:
+        f.write("All models compiled successfully\n")
+    print("\n" + "="*60)
+    print("✓ COMPILATION COMPLETE")
+    print("="*60)
+    print(f"Cache directory: {CACHE_DIR}")
+    print("Models are ready for Mimir startup!")
+if __name__ == "__main__":
+    compile_all()

configuration_phi3.py ADDED Viewed

	@@ -0,0 +1,227 @@

+# coding=utf-8
+# Copyright 2024 Microsoft and the HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" Phi-3 model configuration"""
+from transformers.configuration_utils import PretrainedConfig
+from transformers.utils import logging
+logger = logging.get_logger(__name__)
+PHI3_PRETRAINED_CONFIG_ARCHIVE_MAP = {
+    "microsoft/Phi-3-mini-4k-instruct": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/config.json",
+    "microsoft/Phi-3-mini-128k-instruct": "https://huggingface.co/microsoft/Phi-3-mini-128k-instruct/resolve/main/config.json",
+}
+class Phi3Config(PretrainedConfig):
+    r"""
+    This is the configuration class to store the configuration of a [`Phi3Model`]. It is used to instantiate a Phi-3
+    model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
+    defaults will yield a similar configuration to that of the
+    [microsoft/Phi-3-mini-4k-instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct).
+    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
+    documentation from [`PretrainedConfig`] for more information.
+    Args:
+        vocab_size (`int`, *optional*, defaults to 32064):
+            Vocabulary size of the Phi-3 model. Defines the number of different tokens that can be represented by the
+            `inputs_ids` passed when calling [`Phi3Model`].
+        hidden_size (`int`, *optional*, defaults to 3072):
+            Dimension of the hidden representations.
+        intermediate_size (`int`, *optional*, defaults to 8192):
+            Dimension of the MLP representations.
+        num_hidden_layers (`int`, *optional*, defaults to 32):
+            Number of hidden layers in the Transformer decoder.
+        num_attention_heads (`int`, *optional*, defaults to 32):
+            Number of attention heads for each attention layer in the Transformer decoder.
+        num_key_value_heads (`int`, *optional*):
+            This is the number of key_value heads that should be used to implement Grouped Query Attention. If
+            `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
+            `num_key_value_heads=1 the model will use Multi Query Attention (MQA) otherwise GQA is used. When
+            converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
+            by meanpooling all the original heads within that group. For more details checkout [this
+            paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
+            `num_attention_heads`.
+        resid_pdrop (`float`, *optional*, defaults to 0.0):
+            Dropout probability for mlp outputs.
+        embd_pdrop (`int`, *optional*, defaults to 0.0):
+            The dropout ratio for the embeddings.
+        attention_dropout (`float`, *optional*, defaults to 0.0):
+            The dropout ratio after computing the attention scores.
+        hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
+            The non-linear activation function (function or string) in the decoder.
+        max_position_embeddings (`int`, *optional*, defaults to 4096):
+            The maximum sequence length that this model might ever be used with.
+        original_max_position_embeddings (`int`, *optional*, defaults to 4096):
+            The maximum sequence length that this model was trained with. This is used to determine the size of the
+            original RoPE embeddings when using long scaling.
+        initializer_range (`float`, *optional*, defaults to 0.02):
+            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
+        rms_norm_eps (`float`, *optional*, defaults to 1e-05):
+            The epsilon value used for the RMSNorm.
+        use_cache (`bool`, *optional*, defaults to `True`):
+            Whether or not the model should return the last key/values attentions (not used by all models). Only
+            relevant if `config.is_decoder=True`. Whether to tie weight embeddings or not.
+        tie_word_embeddings (`bool`, *optional*, defaults to `False`):
+            Whether to tie weight embeddings
+        rope_theta (`float`, *optional*, defaults to 10000.0):
+            The base period of the RoPE embeddings.
+        rope_scaling (`dict`, *optional*):
+            The scaling strategy for the RoPE embeddings. If `None`, no scaling is applied. If a dictionary, it must
+            contain the following keys: `type`, `short_factor` and `long_factor`. The `type` must be `longrope` and
+            the `short_factor` and `long_factor` must be lists of numbers with the same length as the hidden size
+            divided by the number of attention heads divided by 2.
+        bos_token_id (`int`, *optional*, defaults to 1):
+            The id of the "beginning-of-sequence" token.
+        eos_token_id (`int`, *optional*, defaults to 32000):
+            The id of the "end-of-sequence" token.
+        pad_token_id (`int`, *optional*, defaults to 32000):
+            The id of the padding token.
+        sliding_window (`int`, *optional*):
+            Sliding window attention window size. If `None`, no sliding window is applied.
+    Example:
+    ```python
+    >>> from transformers import Phi3Model, Phi3Config
+    >>> # Initializing a Phi-3 style configuration
+    >>> configuration = Phi3Config.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
+    >>> # Initializing a model from the configuration
+    >>> model = Phi3Model(configuration)
+    >>> # Accessing the model configuration
+    >>> configuration = model.config
+    ```"""
+    model_type = "phi3"
+    keys_to_ignore_at_inference = ["past_key_values"]
+    def __init__(
+        self,
+        vocab_size=32064,
+        hidden_size=3072,
+        intermediate_size=8192,
+        num_hidden_layers=32,
+        num_attention_heads=32,
+        num_key_value_heads=None,
+        resid_pdrop=0.0,
+        embd_pdrop=0.0,
+        attention_dropout=0.0,
+        hidden_act="silu",
+        max_position_embeddings=4096,
+        original_max_position_embeddings=4096,
+        initializer_range=0.02,
+        rms_norm_eps=1e-5,
+        use_cache=True,
+        tie_word_embeddings=False,
+        rope_theta=10000.0,
+        rope_scaling=None,
+        bos_token_id=1,
+        eos_token_id=32000,
+        pad_token_id=32000,
+        sliding_window=None,
+        **kwargs,
+    ):
+        self.vocab_size = vocab_size
+        self.hidden_size = hidden_size
+        self.intermediate_size = intermediate_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        if num_key_value_heads is None:
+            num_key_value_heads = num_attention_heads
+        self.num_key_value_heads = num_key_value_heads
+        self.resid_pdrop = resid_pdrop
+        self.embd_pdrop = embd_pdrop
+        self.attention_dropout = attention_dropout
+        self.hidden_act = hidden_act
+        self.max_position_embeddings = max_position_embeddings
+        self.original_max_position_embeddings = original_max_position_embeddings
+        self.initializer_range = initializer_range
+        self.rms_norm_eps = rms_norm_eps
+        self.use_cache = use_cache
+        self.rope_theta = rope_theta
+        self.rope_scaling = rope_scaling
+        self._rope_scaling_adjustment()
+        self._rope_scaling_validation()
+        self.sliding_window = sliding_window
+        super().__init__(
+            bos_token_id=bos_token_id,
+            eos_token_id=eos_token_id,
+            pad_token_id=pad_token_id,
+            tie_word_embeddings=tie_word_embeddings,
+            **kwargs,
+        )
+    def _rope_scaling_adjustment(self):
+        """
+        Adjust the `type` of the `rope_scaling` configuration for backward compatibility.
+        """
+        if self.rope_scaling is None:
+            return
+        rope_scaling_type = self.rope_scaling.get("type", None)
+        # For backward compatibility if previous version used "su" or "yarn"
+        if rope_scaling_type is not None and rope_scaling_type in ["su", "yarn"]:
+            self.rope_scaling["type"] = "longrope"
+    def _rope_scaling_validation(self):
+        """
+        Validate the `rope_scaling` configuration.
+        """
+        if self.rope_scaling is None:
+            return
+        if not isinstance(self.rope_scaling, dict) or len(self.rope_scaling) != 3:
+            raise ValueError(
+                "`rope_scaling` must be a dictionary with three fields, `type`, `short_factor` and `long_factor`, "
+                f"got {self.rope_scaling}"
+            )
+        rope_scaling_type = self.rope_scaling.get("type", None)
+        rope_scaling_short_factor = self.rope_scaling.get("short_factor", None)
+        rope_scaling_long_factor = self.rope_scaling.get("long_factor", None)
+        if rope_scaling_type is None or rope_scaling_type not in ["longrope"]:
+            raise ValueError(f"`rope_scaling`'s type field must be one of ['longrope'], got {rope_scaling_type}")
+        if not (
+            isinstance(rope_scaling_short_factor, list)
+            and all(isinstance(x, (int, float)) for x in rope_scaling_short_factor)
+        ):
+            raise ValueError(
+                f"`rope_scaling`'s short_factor field must be a list of numbers, got {rope_scaling_short_factor}"
+            )
+        if not len(rope_scaling_short_factor) == self.hidden_size // self.num_attention_heads // 2:
+            raise ValueError(
+                f"`rope_scaling`'s short_factor field must have length {self.hidden_size // self.num_attention_heads // 2}, got {len(rope_scaling_short_factor)}"
+            )
+        if not (
+            isinstance(rope_scaling_long_factor, list)
+            and all(isinstance(x, (int, float)) for x in rope_scaling_long_factor)
+        ):
+            raise ValueError(
+                f"`rope_scaling`'s long_factor field must be a list of numbers, got {rope_scaling_long_factor}"
+            )
+        if not len(rope_scaling_long_factor) == self.hidden_size // self.num_attention_heads // 2:
+            raise ValueError(
+                f"`rope_scaling`'s long_factor field must have length {self.hidden_size // self.num_attention_heads // 2}, got {len(rope_scaling_long_factor)}"
+            )

favicon.ico ADDED Viewed

gradio_analytics.py ADDED Viewed

	@@ -0,0 +1,491 @@

+# gradio_analytics.py
+import gradio as gr
+import logging
+import json
+import sqlite3
+import os
+from datetime import datetime
+logger = logging.getLogger(__name__)
+try:
+    from app import (
+        get_trackio_database_path,
+        get_project_statistics_with_nulls,
+        get_recent_interactions_with_nulls,
+        create_dashboard_html_with_nulls,
+        calculate_response_quality,
+        refresh_analytics_data_persistent as refresh_analytics_data,
+        export_metrics_json_persistent as export_metrics_json,
+        export_metrics_csv_persistent as export_metrics_csv,
+        load_analytics_state,
+        get_global_state_debug_info,
+        sync_trackio_with_global_state,
+        global_state_manager,
+        evaluate_educational_quality_with_tracking,
+    )
+except ImportError:
+    def get_trackio_database_path(project_name):
+        return None
+    def get_project_statistics_with_nulls(cursor, project_name):
+        return {
+            "total_conversations": None,
+            "avg_session_length": None,
+            "success_rate": None
+        }
+    def get_recent_interactions_with_nulls(cursor, project_name, limit=10):
+        return []
+    def create_dashboard_html_with_nulls(project_name, project_stats):
+        return f"<div>Mock dashboard for {project_name}</div>"
+    def calculate_response_quality(response):
+        return 3.0
+    def refresh_analytics_data():
+        return {}, [], "<div>Mock analytics</div>"
+    def export_metrics_json():
+        gr.Info("Mock JSON export")
+    def export_metrics_csv():
+        gr.Info("Mock CSV export")
+    def load_analytics_state():
+        return {}, [], "<div>Mock analytics state</div>"
+    def get_global_state_debug_info():
+        return {"status": "mock"}
+    def sync_trackio_with_global_state():
+        pass
+    def evaluate_educational_quality_with_tracking(*args, **kwargs):
+        return {"educational_score": 0.5}
+    class MockStateManager:
+        def get_cache_status(self):
+            return {"status": "mock"}
+        def get_evaluation_summary(self, include_history=False):
+            return {"aggregate_metrics": {}, "total_evaluations": {}}
+        def clear_all_states(self):
+            pass
+        def _backup_to_hf_dataset(self):
+            pass
+    global_state_manager = MockStateManager()
+def load_custom_css():
+    try:
+        with open("styles.css", "r", encoding="utf-8") as css_file:
+            css_content = css_file.read()
+            logger.info(f"CSS loaded successfully for analytics page")
+            return css_content
+    except FileNotFoundError:
+        logger.warning("styles.css file not found for analytics page")
+        return ""
+    except Exception as e:
+        logger.warning(f"Error reading styles.css: {e}")
+        return ""
+def launch_external_trackio():
+    try:
+        import subprocess
+        result = subprocess.run(
+            ["trackio", "show", "--project", "Mimir"],
+            capture_output=False,
+            text=True
+        )
+        if result.returncode == 0:
+            gr.Info("Trackio dashboard launched in browser")
+        else:
+            gr.Warning("Could not launch trackio dashboard")
+    except Exception as e:
+        logger.error(f"Failed to launch trackio: {e}")
+        gr.Warning(f"Failed to launch trackio dashboard: {str(e)}")
+def show_cache_status():
+    try:
+        debug_info = get_global_state_debug_info()
+        cache_status = debug_info.get("cache_status", {})
+        status_text = f"""
+**Global State Cache Status:**
+- Session ID: {cache_status.get('session_id', 'Unknown')}
+- Analytics Cached: {'Yes' if cache_status.get('analytics_cached') else 'No'}
+- Conversation Cached: {'Yes' if cache_status.get('conversation_cached') else 'No'}
+- Analytics Last Refresh: {cache_status.get('analytics_last_refresh', 'Never')}
+- Total Analytics Sessions: {cache_status.get('total_analytics_sessions', 0)}
+- Total Conversation Sessions: {cache_status.get('total_conversation_sessions', 0)}
+**Analytics Data Status:**
+- Has Analytics Data: {'Yes' if cache_status.get('analytics_has_data') else 'No'}
+- Conversation Length: {cache_status.get('conversation_length', 0)} messages
+- Chat History Length: {cache_status.get('chat_history_length', 0)} messages
+*Last Updated: {datetime.now().strftime('%H:%M:%S')}*
+        """
+        gr.Info("Cache status updated - check the Status panel")
+        return status_text
+    except Exception as e:
+        error_text = f"Error getting cache status: {str(e)}"
+        gr.Warning(error_text)
+        return error_text
+def manual_backup_to_hf():
+    try:
+        global_state_manager._backup_to_hf_dataset()
+        gr.Info("Manual backup to HF dataset completed successfully")
+        return f"Backup completed at {datetime.now().strftime('%H:%M:%S')}"
+    except Exception as e:
+        gr.Warning(f"Backup failed: {str(e)}")
+        return f"Backup failed: {str(e)}"
+def get_persistence_status():
+    try:
+        status_info = {
+            "SQLite DB": "Active" if os.path.exists(global_state_manager._db_path) else "Not Found",
+            "HF Dataset": global_state_manager.dataset_repo,
+            "Last HF Backup": global_state_manager._last_hf_backup.strftime('%Y-%m-%d %H:%M:%S'),
+            "DB Path": global_state_manager._db_path,
+            "Backup Interval": f"{global_state_manager._hf_backup_interval}s"
+        }
+        return status_info
+    except Exception as e:
+        return {"error": str(e)}
+def clear_all_global_states():
+    try:
+        global_state_manager.clear_all_states()
+        gr.Info("All global states cleared successfully")
+        empty_stats = {
+            "total_conversations": None,
+            "avg_session_length": None,
+            "success_rate": None,
+            "model_type": "Cleared",
+            "last_updated": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        }
+        empty_html = """
+        <div style="text-align: center; padding: 40px; border: 2px dashed #ccc; border-radius: 8px; background: #f8f9fa;">
+            <h3>States Cleared</h3>
+            <p>All global states have been cleared.</p>
+            <p>Click "Refresh Data" to reload analytics.</p>
+        </div>
+        """
+        return empty_stats, [], empty_html
+    except Exception as e:
+        gr.Warning(f"Failed to clear states: {str(e)}")
+        return load_analytics_state()
+def show_evaluation_metrics():
+    try:
+        eval_summary = global_state_manager.get_evaluation_summary(include_history=True)
+        metrics_data = [
+            ["Educational Quality", f"{eval_summary['aggregate_metrics']['avg_educational_quality']:.3f}"],
+            ["User Satisfaction", f"{eval_summary['aggregate_metrics']['user_satisfaction_rate']:.3f}"]
+        ]
+        recent_evaluations = []
+        if 'history' in eval_summary:
+            for eval_item in eval_summary['history']['recent_educational_scores'][-5:]:
+                recent_evaluations.append([
+                    eval_item['timestamp'][:16],
+                    f"{eval_item['educational_score']:.3f}",
+                    f"{eval_item['semantic_quality']:.3f}",
+                    f"{eval_item['response_time']:.3f}s"
+                ])
+        return eval_summary, metrics_data, recent_evaluations
+    except Exception as e:
+        logger.error(f"Error getting evaluation metrics: {e}")
+        return {}, [], []
+def sync_and_refresh_all():
+    try:
+        sync_trackio_with_global_state()
+        project_stats, recent_interactions, dashboard_html = refresh_analytics_data()
+        eval_summary, metrics_data, recent_evaluations = show_evaluation_metrics()
+        gr.Info("All data synced and refreshed successfully")
+        return project_stats, recent_interactions, dashboard_html, eval_summary, metrics_data, recent_evaluations
+    except Exception as e:
+        logger.error(f"Sync and refresh failed: {e}")
+        gr.Warning(f"Sync failed: {str(e)}")
+        return load_analytics_state() + ({}, [], [])
+with gr.Blocks() as demo:
+    custom_css = load_custom_css()
+    if custom_css:
+        gr.HTML(f'<style>{custom_css}</style>')
+    gr.HTML('<div class="analytics-title"><h2>Mimir Analytics Dashboard</h2></div>')
+    gr.Markdown("Monitor educational AI performance and effectiveness metrics with persistent state management.")
+    with gr.Tabs():
+        with gr.TabItem("Traditional Analytics"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    gr.Markdown("## Controls")
+                    refresh_btn = gr.Button("Refresh Data", variant="primary")
+                    sync_all_btn = gr.Button("Sync & Refresh All", variant="primary")
+                    with gr.Row():
+                        export_json_btn = gr.Button("Export JSON", variant="secondary", size="sm")
+                        export_csv_btn = gr.Button("Export CSV", variant="secondary", size="sm")
+                    launch_trackio_btn = gr.Button("Launch Trackio Dashboard", variant="secondary")
+                    gr.Markdown("### State Management")
+                    with gr.Row():
+                        cache_status_btn = gr.Button("Cache Status", size="sm")
+                        clear_states_btn = gr.Button("Clear All States", size="sm", variant="stop")
+                    with gr.Group():
+                        gr.Markdown("### Project Information")
+                        project_info = gr.JSON(
+                            value={
+                                "total_conversations": None,
+                                "avg_session_length": None,
+                                "success_rate": None,
+                                "model_type": None
+                            },
+                            label="Project Stats"
+                        )
+                    with gr.Group():
+                        gr.Markdown("### System Status")
+                        status_panel = gr.Markdown(
+                            "Click 'Cache Status' to view global state information.",
+                            label="Status Information"
+                        )
+                with gr.Column(scale=2):
+                    gr.Markdown("## Key Metrics Dashboard")
+                    trackio_iframe = gr.HTML(
+                        value="""
+                        <div style="text-align: center; padding: 40px; border: 2px dashed #ccc; border-radius: 8px; background: #f8f9fa;">
+                            <h3>Trackio Dashboard</h3>
+                            <p>Analytics data will appear here after conversations.</p>
+                            <p>Data is automatically cached and persists across page navigation.</p>
+                            <p>To launch trackio dashboard separately, run:</p>
+                            <code style="background: #e9ecef; padding: 4px 8px; border-radius: 4px;">trackio show --project "Mimir"</code>
+                        </div>
+                        """,
+                        label="Dashboard"
+                    )
+            with gr.Row():
+                with gr.Column():
+                    gr.Markdown("## Recent Interactions")
+                    gr.Markdown("*Data persists when switching between Chatbot and Analytics pages*")
+                    recent_metrics = gr.Dataframe(
+                        headers=["Timestamp", "Response Time", "Prompt Mode", "Tools Used", "Quality Score", "Adapter"],
+                        datatype=["str", "number", "str", "bool", "number", "str"],
+                        row_count=10,
+                        col_count=6,
+                        interactive=False,
+                        label="Latest Sessions",
+                        value=[],
+                        show_label=True
+                    )
+        with gr.TabItem("ML Performance"):
+            gr.Markdown("## Agent-Based Performance & Global State Metrics")
+            with gr.Row():
+                with gr.Column(scale=1):
+                    eval_metrics_btn = gr.Button("Get Evaluation Metrics", variant="primary")
+                    with gr.Group():
+                        gr.Markdown("### Model Cache Status")
+                        cache_status_display = gr.JSON(
+                            value={},
+                            label="Cache Information"
+                        )
+                with gr.Column(scale=2):
+                    gr.Markdown("### Aggregate Performance Metrics")
+                    eval_metrics_table = gr.Dataframe(
+                        headers=["Metric", "Score"],
+                        datatype=["str", "str"],
+                        label="Model Performance",
+                        value=[]
+                    )
+                    eval_summary_display = gr.JSON(
+                        value={},
+                        label="Detailed Evaluation Summary"
+                    )
+            with gr.Row():
+                with gr.Column():
+                    gr.Markdown("### Recent Quality Evaluations")
+                    recent_evaluations_table = gr.Dataframe(
+                        headers=["Timestamp", "Educational Score", "Semantic Quality", "Response Time"],
+                        datatype=["str", "str", "str", "str"],
+                        label="Recent Evaluations",
+                        value=[]
+                    )
+        with gr.TabItem("System Status"):
+            gr.Markdown("## Global State Manager & System Diagnostics")
+            with gr.Row():
+                with gr.Column():
+                    gr.Markdown("### Global State Cache")
+                    cache_details = gr.Markdown("Click 'Show Cache Status' to view detailed information.")
+                    show_cache_btn = gr.Button("Show Cache Status", variant="primary")
+                    refresh_cache_btn = gr.Button("Refresh Cache Info", variant="secondary")
+                    gr.Markdown("### Persistence Controls")
+                    backup_btn = gr.Button("Manual Backup to HF Dataset", variant="primary")
+                    backup_status = gr.Textbox(label="Backup Status", value="No recent backup", interactive=False)
+                with gr.Column():
+                    gr.Markdown("### System Actions")
+                    sync_trackio_btn = gr.Button("Sync to Database", variant="secondary")
+                    clear_all_btn = gr.Button("Clear All Global States", variant="stop")
+                    gr.Markdown("### Persistence Status")
+                    persistence_info = gr.JSON(
+                        value={},
+                        label="Persistence Information"
+                    )
+                    gr.Markdown("### Performance Monitor")
+                    perf_info = gr.JSON(
+                        value={},
+                        label="Performance Information"
+                    )
+    demo.load(
+        load_analytics_state,
+        inputs=None,
+        outputs=[project_info, recent_metrics, trackio_iframe],
+        show_progress="hidden"
+    )
+    demo.load(
+        fn=lambda: global_state_manager.get_cache_status(),
+        inputs=None,
+        outputs=[cache_status_display],
+        show_progress="hidden"
+    )
+    demo.load(
+        fn=get_persistence_status,
+        inputs=None,
+        outputs=[persistence_info],
+        show_progress="hidden"
+    )
+    refresh_btn.click(
+        fn=refresh_analytics_data,
+        inputs=[],
+        outputs=[project_info, recent_metrics, trackio_iframe],
+        show_progress="full"
+    )
+    sync_all_btn.click(
+        fn=sync_and_refresh_all,
+        inputs=[],
+        outputs=[project_info, recent_metrics, trackio_iframe, eval_summary_display, eval_metrics_table, recent_evaluations_table],
+        show_progress="full"
+    )
+    export_json_btn.click(
+        fn=export_metrics_json,
+        inputs=[],
+        outputs=[],
+        show_progress="full"
+    )
+    export_csv_btn.click(
+        fn=export_metrics_csv,
+        inputs=[],
+        outputs=[],
+        show_progress="full"
+    )
+    launch_trackio_btn.click(
+        fn=launch_external_trackio,
+        inputs=[],
+        outputs=[],
+        show_progress="full"
+    )
+    cache_status_btn.click(
+        fn=show_cache_status,
+        inputs=[],
+        outputs=[status_panel],
+        show_progress="full"
+    )
+    clear_states_btn.click(
+        fn=clear_all_global_states,
+        inputs=[],
+        outputs=[project_info, recent_metrics, trackio_iframe],
+        show_progress="full"
+    )
+    eval_metrics_btn.click(
+        fn=show_evaluation_metrics,
+        inputs=[],
+        outputs=[eval_summary_display, eval_metrics_table, recent_evaluations_table],
+        show_progress="full"
+    )
+    show_cache_btn.click(
+        fn=show_cache_status,
+        inputs=[],
+        outputs=[cache_details],
+        show_progress="full"
+    )
+    refresh_cache_btn.click(
+        fn=lambda: global_state_manager.get_cache_status(),
+        inputs=[],
+        outputs=[perf_info],
+        show_progress="full"
+    )
+    backup_btn.click(
+        fn=manual_backup_to_hf,
+        inputs=[],
+        outputs=[backup_status],
+        show_progress="full"
+    )
+    sync_trackio_btn.click(
+        fn=sync_trackio_with_global_state,
+        inputs=[],
+        outputs=[],
+        show_progress="full"
+    )
+    clear_all_btn.click(
+        fn=clear_all_global_states,
+        inputs=[],
+        outputs=[project_info, recent_metrics, trackio_iframe],
+        show_progress="full"
+    )
+if __name__ == "__main__":
+    logger.info("Running analytics dashboard standalone with global state management")
+    demo.launch(server_name="0.0.0.0", server_port=7861)

gradio_chatbot.py ADDED Viewed

	@@ -0,0 +1,150 @@

+# gradio_chatbot.py
+import gradio as gr
+import logging
+logger = logging.getLogger(__name__)
+from app import (
+    add_user_message,
+    add_loading_animation,
+    generate_response,
+    reset_conversation,
+    load_conversation_state,
+    remove_loading_animations,
+    global_state_manager,  # Import the instance from app.py
+)
+def load_custom_css():
+    try:
+        with open("styles.css", "r", encoding="utf-8") as css_file:
+            css_content = css_file.read()
+            logger.info(f"CSS loaded successfully, length: {len(css_content)} characters")
+            return css_content
+    except FileNotFoundError:
+        logger.warning("styles.css file not found, using default styling")
+        return ""
+    except Exception as e:
+        logger.warning(f"Error reading styles.css: {e}")
+        return ""
+def restore_state_on_page_access():
+    """Restore conversation state when page loads"""
+    try:
+        current_state = global_state_manager.get_conversation_state()
+        chat_history = current_state.get('chat_history', [])
+        conversation_state_data = current_state.get('conversation_state', [])
+        logger.info(f"Restored state: {len(chat_history)} messages in chat history, {len(conversation_state_data)} in conversation state")
+        return chat_history, conversation_state_data
+    except Exception as e:
+        logger.error(f"Failed to restore state: {e}")
+        return [], []
+with gr.Blocks() as demo:
+    custom_css = load_custom_css()
+    if custom_css:
+        gr.HTML(f'<style>{custom_css}</style>')
+    try:
+        initial_chat_history, initial_conversation_state = load_conversation_state()
+    except:
+        initial_chat_history, initial_conversation_state = [], []
+    conversation_state = gr.State(initial_conversation_state)
+    gr.HTML('<div class="title-header"><h1>Mimir</h1></div>')
+    with gr.Row():
+        chatbot = gr.Chatbot(
+            type="messages",
+            show_copy_button=True,
+            show_share_button=False,
+            layout="bubble",
+            autoscroll=True,
+            avatar_images=None,
+            elem_id="main-chatbot",
+            scale=1,
+            height="65vh",
+            value=initial_chat_history,
+            latex_delimiters=[
+                {"left": "$$", "right": "$$", "display": True},
+                {"left": "$", "right": "$", "display": False},
+            ]
+        )
+    with gr.Row(elem_classes=["input-controls"]):
+        msg = gr.Textbox(
+            placeholder="Ask me about math, research, study strategies, or any educational topic...",
+            show_label=False,
+            lines=6,
+            max_lines=8,
+            elem_classes=["input-textbox"],
+            container=False,
+            scale=4
+        )
+        with gr.Column(elem_classes=["button-column"], scale=1):
+            send = gr.Button("Send", elem_classes=["send-button"], size="sm")
+            clear = gr.Button("Clear", elem_classes=["clear-button"], size="sm")
+    demo.load(
+        fn=restore_state_on_page_access,
+        outputs=[chatbot, conversation_state],
+        queue=False
+    )
+    msg.submit(
+        add_user_message,
+        inputs=[msg, chatbot, conversation_state],
+        outputs=[msg, chatbot, conversation_state],
+        show_progress="hidden"
+    ).then(
+        add_loading_animation,
+        inputs=[chatbot, conversation_state],
+        outputs=[chatbot, conversation_state],
+        show_progress="hidden"
+    ).then(
+        lambda chat_history, conv_state: (remove_loading_animations(chat_history), conv_state),
+        inputs=[chatbot, conversation_state],
+        outputs=[chatbot, conversation_state],
+        show_progress="hidden"
+    ).then(
+        generate_response,
+        inputs=[chatbot, conversation_state],
+        outputs=[chatbot, conversation_state],
+        show_progress="hidden"
+    )
+    send.click(
+        add_user_message,
+        inputs=[msg, chatbot, conversation_state],
+        outputs=[msg, chatbot, conversation_state],
+        show_progress="hidden"
+    ).then(
+        add_loading_animation,
+        inputs=[chatbot, conversation_state],
+        outputs=[chatbot, conversation_state],
+        show_progress="hidden"
+    ).then(
+        lambda chat_history, conv_state: (remove_loading_animations(chat_history), conv_state),
+        inputs=[chatbot, conversation_state],
+        outputs=[chatbot, conversation_state],
+        show_progress="hidden"
+    ).then(
+        generate_response,
+        inputs=[chatbot, conversation_state],
+        outputs=[chatbot, conversation_state],
+        show_progress="hidden"
+    )
+    clear.click(
+        reset_conversation,
+        outputs=[chatbot, conversation_state],
+        show_progress="hidden"
+    )
+if __name__ == "__main__":
+    logger.info("Running chatbot interface standalone")
+    demo.launch(server_name="0.0.0.0", server_port=7860)

graph_tool.py ADDED Viewed

	@@ -0,0 +1,109 @@

+#graph_tool.py
+import base64
+import io
+import json
+from typing import Dict, List, Literal, Tuple
+import matplotlib.pyplot as plt
+from langchain_core.tools import tool
+# Use the @tool decorator and specify the "content_and_artifact" response format.
+@tool(response_format="content_and_artifact")
+def generate_plot(
+    data: Dict[str, float],
+    plot_type: Literal["bar", "line", "pie"],
+    title: str = "Generated Plot",
+    labels: List[str] = None,
+    x_label: str = "",
+    y_label: str = ""
+) -> Tuple:
+    """
+    Generates a plot (bar, line, or pie) from a dictionary of data and returns it
+    as a base64 encoded PNG image artifact.
+    Args:
+        data (Dict[str, float]): A dictionary where keys are labels and values are the numeric data to plot.
+        plot_type (Literal["bar", "line", "pie"]): The type of plot to generate.
+        title (str): The title for the plot.
+        labels (List[str]): Optional list of labels to use for the x-axis or pie slices. If not provided, data keys are used.
+        x_label (str): The label for the x-axis (for bar and line charts).
+        y_label (str): The label for the y-axis (for bar and line charts).
+    Returns:
+        A tuple containing:
+        - A string message confirming the plot was generated.
+        - A dictionary artifact with the base64 encoded image string and its format.
+    """
+    # --- Input Validation ---
+    if not isinstance(data, dict) or not data:
+        content = "Error: Data must be a non-empty dictionary."
+        artifact = {"error": content}
+        return content, artifact
+    try:
+        y_data = [float(val) for val in data.values()]
+    except (ValueError, TypeError):
+        content = "Error: All data values must be numeric."
+        artifact = {"error": content}
+        return content, artifact
+    x_data = list(data.keys())
+    # --- Plot Generation ---
+    try:
+        fig, ax = plt.subplots(figsize=(10, 6))
+        if plot_type == 'bar':
+            # Use provided labels if they match the data length, otherwise use data keys
+            bar_labels = labels if labels and len(labels) == len(x_data) else x_data
+            bars = ax.bar(bar_labels, y_data)
+            ax.set_xlabel(x_label)
+            ax.set_ylabel(y_label)
+            ax.set_ylim(bottom=0)
+            for bar, value in zip(bars, y_data):
+                height = bar.get_height()
+                ax.text(bar.get_x() + bar.get_width()/2., height, f'{value}', ha='center', va='bottom')
+        elif plot_type == 'line':
+            line_labels = labels if labels and len(labels) == len(x_data) else x_data
+            ax.plot(line_labels, y_data, marker='o')
+            ax.set_xlabel(x_label)
+            ax.set_ylabel(y_label)
+            ax.set_ylim(bottom=0)
+            ax.grid(True, alpha=0.3)
+        elif plot_type == 'pie':
+            pie_labels = labels if labels and len(labels) == len(y_data) else list(data.keys())
+            ax.pie(y_data, labels=pie_labels, autopct='%1.1f%%', startangle=90)
+            ax.axis('equal')
+        else:
+            content = f"Error: Invalid plot_type '{plot_type}'. Choose 'bar', 'line', or 'pie'."
+            artifact = {"error": content}
+            return content, artifact
+        ax.set_title(title, fontsize=14, fontweight='bold')
+        plt.tight_layout()
+        # --- In-Memory Image Conversion ---
+        buf = io.BytesIO()
+        plt.savefig(buf, format='png', dpi=150)
+        plt.close(fig)
+        buf.seek(0)
+        img_base64 = base64.b64encode(buf.getvalue()).decode('utf-8')
+        # --- Return Content and Artifact ---
+        content = f"Successfully generated a {plot_type} plot titled '{title}'."
+        artifact = {
+            "base64_image": img_base64,
+            "format": "png"
+        }
+        return content, artifact
+    except Exception as e:
+        plt.close('all')
+        content = f"An unexpected error occurred while generating the plot: {str(e)}"
+        artifact = {"error": str(e)}
+        return content, artifact

loading_animation.gif ADDED Viewed

mimir_classifier.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4b8d5d6247cdf48c288bf3690fe1bc05df7bbff550f968ebfd093d5738a1a9b7
+size 7037

performance_metrics.log ADDED Viewed

	@@ -0,0 +1,108 @@

+FALLBACK ACTIVATED: Using base model. Strategy: graceful | Logged: 2025-09-16 03:13:00
+Agent initialized with FALLBACK model: {'status': 'loaded', 'model_type': 'base', 'is_adapter': False, 'fine_tuned_path': 'jdesiree/Mimir-Phi-3.5', 'base_model_path': 'microsoft/Phi-3-mini-4k-instruct', 'using_fallback': True} | Logged: 2025-09-16 03:13:00
+Init and LangGraph workflow setup time: 14.1047 seconds. Timestamp: 2025-09-16 03:12:47 | Logged: 2025-09-16 03:13:01
+Tool decision time (excluded): 0.0002 seconds. Timestamp: 2025-09-16 03:13:02 | Logged: 2025-09-16 03:13:02
+Tool decision workflow time: 0.0005 seconds. Decision: False. Timestamp: 2025-09-16 03:13:02 | Logged: 2025-09-16 03:13:02
+Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:13:02 | Logged: 2025-09-16 03:13:02
+Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:13:02 | Logged: 2025-09-16 03:13:02
+Prompt decision: CONVERSATIONAL | Context: start=True, greeting=True, casual=False, edu_context=False, discovery=False, tools=False | Decision time: 0.0007s | Input: 'Hello...' | Timestamp: 2025-09-16 03:13:02 | Logged: 2025-09-16 03:13:02
+Call model time (error): 1.5796 seconds. Timestamp: 2025-09-16 03:13:02 | Logged: 2025-09-16 03:13:03
+Total query processing time: 2.0022 seconds. Input: 'Hello...'. Timestamp: 2025-09-16 03:13:01 | Logged: 2025-09-16 03:13:03
+Agent warmup completed in 2.00 seconds | Logged: 2025-09-16 03:13:03
+Create interface time: 0.2044 seconds. Timestamp: 2025-09-16 03:13:03 | Logged: 2025-09-16 03:13:03
+FALLBACK ACTIVATED: Using base model. Strategy: graceful | Logged: 2025-09-16 03:13:59
+Agent initialized with FALLBACK model: {'status': 'loaded', 'model_type': 'base', 'is_adapter': False, 'fine_tuned_path': 'jdesiree/Mimir-Phi-3.5', 'base_model_path': 'microsoft/Phi-3-mini-4k-instruct', 'using_fallback': True} | Logged: 2025-09-16 03:13:59
+Init and LangGraph workflow setup time: 1.8577 seconds. Timestamp: 2025-09-16 03:13:57 | Logged: 2025-09-16 03:13:59
+Tool decision time (excluded): 0.0001 seconds. Timestamp: 2025-09-16 03:13:59 | Logged: 2025-09-16 03:13:59
+Tool decision workflow time: 0.0003 seconds. Decision: False. Timestamp: 2025-09-16 03:13:59 | Logged: 2025-09-16 03:13:59
+Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:13:59 | Logged: 2025-09-16 03:13:59
+Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:13:59 | Logged: 2025-09-16 03:13:59
+Prompt decision: CONVERSATIONAL | Context: start=True, greeting=True, casual=False, edu_context=False, discovery=False, tools=False | Decision time: 0.0007s | Input: 'Hello...' | Timestamp: 2025-09-16 03:13:59 | Logged: 2025-09-16 03:13:59
+Call model time (error): 0.6432 seconds. Timestamp: 2025-09-16 03:13:59 | Logged: 2025-09-16 03:14:00
+Total query processing time: 0.6579 seconds. Input: 'Hello...'. Timestamp: 2025-09-16 03:13:59 | Logged: 2025-09-16 03:14:00
+Agent warmup completed in 0.66 seconds | Logged: 2025-09-16 03:14:00
+Create interface time: 0.5570 seconds. Timestamp: 2025-09-16 03:14:00 | Logged: 2025-09-16 03:14:00
+FALLBACK ACTIVATED: Using base model. Strategy: graceful | Logged: 2025-09-16 03:14:22
+Agent initialized with FALLBACK model: {'status': 'loaded', 'model_type': 'base', 'is_adapter': False, 'fine_tuned_path': 'jdesiree/Mimir-Phi-3.5', 'base_model_path': 'microsoft/Phi-3-mini-4k-instruct', 'using_fallback': True} | Logged: 2025-09-16 03:14:22
+Init and LangGraph workflow setup time: 1.8653 seconds. Timestamp: 2025-09-16 03:14:20 | Logged: 2025-09-16 03:14:22
+Tool decision time (excluded): 0.0002 seconds. Timestamp: 2025-09-16 03:14:22 | Logged: 2025-09-16 03:14:22
+Tool decision workflow time: 0.0004 seconds. Decision: False. Timestamp: 2025-09-16 03:14:22 | Logged: 2025-09-16 03:14:22
+Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:14:22 | Logged: 2025-09-16 03:14:22
+Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:14:22 | Logged: 2025-09-16 03:14:22
+Prompt decision: CONVERSATIONAL | Context: start=True, greeting=True, casual=False, edu_context=False, discovery=False, tools=False | Decision time: 0.0008s | Input: 'Hello...' | Timestamp: 2025-09-16 03:14:22 | Logged: 2025-09-16 03:14:22
+Call model time (error): 0.8109 seconds. Timestamp: 2025-09-16 03:14:22 | Logged: 2025-09-16 03:14:23
+Total query processing time: 0.8284 seconds. Input: 'Hello...'. Timestamp: 2025-09-16 03:14:22 | Logged: 2025-09-16 03:14:23
+Agent warmup completed in 0.83 seconds | Logged: 2025-09-16 03:14:23
+Create interface time: 0.5381 seconds. Timestamp: 2025-09-16 03:14:23 | Logged: 2025-09-16 03:14:23
+FALLBACK ACTIVATED: Using base model. Strategy: graceful | Logged: 2025-09-16 03:14:43
+Agent initialized with FALLBACK model: {'status': 'loaded', 'model_type': 'base', 'is_adapter': False, 'fine_tuned_path': 'jdesiree/Mimir-Phi-3.5', 'base_model_path': 'microsoft/Phi-3-mini-4k-instruct', 'using_fallback': True} | Logged: 2025-09-16 03:14:43
+Init and LangGraph workflow setup time: 1.7132 seconds. Timestamp: 2025-09-16 03:14:42 | Logged: 2025-09-16 03:14:43
+Tool decision time (excluded): 0.0002 seconds. Timestamp: 2025-09-16 03:14:43 | Logged: 2025-09-16 03:14:43
+Tool decision workflow time: 0.0004 seconds. Decision: False. Timestamp: 2025-09-16 03:14:43 | Logged: 2025-09-16 03:14:43
+Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:14:43 | Logged: 2025-09-16 03:14:43
+Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:14:43 | Logged: 2025-09-16 03:14:43
+Prompt decision: CONVERSATIONAL | Context: start=True, greeting=True, casual=False, edu_context=False, discovery=False, tools=False | Decision time: 0.0009s | Input: 'Hello...' | Timestamp: 2025-09-16 03:14:43 | Logged: 2025-09-16 03:14:43
+Call model time (error): 0.6961 seconds. Timestamp: 2025-09-16 03:14:43 | Logged: 2025-09-16 03:14:44
+Total query processing time: 0.7111 seconds. Input: 'Hello...'. Timestamp: 2025-09-16 03:14:43 | Logged: 2025-09-16 03:14:44
+Agent warmup completed in 0.71 seconds | Logged: 2025-09-16 03:14:44
+Create interface time: 0.5078 seconds. Timestamp: 2025-09-16 03:14:44 | Logged: 2025-09-16 03:14:45
+FALLBACK ACTIVATED: Using base model. Strategy: graceful | Logged: 2025-09-16 03:15:43
+Agent initialized with FALLBACK model: {'status': 'loaded', 'model_type': 'base', 'is_adapter': False, 'fine_tuned_path': 'jdesiree/Mimir-Phi-3.5', 'base_model_path': 'microsoft/Phi-3-mini-4k-instruct', 'using_fallback': True} | Logged: 2025-09-16 03:15:43
+Init and LangGraph workflow setup time: 1.9391 seconds. Timestamp: 2025-09-16 03:15:41 | Logged: 2025-09-16 03:15:43
+Tool decision time (excluded): 0.0001 seconds. Timestamp: 2025-09-16 03:15:43 | Logged: 2025-09-16 03:15:43
+Tool decision workflow time: 0.0003 seconds. Decision: False. Timestamp: 2025-09-16 03:15:43 | Logged: 2025-09-16 03:15:43
+Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:15:43 | Logged: 2025-09-16 03:15:43
+Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:15:43 | Logged: 2025-09-16 03:15:43
+Prompt decision: CONVERSATIONAL | Context: start=True, greeting=True, casual=False, edu_context=False, discovery=False, tools=False | Decision time: 0.0008s | Input: 'Hello...' | Timestamp: 2025-09-16 03:15:43 | Logged: 2025-09-16 03:15:43
+Call model time (error): 0.6238 seconds. Timestamp: 2025-09-16 03:15:43 | Logged: 2025-09-16 03:15:44
+Total query processing time: 0.6380 seconds. Input: 'Hello...'. Timestamp: 2025-09-16 03:15:43 | Logged: 2025-09-16 03:15:44
+Agent warmup completed in 0.64 seconds | Logged: 2025-09-16 03:15:44
+Create interface time: 0.5507 seconds. Timestamp: 2025-09-16 03:15:44 | Logged: 2025-09-16 03:15:44
+FALLBACK ACTIVATED: Using base model. Strategy: graceful | Logged: 2025-09-16 03:17:18
+Agent initialized with FALLBACK model: {'status': 'loaded', 'model_type': 'base', 'is_adapter': False, 'fine_tuned_path': 'jdesiree/Mimir-Phi-3.5', 'base_model_path': 'microsoft/Phi-3-mini-4k-instruct', 'using_fallback': True} | Logged: 2025-09-16 03:17:18
+Init and LangGraph workflow setup time: 1.9692 seconds. Timestamp: 2025-09-16 03:17:16 | Logged: 2025-09-16 03:17:18
+Tool decision time (excluded): 0.0004 seconds. Timestamp: 2025-09-16 03:17:18 | Logged: 2025-09-16 03:17:18
+Tool decision workflow time: 0.0012 seconds. Decision: False. Timestamp: 2025-09-16 03:17:18 | Logged: 2025-09-16 03:17:18
+Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:17:18 | Logged: 2025-09-16 03:17:18
+Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:17:18 | Logged: 2025-09-16 03:17:18
+Prompt decision: CONVERSATIONAL | Context: start=True, greeting=True, casual=False, edu_context=False, discovery=False, tools=False | Decision time: 0.0008s | Input: 'Hello...' | Timestamp: 2025-09-16 03:17:18 | Logged: 2025-09-16 03:17:18
+Call model time (error): 0.6223 seconds. Timestamp: 2025-09-16 03:17:18 | Logged: 2025-09-16 03:17:18
+Total query processing time: 0.6606 seconds. Input: 'Hello...'. Timestamp: 2025-09-16 03:17:18 | Logged: 2025-09-16 03:17:18
+Agent warmup completed in 0.66 seconds | Logged: 2025-09-16 03:17:18
+Create interface time: 0.6001 seconds. Timestamp: 2025-09-16 03:17:18 | Logged: 2025-09-16 03:17:19
+FALLBACK ACTIVATED: Using base model. Strategy: graceful | Logged: 2025-09-16 03:20:45
+Agent initialized with FALLBACK model: {'status': 'loaded', 'model_type': 'base', 'is_adapter': False, 'fine_tuned_path': 'jdesiree/Mimir-Phi-3.5', 'base_model_path': 'microsoft/Phi-3-mini-4k-instruct', 'using_fallback': True} | Logged: 2025-09-16 03:20:45
+Init and LangGraph workflow setup time: 2.2781 seconds. Timestamp: 2025-09-16 03:20:43 | Logged: 2025-09-16 03:20:45
+Tool decision time (excluded): 0.0002 seconds. Timestamp: 2025-09-16 03:20:45 | Logged: 2025-09-16 03:20:45
+Tool decision workflow time: 0.0004 seconds. Decision: False. Timestamp: 2025-09-16 03:20:45 | Logged: 2025-09-16 03:20:45
+Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:20:45 | Logged: 2025-09-16 03:20:45
+Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:20:45 | Logged: 2025-09-16 03:20:45
+Prompt decision: CONVERSATIONAL | Context: start=True, greeting=True, casual=False, edu_context=False, discovery=False, tools=False | Decision time: 0.0007s | Input: 'Hello...' | Timestamp: 2025-09-16 03:20:45 | Logged: 2025-09-16 03:20:45
+Call model time (error): 0.9643 seconds. Timestamp: 2025-09-16 03:20:45 | Logged: 2025-09-16 03:20:46
+Total query processing time: 0.9942 seconds. Input: 'Hello...'. Timestamp: 2025-09-16 03:20:45 | Logged: 2025-09-16 03:20:46
+Agent warmup completed in 0.99 seconds | Logged: 2025-09-16 03:20:46
+Create interface time: 0.5993 seconds. Timestamp: 2025-09-16 03:20:46 | Logged: 2025-09-16 03:20:46
+FALLBACK ACTIVATED: Using base model. Strategy: graceful | Logged: 2025-09-16 03:23:25
+Agent initialized with FALLBACK model: {'status': 'loaded', 'model_type': 'base', 'is_adapter': False, 'fine_tuned_path': 'jdesiree/Mimir-Phi-3.5', 'base_model_path': 'microsoft/Phi-3-mini-4k-instruct', 'using_fallback': True} | Logged: 2025-09-16 03:23:25
+Init and LangGraph workflow setup time: 1.7249 seconds. Timestamp: 2025-09-16 03:23:23 | Logged: 2025-09-16 03:23:25
+Tool decision time (excluded): 0.0002 seconds. Timestamp: 2025-09-16 03:23:25 | Logged: 2025-09-16 03:23:25
+Tool decision workflow time: 0.0004 seconds. Decision: False. Timestamp: 2025-09-16 03:23:25 | Logged: 2025-09-16 03:23:25
+Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:23:25 | Logged: 2025-09-16 03:23:25
+Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:23:25 | Logged: 2025-09-16 03:23:25
+Prompt decision: CONVERSATIONAL | Context: start=True, greeting=True, casual=False, edu_context=False, discovery=False, tools=False | Decision time: 0.0011s | Input: 'Hello...' | Timestamp: 2025-09-16 03:23:25 | Logged: 2025-09-16 03:23:25
+Call model time (error): 0.8647 seconds. Timestamp: 2025-09-16 03:23:25 | Logged: 2025-09-16 03:23:26
+Total query processing time: 0.9050 seconds. Input: 'Hello...'. Timestamp: 2025-09-16 03:23:25 | Logged: 2025-09-16 03:23:26
+Agent warmup completed in 0.91 seconds | Logged: 2025-09-16 03:23:26
+Create interface time: 0.5289 seconds. Timestamp: 2025-09-16 03:23:26 | Logged: 2025-09-16 03:23:26
+FALLBACK ACTIVATED: Using base model. Strategy: graceful | Logged: 2025-09-16 03:40:35
+Agent initialized with FALLBACK model: {'status': 'loaded', 'model_type': 'base', 'is_adapter': False, 'fine_tuned_path': 'jdesiree/Mimir-Phi-3.5', 'base_model_path': 'microsoft/Phi-3-mini-4k-instruct', 'using_fallback': True} | Logged: 2025-09-16 03:40:35
+Init and LangGraph workflow setup time: 2.0252 seconds. Timestamp: 2025-09-16 03:40:33 | Logged: 2025-09-16 03:40:35
+Tool decision time (excluded): 0.0001 seconds. Timestamp: 2025-09-16 03:40:35 | Logged: 2025-09-16 03:40:35
+Tool decision workflow time: 0.0003 seconds. Decision: False. Timestamp: 2025-09-16 03:40:35 | Logged: 2025-09-16 03:40:35
+Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:40:35 | Logged: 2025-09-16 03:40:35
+Tool decision time (excluded): 0.0000 seconds. Timestamp: 2025-09-16 03:40:35 | Logged: 2025-09-16 03:40:35
+Prompt decision: CONVERSATIONAL | Context: start=True, greeting=True, casual=False, edu_context=False, discovery=False, tools=False | Decision time: 0.0008s | Input: 'Hello...' | Timestamp: 2025-09-16 03:40:35 | Logged: 2025-09-16 03:40:35
+Call model time (error): 3.0598 seconds. Timestamp: 2025-09-16 03:40:35 | Logged: 2025-09-16 03:40:38
+Total query processing time: 3.0750 seconds. Input: 'Hello...'. Timestamp: 2025-09-16 03:40:35 | Logged: 2025-09-16 03:40:38
+Agent warmup completed in 3.08 seconds | Logged: 2025-09-16 03:40:38
+Create interface time: 0.5787 seconds. Timestamp: 2025-09-16 03:40:38 | Logged: 2025-09-16 03:40:39

pre_download.py ADDED Viewed

	@@ -0,0 +1,58 @@

+# pre_download.py
+"""
+Pre-download all models for Mimir to avoid cold start delays.
+Downloads models to HF cache without loading them fully.
+"""
+import os
+from huggingface_hub import snapshot_download, hf_hub_download
+HF_TOKEN = os.getenv("HF_TOKEN")
+# All models used in Mimir
+MODELS = {
+    "phi3_finetuned": "jdesiree/Mimir-Phi-3.5",
+    "phi3_base": "microsoft/Phi-3-mini-4k-instruct",
+    "mistral_reasoning": "yentinglin/Mistral-Small-24B-Instruct-2501-reasoning",
+    "mistral_math_gguf": "brittlewis12/Mistral-Small-24B-Instruct-2501-reasoning-GGUF",
+    "embeddings": "thenlper/gte-small",
+}
+def download_model(repo_id: str, model_name: str):
+    """Download a model to HF cache"""
+    try:
+        print(f"→ Downloading {model_name}: {repo_id}")
+        # For GGUF repo, download specific file
+        if "GGUF" in repo_id:
+            hf_hub_download(
+                repo_id=repo_id,
+                filename="mistral-small-24b-instruct-2501-reasoning-Q4_K_M.gguf",
+                token=HF_TOKEN
+            )
+        else:
+            # Standard model download
+            snapshot_download(
+                repo_id=repo_id,
+                token=HF_TOKEN,
+                ignore_patterns=["*.msgpack", "*.h5", "*.ot", "*.safetensors"]  # Skip unnecessary files
+            )
+        print(f"✓ {model_name} downloaded")
+    except Exception as e:
+        print(f"{model_name} download failed: {e}")
+def main():
+    print("="*60)
+    print("PRE-DOWNLOADING MIMIR MODELS")
+    print("="*60)
+    for model_name, repo_id in MODELS.items():
+        download_model(repo_id, model_name)
+    print("\n" + "="*60)
+    print("✓ ALL MODELS DOWNLOADED")
+    print("="*60)
+if __name__ == "__main__":
+    main()

prompt_classifier.py ADDED Viewed

	@@ -0,0 +1,339 @@

+"""
+Mimir Prompt Classifier - ML-based decision engine for prompt segment selection
+Receives pre-calculated features from app.py for clean separation of concerns
+"""
+import pickle
+import numpy as np
+import pandas as pd
+import os
+import re
+import logging
+from typing import Dict, List, Optional, Any
+from dataclasses import dataclass, field
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.multioutput import MultiOutputClassifier
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import classification_report, accuracy_score
+from datasets import load_dataset
+logger = logging.getLogger(__name__)
+@dataclass
+class ConversationInput:
+    """Input data structure for the classifier - all values pre-calculated by app.py"""
+    user_input: str
+    conversation_length: int                    # Total user prompts sent (tracked in app.py)
+    is_first_turn: bool                        # Tracked in app.py (starts True, becomes False after first response)
+    input_character_count: int                 # Length of user prompt
+    is_short_input: bool                       # True if user prompt ≤6 chars, False if >6 chars
+    recent_discovery_count: int                # Count tracked in app.py
+    contains_greeting: bool                    # From regex logic
+    contains_educational_keywords: bool        # From regex logic
+    requires_visualization: bool               # Yes/No from tool decision engine
+    topic_change_detected: bool                # From regex logic
+@dataclass
+class ClassificationResult:
+    """Output data structure from the classifier"""
+    use_discovery_mode: bool
+    use_conversational: bool
+    use_guiding_teaching: bool
+    use_tool_enhancement: bool
+    confidence_scores: Dict[str, float] = field(default_factory=dict)
+    decision_time: float = 0.0
+class RegexPatterns:
+    """Regex patterns for app.py to use for feature extraction"""
+    @staticmethod
+    def get_greeting_pattern():
+        return re.compile(
+            r'^(hello|hi|hey|good\s+(morning|afternoon|evening)|greetings?|howdy|what\'s\s+up|sup)\s*[!.]*$',
+            re.IGNORECASE
+        )
+    @staticmethod
+    def get_educational_pattern():
+        return re.compile(
+            r'\b(study|learn|homework|test|exam|practice|explain|teach|understand|help|math|science|'
+            r'essay|research|assignment|question|problem|calculus|algebra|chemistry|physics|biology|history|ACT|LSAT|SAT)\b',
+            re.IGNORECASE
+        )
+    @staticmethod
+    def get_topic_change_pattern():
+        return re.compile(
+            r'(actually|instead|now|let\'s|what\s+about|can\s+we|switch|move\s+on|'
+            r'let\'s\s+do|let\'s\s+try|change\s+to|talk\s+about)',
+            re.IGNORECASE
+        )
+class ConversationFeatureExtractor:
+    """Minimal feature extractor that uses pre-calculated values"""
+    def extract_features(self, conversation_input: ConversationInput) -> Dict[str, float]:
+        """Extract numerical features from pre-calculated conversation input"""
+        # Convert all inputs to float for ML model
+        features = {
+            'conversation_length': float(conversation_input.conversation_length),
+            'is_first_turn': float(conversation_input.is_first_turn),
+            'input_character_count': float(conversation_input.input_character_count),
+            'is_short_input': float(conversation_input.is_short_input),
+            'recent_discovery_count': float(conversation_input.recent_discovery_count),
+            'contains_greeting': float(conversation_input.contains_greeting),
+            'contains_educational_keywords': float(conversation_input.contains_educational_keywords),
+            'requires_visualization': float(conversation_input.requires_visualization),
+            'topic_change_detected': float(conversation_input.topic_change_detected),
+        }
+        # Add derived features
+        features['is_early_conversation'] = float(conversation_input.conversation_length <= 3)
+        features['has_sufficient_discovery'] = float(conversation_input.recent_discovery_count >= 2)
+        # Interaction features that help the decision tree
+        features['greeting_and_first_turn'] = features['contains_greeting'] * features['is_first_turn']
+        features['educational_and_early'] = features['contains_educational_keywords'] * features['is_early_conversation']
+        features['topic_change_and_not_first'] = features['topic_change_detected'] * (1.0 - features['is_first_turn'])
+        return features
+class MimirPromptClassifier:
+    """Main classifier for prompt segment decision making"""
+    def __init__(self, model_path: Optional[str] = None):
+        self.feature_extractor = ConversationFeatureExtractor()
+        self.model = None
+        self.feature_names = None
+        self.target_names = ['use_discovery_mode', 'use_conversational', 'use_guiding_teaching', 'use_tool_enhancement']
+        if model_path and os.path.exists(model_path):
+            self.load_model(model_path)
+    def train_from_huggingface(self, dataset_name: str = "jdesiree/Mimir_DecisionClassifier", test_size: float = 0.2):
+        """Train the classifier using data from Hugging Face"""
+        logger.info(f"Loading dataset: {dataset_name}")
+        try:
+            dataset = load_dataset(dataset_name)
+            df = pd.DataFrame(dataset['train'])
+        except Exception as e:
+            logger.error(f"Failed to load dataset: {e}")
+            raise
+        # Prepare features and targets
+        X, y = self._prepare_training_data(df)
+        # Split data
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
+        # Train model with parameters optimized for interpretability
+        self.model = MultiOutputClassifier(
+            DecisionTreeClassifier(
+                criterion='entropy',
+                max_depth=8,
+                min_samples_split=5,
+                min_samples_leaf=2,
+                random_state=42
+            )
+        )
+        logger.info("Training classifier...")
+        self.model.fit(X_train, y_train)
+        # Evaluate
+        y_pred = self.model.predict(X_test)
+        # Calculate accuracy for each target
+        accuracies = []
+        for i, target in enumerate(self.target_names):
+            accuracy = accuracy_score(y_test[:, i], y_pred[:, i])
+            accuracies.append(accuracy)
+            logger.info(f"{target} accuracy: {accuracy:.3f}")
+        overall_accuracy = np.mean(accuracies)
+        logger.info(f"Overall accuracy: {overall_accuracy:.3f}")
+        # Print detailed classification report for each target
+        for i, target in enumerate(self.target_names):
+            print(f"\n{target}:")
+            print(classification_report(y_test[:, i], y_pred[:, i]))
+    def _prepare_training_data(self, df: pd.DataFrame) -> tuple[np.ndarray, np.ndarray]:
+        """Convert DataFrame to training features and targets"""
+        features_list = []
+        targets_list = []
+        for _, row in df.iterrows():
+            # Create ConversationInput object from dataset row
+            conv_input = ConversationInput(
+                user_input=row['user_input'],
+                conversation_length=int(row['conversation_length']),
+                is_first_turn=bool(row['is_first_turn']),
+                input_character_count=int(row['input_character_count']),
+                is_short_input=bool(row['is_short_input']),
+                recent_discovery_count=int(row['recent_discovery_count']),
+                contains_greeting=bool(row['contains_greeting']),
+                contains_educational_keywords=bool(row['contains_educational_keywords']),
+                requires_visualization=bool(row['requires_visualization']),
+                topic_change_detected=bool(row['topic_change_detected'])
+            )
+            # Extract features
+            features = self.feature_extractor.extract_features(conv_input)
+            features_list.append(list(features.values()))
+            # Extract targets
+            targets = [
+                bool(row['use_discovery_mode']),
+                bool(row['use_conversational']),
+                bool(row['use_guiding_teaching']),
+                bool(row['use_tool_enhancement'])
+            ]
+            targets_list.append(targets)
+        # Store feature names for later use
+        if features_list:
+            sample_features = self.feature_extractor.extract_features(
+                ConversationInput("", 0, False, 0, False, 0, False, False, False, False)
+            )
+            self.feature_names = list(sample_features.keys())
+        return np.array(features_list), np.array(targets_list, dtype=int)
+    def predict(self, conversation_input: ConversationInput) -> ClassificationResult:
+        """Make prediction for prompt segments"""
+        if self.model is None:
+            raise ValueError("Model not trained or loaded. Call train_from_huggingface() first.")
+        # Extract features
+        features = self.feature_extractor.extract_features(conversation_input)
+        feature_vector = np.array([list(features.values())])
+        # Make prediction
+        predictions = self.model.predict(feature_vector)[0]
+        return ClassificationResult(
+            use_discovery_mode=bool(predictions[0]),
+            use_conversational=bool(predictions[1]),
+            use_guiding_teaching=bool(predictions[2]),
+            use_tool_enhancement=bool(predictions[3])
+        )
+    def save_model(self, model_path: str):
+        """Save the trained model"""
+        model_data = {
+            'model': self.model,
+            'feature_names': self.feature_names,
+            'target_names': self.target_names
+        }
+        with open(model_path, 'wb') as f:
+            pickle.dump(model_data, f)
+        logger.info(f"Model saved to {model_path}")
+    def load_model(self, model_path: str):
+        """Load a pre-trained model"""
+        try:
+            with open(model_path, 'rb') as f:
+                model_data = pickle.load(f)
+            self.model = model_data['model']
+            self.feature_names = model_data['feature_names']
+            self.target_names = model_data.get('target_names', self.target_names)
+            logger.info(f"Model loaded from {model_path}")
+        except Exception as e:
+            logger.error(f"Failed to load model: {e}")
+            raise
+    def get_feature_importance(self) -> Dict[str, float]:
+        """Get feature importance scores"""
+        if self.model is None or self.feature_names is None:
+            return {}
+        # Average feature importance across all outputs
+        importance_scores = {}
+        for i, feature_name in enumerate(self.feature_names):
+            avg_importance = np.mean([estimator.feature_importances_[i] for estimator in self.model.estimators_])
+            importance_scores[feature_name] = avg_importance
+        return dict(sorted(importance_scores.items(), key=lambda x: x[1], reverse=True))
+    def debug_prediction(self, conversation_input: ConversationInput) -> Dict:
+        """Get detailed prediction information for debugging"""
+        features = self.feature_extractor.extract_features(conversation_input)
+        result = self.predict(conversation_input)
+        return {
+            'input': conversation_input,
+            'features': features,
+            'prediction': result,
+            'feature_importance': self.get_feature_importance()
+        }
+# Convenience function for easy integration
+def create_classifier(dataset_name: str = "jdesiree/Mimir_DecisionClassifier",
+                     model_path: Optional[str] = None) -> MimirPromptClassifier:
+    """Create and train a classifier"""
+    classifier = MimirPromptClassifier()
+    if model_path and os.path.exists(model_path):
+        classifier.load_model(model_path)
+    else:
+        classifier.train_from_huggingface(dataset_name)
+        if model_path:
+            classifier.save_model(model_path)
+    return classifier
+# Helper functions for app.py integration
+def check_contains_greeting(user_input: str) -> bool:
+    """Check if input contains greeting pattern"""
+    pattern = RegexPatterns.get_greeting_pattern()
+    return bool(pattern.match(user_input.lower().strip()))
+def check_contains_educational_keywords(user_input: str) -> bool:
+    """Check if input contains educational keywords"""
+    pattern = RegexPatterns.get_educational_pattern()
+    return bool(pattern.search(user_input.lower()))
+def check_topic_change_detected(user_input: str) -> bool:
+    """Check if input indicates topic change"""
+    pattern = RegexPatterns.get_topic_change_pattern()
+    return bool(pattern.search(user_input.lower()))
+def determine_is_short_input(user_input: str) -> bool:
+    """Determine if input is short (≤6 characters)"""
+    return len(user_input.strip()) <= 6
+if __name__ == "__main__":
+    # Example usage and testing
+    logging.basicConfig(level=logging.INFO)
+    # Create and train classifier
+    classifier = create_classifier()
+    # Test with sample input
+    test_input = ConversationInput(
+        user_input="Can you help me with calculus?",
+        conversation_length=1,
+        is_first_turn=True,
+        input_character_count=26,
+        is_short_input=False,
+        recent_discovery_count=0,
+        contains_greeting=False,
+        contains_educational_keywords=True,
+        requires_visualization=False,
+        topic_change_detected=False
+    )
+    result = classifier.predict(test_input)
+    print(f"Prediction: {result}")
+    # Debug prediction
+    debug_info = classifier.debug_prediction(test_input)
+    print(f"Features: {debug_info['features']}")
+    print(f"Feature importance: {classifier.get_feature_importance()}")

prompt_library.py ADDED Viewed

	@@ -0,0 +1,509 @@

+# prompt_library.py
+'''This file is to be the dedicated prompt library repository. Rather than keeping the full library in the app.py, the prompts will be centralized here for ease of editing.'''
+'''
+Prompts for Response Generation Input Templating
+'''
+# --- Always Included ---
+# Core Identity (Universal Base)
+CORE_IDENTITY = """
+You are Mimir, an expert multi-concept tutor designed to facilitate genuine learning and understanding. Your primary mission is to guide students through the learning process concisely, without excessive filler language.
+## Communication Standards
+- Use an approachable, friendly tone with professional language choice suitable for an educational environment.
+- You may not, under any circumstances, use vulgar language, even if asked to do so.
+- Write at a reading level that is accessible to young adults.
+- Be supportive and encouraging without being condescending.
+- You may use conversational language if the user input does so, but in moderation to reciprocate briefly before proceeding with the task.
+- You present critiques as educational opportunities when needed.
+## Follow-up Responses
+- If you have conversation history, you must consider it in your new response.
+- If the previous turn included practice questions and the current user input is the user answering the practice questions, you must grade the user's response for accuracy and give them feedback.
+- If this is the first turn, address the user input as is appropriate per the full instructions.
+"""
+# --- Formatting ---
+# General Formatting
+GENERAL_FORMATTING = '''
+## General Formatting Guidelines
+- Headings must be on their own line, not included inside a sentence or body text.
+- Use ## and ### headings when needed. If only one heading level is needed, use ##.
+- Separate paragraphs with a blank line.
+- Organize content logically using headers and subheadings for complex answers.
+- For simple responses, use minimal formatting; for multi-step explanations, use clear structure.
+- Separate sections and paragraphs with a full black line.
+- Do not use emojis.
+'''
+# LaTeX Formatting
+LATEX_FORMATTING = '''
+You have access to LaTeX and markdown rendering.
+- For inline math, use $ ... $, e.g. $\sum_{i=0}^n i^2$
+- For centered display math, use $$ ... $$ on its own line.
+- To show a literal dollar sign, use `\$` (e.g., \$5.00).
+- To show literal parentheses in LaTeX, use `\(` and `\)` (e.g., \(a+b\)).
+'''
+# --- Discovery Prompts ---
+# Vauge Input Discovery
+VAUGE_INPUT = """
+Use discover tactics to understand the user's goals. Consider any context given in the user's input or chat history. Ask the user how you may help them, suggesting you can create practice questions to study for a test or delve into a topic."""
+# User's Understanding
+USER_UNDERSTANDING = '''
+Use discover tactics to understand the user's goals. Consider the topic(s) currently being discussed in the user input as well as the recent chat history. As an educator, consider how you may uncover the user's current knowledge of the topic, as well as how you may approach instructing or inform the user to facilitate learning. Do no include your thinking in the final response, instead condense your thinking into targeted questions that prompt the user to consider these concepts and present to you their objective.
+'''
+# --- Instructional Prompts ---
+# Guiding/Teaching Mode
+GUIDING_TEACHING = """
+As a skilled educator, considering the conversation history and current user input, aiming to guide the user in understanding further the topic being discussed. You adhere to academic integrity guidelines and tailor your approach based on subject. You must consider any conversation history.
+## Academic Integrity Guidelines
+- Do not provide full solutions - guide through processes instead
+- Break problems into conceptual components
+- Ask clarifying questions about their understanding
+- Provide analogous examples, not direct answers
+- Encourage original thinking and reasoning skills
+## Subject-Specific Approaches
+- **Math problems**: Explain concepts and guide through steps without computing final answers
+- **Multiple-choice**: Discuss underlying concepts, not correct choices
+- **Essays**: Focus on research strategies and organization techniques
+- **Factual questions**: Provide educational context and encourage synthesis
+"""
+# Practice Question formatting, table integration, and tool output integration
+STRUCTURE_PRACTICE_QUESTIONS = '''
+You must include one to two practice questions for the user. Included here are formatting and usage instruction guidelines for how to integrate practice questions into your response to the user.
+### Question Formatting
+Write a practice question relevant to the user's learning objective, testing their knowledge on recently discussed topics. Keep the questions direct and concise. End all questions with directions to the user as to how to reply, rather that be to given a written response, or select from a bank of answers you will provide below.
+If tool output is included in this prompt tailor the question to require an understanding on the image to be able to correctly answer the question or questions. Evaluate all included context relating to the tool output to gain an understanding of what the output represents to appropriately interpret how to integrate the image into your response.
+If the topic being discussed could benefit from one or more practice questions requiring the analysis of data, put no tool output is provided, produce a markdown table per the below formatting guidelines, and tailor your questions to require interpretation of the data.
+### Question Data Reference Formatting
+1. 1 to 4 sentence question
+This is the format you must use to integrate the image output of the graphing tool:
+![Chart, Graph](my_image.png "Scenic View")
+| Example C1 | Example C2 |...
+| :---------------: | :----------------: |...
+| Content...... | Content....... |...
+### Practice Question Answer Options Formatting
+**Single Option Multiple Choice**
+Provide the user with four options, placed under the question and any relevant reference data if included.
+A. Option
+B. Option
+C. Option
+D. Option
+**All That Apply**
+Use this format to indicate the user is to reply to one or more of the options, as this is a multi-selection multiple-choice question format.
+- [ ] A. Option
+- [ ] B. Option
+- [ ] C. Option
+- [ ] D. Option
+---
+**Written Response**
+Prompt the user, in one sentence, to write their response when you are posing a written response to a question.
+'''
+# Practice Question follow-up
+PRACTICE_QUESTION_FOLLOWUP = '''
+In the previous turn, you sent the user one or more practice questions. You must assess the question(s), identify the correct answers, and grade the user's response.
+In your final response to the user, only include your feedback identifying if the user was correct.
+If the user answered incorrectly, provide constructive feedback, the correct answer, and a rationale explaining the answer.
+If the user answered correctly, congratulate them and offer to either move forward in exploring the topic further or continue with more practice questions.
+If the user did not answer, assess the user input for this turn. Ask the user if they would like to try to answer the questions or if they need further help.
+'''
+# --- Tool Use ---
+# Tool Use Enhancement
+TOOL_USE_ENHANCEMENT = """
+## Tool Usage for Educational Enhancement
+Apply when teaching concepts that benefit from visual representation or when practice questions require charts/graphs.
+You are equipped with a sophisticated data visualization tool, `Create_Graph_Tool`, designed to create precise, publication-quality charts. Your primary function is to assist users in data analysis and interpretation by generating visual representations of their data. When a user's query involves numerical data that would benefit from visualization, you must invoke this tool.
+## Tool Decision Criteria
+- Teaching mathematical functions, trends, or relationships
+- Demonstrating statistical concepts or data analysis
+- Creating practice questions that test chart interpretation skills
+- Illustrating proportional relationships or comparisons
+**Tool Signature:**
+`Create_Graph_Tool(data: Dict[str, float], plot_type: Literal["bar", "line", "pie"], title: str, x_label: str, y_label: str, educational_context: str)`
+**Parameter Guide:**
+*   `data` **(Required)**: A dictionary where keys are string labels and values are the corresponding numeric data points.
+    *   *Example:* `{"Experiment A": 88.5, "Experiment B": 92.1}`
+*   `plot_type` **(Required)**: The specific type of chart to generate. This **must** be one of `"bar"`, `"line"`, or `"pie"`.
+*   `title` (Optional): A formal title for the plot.
+*   `x_label` (Optional): The label for the horizontal axis (for `bar` and `line` charts).
+*   `y_label` (Optional): The label for the vertical axis (for `bar` and `line` charts).
+*   `educational_context` (Optional): Explanation of why this visualization helps learning.
+**Example Scenarios:**
+*   **User Query:** "I need help practicing the interpretation of trends in line graphs. To analyze the efficacy of a new fertilizer, I have recorded crop yield in kilograms over five weeks. Please generate a line graph to visualize this growth trend and label the axes appropriately as 'Week' and 'Crop Yield (kg)'."
+*   **Your Tool Call:**
+    *   `data`: `{"Week 1": 120, "Week 2": 155, "Week 3": 190, "Week 4": 210, "Week 5": 245}`
+    *   `plot_type`: `"line"`
+    *   `title`: `"Efficacy of New Fertilizer on Crop Yield"`
+    *   `x_label`: `"Week"`
+    *   `y_label`: `"Crop Yield (kg)"`
+    *   `educational_context`: `"This line graph helps visualize the consistent upward trend in crop yield, making it easier to identify growth patterns and analyze the fertilizer's effectiveness over time."`
+*   **User Query:** "I am studying for my ACT, and I am at a loss in interpreting the charts. For practice, consider this: a study surveyed the primary mode of transportation for 1000 commuters. The results were: 450 drive, 300 use public transit, 150 cycle, and 100 walk. Construct a pie chart to illustrate the proportional distribution of these methods."
+*   **Your Tool Call:**
+    *   `data`: `{"Driving": 450, "Public Transit": 300, "Cycling": 150, "Walking": 100}`
+    *   `plot_type`: `"pie"`
+    *   `title`: `"Proportional Distribution of Commuter Transportation Methods"`
+    *   `educational_context`: `"This pie chart clearly shows the relative proportions of each transportation method, making it easy to see that driving is the most common method (45%) while walking is the least common (10%)."`
+NOTE: If specific data to use is not supplied by the user, create reasonable example data that illustrates the concept being taught."""
+'''
+The prompt used by the routing agent, determines if tools are enabled.
+'''
+# --- Tool Decision Engine Prompt ---
+TOOL_DECISION = """
+Analyze this educational query and determine if creating a graph, chart, or visual representation would significantly enhance learning and understanding.
+Query: "{query}"
+EXCLUDE if query is:
+- Greetings or casual conversation (hello, hi, hey)
+- Simple definitions without data
+- General explanations that don't involve data
+INCLUDE if query involves:
+- Mathematical functions or relationships
+- Data analysis or statistics
+- Comparisons that benefit from charts
+- Trends or patterns over time
+- Creating practice questions with data
+Answer with exactly: YES or NO
+Decision:"""
+'''
+System Instructions for the four classification agents
+'''
+# --- Classification Prompts ---
+agent_1_system = '''
+As a teacher's aid, considering the current user prompt/input and recent conversation history, determine if practice questions are needed. Your goal,is to determine dynamically if the user's current understanding and the conversation as a whole would benefit from the model offering practice questions to the user.
+Cases where practice question's are beneficial:
+- The user requested practice questions.
+    Examples:
+    1. Can you make some ACT math section practice questions?
+- The user expressed that they would like to gauge their understanding.
+    Examples:
+    1. I want to figure out where I am in prep for my history exam, it is on the American Civil War.
+- The previous turns include model instruction on a topic and the user has expressed some level of understanding.
+    Examples:
+    1. The chat history is an exchange between the user and model on a specific topic, and the current turn is the user responding to model instruction. The user appears to be grasping hte concept, so a practice question would be helpful to gauge the user's grasp of the discussed topic.
+When strictly inappropriate to include practice questions:
+- The current user prompt/input is conversational, or nonsense:
+    Examples:
+    1. Hello/Hi/Thank You...
+    2. grey, blue colored stuff
+    3. fnsjdfnbiwe
+- The user's question is straightforward, requiring a general answer or tutoring rather than user knowledge testing.
+    Examples:
+    1. Can you tell me when WW2 started?
+    2. Who are the key players in the civil rights movement?
+    3. What do the variables mean in a quadradic equatin?
+Before determining your final response, consider if issuing a practice question would be beneficial or inappropriate. Ask yourself if the user has received instruction on a topic, or requested practice questions prior to returning your final response.
+If the current turn qualifies for practice question generations, return exactly "STRUCTURE_PRACTICE_QUESTIONS"
+Otherwise, return "No Practice questions are needed."
+Do not return any other values outside of the provided options.
+'''
+agent_2_system = '''
+As an expert in intension analysis, determine if one, both or neither of the following cases is true considering the current user prompt/input.
+**Vauge Prompt**
+Appply this option if the user prompt/input is overly vauge and uniterpretable. IT has no indication that it is a followup message, possibly being a simple greeting. THis selection results in the user's rpomptbeing handled lightly with a simple request for a task and suggestions for the user to pick from.
+**Unclear Needs**
+Apply this if the user's current message is just a greeting or conversational. Also apply this option if the current message include comment like or similair to "lets change subjects." Consider that returning the positive value for this option, which is USER_UNDERSTANDING, then the users prompt will be handled with discovery tactics to uncover the user's goals. of the two options, this option yeilds a more detailed course of action in uncovering user needs.
+**Neither**
+Apply neither if the user appears to be responding to a previous message, makes a direct request, or is otherwise a coherant message.
+    Example:
+    1. I think the answer is A (responding)
+    2. Can you explain why the sky is blue? (direct request)
+    3. To my understanding
+Your final response must be one of the following:
+"VAUGE_INPUT USER_UNDERSTANDING"
+"USER_UNDERSTANDING"
+"VAUGE_INPUT"
+"Neither is applicable."
+Do not return any other values outside of the provided options.
+'''
+agent_3_system = '''
+Given a current user prompt/input and recent conversation history, you determine if the current turn is a followup from a practice question.
+For context, consider the instructions given to generate practice questions:
+{STRUCTURE_PRACTICE_QUESTIONS}
+The user prompt/input is a followup if the previous turns contains a practice question per the previous guidelines.
+The user prompt may or may not answer the question(s).
+If the current turn is a followup reply from the user regarding a practice question, return "PRACTICE_QUESTION_FOLLOWUP True"
+Otherwise return "Not a followup"
+Do not return any other values outside of the provided options.
+'''
+agent_4_system = '''
+As an educational proffession whom is assessing a student's current needs, provided the current user prompt/input and recent conversation history, determine if the user is in need of instruction or teaching on a topic, and/or a practice question to enhance their learning.
+"GUIDING_TEACHING"
+Guiding and teaching is a curated approach to instructing the user on a given topic. This catagory should be applied if the user is requesting information, seems confused on previous instruction, or continuing a discussion on a topic.
+"STRUCTURE_PRACTICE_QUESTIONS"
+This catagory is applicable if the user responded positivel to previous instruction by the model on a set topic, or has requested practice questions directly.
+Neither apply if no topics are specifically stated in the current or past prompts.
+You may return the following outputs based on your assessment:
+"GUIDING_TEACHING"
+"STRUCTURE_PRACTICE_QUESTIONS"
+"GUIDING_TEACHING STRUCTURE_PRACTICE_QUESTIONS"
+"Neither Apply"
+Do not return any other values outside of the provided options.
+'''
+'''
+Thinking prompts for use by the agent constructing reasoning invisible to the user, outputs to be supplied to the response model for context and examples.
+'''
+# --- Thinking Prompts ---
+# Thinking process for math-based teaching and problem solving. Tree-of-Thought Prompting
+MATH_THINKING = '''
+Math based thinking process instructions:
+Given a user input and recent chat history, you execute a thinking process to determine your goal. Below is provided the decision tree you will utilize, logically proceeding question by question until you reach an end point. You will then process the user prompt per the instructions outlined in the endpoint. Your final output is to be cleaning structured as context fro answering the user prompt.
+**General Final Response Output Rules**
+When formatting context, apply LaTeX formatting per these guidelines:
+You have access to LaTeX and markdown rendering.
+- For inline math, use $ ... $, e.g. $\sum_{i=0}^n i^2$
+- For centered display math, use $$ ... $$ on its own line.
+- To show a literal dollar sign, use `\$` (e.g., \$5.00).
+- To show literal parentheses in LaTeX, use `\(` and `\)` (e.g., \(a+b\)).
+Content must be ordered logically, building from foundational knowledge to final solutions. Follow proper order of operation. The level of detail is dictated by the output of the decision tree below.
+**Decision Tree**
+Each question has two possible outcomes, narrowing the options. Consider each against the supplied user input and conversation history, proceeding in order. You must apply the general output rules and the final endpoint rules to your reasoning and process in producing the final output for context, to be utilized by another model in producing the final response.
+Is the math based question or request complex?
+1A. The question is a low-level math question or request not requiring more than five steps for completion. Examples: basic arithmetic or definitions.
+1B. The question or request is complex or multifaceted. Examples: tasks that require more than five steps to address. May pertain to advanced mathematical domains such as engineering or physics
+**End Points**
+1A. Evaluate the topic being discussed, considering the newest user and conversation input. Define key terms at the beginning of your context generation, such as the operators and their use in the problem and any principles that apply. Step by step solve the problem presented in the current user query, if one is presented. All math must be formatted per the LaTeX formatting guidelines, with each step on its own line with a description over top expressing why the step is being done and what principles are being applied. Maintain a minimal level of detail, focusing on large topics rather than granular details.
+    EXAMPLE:
+    [INPUT]
+    user: "Can you explain the Pythagorean theorem?"
+    chat_history: None
+    [OUTPUT]
+    **Key Terms**
+    - **Right Triangle:** A triangle with one angle measuring exactly 90 degrees.
+    - **Hypotenuse:** The longest side of a right triangle, opposite the right angle.
+    - **Legs:** The two shorter sides of a right triangle that form the right angle.
+    **Principle: The Pythagorean Theorem**
+    The theorem states that in a right triangle, the square of the length of the hypotenuse (c) is equal to the sum of the squares of the lengths of the other two sides (a and b).
+    **Formula**
+    The relationship is expressed with the formula:
+    $$a^2 + b^2 = c^2$$
+1B. Evaluate the topic being discussed, considering the newest user and conversation input. Define key terms at the beginning of your context generation, such as the operators and their use in the problem and any principles that apply. Identify the domain or school of knowledge. Step by step solve the problem presented in the current user query, if one is presented. List steps in a numbered list. All math must be formatted per the LaTeX formatting guidelines, with each step on its own line with a description over top expressing why the step is being done, and the relevant principles being applied. Include a summary of steps taken and the final answer below the full steps list, in a bulleted list.
+    EXAMPLE:
+    [INPUT]
+    user: "Okay, can you solve the definite integral of f(x) = 3x^2 from x=1 to x=3?"
+    chat_history: "user: \"What is an integral?\"\nassistant: \"An integral is a mathematical object that can be interpreted as an area or a generalization of area. The process of finding an integral is called integration.\""
+    [OUTPUT]
+    **Domain:** Integral Calculus
+    **Key Terms**
+    - **Definite Integral:** Represents the net area under a curve between two points, known as the limits of integration.
+    - **Antiderivative:** A function whose derivative is the original function. The process relies on the Fundamental Theorem of Calculus.
+    - **Limits of Integration:** The start (lower) and end (upper) points of the interval over which the integral is calculated. In this case, 1 and 3.
+    **Problem**
+    Solve the definite integral:
+    $$\int_{1}^{3} 3x^2 \,dx$$
+    **Step-by-Step Solution**
+    1.  **Find the antiderivative of the function.**
+        We apply the power rule for integration, $\int x^n \,dx = \frac{x^{n+1}}{n+1}$.
+        $$ \int 3x^2 \,dx = 3 \cdot \frac{x^{2+1}}{2+1} = 3 \cdot \frac{x^3}{3} = x^3 $$
+    2.  **Apply the Fundamental Theorem of Calculus.**
+        We will evaluate the antiderivative at the upper and lower limits of integration, $F(b) - F(a)$.
+        $$ [x^3]_1^3 $$
+    3.  **Evaluate the antiderivative at the upper limit (x=3).**
+        $$ (3)^3 = 27 $$
+    4.  **Evaluate the antiderivative at the lower limit (x=1).**
+        $$ (1)^3 = 1 $$
+    5.  **Subtract the lower limit result from the upper limit result.**
+        This gives the final value of the definite integral.
+        $$ 27 - 1 = 26 $$
+    **Summary**
+    - The antiderivative of $3x^2$ is $x^3$.
+    - Evaluating the antiderivative from $x=1$ to $x=3$ yields $(3)^3 - (1)^3$.
+    - The final answer is $26$.
+'''
+# CHAIN OF THOUGH PROMPTING, GUIDING THE MODEL IN PROCESSING TOOL OUTPUT FOR QUESTIONS, DESIGNING TABLES FOR CONTEXTUAL DATA, AND DESIGNING PRACTICE QUESTIONS AS WELL AS AN ANSWER BANK.
+QUESTION_ANSWER_DESIGN = '''
+As seasoning test question writing specialist, your task is to produce context to create a practice question for the user.
+Tool Outputs (if provided)
+If tool call outputs are avialble, the practice question must use and require understanding of the data presented.
+Image output: {tool_img_output}
+Image context to consider: {tool_context}
+You must construct practice questions per the formatting guidelines included here:
+{STRUCTURE_PRACTICE_QUESTIONS}
+Math LaTeX Formatting Guidelines:
+{LATEX_FORMATTING}
+Follow this logical process:
+1. Assess the current round's user input and the conversation history, if there is one. What specific topics or concepts are discussed? What instruction has the model previously given? Also identify the subject domain. Return this context summaried at teh top of your context output.
+2. Produce a practice question for the user on the identified topic or concept. Return the pract question with the heading "Practice Question"
+    - If Math or requiring scientific calculations: The question must not be an example given by the model or user in the conversation history. It may be inspired by the conversation history, but it must require the user to try to solve the problem based on what they learned. If no tool output is given to base the question on, then you must create your own data for the user to interpret, solve, or otherwise manipulate to come to an answer.You may provide data by means of the tool image output, with the question constructed using the tool context output. If no tool output is included, you may provide data as a markdown table or integrated into the question. Math must be formatted using LaTeX as outlined in the LaTeX guidelines given above.
+    - If History/social studies/art or otherwise static fact related: The question must be answerable with based on previosu model teaching or instruction from the conversation history.
+3. Produce an answer bank under the question with the correct answer or answers labeled. If it is a written response question, you must write examples of possible correct answers for the new model to utilize in grading the user's answer.
+'''
+# This prompt is reserved for high complexity user queries, aiming to generate context in support of the response agent.
+REASONING_THINKING = '''
+Considering the provided current user prompt/input and recent conversation history, as an educational professional skilled in breaking down concepts, return context that would be beneficial in producing a response to the user.
+1. Begin by thinking about what the user is asking about, such as the topic or domain of knowledge. Summarizes the user's request as well as what has been said relating to the topic or goal in the conversation history. Give this section the heading "User Knowledge Summary."
+2. Evaluate the user's previous statements for accuracy. Ask yourself if the user appears to be grasping the concept or struggling with some part of it. Produce a brief analysis section that defines the user's established understanding, or if this is unknown. Propose potential concepts to cover to aid the user. Return this section with the head "User Understanding."
+3. Identify steps taken by the model in previous turns to aid the user, as well as the apparent effectiveness of said steps, if conversation history is available. Produce this section with the heading "Previous Actions."
+4. Identify relevant facts that would aid the user in understanding the concept, following a logical order in listing these items. Present these items in a nested list, with a title for each nested block at the higher level and atomic facts nested underneath. Produce this section with the heading "Reference Fact Sheet"
+Review your response prior to returning it as output. Review for accuracy and relevance, producing only facts that support further learning rather than information the user has already shown understand of.
+    Examples:
+    [INPUT]
+    user: "I know principal is the starting money and the rate is the percentage. But I don't get what 'compounding frequency' means. Does it matter if it's daily vs yearly?"
+    chat_history: "user: \"How do I calculate compound interest?\"\nassistant: \"## Calculating Compound Interest\n\nThat's a great question! Compound interest is essentially interest earned on the initial amount of money (the principal) as well as on the accumulated interest from previous periods.\n\nTo give you the most helpful explanation, it would be useful to know what you're familiar with already. Have you encountered terms like 'principal', 'annual interest rate', or 'compounding frequency' before?\""
+    [OUTPUT]
+    ### User Knowledge Summary
+    The user's goal is to learn how to calculate compound interest. The conversation began with the user asking for the calculation method. The model responded by defining the term and asking discovery questions to gauge the user's prior knowledge of key variables. The user has now confirmed they understand 'principal' and 'interest rate' but are specifically asking for a definition of 'compounding frequency' and an explanation of its importance.
+    ### User Understanding
+    The user has a foundational grasp of the core components of interest calculations (principal, rate). Their point of confusion is isolated to the concept of compounding frequency. They have correctly intuited that the frequency (e.g., daily vs. yearly) matters but do not understand why. To aid them, the next steps should be to formally define compounding frequency and then use a comparative example to illustrate its impact on the final amount.
+    ### Previous Actions
+    In the previous turn, the model successfully employed a guided discovery tactic. Instead of providing the formula outright, it defined the topic and asked targeted questions. This action was highly effective, as it allowed the user to articulate their specific knowledge gap, enabling a more focused and efficient teaching approach in the next turn.
+    ### Reference Fact Sheet
+    - **The Compound Interest Formula**
+        - The standard formula is $A = P(1 + r/n)^{nt}$
+        - **A**: The future value of the investment/loan, including interest.
+        - **P**: The principal amount (the initial amount of money).
+        - **r**: The annual interest rate (in decimal form).
+        - **n**: The number of times that interest is compounded per year.
+        - **t**: The number of years the money is invested or borrowed for.
+    - **The Role of Compounding Frequency (n)**
+        - This variable represents how often the accumulated interest is calculated and added to the principal.
+        - A higher frequency means interest starts earning its own interest sooner and more often within the same year.
+        - This directly leads to a higher future value (A), even if all other variables (P, r, t) are the same.
+    - **Common Values for 'n'**
+        - **Annually**: n = 1
+        - **Semiannually**: n = 2
+        - **Quarterly**: n = 4
+        - **Monthly**: n = 12
+        - **Daily**: n = 365
+    [INPUT]
+    user: "I think the radius is the line that goes all the way across the middle."
+    chat_history: "user: \"How do you find the area of a circle?\"\nassistant: \"## Finding the Area of a Circle\n\nThat's a great question! To calculate the area of a circle, we use a specific formula that involves a special number called Pi.\n\nBefore we dive into the formula, what do you already know about the different parts of a circle, like the radius or the diameter?\""
+    [OUTPUT]
+    ### User Knowledge Summary
+    The user wants to learn how to calculate the area of a circle. In the previous turn, the model asked the user about their knowledge of a circle's parts to establish a baseline. The user has now responded, incorrectly identifying the radius as the diameter.
+    ### User Understanding
+    The user has a misunderstanding of core terminology essential for this calculation. They have confused the radius with the diameter. This foundational knowledge gap must be corrected before introducing the area formula. Potential concepts to cover are the definitions of radius and diameter and their mathematical relationship.
+    ### Previous Actions
+    In the previous turn, the model employed a discovery tactic by asking about the user's prior knowledge of circle components. This was an effective step, as it successfully revealed a critical misconception in the user's understanding that can now be corrected.
+    ### Reference Fact Sheet
+    - Core Components of a Circle
+        - **Radius (r):** The distance from the center of the circle to any point on its edge.
+        - **Diameter (d):** The distance from one edge of the circle to the other, passing through the center.
+        - **Relationship:** The diameter is always exactly twice the length of the radius ($d = 2r$). Conversely, the radius is half the diameter ($r = d/2$).
+    - The Area Formula
+        - **Pi ($\pi$):** A special mathematical constant, approximately equal to 3.14159, that represents the ratio of a circle's circumference to its diameter.
+        - **Formula:** The area ($A$) of a circle is calculated using the formula $A = \pi r^2$.
+        - **Crucial Detail:** The formula uses the **radius**, not the diameter. If given the diameter, it must first be converted to the radius before calculating the area.
+'''

requirements.txt ADDED Viewed

	@@ -0,0 +1,72 @@

+# requirements.txt
+# Mimir Educational AI Assistant Dependencies
+# =============================================================================
+# ZeroGPU COMPATIBILITY
+# =============================================================================
+# DO NOT specify torch versions - provided by ZeroGPU environment
+spaces
+# =============================================================================
+# CORE ML/AI PACKAGES
+# =============================================================================
+transformers>=4.41.0
+huggingface_hub>=0.20.0
+safetensors
+accelerate>=0.31.0
+bitsandbytes
+sentencepiece
+peft>=0.10.0
+# GGUF model support for Math Thinking Agent
+# llama-cpp-python>=0.2.0
+# =============================================================================
+# LANGCHAIN ECOSYSTEM
+# =============================================================================
+langgraph>=0.2.0
+langchain-core>=0.3.0
+langchain-community>=0.3.0
+langchain-huggingface>=0.1.0
+# =============================================================================
+# UI FRAMEWORK
+# =============================================================================
+gradio>=5.46.1
+# =============================================================================
+# DATA & STATE MANAGEMENT
+# =============================================================================
+datasets>=2.14.0  # For HF dataset backup in state manager
+python-dotenv>=1.0.0  # Environment variable management
+# =============================================================================
+# VISUALIZATION & TOOLS
+# =============================================================================
+matplotlib>=3.7.0  # For graph_tool.py
+plotly>=5.15.0  # For advanced visualizations
+pandas>=2.0.0  # Data handling
+numpy>=1.24.0  # Numerical operations
+# =============================================================================
+# METRICS & EVALUATION
+# =============================================================================
+lighteval  # For educational quality metrics and LightEval integration
+trackio
+# =============================================================================
+# UTILITIES
+# =============================================================================
+tqdm>=4.65.0  # Progress bars
+# =============================================================================
+# NOTES
+# =============================================================================
+# Removed dependencies:
+# - scikit-learn: ML classifier replaced by agent-based routing
+# - sentence-transformers, faiss-cpu: RAG not used
+# - pyspellchecker: Spell checking removed
+# - scipy: Not used in current implementation
+# - langsmith: Not used
+# - emoji: Not used
+# - tiktoken, langchain-text-splitters: RAG components not used

state_manager.py ADDED Viewed

	@@ -0,0 +1,801 @@

+# state_manager.py
+"""
+Global state management and logical expression system for Mimir.
+Components:
+- GlobalStateManager: Thread-safe state persistence with SQLite + HF dataset backup
+- PromptStateManager: Per-turn prompt segment activation tracking
+- LogicalExpressions: Regex-based prompt triggers
+"""
+import os
+import re
+import sqlite3
+import json
+import logging
+import threading
+from datetime import datetime, timedelta
+from typing import Dict, List, Optional, Any
+from datasets import load_dataset, Dataset
+from huggingface_hub import HfApi
+logger = logging.getLogger(__name__)
+# ============================================================================
+# PROMPT STATE MANAGER
+# ============================================================================
+class PromptStateManager:
+    """
+    Manages prompt segment activation state for a single turn.
+    Resets to default (all False) at the start of each turn.
+    """
+    def __init__(self):
+        self._default_state = {
+            "MATH_THINKING": False,
+            "QUESTION_ANSWER_DESIGN": False,
+            "REASONING_THINKING": False,
+            "VAUGE_INPUT": False,
+            "USER_UNDERSTANDING": False,
+            "GENERAL_FORMATTING": False,
+            "LATEX_FORMATTING": False,
+            "GUIDING_TEACHING": False,
+            "STRUCTURE_PRACTICE_QUESTIONS": False,
+            "PRACTICE_QUESTION_FOLLOWUP": False,
+            "TOOL_USE_ENHANCEMENT": False,
+        }
+        self._current_state = self._default_state.copy()
+        logger.info("PromptStateManager initialized")
+    def reset(self):
+        """Reset all prompt states to False for new turn"""
+        self._current_state = self._default_state.copy()
+        logger.debug("Prompt state reset for new turn")
+    def get_state(self) -> Dict[str, bool]:
+        """Get current prompt state dictionary"""
+        return self._current_state.copy()
+    def update(self, prompt_name: str, value: bool):
+        """
+        Update a specific prompt state.
+        Args:
+            prompt_name: Name of prompt segment (must be in default_state)
+            value: True to activate, False to deactivate
+        """
+        if prompt_name not in self._default_state:
+            logger.warning(f"Unknown prompt name: {prompt_name}")
+            return
+        self._current_state[prompt_name] = value
+        logger.debug(f"Prompt state updated: {prompt_name} = {value}")
+    def update_multiple(self, updates: Dict[str, bool]):
+        """
+        Update multiple prompt states at once.
+        Args:
+            updates: Dictionary of {prompt_name: bool} updates
+        """
+        for prompt_name, value in updates.items():
+            self.update(prompt_name, value)
+    def is_active(self, prompt_name: str) -> bool:
+        """Check if a prompt segment is active"""
+        return self._current_state.get(prompt_name, False)
+    def get_active_prompts(self) -> List[str]:
+        """Get list of all currently active prompt names"""
+        return [name for name, active in self._current_state.items() if active]
+    def get_active_response_prompts(self) -> List[str]:
+        """
+        Get list of active response agent prompts only.
+        Excludes thinking agent prompts.
+        """
+        response_prompts = [
+            "VAUGE_INPUT", "USER_UNDERSTANDING", "GENERAL_FORMATTING",
+            "LATEX_FORMATTING", "GUIDING_TEACHING", "STRUCTURE_PRACTICE_QUESTIONS",
+            "PRACTICE_QUESTION_FOLLOWUP", "TOOL_USE_ENHANCEMENT"
+        ]
+        return [name for name in response_prompts if self._current_state.get(name, False)]
+    def get_active_thinking_prompts(self) -> List[str]:
+        """
+        Get list of active thinking agent prompts only.
+        """
+        thinking_prompts = ["MATH_THINKING", "QUESTION_ANSWER_DESIGN", "REASONING_THINKING"]
+        return [name for name in thinking_prompts if self._current_state.get(name, False)]
+# ============================================================================
+# LOGICAL EXPRESSIONS
+# ============================================================================
+class LogicalExpressions:
+    """
+    Regex-based logical expressions for prompt trigger detection.
+    Analyzes user input to activate appropriate prompt segments.
+    """
+    def __init__(self):
+        # Math-related keywords
+        self.math_regex = r'\b(math|calculus|algebra|geometry|equation|formula|solve|calculate|derivative|integral|trigonometry|statistics|probability)\b'
+        # Additional regex patterns can be added here
+        logger.info("LogicalExpressions initialized")
+    def check_math_keywords(self, user_input: str) -> bool:
+        """
+        Check if user input contains mathematical keywords.
+        Triggers LATEX_FORMATTING.
+        Args:
+            user_input: User's message
+        Returns:
+            True if math keywords detected
+        """
+        result = bool(re.search(self.math_regex, user_input, re.IGNORECASE))
+        if result:
+            logger.debug(f"Math keywords detected in: '{user_input[:50]}...'")
+        return result
+    def apply_all_checks(self, user_input: str, prompt_state: PromptStateManager):
+        """
+        Apply all logical expression checks and update prompt_state.
+        Args:
+            user_input: User's message
+            prompt_state: PromptStateManager instance to update
+        """
+        # GENERAL_FORMATTING is always applied
+        prompt_state.update("GENERAL_FORMATTING", True)
+        # Check for math keywords
+        if self.check_math_keywords(user_input):
+            prompt_state.update("LATEX_FORMATTING", True)
+        # Additional checks can be added here as needed
+        logger.debug(f"Logical expressions applied. Active prompts: {prompt_state.get_active_prompts()}")
+# ============================================================================
+# GLOBAL STATE MANAGER
+# ============================================================================
+class GlobalStateManager:
+    """
+    Thread-safe global state manager with SQLite persistence and HF dataset backup.
+    Now includes PromptStateManager for per-turn prompt segment tracking.
+    """
+    def __init__(self, db_path="mimir_analytics.db", dataset_repo="jdesiree/mimir_analytics"):
+        self._db_path = db_path
+        self.dataset_repo = dataset_repo
+        self.hf_token = os.getenv("HF_TOKEN")
+        # Existing state caches
+        self._states = {}
+        self._analytics_cache = {}
+        self._ml_models_cache = {}
+        self._evaluation_cache = {}
+        # Thread safety
+        self._lock = threading.Lock()
+        # Cleanup settings
+        self._cleanup_interval = 3600
+        self._max_age = 24 * 3600
+        self._last_cleanup = datetime.now()
+        self._last_hf_backup = datetime.now()
+        self._hf_backup_interval = 3600
+        # NEW: Prompt state management
+        self._prompt_state_manager = PromptStateManager()
+        # Initialize existing systems
+        self._init_database()
+        self._load_from_database()
+        self._load_from_hf_dataset()
+        logger.info("GlobalStateManager initialized with PromptStateManager")
+    # ========================================================================
+    # PROMPT STATE MANAGEMENT
+    # ========================================================================
+    def get_prompt_state_manager(self) -> PromptStateManager:
+        """Get the prompt state manager for current turn"""
+        return self._prompt_state_manager
+    def reset_prompt_state(self):
+        """Reset prompt state for new turn"""
+        self._prompt_state_manager.reset()
+        logger.debug("Prompt state reset for new turn")
+    def get_prompt_state(self) -> Dict[str, bool]:
+        """Get current prompt state dictionary"""
+        return self._prompt_state_manager.get_state()
+    def update_prompt_state(self, prompt_name: str, value: bool):
+        """Update specific prompt state"""
+        self._prompt_state_manager.update(prompt_name, value)
+    def update_prompt_states(self, updates: Dict[str, bool]):
+        """Update multiple prompt states"""
+        self._prompt_state_manager.update_multiple(updates)
+    # ========================================================================
+    # EXISTING DATABASE METHODS (unchanged)
+    # ========================================================================
+    def _init_database(self):
+        """Initialize SQLite database for persistent storage"""
+        conn = sqlite3.connect(self._db_path)
+        cursor = conn.cursor()
+        cursor.execute("""
+            CREATE TABLE IF NOT EXISTS conversations (
+                session_id TEXT PRIMARY KEY,
+                chat_history TEXT,
+                conversation_state TEXT,
+                last_accessed TEXT,
+                created TEXT
+            )
+        """)
+        cursor.execute("""
+            CREATE TABLE IF NOT EXISTS analytics (
+                session_id TEXT PRIMARY KEY,
+                project_stats TEXT,
+                recent_interactions TEXT,
+                dashboard_html TEXT,
+                last_refresh TEXT,
+                export_history TEXT
+            )
+        """)
+        cursor.execute("""
+            CREATE TABLE IF NOT EXISTS evaluations (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                session_id TEXT,
+                timestamp TEXT,
+                metric_type TEXT,
+                metric_data TEXT
+            )
+        """)
+        cursor.execute("""
+            CREATE TABLE IF NOT EXISTS classifications (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                session_id TEXT,
+                timestamp TEXT,
+                user_input TEXT,
+                prediction_data TEXT,
+                features TEXT
+            )
+        """)
+        conn.commit()
+        conn.close()
+    def _load_from_database(self):
+        """Load all data from SQLite on startup"""
+        try:
+            conn = sqlite3.connect(self._db_path)
+            cursor = conn.cursor()
+            cursor.execute("SELECT * FROM conversations")
+            for row in cursor.fetchall():
+                session_id = row[0]
+                self._states[session_id] = {
+                    'chat_history': json.loads(row[1]),
+                    'conversation_state': json.loads(row[2]),
+                    'last_accessed': datetime.fromisoformat(row[3]),
+                    'created': datetime.fromisoformat(row[4])
+                }
+            cursor.execute("SELECT * FROM analytics")
+            for row in cursor.fetchall():
+                session_id = row[0]
+                self._analytics_cache[session_id] = {
+                    'project_stats': json.loads(row[1]),
+                    'recent_interactions': json.loads(row[2]),
+                    'dashboard_html': row[3],
+                    'last_refresh': datetime.fromisoformat(row[4]) if row[4] else None,
+                    'export_history': json.loads(row[5]),
+                    'last_accessed': datetime.now()
+                }
+            conn.close()
+            logger.info(f"Loaded {len(self._states)} conversations and {len(self._analytics_cache)} analytics from database")
+        except Exception as e:
+            logger.error(f"Error loading from database: {e}")
+    def _load_from_hf_dataset(self):
+        """Load data from HF dataset on startup"""
+        try:
+            ds = load_dataset(self.dataset_repo, split="train", token=self.hf_token)
+            for item in ds:
+                if item['data_type'] == 'conversation':
+                    session_id = item['session_id']
+                    data = json.loads(item['data'])
+                    self._states[session_id] = data
+                elif item['data_type'] == 'analytics':
+                    session_id = item['session_id']
+                    data = json.loads(item['data'])
+                    self._analytics_cache[session_id] = data
+            logger.info(f"Loaded data from HF dataset {self.dataset_repo}")
+        except Exception as e:
+            logger.warning(f"Could not load from HF dataset: {e}")
+    def _save_to_database_conversations(self, session_id):
+        """Save conversation to SQLite"""
+        if session_id not in self._states:
+            return
+        state = self._states[session_id]
+        conn = sqlite3.connect(self._db_path)
+        cursor = conn.cursor()
+        cursor.execute("""
+            INSERT OR REPLACE INTO conversations
+            (session_id, chat_history, conversation_state, last_accessed, created)
+            VALUES (?, ?, ?, ?, ?)
+        """, (
+            session_id,
+            json.dumps(state['chat_history']),
+            json.dumps(state['conversation_state']),
+            state['last_accessed'].isoformat(),
+            state.get('created', datetime.now()).isoformat()
+        ))
+        conn.commit()
+        conn.close()
+    def _save_to_database_analytics(self, session_id):
+        """Save analytics to SQLite"""
+        if session_id not in self._analytics_cache:
+            return
+        analytics = self._analytics_cache[session_id]
+        conn = sqlite3.connect(self._db_path)
+        cursor = conn.cursor()
+        cursor.execute("""
+            INSERT OR REPLACE INTO analytics
+            (session_id, project_stats, recent_interactions, dashboard_html, last_refresh, export_history)
+            VALUES (?, ?, ?, ?, ?, ?)
+        """, (
+            session_id,
+            json.dumps(analytics.get('project_stats', {})),
+            json.dumps(analytics.get('recent_interactions', [])),
+            analytics.get('dashboard_html', ''),
+            analytics.get('last_refresh').isoformat() if analytics.get('last_refresh') else None,
+            json.dumps(analytics.get('export_history', []))
+        ))
+        conn.commit()
+        conn.close()
+    def _backup_to_hf_dataset(self):
+        """Backup all data to HF dataset"""
+        if (datetime.now() - self._last_hf_backup).seconds < self._hf_backup_interval:
+            return
+        try:
+            data_items = []
+            for session_id, state in self._states.items():
+                data_items.append({
+                    'session_id': session_id,
+                    'data_type': 'conversation',
+                    'data': json.dumps(state, default=str),
+                    'timestamp': datetime.now().isoformat()
+                })
+            for session_id, analytics in self._analytics_cache.items():
+                data_items.append({
+                    'session_id': session_id,
+                    'data_type': 'analytics',
+                    'data': json.dumps(analytics, default=str),
+                    'timestamp': datetime.now().isoformat()
+                })
+            if data_items:
+                ds = Dataset.from_list(data_items)
+                ds.push_to_hub(self.dataset_repo, token=self.hf_token)
+                self._last_hf_backup = datetime.now()
+                logger.info(f"Backed up {len(data_items)} items to HF dataset")
+        except Exception as e:
+            logger.error(f"Error backing up to HF dataset: {e}")
+    def _cleanup_old_states(self):
+        """Remove old unused states to prevent memory leaks"""
+        now = datetime.now()
+        if (now - self._last_cleanup).seconds < self._cleanup_interval:
+            return
+        with self._lock:
+            expired_keys = []
+            for session_id, state_data in self._states.items():
+                if (now - state_data.get('last_accessed', now)).seconds > self._max_age:
+                    expired_keys.append(session_id)
+            for key in expired_keys:
+                del self._states[key]
+                logger.info(f"Cleaned up expired state: {key}")
+            self._last_cleanup = now
+    # ========================================================================
+    # CONVERSATION STATE METHODS (unchanged)
+    # ========================================================================
+    def get_session_id(self, request=None):
+        """Generate or retrieve session ID"""
+        return "default_session"
+    def get_conversation_state(self, session_id=None):
+        """Get conversation state for a session"""
+        if session_id is None:
+            session_id = self.get_session_id()
+        self._cleanup_old_states()
+        with self._lock:
+            if session_id not in self._states:
+                self._states[session_id] = {
+                    'chat_history': [],
+                    'conversation_state': [],
+                    'last_accessed': datetime.now(),
+                    'created': datetime.now()
+                }
+            else:
+                self._states[session_id]['last_accessed'] = datetime.now()
+            return self._states[session_id].copy()
+    def update_conversation_state(self, chat_history, conversation_state, session_id=None):
+        """Update conversation state for a session"""
+        if session_id is None:
+            session_id = self.get_session_id()
+        with self._lock:
+            if session_id not in self._states:
+                self._states[session_id] = {}
+            self._states[session_id].update({
+                'chat_history': chat_history.copy() if chat_history else [],
+                'conversation_state': conversation_state.copy() if conversation_state else [],
+                'last_accessed': datetime.now()
+            })
+            self._save_to_database_conversations(session_id)
+            self._backup_to_hf_dataset()
+    def reset_conversation_state(self, session_id=None):
+        """Reset conversation state for a session"""
+        if session_id is None:
+            session_id = self.get_session_id()
+        with self._lock:
+            if session_id in self._states:
+                self._states[session_id].update({
+                    'chat_history': [],
+                    'conversation_state': [],
+                    'last_accessed': datetime.now()
+                })
+                self._save_to_database_conversations(session_id)
+    def get_all_sessions(self):
+        """Get all active sessions (for analytics)"""
+        self._cleanup_old_states()
+        with self._lock:
+            return list(self._states.keys())
+    # ========================================================================
+    # ANALYTICS STATE METHODS (unchanged)
+    # ========================================================================
+    def get_analytics_state(self, session_id=None):
+        """Get analytics state for a session"""
+        if session_id is None:
+            session_id = self.get_session_id()
+        self._cleanup_old_states()
+        with self._lock:
+            if session_id not in self._analytics_cache:
+                self._analytics_cache[session_id] = {
+                    'project_stats': {
+                        "total_conversations": None,
+                        "avg_session_length": None,
+                        "success_rate": None,
+                        "model_type": "Phi-3-mini (Fine-tuned)",
+                        "last_updated": None
+                    },
+                    'recent_interactions': [],
+                    'dashboard_html': None,
+                    'last_refresh': None,
+                    'export_history': [],
+                    'database_status': 'unknown',
+                    'error_state': None,
+                    'last_accessed': datetime.now()
+                }
+            else:
+                self._analytics_cache[session_id]['last_accessed'] = datetime.now()
+            return self._analytics_cache[session_id].copy()
+    def update_analytics_state(self, project_stats=None, recent_interactions=None,
+                             dashboard_html=None, error_state=None, session_id=None):
+        """Update analytics state for a session"""
+        if session_id is None:
+            session_id = self.get_session_id()
+        with self._lock:
+            if session_id not in self._analytics_cache:
+                self._analytics_cache[session_id] = {}
+            current_time = datetime.now()
+            if project_stats is not None:
+                self._analytics_cache[session_id]['project_stats'] = project_stats.copy()
+                self._analytics_cache[session_id]['last_refresh'] = current_time
+            if recent_interactions is not None:
+                self._analytics_cache[session_id]['recent_interactions'] = recent_interactions.copy()
+            if dashboard_html is not None:
+                self._analytics_cache[session_id]['dashboard_html'] = dashboard_html
+            if error_state is not None:
+                self._analytics_cache[session_id]['error_state'] = error_state
+            self._analytics_cache[session_id]['last_accessed'] = current_time
+            self._save_to_database_analytics(session_id)
+            self._backup_to_hf_dataset()
+    def add_export_record(self, export_type, filename, success=True, session_id=None):
+        """Add export record to analytics state"""
+        if session_id is None:
+            session_id = self.get_session_id()
+        with self._lock:
+            if session_id not in self._analytics_cache:
+                self.get_analytics_state(session_id)
+            export_record = {
+                'timestamp': datetime.now().isoformat(),
+                'type': export_type,
+                'filename': filename,
+                'success': success
+            }
+            if 'export_history' not in self._analytics_cache[session_id]:
+                self._analytics_cache[session_id]['export_history'] = []
+            self._analytics_cache[session_id]['export_history'].append(export_record)
+            if len(self._analytics_cache[session_id]['export_history']) > 20:
+                self._analytics_cache[session_id]['export_history'] = \
+                    self._analytics_cache[session_id]['export_history'][-20:]
+            self._save_to_database_analytics(session_id)
+    # ========================================================================
+    # ML MODEL CACHE METHODS (unchanged)
+    # ========================================================================
+    def get_ml_model_cache(self, model_type: str = "prompt_classifier"):
+        """Get cached ML model"""
+        with self._lock:
+            return self._ml_models_cache.get(model_type, None)
+    def cache_ml_model(self, model, model_type: str = "prompt_classifier", metadata: dict = None):
+        """Cache a trained ML model"""
+        with self._lock:
+            self._ml_models_cache[model_type] = {
+                'model': model,
+                'cached_at': datetime.now(),
+                'metadata': metadata or {},
+                'access_count': 0
+            }
+            logger.info(f"ML model '{model_type}' cached successfully")
+    # ========================================================================
+    # EVALUATION STATE METHODS (unchanged)
+    # ========================================================================
+    def get_evaluation_state(self, session_id=None):
+        """Get evaluation state for a session"""
+        if session_id is None:
+            session_id = self.get_session_id()
+        with self._lock:
+            if session_id not in self._evaluation_cache:
+                self._evaluation_cache[session_id] = {
+                    'educational_quality_scores': [],
+                    'rag_performance_metrics': [],
+                    'prompt_classification_accuracy': [],
+                    'user_feedback_history': [],
+                    'aggregate_metrics': {
+                        'avg_educational_quality': 0.0,
+                        'avg_rag_relevance': 0.0,
+                        'classifier_accuracy_rate': 0.0,
+                        'user_satisfaction_rate': 0.0
+                    },
+                    'evaluation_session_count': 0,
+                    'last_updated': datetime.now()
+                }
+            return self._evaluation_cache[session_id].copy()
+    def add_educational_quality_score(self, user_query: str, response: str, metrics: dict, session_id=None):
+        """Add educational quality evaluation result"""
+        if session_id is None:
+            session_id = self.get_session_id()
+        with self._lock:
+            if session_id not in self._evaluation_cache:
+                self.get_evaluation_state(session_id)
+            quality_record = {
+                'timestamp': datetime.now().isoformat(),
+                'user_query': user_query[:100],
+                'response_length': len(response),
+                'semantic_quality': metrics.get('semantic_quality', 0.0),
+                'educational_score': metrics.get('educational_score', 0.0),
+                'response_time': metrics.get('response_time', 0.0),
+                'overall_score': (metrics.get('semantic_quality', 0.0) + metrics.get('educational_score', 0.0)) / 2
+            }
+            self._evaluation_cache[session_id]['educational_quality_scores'].append(quality_record)
+            self._update_aggregate_metrics(session_id)
+    def add_prompt_classification_result(self, predicted_mode: str, was_successful: bool, metadata: dict = None, session_id=None):
+        """Add prompt classification accuracy result"""
+        if session_id is None:
+            session_id = self.get_session_id()
+        with self._lock:
+            if session_id not in self._evaluation_cache:
+                self.get_evaluation_state(session_id)
+            classification_record = {
+                'timestamp': datetime.now().isoformat(),
+                'predicted_mode': predicted_mode,
+                'was_successful': was_successful,
+                'accuracy_score': 1.0 if was_successful else 0.0,
+                'metadata': metadata or {}
+            }
+            self._evaluation_cache[session_id]['prompt_classification_accuracy'].append(classification_record)
+            self._update_aggregate_metrics(session_id)
+    def add_user_feedback(self, response_id: str, feedback_type: str, conversation_context: dict = None, session_id=None):
+        """Add user feedback result"""
+        if session_id is None:
+            session_id = self.get_session_id()
+        with self._lock:
+            if session_id not in self._evaluation_cache:
+                self.get_evaluation_state(session_id)
+            feedback_record = {
+                'timestamp': datetime.now().isoformat(),
+                'response_id': response_id,
+                'feedback_type': feedback_type,
+                'satisfaction_score': 1.0 if feedback_type == 'thumbs_up' else 0.0,
+                'conversation_context': conversation_context or {}
+            }
+            self._evaluation_cache[session_id]['user_feedback_history'].append(feedback_record)
+            self._update_aggregate_metrics(session_id)
+    def _update_aggregate_metrics(self, session_id: str):
+        """Update aggregate metrics for a session"""
+        eval_state = self._evaluation_cache[session_id]
+        if eval_state['educational_quality_scores']:
+            avg_educational = sum(score['overall_score'] for score in eval_state['educational_quality_scores']) / len(eval_state['educational_quality_scores'])
+            eval_state['aggregate_metrics']['avg_educational_quality'] = avg_educational
+        if eval_state['prompt_classification_accuracy']:
+            accuracy_rate = sum(result['accuracy_score'] for result in eval_state['prompt_classification_accuracy']) / len(eval_state['prompt_classification_accuracy'])
+            eval_state['aggregate_metrics']['classifier_accuracy_rate'] = accuracy_rate
+        if eval_state['user_feedback_history']:
+            satisfaction_rate = sum(feedback['satisfaction_score'] for feedback in eval_state['user_feedback_history']) / len(eval_state['user_feedback_history'])
+            eval_state['aggregate_metrics']['user_satisfaction_rate'] = satisfaction_rate
+        eval_state['last_updated'] = datetime.now()
+        eval_state['evaluation_session_count'] += 1
+    def get_evaluation_summary(self, session_id=None, include_history: bool = False):
+        """Get evaluation summary for analytics"""
+        if session_id is None:
+            session_id = self.get_session_id()
+        eval_state = self.get_evaluation_state(session_id)
+        summary = {
+            'aggregate_metrics': eval_state['aggregate_metrics'],
+            'total_evaluations': {
+                'educational_quality': len(eval_state['educational_quality_scores']),
+                'classification_accuracy': len(eval_state['prompt_classification_accuracy']),
+                'user_feedback': len(eval_state['user_feedback_history'])
+            },
+            'last_updated': eval_state['last_updated'],
+            'session_evaluation_count': eval_state['evaluation_session_count']
+        }
+        if include_history:
+            summary['history'] = {
+                'recent_educational_scores': eval_state['educational_quality_scores'][-10:],
+                'recent_classification_results': eval_state['prompt_classification_accuracy'][-10:],
+                'recent_user_feedback': eval_state['user_feedback_history'][-10:]
+            }
+        return summary
+    # ========================================================================
+    # UTILITY METHODS
+    # ========================================================================
+    def get_cache_status(self, session_id=None):
+        """Get cache status for debugging"""
+        if session_id is None:
+            session_id = self.get_session_id()
+        with self._lock:
+            analytics_cached = session_id in self._analytics_cache
+            conversation_cached = session_id in self._states
+            cache_info = {
+                'session_id': session_id,
+                'analytics_cached': analytics_cached,
+                'conversation_cached': conversation_cached,
+                'total_analytics_sessions': len(self._analytics_cache),
+                'total_conversation_sessions': len(self._states),
+                'prompt_state_active_count': len(self._prompt_state_manager.get_active_prompts())
+            }
+            if analytics_cached:
+                analytics_state = self._analytics_cache[session_id]
+                cache_info['analytics_last_refresh'] = analytics_state.get('last_refresh')
+                cache_info['analytics_has_data'] = bool(analytics_state.get('project_stats', {}).get('total_conversations'))
+            if conversation_cached:
+                conversation_state = self._states[session_id]
+                cache_info['conversation_length'] = len(conversation_state.get('conversation_state', []))
+                cache_info['chat_history_length'] = len(conversation_state.get('chat_history', []))
+            return cache_info
+    def reset_analytics_state(self, session_id=None):
+        """Reset analytics state for a session"""
+        if session_id is None:
+            session_id = self.get_session_id()
+        with self._lock:
+            if session_id in self._analytics_cache:
+                del self._analytics_cache[session_id]
+    def clear_all_states(self):
+        """Clear all states - use with caution"""
+        with self._lock:
+            self._states.clear()
+            self._analytics_cache.clear()
+            self._ml_models_cache.clear()
+            self._evaluation_cache.clear()
+            self._prompt_state_manager.reset()
+            logger.info("All global states cleared")

styles.css ADDED Viewed

	@@ -0,0 +1,353 @@

+/* ============================
+   GLOBAL THEME & VARIABLES
+============================ */
+:root {
+  /* Text Colors */
+  --primarytext-color: #1a1a1a;
+  --secondarytext-color: #555;
+  /* Primary Colors */
+  --primary-dark: #345da8;
+  --primary-light: #a8b5c9;
+  /* Secondary Colors */
+  --secondary-dark: #063d80;
+  --secondary-light: #6ea1fa;
+  /* Chat & Container Colors */
+  --chathistory_area: #f0f1f4;
+  --container-color: #f5f6f8;
+  --Send: #6ea1fa;
+  --Send-hover: #87d0d5;
+  --clear: #b2b8c2;
+  --clear-hover: #2c5be0;
+  --text_areabackground: #fafafa;
+  /* Chat Bubble Colors */
+  --bot-bubble-color: #b9c8e3;
+  --user-bubble-color: #e3eaf6;
+  /* Scrollbar Colors */
+  --scrollbar-bg: #d0d3d8;
+  --scrollbar-thumb: #a2a6ad;
+  --scrollbar-thumb-hover: #888d94;
+  /* Border & Radius */
+  --border-thin: 1px;
+  --border-medium: 2px;
+  --border-default: 1px;
+  --border-focus: 2px;
+  --border-hover: 3px;
+  --button-border: 2px;
+  --radius-sm: 4px;
+  --radius-md: 6px;
+}
+/* ============================
+   DARK MODE THEME (SOFTER)
+============================ */
+@media (prefers-color-scheme: dark) {
+  :root {
+    --primarytext-color: #f8f8f8;
+    --secondarytext-color: #d0d3d8;
+    --primary-dark: #27477d;
+    --primary-light: #7d8da9;
+    --secondary-dark: #042a59;
+    --secondary-light: #5e88d6;
+    --chathistory_area: #202327;
+    --container-color: #1b1d20;
+    --Send: #5e88d6;
+    --Send-hover: #7ac4c9;
+    --clear: #7a7f88;
+    --clear-hover: #5e88d6;
+    --text_areabackground: #25282c;
+    --bot-bubble-color: #425575;
+    --user-bubble-color: #566583;
+    --scrollbar-bg: #2b2e33;
+    --scrollbar-thumb: #4b4f56;
+    --scrollbar-thumb-hover: #5e636b;
+  }
+}
+/* ============================
+   FONT IMPORT & BASE STYLING
+============================ */
+@import url('https://fonts.googleapis.com/css2?family=Oswald:wght@200..700&display=swap');
+body {
+  background: var(--text_areabackground);
+  color: var(--primarytext-color);
+  font-family: "Oswald", sans-serif;
+  margin: 0;
+}
+* {
+  color: var(--primarytext-color) !important;
+  font-family: "Oswald", sans-serif !important;
+  box-sizing: border-box;
+}
+/* ============================
+   CUSTOM SCROLLBAR
+============================ */
+::-webkit-scrollbar {
+  width: 12px;
+}
+::-webkit-scrollbar-track {
+  background: var(--scrollbar-bg);
+}
+::-webkit-scrollbar-thumb {
+  background-color: var(--scrollbar-thumb);
+  border-radius: 6px;
+  border: 2px solid var(--scrollbar-bg);
+}
+::-webkit-scrollbar-thumb:hover {
+  background-color: var(--scrollbar-thumb-hover);
+}
+/* ============================
+   GRADIO CONTAINER & LAYOUT
+============================ */
+.gradio-container,
+[data-testid="block-container"],
+.contain {
+  background-color: var(--container-color) !important;
+  font-family: "Oswald", sans-serif !important;
+  display: flex !important;
+  flex-direction: column !important;
+  height: 100vh !important;
+  max-height: 100vh !important;
+  overflow: hidden !important;
+}
+/* ============================
+   HEADER & NAVIGATION
+============================ */
+.title-header {
+  background-color: transparent;
+  padding: 10px;
+  border-bottom: var(--border-focus) solid var(--primary-dark);
+  display: flex;
+  align-items: center;
+  height: 60px !important;
+}
+.title-header h1 {
+  font-size: 3.5rem;
+  font-weight: 700;
+  color: var(--primarytext-color);
+  margin: 0;
+}
+/* ============================
+   CHAT CONTAINER
+============================ */
+#main-chatbot,
+[data-testid="chatbot"],
+.gradio-chatbot,
+[role="log"] {
+  border: var(--border-default) solid var(--primary-dark) !important;
+  border-radius: var(--radius-md) !important;
+  background-color: var(--chathistory_area) !important;
+  box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1) !important;
+  padding: 15px !important;
+  margin: 15px 20px !important;
+  flex: 1 !important;
+  overflow-y: auto !important;
+}
+/* ============================
+   TEXT INPUT AREA
+============================ */
+textarea,
+.gradio-textbox textarea {
+  background-color: var(--text_areabackground) !important;
+  border: var(--border-default) solid var(--secondary-dark) !important;
+  border-radius: var(--radius-md) !important;
+  color: var(--primarytext-color) !important;
+  padding: 10px !important;
+  resize: none !important;
+  box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1) !important;
+}
+textarea:focus {
+  border-color: var(--secondary-light) !important;
+  box-shadow: 0 0 0 var(--border-focus) rgba(96, 165, 250, 0.2) !important;
+}
+/* ============================
+   BUTTONS
+============================ */
+button.send-button {
+  background-color: var(--Send) !important;
+  color: var(--primarytext-color) !important;
+  border: var(--button-border) solid var(--secondary-dark) !important;
+  border-radius: var(--radius-md) !important;
+  padding: 8px 16px !important;
+  font-weight: 600 !important;
+  box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
+  width: 100%;
+}
+button.send-button:hover {
+  background-color: var(--Send-hover) !important;
+}
+button.clear-button {
+  background-color: var(--clear) !important;
+  color: var(--primarytext-color) !important;
+  border: var(--button-border) solid var(--secondary-dark) !important;
+  border-radius: var(--radius-md) !important;
+  padding: 8px 16px !important;
+  font-weight: 600 !important;
+  box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
+  width: 100%;
+}
+button.clear-button:hover {
+  background-color: var(--clear-hover) !important;
+}
+/* ============================
+   CHAT BUBBLES (VARIABLE COLORS)
+============================ */
+.message.user,
+.message.bot {
+  background: none !important;
+  border: none !important;
+  padding: 0 !important;
+  margin: 0 !important;
+  box-shadow: none !important;
+}
+.message-row {
+  display: flex;
+  margin: 8px 12px;
+}
+.message.panel-full-width {
+  max-width: 80%;
+  min-width: 240px;
+  padding: 14px 20px !important;
+  border-radius: 18px !important;
+  box-shadow: none !important;
+  position: relative;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+/* Bot Bubble */
+.message-row.bot-row .message.panel-full-width {
+  background-color: var(--bot-bubble-color) !important;
+  color: var(--primarytext-color) !important;
+  margin-right: auto;
+  margin-left: 0;
+}
+.message-row.bot-row .message.panel-full-width::before {
+  content: "";
+  position: absolute;
+  top: 12px;
+  left: -10px;
+  width: 0;
+  height: 0;
+  border-top: 10px solid transparent;
+  border-right: 10px solid var(--bot-bubble-color);
+  border-bottom: 10px solid transparent;
+}
+/* User Bubble */
+.message-row.user-row .message.panel-full-width {
+  background-color: var(--user-bubble-color) !important;
+  color: var(--primarytext-color) !important;
+  margin-left: auto;
+  margin-right: 0;
+}
+.message-row.user-row .message.panel-full-width::before {
+  content: "";
+  position: absolute;
+  top: 12px;
+  right: -10px;
+  width: 0;
+  height: 0;
+  border-top: 10px solid transparent;
+  border-left: 10px solid var(--user-bubble-color);
+  border-bottom: 10px solid transparent;
+}
+/* ============================
+   RESPONSIVE ADJUSTMENTS
+============================ */
+@media (max-width: 768px) {
+  .message.panel-full-width {
+    max-width: 85%;
+  }
+}
+/* ============================
+FOOTER: RESTORE BUILT-IN GRADIO LINKS (settings, API, etc.)
+============================ */
+footer.svelte-czcr5b {
+display: flex !important;
+align-items: center !important;
+justify-content: center !important;
+gap: 12px !important;
+visibility: visible !important;
+position: fixed !important;
+bottom: 0 !important;
+left: 0 !important;
+right: 0 !important;
+background-color: var(--container-color) !important;
+backdrop-filter: blur(5px) !important;
+border-top: var(--border-default) solid rgba(0, 0, 0, 0.12) !important;
+padding: 8px 16px !important;
+z-index: 1000 !important;
+min-height: 36px !important;
+}
+footer.svelte-czcr5b a,
+footer.svelte-czcr5b button,
+footer.svelte-czcr5b span {
+color: var(--secondarytext-color) !important;
+font-size: 12px !important;
+font-family: "Oswald", sans-serif !important;
+text-decoration: none !important;
+background: none !important;
+border: none !important;
+cursor: pointer !important;
+opacity: 0.8;
+transition: opacity 0.15s ease;
+}
+footer.svelte-czcr5b a:hover,
+footer.svelte-czcr5b button:hover,
+footer.svelte-czcr5b span:hover {
+opacity: 1;
+color: var(--primarytext-color) !important;
+}
+/* Divider style between footer links */
+footer.svelte-czcr5b .divider {
+color: var(--secondarytext-color) !important;
+opacity: 0.5;
+margin: 0 6px !important;
+}
+/* Make sure footer items never collapse */
+footer.svelte-czcr5b > * {
+display: inline-flex !important;
+align-items: center !important;
+}