Spaces:

jdesiree
/

Mimir

Sleeping

App Files Files Community

jdesiree commited on Oct 23, 2025

Commit

7ea174c

verified ·

1 Parent(s): 8e0d766

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -25

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ Architecture:
 - Prompt state tracking per turn
 - LightEval for metrics tracking
 - Logger for timing functions
-- OPTIMIZED: Single Qwen3-4B-Claude model for all agents (3.3GB, fast startup)
 """
 import os
 import re
@@ -54,13 +54,13 @@ import torch
 import gradio as gr
 from dotenv import load_dotenv
-# Agent architecture (now with shared Qwen3-Claude!)
 from agents import (
     ToolDecisionAgent,
     PromptRoutingAgents,
     ThinkingAgents,
     ResponseAgent,
-    get_shared_qwen3,  # Pre-warm shared Qwen3-Claude
 )
 # State management
@@ -95,27 +95,6 @@ from langchain_core.messages import HumanMessage, SystemMessage, AIMessage, Tool
 # Tool for graphing
 from graph_tool import generate_plot
-# ============================================================================
-# LLAMA-CPP-PYTHON WHEEL INSTALLATION
-# ============================================================================
-wheel_url = "https://huggingface.co/spaces/jdesiree/Mimir/resolve/main/wheels/llama_cpp_python-0.3.16-cp310-cp310-linux_x86_64.whl"
-# Check if the package is already installed
-try:
-    import llama_cpp_python
-    print("✓ llama_cpp_python is already installed.")
-except ImportError:
-    print("→ llama_cpp_python not found. Installing from wheel...")
-    try:
-        subprocess.check_call([
-            sys.executable, "-m", "pip", "install",
-            "--no-cache-dir",
-            wheel_url
-        ])
-        print("✓ Installation successful.")
-    except subprocess.CalledProcessError as e:
-        print(f"❌ ERROR: Installation failed: {e}")
 # ============================================================================
 # LIGHTEVAL FOR METRICS
@@ -179,6 +158,16 @@ def log_step(step_name: str, start_time: Optional[float] = None) -> float:
     return now
 # ============================================================================
 # GLOBAL INITIALIZATION
@@ -200,7 +189,7 @@ tool_agent = ToolDecisionAgent()
 routing_agents = PromptRoutingAgents()
 thinking_agents = ThinkingAgents()
 response_agent = ResponseAgent()
-logger.info("Agents initialized (using shared Qwen3-Claude)")
 # Pre-warm shared Qwen3-Claude (optional - happens on first agent call anyway)
 logger.info("Shared Qwen3-Claude agent ready (loads on first use)")

 - Prompt state tracking per turn
 - LightEval for metrics tracking
 - Logger for timing functions
+- OPTIMIZED: Single Llama-3.2-3B model for all agents (3.3GB, fast startup)
 """
 import os
 import re
 import gradio as gr
 from dotenv import load_dotenv
+# Agent architecture
 from agents import (
     ToolDecisionAgent,
     PromptRoutingAgents,
     ThinkingAgents,
     ResponseAgent,
+    get_shared_llama,  # Pre-warm llama
 )
 # State management
 # Tool for graphing
 from graph_tool import generate_plot
 # ============================================================================
 # LIGHTEVAL FOR METRICS
     return now
+# ============================================================================
+# MODEL INFORMATION
+# ============================================================================
+print("="*60)
+print("MIMIR - Using Llama-3.2-3B-Instruct")
+print("  Model: meta-llama/Llama-3.2-3B-Instruct")
+print("  Memory: ~1GB (4-bit quantized)")
+print("  Context: 128K tokens")
+print("  Architecture: Single unified model")
+print("="*60)
 # ============================================================================
 # GLOBAL INITIALIZATION
 routing_agents = PromptRoutingAgents()
 thinking_agents = ThinkingAgents()
 response_agent = ResponseAgent()
+logger.info("Agents initialized (using shared get_shared_llama)")
 # Pre-warm shared Qwen3-Claude (optional - happens on first agent call anyway)
 logger.info("Shared Qwen3-Claude agent ready (loads on first use)")