Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -9,7 +9,7 @@ Architecture:
|
|
| 9 |
- Prompt state tracking per turn
|
| 10 |
- LightEval for metrics tracking
|
| 11 |
- Logger for timing functions
|
| 12 |
-
- OPTIMIZED: Single
|
| 13 |
"""
|
| 14 |
import os
|
| 15 |
import re
|
|
@@ -54,13 +54,13 @@ import torch
|
|
| 54 |
import gradio as gr
|
| 55 |
from dotenv import load_dotenv
|
| 56 |
|
| 57 |
-
# Agent architecture
|
| 58 |
from agents import (
|
| 59 |
ToolDecisionAgent,
|
| 60 |
PromptRoutingAgents,
|
| 61 |
ThinkingAgents,
|
| 62 |
ResponseAgent,
|
| 63 |
-
|
| 64 |
)
|
| 65 |
|
| 66 |
# State management
|
|
@@ -95,27 +95,6 @@ from langchain_core.messages import HumanMessage, SystemMessage, AIMessage, Tool
|
|
| 95 |
# Tool for graphing
|
| 96 |
from graph_tool import generate_plot
|
| 97 |
|
| 98 |
-
# ============================================================================
|
| 99 |
-
# LLAMA-CPP-PYTHON WHEEL INSTALLATION
|
| 100 |
-
# ============================================================================
|
| 101 |
-
wheel_url = "https://huggingface.co/spaces/jdesiree/Mimir/resolve/main/wheels/llama_cpp_python-0.3.16-cp310-cp310-linux_x86_64.whl"
|
| 102 |
-
|
| 103 |
-
# Check if the package is already installed
|
| 104 |
-
try:
|
| 105 |
-
import llama_cpp_python
|
| 106 |
-
print("✓ llama_cpp_python is already installed.")
|
| 107 |
-
except ImportError:
|
| 108 |
-
print("→ llama_cpp_python not found. Installing from wheel...")
|
| 109 |
-
|
| 110 |
-
try:
|
| 111 |
-
subprocess.check_call([
|
| 112 |
-
sys.executable, "-m", "pip", "install",
|
| 113 |
-
"--no-cache-dir",
|
| 114 |
-
wheel_url
|
| 115 |
-
])
|
| 116 |
-
print("✓ Installation successful.")
|
| 117 |
-
except subprocess.CalledProcessError as e:
|
| 118 |
-
print(f"❌ ERROR: Installation failed: {e}")
|
| 119 |
|
| 120 |
# ============================================================================
|
| 121 |
# LIGHTEVAL FOR METRICS
|
|
@@ -179,6 +158,16 @@ def log_step(step_name: str, start_time: Optional[float] = None) -> float:
|
|
| 179 |
|
| 180 |
return now
|
| 181 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
|
| 183 |
# ============================================================================
|
| 184 |
# GLOBAL INITIALIZATION
|
|
@@ -200,7 +189,7 @@ tool_agent = ToolDecisionAgent()
|
|
| 200 |
routing_agents = PromptRoutingAgents()
|
| 201 |
thinking_agents = ThinkingAgents()
|
| 202 |
response_agent = ResponseAgent()
|
| 203 |
-
logger.info("Agents initialized (using shared
|
| 204 |
|
| 205 |
# Pre-warm shared Qwen3-Claude (optional - happens on first agent call anyway)
|
| 206 |
logger.info("Shared Qwen3-Claude agent ready (loads on first use)")
|
|
|
|
| 9 |
- Prompt state tracking per turn
|
| 10 |
- LightEval for metrics tracking
|
| 11 |
- Logger for timing functions
|
| 12 |
+
- OPTIMIZED: Single Llama-3.2-3B model for all agents (3.3GB, fast startup)
|
| 13 |
"""
|
| 14 |
import os
|
| 15 |
import re
|
|
|
|
| 54 |
import gradio as gr
|
| 55 |
from dotenv import load_dotenv
|
| 56 |
|
| 57 |
+
# Agent architecture
|
| 58 |
from agents import (
|
| 59 |
ToolDecisionAgent,
|
| 60 |
PromptRoutingAgents,
|
| 61 |
ThinkingAgents,
|
| 62 |
ResponseAgent,
|
| 63 |
+
get_shared_llama, # Pre-warm llama
|
| 64 |
)
|
| 65 |
|
| 66 |
# State management
|
|
|
|
| 95 |
# Tool for graphing
|
| 96 |
from graph_tool import generate_plot
|
| 97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
# ============================================================================
|
| 100 |
# LIGHTEVAL FOR METRICS
|
|
|
|
| 158 |
|
| 159 |
return now
|
| 160 |
|
| 161 |
+
# ============================================================================
|
| 162 |
+
# MODEL INFORMATION
|
| 163 |
+
# ============================================================================
|
| 164 |
+
print("="*60)
|
| 165 |
+
print("MIMIR - Using Llama-3.2-3B-Instruct")
|
| 166 |
+
print(" Model: meta-llama/Llama-3.2-3B-Instruct")
|
| 167 |
+
print(" Memory: ~1GB (4-bit quantized)")
|
| 168 |
+
print(" Context: 128K tokens")
|
| 169 |
+
print(" Architecture: Single unified model")
|
| 170 |
+
print("="*60)
|
| 171 |
|
| 172 |
# ============================================================================
|
| 173 |
# GLOBAL INITIALIZATION
|
|
|
|
| 189 |
routing_agents = PromptRoutingAgents()
|
| 190 |
thinking_agents = ThinkingAgents()
|
| 191 |
response_agent = ResponseAgent()
|
| 192 |
+
logger.info("Agents initialized (using shared get_shared_llama)")
|
| 193 |
|
| 194 |
# Pre-warm shared Qwen3-Claude (optional - happens on first agent call anyway)
|
| 195 |
logger.info("Shared Qwen3-Claude agent ready (loads on first use)")
|