jdesiree commited on
Commit
7ea174c
·
verified ·
1 Parent(s): 8e0d766

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -25
app.py CHANGED
@@ -9,7 +9,7 @@ Architecture:
9
  - Prompt state tracking per turn
10
  - LightEval for metrics tracking
11
  - Logger for timing functions
12
- - OPTIMIZED: Single Qwen3-4B-Claude model for all agents (3.3GB, fast startup)
13
  """
14
  import os
15
  import re
@@ -54,13 +54,13 @@ import torch
54
  import gradio as gr
55
  from dotenv import load_dotenv
56
 
57
- # Agent architecture (now with shared Qwen3-Claude!)
58
  from agents import (
59
  ToolDecisionAgent,
60
  PromptRoutingAgents,
61
  ThinkingAgents,
62
  ResponseAgent,
63
- get_shared_qwen3, # Pre-warm shared Qwen3-Claude
64
  )
65
 
66
  # State management
@@ -95,27 +95,6 @@ from langchain_core.messages import HumanMessage, SystemMessage, AIMessage, Tool
95
  # Tool for graphing
96
  from graph_tool import generate_plot
97
 
98
- # ============================================================================
99
- # LLAMA-CPP-PYTHON WHEEL INSTALLATION
100
- # ============================================================================
101
- wheel_url = "https://huggingface.co/spaces/jdesiree/Mimir/resolve/main/wheels/llama_cpp_python-0.3.16-cp310-cp310-linux_x86_64.whl"
102
-
103
- # Check if the package is already installed
104
- try:
105
- import llama_cpp_python
106
- print("✓ llama_cpp_python is already installed.")
107
- except ImportError:
108
- print("→ llama_cpp_python not found. Installing from wheel...")
109
-
110
- try:
111
- subprocess.check_call([
112
- sys.executable, "-m", "pip", "install",
113
- "--no-cache-dir",
114
- wheel_url
115
- ])
116
- print("✓ Installation successful.")
117
- except subprocess.CalledProcessError as e:
118
- print(f"❌ ERROR: Installation failed: {e}")
119
 
120
  # ============================================================================
121
  # LIGHTEVAL FOR METRICS
@@ -179,6 +158,16 @@ def log_step(step_name: str, start_time: Optional[float] = None) -> float:
179
 
180
  return now
181
 
 
 
 
 
 
 
 
 
 
 
182
 
183
  # ============================================================================
184
  # GLOBAL INITIALIZATION
@@ -200,7 +189,7 @@ tool_agent = ToolDecisionAgent()
200
  routing_agents = PromptRoutingAgents()
201
  thinking_agents = ThinkingAgents()
202
  response_agent = ResponseAgent()
203
- logger.info("Agents initialized (using shared Qwen3-Claude)")
204
 
205
  # Pre-warm shared Qwen3-Claude (optional - happens on first agent call anyway)
206
  logger.info("Shared Qwen3-Claude agent ready (loads on first use)")
 
9
  - Prompt state tracking per turn
10
  - LightEval for metrics tracking
11
  - Logger for timing functions
12
+ - OPTIMIZED: Single Llama-3.2-3B model for all agents (3.3GB, fast startup)
13
  """
14
  import os
15
  import re
 
54
  import gradio as gr
55
  from dotenv import load_dotenv
56
 
57
+ # Agent architecture
58
  from agents import (
59
  ToolDecisionAgent,
60
  PromptRoutingAgents,
61
  ThinkingAgents,
62
  ResponseAgent,
63
+ get_shared_llama, # Pre-warm llama
64
  )
65
 
66
  # State management
 
95
  # Tool for graphing
96
  from graph_tool import generate_plot
97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
  # ============================================================================
100
  # LIGHTEVAL FOR METRICS
 
158
 
159
  return now
160
 
161
+ # ============================================================================
162
+ # MODEL INFORMATION
163
+ # ============================================================================
164
+ print("="*60)
165
+ print("MIMIR - Using Llama-3.2-3B-Instruct")
166
+ print(" Model: meta-llama/Llama-3.2-3B-Instruct")
167
+ print(" Memory: ~1GB (4-bit quantized)")
168
+ print(" Context: 128K tokens")
169
+ print(" Architecture: Single unified model")
170
+ print("="*60)
171
 
172
  # ============================================================================
173
  # GLOBAL INITIALIZATION
 
189
  routing_agents = PromptRoutingAgents()
190
  thinking_agents = ThinkingAgents()
191
  response_agent = ResponseAgent()
192
+ logger.info("Agents initialized (using shared get_shared_llama)")
193
 
194
  # Pre-warm shared Qwen3-Claude (optional - happens on first agent call anyway)
195
  logger.info("Shared Qwen3-Claude agent ready (loads on first use)")