Spaces:
Running
Running
Commit ·
1476939
1
Parent(s): 0e323dc
Giving agent access to the model costs
Browse files
src/manager/agent_manager.py
CHANGED
|
@@ -141,8 +141,8 @@ class AgentManager():
|
|
| 141 |
|
| 142 |
def validate_budget(self, amount: float) -> None:
|
| 143 |
if not self.budget_manager.can_spend(amount):
|
| 144 |
-
raise ValueError(f"Do not have enough budget to create the
|
| 145 |
-
+f"Creating the
|
| 146 |
|
| 147 |
def create_agent_class(self, agent_name: str, base_model: str, system_prompt: str, description: str = "", create_cost: float = 0, invoke_cost: float = 0,
|
| 148 |
**additional_params) -> Agent:
|
|
|
|
| 141 |
|
| 142 |
def validate_budget(self, amount: float) -> None:
|
| 143 |
if not self.budget_manager.can_spend(amount):
|
| 144 |
+
raise ValueError(f"Do not have enough budget to create/use the agent. "
|
| 145 |
+
+f"Creating/Using the agent costs {amount} but only {self.budget_manager.get_current_remaining_budget()} is remaining")
|
| 146 |
|
| 147 |
def create_agent_class(self, agent_name: str, base_model: str, system_prompt: str, description: str = "", create_cost: float = 0, invoke_cost: float = 0,
|
| 148 |
**additional_params) -> Agent:
|
src/manager/budget_manager.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
from src.manager.utils.singleton import singleton
|
| 2 |
import torch
|
| 3 |
import psutil
|
|
|
|
| 4 |
@singleton
|
| 5 |
class BudgetManager():
|
| 6 |
TOTAL_BUDGET = 100
|
|
|
|
| 1 |
from src.manager.utils.singleton import singleton
|
| 2 |
import torch
|
| 3 |
import psutil
|
| 4 |
+
|
| 5 |
@singleton
|
| 6 |
class BudgetManager():
|
| 7 |
TOTAL_BUDGET = 100
|
src/manager/manager.py
CHANGED
|
@@ -163,6 +163,7 @@ class GeminiManager:
|
|
| 163 |
top_k = min(k, len(memories))
|
| 164 |
# Semantic Retrieval with GPU
|
| 165 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
|
|
|
| 166 |
model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
|
| 167 |
doc_embeddings = model.encode(memories, convert_to_tensor=True, device=device)
|
| 168 |
query_embedding = model.encode(query, convert_to_tensor=True, device=device)
|
|
@@ -195,13 +196,13 @@ class GeminiManager:
|
|
| 195 |
logger.debug(f"Chat history: {chat_history}")
|
| 196 |
try:
|
| 197 |
response = suppress_output(self.generate_response)(chat_history)
|
|
|
|
| 198 |
except Exception as e:
|
| 199 |
-
logger.debug(f"Error generating response: {e}")
|
| 200 |
messages.append({
|
| 201 |
"role": "assistant",
|
| 202 |
"content": f"Error generating response: {e}"
|
| 203 |
})
|
| 204 |
-
logger.error(f"Error generating response
|
| 205 |
yield messages
|
| 206 |
return
|
| 207 |
logger.debug(f"Response: {response}")
|
|
|
|
| 163 |
top_k = min(k, len(memories))
|
| 164 |
# Semantic Retrieval with GPU
|
| 165 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 166 |
+
print(f"Using device: {device}")
|
| 167 |
model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
|
| 168 |
doc_embeddings = model.encode(memories, convert_to_tensor=True, device=device)
|
| 169 |
query_embedding = model.encode(query, convert_to_tensor=True, device=device)
|
|
|
|
| 196 |
logger.debug(f"Chat history: {chat_history}")
|
| 197 |
try:
|
| 198 |
response = suppress_output(self.generate_response)(chat_history)
|
| 199 |
+
print(f"Response: {response}")
|
| 200 |
except Exception as e:
|
|
|
|
| 201 |
messages.append({
|
| 202 |
"role": "assistant",
|
| 203 |
"content": f"Error generating response: {e}"
|
| 204 |
})
|
| 205 |
+
logger.error(f"Error generating response", e)
|
| 206 |
yield messages
|
| 207 |
return
|
| 208 |
logger.debug(f"Response: {response}")
|
src/tools/default_tools/agent_cost_manager.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__all__ = ['AgentCostManager']
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class AgentCostManager():
|
| 5 |
+
dependencies = []
|
| 6 |
+
|
| 7 |
+
inputSchema = {
|
| 8 |
+
"name": "AgentCostManager",
|
| 9 |
+
"description": "Retrieves the cost of creating and invoking an agent. Please make sure to use this before creating an agent.",
|
| 10 |
+
"parameters": {
|
| 11 |
+
"type": "object",
|
| 12 |
+
"properties": {},
|
| 13 |
+
"required": [],
|
| 14 |
+
}
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
costs = {
|
| 18 |
+
"llama3.2": {
|
| 19 |
+
"description": "3 Billion parameter model",
|
| 20 |
+
"create_cost": 10,
|
| 21 |
+
"invoke_cost": 20,
|
| 22 |
+
},
|
| 23 |
+
"mistral": {
|
| 24 |
+
"description": "7 Billion parameter model",
|
| 25 |
+
"create_cost": 20,
|
| 26 |
+
"invoke_cost": 50,
|
| 27 |
+
},
|
| 28 |
+
"gemini-2.5-flash-preview-04-17": {
|
| 29 |
+
"description": "Adaptive thinking, cost efficiency",
|
| 30 |
+
"create_cost": 20,
|
| 31 |
+
"invoke_cost": 50
|
| 32 |
+
},
|
| 33 |
+
"gemini-2.5-pro-preview-03-25": {
|
| 34 |
+
"description": "Enhanced thinking and reasoning, multimodal understanding, advanced coding, and more",
|
| 35 |
+
"create_cost": 20,
|
| 36 |
+
"invoke_cost": 50
|
| 37 |
+
},
|
| 38 |
+
"gemini-2.0-flash": {
|
| 39 |
+
"description": "Next generation features, speed, thinking, realtime streaming, and multimodal generation",
|
| 40 |
+
"create_cost": 20,
|
| 41 |
+
"invoke_cost": 50
|
| 42 |
+
},
|
| 43 |
+
"gemini-2.0-flash-lite": {
|
| 44 |
+
"description": "Cost efficiency and low latency",
|
| 45 |
+
"create_cost": 20,
|
| 46 |
+
"invoke_cost": 50
|
| 47 |
+
},
|
| 48 |
+
"gemini-1.5-flash": {
|
| 49 |
+
"description": "Fast and versatile performance across a diverse variety of tasks",
|
| 50 |
+
"create_cost": 20,
|
| 51 |
+
"invoke_cost": 50
|
| 52 |
+
},
|
| 53 |
+
"gemini-1.5-flash-8b": {
|
| 54 |
+
"description": "High volume and lower intelligence tasks",
|
| 55 |
+
"create_cost": 20,
|
| 56 |
+
"invoke_cost": 50
|
| 57 |
+
},
|
| 58 |
+
"gemini-1.5-pro": {
|
| 59 |
+
"description": "Complex reasoning tasks requiring more intelligence",
|
| 60 |
+
"create_cost": 20,
|
| 61 |
+
"invoke_cost": 50
|
| 62 |
+
},
|
| 63 |
+
"gemini-2.0-flash-live-001": {
|
| 64 |
+
"description": "Low-latency bidirectional voice and video interactions",
|
| 65 |
+
"create_cost": 20,
|
| 66 |
+
"invoke_cost": 50
|
| 67 |
+
}
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
def get_costs(self):
|
| 71 |
+
return self.costs
|
| 72 |
+
|
| 73 |
+
def run(self, **kwargs):
|
| 74 |
+
return {
|
| 75 |
+
"status": "success",
|
| 76 |
+
"message": "Cost of creating and invoking an agent",
|
| 77 |
+
"output": self.costs,
|
| 78 |
+
}
|
src/tools/default_tools/agent_creater_tool.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
from src.manager.agent_manager import AgentManager
|
| 2 |
from src.manager.config.model_selector import choose_best_model
|
| 3 |
from src.manager.utils.runtime_selector import detect_runtime_environment
|
|
|
|
| 4 |
__all__ = ['AgentCreator']
|
| 5 |
|
| 6 |
class AgentCreator():
|
|
@@ -20,7 +21,7 @@ class AgentCreator():
|
|
| 20 |
},
|
| 21 |
"base_model": {
|
| 22 |
"type": "string",
|
| 23 |
-
"description": "A base model from which the new agent mode is to be created.
|
| 24 |
},
|
| 25 |
"system_prompt": {
|
| 26 |
"type": "string",
|
|
@@ -31,78 +32,7 @@ class AgentCreator():
|
|
| 31 |
"description": "Description of the agent. This is a string that describes the agent and its capabilities. It should be a single line description.",
|
| 32 |
},
|
| 33 |
},
|
| 34 |
-
"required": ["agent_name", "system_prompt", "description"],
|
| 35 |
-
#"required": ["agent_name", "base_model", "system_prompt", "description"],
|
| 36 |
-
},
|
| 37 |
-
"creates": {
|
| 38 |
-
"selector": "base_model",
|
| 39 |
-
"types": {
|
| 40 |
-
"llama3.2":{
|
| 41 |
-
"description": "3 Billion parameter model",
|
| 42 |
-
"create_cost": 10,
|
| 43 |
-
"invoke_cost": 20,
|
| 44 |
-
},
|
| 45 |
-
"mistral":{
|
| 46 |
-
"description": "7 Billion parameter model",
|
| 47 |
-
"create_cost": 20,
|
| 48 |
-
"invoke_cost": 50,
|
| 49 |
-
},
|
| 50 |
-
"gemini-2.5-flash-preview-04-17": {
|
| 51 |
-
"description": "Adaptive thinking, cost efficiency",
|
| 52 |
-
"create_cost": 20,
|
| 53 |
-
"invoke_cost": 50
|
| 54 |
-
},
|
| 55 |
-
"gemini-2.5-pro-preview-03-25": {
|
| 56 |
-
"description": "Enhanced thinking and reasoning, multimodal understanding, advanced coding, and more",
|
| 57 |
-
"create_cost": 20,
|
| 58 |
-
"invoke_cost": 50
|
| 59 |
-
},
|
| 60 |
-
"gemini-2.0-flash": {
|
| 61 |
-
"description": "Next generation features, speed, thinking, realtime streaming, and multimodal generation",
|
| 62 |
-
"create_cost": 20,
|
| 63 |
-
"invoke_cost": 50
|
| 64 |
-
},
|
| 65 |
-
"gemini-2.0-flash-lite": {
|
| 66 |
-
"description": "Cost efficiency and low latency",
|
| 67 |
-
"create_cost": 20,
|
| 68 |
-
"invoke_cost": 50
|
| 69 |
-
},
|
| 70 |
-
"gemini-1.5-flash": {
|
| 71 |
-
"description": "Fast and versatile performance across a diverse variety of tasks",
|
| 72 |
-
"create_cost": 20,
|
| 73 |
-
"invoke_cost": 50
|
| 74 |
-
},
|
| 75 |
-
"gemini-1.5-flash-8b": {
|
| 76 |
-
"description": "High volume and lower intelligence tasks",
|
| 77 |
-
"create_cost": 20,
|
| 78 |
-
"invoke_cost": 50
|
| 79 |
-
},
|
| 80 |
-
"gemini-1.5-pro": {
|
| 81 |
-
"description": "Complex reasoning tasks requiring more intelligence",
|
| 82 |
-
"create_cost": 20,
|
| 83 |
-
"invoke_cost": 50
|
| 84 |
-
},
|
| 85 |
-
# "gemini-embedding-exp": {
|
| 86 |
-
# "description": "Measuring the relatedness of text strings",
|
| 87 |
-
# "create_cost": 20,
|
| 88 |
-
# "invoke_cost": 50
|
| 89 |
-
# },
|
| 90 |
-
# "imagen-3.0-generate-002": {
|
| 91 |
-
# "description": "Our most advanced image generation model",
|
| 92 |
-
# "create_cost": 20,
|
| 93 |
-
# "invoke_cost": 50
|
| 94 |
-
# },
|
| 95 |
-
# "veo-2.0-generate-001": {
|
| 96 |
-
# "description": "High quality video generation",
|
| 97 |
-
# "create_cost": 20,
|
| 98 |
-
# "invoke_cost": 50
|
| 99 |
-
# },
|
| 100 |
-
"gemini-2.0-flash-live-001": {
|
| 101 |
-
"description": "Low-latency bidirectional voice and video interactions",
|
| 102 |
-
"create_cost": 20,
|
| 103 |
-
"invoke_cost": 50
|
| 104 |
-
}
|
| 105 |
-
}
|
| 106 |
}
|
| 107 |
}
|
| 108 |
|
|
@@ -118,11 +48,12 @@ class AgentCreator():
|
|
| 118 |
|
| 119 |
system_prompt = kwargs.get("system_prompt")
|
| 120 |
description = kwargs.get("description")
|
| 121 |
-
|
| 122 |
-
|
|
|
|
| 123 |
print(f"[WARN] Auto-selected model '{base_model}' not in schema. Falling back to gemini-2.0-flash")
|
| 124 |
base_model = "gemini-2.0-flash"
|
| 125 |
-
invoke_cost =
|
| 126 |
|
| 127 |
agent_manager = AgentManager()
|
| 128 |
try:
|
|
|
|
| 1 |
from src.manager.agent_manager import AgentManager
|
| 2 |
from src.manager.config.model_selector import choose_best_model
|
| 3 |
from src.manager.utils.runtime_selector import detect_runtime_environment
|
| 4 |
+
from src.tools.default_tools.agent_cost_manager import AgentCostManager
|
| 5 |
__all__ = ['AgentCreator']
|
| 6 |
|
| 7 |
class AgentCreator():
|
|
|
|
| 21 |
},
|
| 22 |
"base_model": {
|
| 23 |
"type": "string",
|
| 24 |
+
"description": "A base model from which the new agent mode is to be created. Check the available models using the AgentCostManager tool.",
|
| 25 |
},
|
| 26 |
"system_prompt": {
|
| 27 |
"type": "string",
|
|
|
|
| 32 |
"description": "Description of the agent. This is a string that describes the agent and its capabilities. It should be a single line description.",
|
| 33 |
},
|
| 34 |
},
|
| 35 |
+
"required": ["agent_name", "base_model", "system_prompt", "description"],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
}
|
| 37 |
}
|
| 38 |
|
|
|
|
| 48 |
|
| 49 |
system_prompt = kwargs.get("system_prompt")
|
| 50 |
description = kwargs.get("description")
|
| 51 |
+
model_costs = AgentCostManager().get_costs()
|
| 52 |
+
create_cost = model_costs[base_model]["create_cost"]
|
| 53 |
+
if base_model not in model_costs:
|
| 54 |
print(f"[WARN] Auto-selected model '{base_model}' not in schema. Falling back to gemini-2.0-flash")
|
| 55 |
base_model = "gemini-2.0-flash"
|
| 56 |
+
invoke_cost = model_costs[base_model]["invoke_cost"]
|
| 57 |
|
| 58 |
agent_manager = AgentManager()
|
| 59 |
try:
|