Spaces:
Sleeping
Sleeping
too many tokens used, reducing complexity
Browse files
app.py
CHANGED
|
@@ -105,85 +105,48 @@ class SlpMultiAgent:
|
|
| 105 |
MAX_QUESTION_LENGTH = 1000
|
| 106 |
short_question = question # [:MAX_QUESTION_LENGTH]
|
| 107 |
|
| 108 |
-
# Use
|
| 109 |
model = OpenAIServerModel(
|
| 110 |
-
model_id="gpt-3.5-turbo
|
| 111 |
-
temperature=0.
|
| 112 |
-
max_tokens=
|
| 113 |
)
|
| 114 |
|
| 115 |
-
# Create
|
| 116 |
research_agent = CodeAgent(
|
| 117 |
-
tools=[KnowledgeBaseTool()],
|
| 118 |
model=model,
|
| 119 |
-
additional_authorized_imports=["
|
| 120 |
-
max_steps=
|
| 121 |
name="ResearchAgent",
|
| 122 |
verbosity_level=0,
|
| 123 |
-
description="
|
| 124 |
)
|
| 125 |
|
| 126 |
-
|
| 127 |
tools=[],
|
| 128 |
model=model,
|
| 129 |
-
additional_authorized_imports=["math", "
|
| 130 |
-
max_steps=
|
| 131 |
-
name="
|
| 132 |
verbosity_level=0,
|
| 133 |
-
description="
|
| 134 |
-
)
|
| 135 |
-
|
| 136 |
-
logic_agent = CodeAgent(
|
| 137 |
-
tools=[],
|
| 138 |
-
model=model,
|
| 139 |
-
additional_authorized_imports=["itertools", "collections", "re", "string"],
|
| 140 |
-
max_steps=4,
|
| 141 |
-
name="LogicAgent",
|
| 142 |
-
verbosity_level=0,
|
| 143 |
-
description="Specializes in logical reasoning, pattern recognition, and problem decomposition."
|
| 144 |
-
)
|
| 145 |
-
|
| 146 |
-
language_agent = CodeAgent(
|
| 147 |
-
tools=[],
|
| 148 |
-
model=model,
|
| 149 |
-
additional_authorized_imports=["re", "string", "collections"],
|
| 150 |
-
max_steps=3,
|
| 151 |
-
name="LanguageAgent",
|
| 152 |
-
verbosity_level=0,
|
| 153 |
-
description="Specializes in text analysis, word puzzles, linguistics, and language patterns."
|
| 154 |
-
)
|
| 155 |
-
|
| 156 |
-
data_agent = CodeAgent(
|
| 157 |
-
tools=[],
|
| 158 |
-
model=model,
|
| 159 |
-
additional_authorized_imports=["pandas", "json", "csv", "collections", "statistics"],
|
| 160 |
-
max_steps=4,
|
| 161 |
-
name="DataAgent",
|
| 162 |
-
verbosity_level=0,
|
| 163 |
-
description="Specializes in data processing, sorting, filtering, and structured analysis."
|
| 164 |
)
|
| 165 |
|
| 166 |
manager_agent = CodeAgent(
|
| 167 |
model=OpenAIServerModel(
|
| 168 |
-
model_id="gpt-3.5-turbo
|
| 169 |
-
temperature=0.
|
| 170 |
-
max_tokens=
|
| 171 |
),
|
| 172 |
-
tools=[KnowledgeBaseTool()],
|
| 173 |
-
managed_agents=[research_agent,
|
| 174 |
name="ManagerAgent",
|
| 175 |
-
description="
|
| 176 |
-
additional_authorized_imports=[
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
"requests",
|
| 181 |
-
"re",
|
| 182 |
-
"math"
|
| 183 |
-
],
|
| 184 |
-
planning_interval=2,
|
| 185 |
-
verbosity_level=1,
|
| 186 |
-
max_steps=10, # More steps for complex coordination
|
| 187 |
final_answer_checks=[check_reasoning]
|
| 188 |
)
|
| 189 |
|
|
@@ -199,22 +162,15 @@ class SlpMultiAgent:
|
|
| 199 |
lambda: manager_agent.run(f"""
|
| 200 |
Question: {short_question}
|
| 201 |
|
| 202 |
-
You have
|
| 203 |
-
- ResearchAgent: Facts, history, knowledge lookup
|
| 204 |
-
- MathAgent: Calculations, statistics, numerical problems
|
| 205 |
-
- LogicAgent: Logical reasoning, patterns, problem decomposition
|
| 206 |
-
- LanguageAgent: Text analysis, word puzzles, linguistics
|
| 207 |
-
- DataAgent: Data processing, sorting, structured analysis
|
| 208 |
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
3. Delegate to most appropriate specialist agent(s)
|
| 213 |
-
4. Synthesize results into final answer
|
| 214 |
|
| 215 |
-
CRITICAL:
|
| 216 |
|
| 217 |
-
|
| 218 |
""")
|
| 219 |
)
|
| 220 |
break # Success, exit retry loop
|
|
@@ -256,24 +212,8 @@ class SlpMultiAgent:
|
|
| 256 |
return result if result else "Unable to determine answer."
|
| 257 |
|
| 258 |
def check_reasoning(final_answer, agent_memory):
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
if not final_answer or len(final_answer.strip()) < 1:
|
| 262 |
-
return False
|
| 263 |
-
|
| 264 |
-
# Check if it's just thoughts/reasoning instead of an answer
|
| 265 |
-
bad_patterns = ['### Thought:', '### Code:', 'I will', 'Let me', 'First, I', 'Next, I', 'Step 1:', 'Based on']
|
| 266 |
-
if any(pattern in final_answer for pattern in bad_patterns):
|
| 267 |
-
return False
|
| 268 |
-
|
| 269 |
-
# Check if answer is too long (likely contains reasoning)
|
| 270 |
-
if len(final_answer) > 300:
|
| 271 |
-
return False
|
| 272 |
-
|
| 273 |
-
return True # Pass if it looks like a real answer
|
| 274 |
-
except Exception as e:
|
| 275 |
-
print(f"Error in reasoning check: {e}")
|
| 276 |
-
return True # Default to passing on errors
|
| 277 |
|
| 278 |
|
| 279 |
async def run_and_submit_all(profile):
|
|
@@ -337,8 +277,8 @@ async def run_and_submit_all(profile):
|
|
| 337 |
answers_payload = []
|
| 338 |
print(f"Running agent on {len(questions_data)} questions...")
|
| 339 |
|
| 340 |
-
# Process questions
|
| 341 |
-
semaphore = asyncio.Semaphore(
|
| 342 |
|
| 343 |
async def process_question(item):
|
| 344 |
task_id = item.get("task_id")
|
|
|
|
| 105 |
MAX_QUESTION_LENGTH = 1000
|
| 106 |
short_question = question # [:MAX_QUESTION_LENGTH]
|
| 107 |
|
| 108 |
+
# Use cheaper, faster model
|
| 109 |
model = OpenAIServerModel(
|
| 110 |
+
model_id="gpt-3.5-turbo",
|
| 111 |
+
temperature=0.0, # Deterministic for consistency
|
| 112 |
+
max_tokens=400 # Reduced tokens for cost efficiency
|
| 113 |
)
|
| 114 |
|
| 115 |
+
# Create only essential agents with reduced complexity
|
| 116 |
research_agent = CodeAgent(
|
| 117 |
+
tools=[KnowledgeBaseTool()], # Remove search to avoid timeouts
|
| 118 |
model=model,
|
| 119 |
+
additional_authorized_imports=["re", "datetime"],
|
| 120 |
+
max_steps=2, # Reduced steps for cost
|
| 121 |
name="ResearchAgent",
|
| 122 |
verbosity_level=0,
|
| 123 |
+
description="Quick factual research and knowledge lookup."
|
| 124 |
)
|
| 125 |
|
| 126 |
+
solver_agent = CodeAgent(
|
| 127 |
tools=[],
|
| 128 |
model=model,
|
| 129 |
+
additional_authorized_imports=["math", "re", "collections", "itertools"],
|
| 130 |
+
max_steps=2, # Reduced steps
|
| 131 |
+
name="SolverAgent",
|
| 132 |
verbosity_level=0,
|
| 133 |
+
description="Problem solving, calculations, and logical reasoning."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
)
|
| 135 |
|
| 136 |
manager_agent = CodeAgent(
|
| 137 |
model=OpenAIServerModel(
|
| 138 |
+
model_id="gpt-3.5-turbo",
|
| 139 |
+
temperature=0.0,
|
| 140 |
+
max_tokens=500
|
| 141 |
),
|
| 142 |
+
tools=[KnowledgeBaseTool()], # Only knowledge base
|
| 143 |
+
managed_agents=[research_agent, solver_agent], # Only 2 agents
|
| 144 |
name="ManagerAgent",
|
| 145 |
+
description="Efficient manager for quick problem solving.",
|
| 146 |
+
additional_authorized_imports=["re", "math"],
|
| 147 |
+
planning_interval=1, # Faster planning
|
| 148 |
+
verbosity_level=0, # Reduce verbosity
|
| 149 |
+
max_steps=4, # Drastically reduced steps
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
final_answer_checks=[check_reasoning]
|
| 151 |
)
|
| 152 |
|
|
|
|
| 162 |
lambda: manager_agent.run(f"""
|
| 163 |
Question: {short_question}
|
| 164 |
|
| 165 |
+
You have ResearchAgent and SolverAgent. Be efficient:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
|
| 167 |
+
For factual questions: Use ResearchAgent
|
| 168 |
+
For calculations/logic: Use SolverAgent
|
| 169 |
+
For simple questions: Answer directly using knowledge_base()
|
|
|
|
|
|
|
| 170 |
|
| 171 |
+
CRITICAL: Always end with <code>final_answer("answer")</code>
|
| 172 |
|
| 173 |
+
Be fast and direct - no lengthy reasoning.
|
| 174 |
""")
|
| 175 |
)
|
| 176 |
break # Success, exit retry loop
|
|
|
|
| 212 |
return result if result else "Unable to determine answer."
|
| 213 |
|
| 214 |
def check_reasoning(final_answer, agent_memory):
|
| 215 |
+
# Skip expensive validation to save costs
|
| 216 |
+
return True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
|
| 218 |
|
| 219 |
async def run_and_submit_all(profile):
|
|
|
|
| 277 |
answers_payload = []
|
| 278 |
print(f"Running agent on {len(questions_data)} questions...")
|
| 279 |
|
| 280 |
+
# Process questions one at a time to avoid rate limits
|
| 281 |
+
semaphore = asyncio.Semaphore(1) # Process 1 question at a time
|
| 282 |
|
| 283 |
async def process_question(item):
|
| 284 |
task_id = item.get("task_id")
|