precison9 commited on
Commit
c8f4f41
·
verified ·
1 Parent(s): 35bfe93

Add agent.py — core agent with router, planner, executor, memory, safety

Browse files
Files changed (1) hide show
  1. multeclaw/agent.py +505 -0
multeclaw/agent.py ADDED
@@ -0,0 +1,505 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Multeclaw Agent Core — reasoning loops, tool execution, planning, memory, repair, and safety.
3
+ This is the brain of the system.
4
+ """
5
+
6
+ import json
7
+ import math
8
+ import time
9
+ import traceback
10
+ import subprocess
11
+ import tempfile
12
+ import os
13
+ from typing import Generator, Optional, Any
14
+ from dataclasses import dataclass, field
15
+ from datetime import datetime
16
+ from enum import Enum
17
+
18
+ from multeclaw.config import AgentConfig, BUILT_IN_TOOLS, SYSTEM_PROMPTS
19
+ from multeclaw.llm_client import MultiModelClient, LLMResponse
20
+
21
+
22
+ # ─── Types ─────────────────────────────────────────────────────────────────────
23
+ class TaskStatus(str, Enum):
24
+ PENDING = "pending"
25
+ RUNNING = "running"
26
+ COMPLETED = "completed"
27
+ FAILED = "failed"
28
+
29
+
30
+ @dataclass
31
+ class Step:
32
+ id: int
33
+ description: str
34
+ status: TaskStatus = TaskStatus.PENDING
35
+ result: str = ""
36
+ error: str = ""
37
+
38
+
39
+ @dataclass
40
+ class AgentMemory:
41
+ """Short-term conversation + long-term preferences."""
42
+ conversation: list[dict] = field(default_factory=list)
43
+ task_plans: list[list[Step]] = field(default_factory=list)
44
+ tool_results: list[dict] = field(default_factory=list)
45
+ user_preferences: dict = field(default_factory=dict)
46
+ session_notes: list[str] = field(default_factory=list)
47
+
48
+ def add_message(self, role: str, content: str):
49
+ self.conversation.append({
50
+ "role": role,
51
+ "content": content,
52
+ "timestamp": datetime.now().isoformat(),
53
+ })
54
+
55
+ def get_messages(self, limit: int = 50) -> list[dict]:
56
+ """Return recent messages in OpenAI format."""
57
+ recent = self.conversation[-limit:]
58
+ return [{"role": m["role"], "content": m["content"]} for m in recent]
59
+
60
+ def add_tool_result(self, tool_name: str, args: dict, result: str, success: bool):
61
+ self.tool_results.append({
62
+ "tool": tool_name,
63
+ "args": args,
64
+ "result": result[:2000],
65
+ "success": success,
66
+ "timestamp": datetime.now().isoformat(),
67
+ })
68
+
69
+ def clear(self):
70
+ self.conversation.clear()
71
+ self.task_plans.clear()
72
+ self.tool_results.clear()
73
+ self.session_notes.clear()
74
+
75
+
76
+ # ─── Tool Executor ─────────────────────────────────────────────────────────────
77
+ class ToolExecutor:
78
+ """Sandboxed execution of built-in tools with safety checks."""
79
+
80
+ SAFE_MATH_NAMES = {
81
+ "abs": abs, "round": round, "min": min, "max": max, "sum": sum,
82
+ "pow": pow, "int": int, "float": float, "len": len,
83
+ "sqrt": math.sqrt, "log": math.log, "log2": math.log2, "log10": math.log10,
84
+ "sin": math.sin, "cos": math.cos, "tan": math.tan,
85
+ "pi": math.pi, "e": math.e, "inf": math.inf,
86
+ "ceil": math.ceil, "floor": math.floor, "factorial": math.factorial,
87
+ }
88
+
89
+ def execute(self, tool_name: str, arguments: dict) -> dict:
90
+ """Execute a tool and return {result, success, error}."""
91
+ try:
92
+ if tool_name == "calculator":
93
+ return self._calculator(arguments["expression"])
94
+ elif tool_name == "code_executor":
95
+ return self._code_executor(arguments["code"])
96
+ elif tool_name == "file_reader":
97
+ return self._file_reader(arguments["path"])
98
+ elif tool_name == "file_writer":
99
+ return self._file_writer(arguments["path"], arguments["content"])
100
+ elif tool_name == "web_search":
101
+ return {"result": "🔍 Web search is not available in this environment. Reason about the question using your knowledge.", "success": True}
102
+ else:
103
+ return {"result": "", "success": False, "error": f"Unknown tool: {tool_name}"}
104
+ except Exception as e:
105
+ return {"result": "", "success": False, "error": f"{type(e).__name__}: {str(e)}"}
106
+
107
+ def _calculator(self, expression: str) -> dict:
108
+ """Safe math evaluation."""
109
+ blocked = ["import", "exec", "eval", "open", "__", "os.", "sys.", "subprocess"]
110
+ expr_lower = expression.lower()
111
+ for b in blocked:
112
+ if b in expr_lower:
113
+ return {"result": "", "success": False, "error": f"Blocked unsafe expression containing '{b}'"}
114
+ try:
115
+ result = eval(expression, {"__builtins__": {}}, self.SAFE_MATH_NAMES)
116
+ return {"result": str(result), "success": True}
117
+ except Exception as e:
118
+ return {"result": "", "success": False, "error": f"Math error: {str(e)}"}
119
+
120
+ def _code_executor(self, code: str) -> dict:
121
+ """Execute Python code in a subprocess with timeout."""
122
+ blocked = ["os.system", "subprocess.call", "subprocess.run", "shutil.rmtree",
123
+ "os.remove", "os.unlink", "os.rmdir"]
124
+ for b in blocked:
125
+ if b in code:
126
+ return {"result": "", "success": False, "error": f"Blocked unsafe code containing '{b}'"}
127
+
128
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f:
129
+ f.write(code)
130
+ f.flush()
131
+ try:
132
+ result = subprocess.run(
133
+ ["python", f.name],
134
+ capture_output=True, text=True, timeout=30,
135
+ env={**os.environ, "PYTHONDONTWRITEBYTECODE": "1"},
136
+ )
137
+ output = result.stdout
138
+ if result.stderr:
139
+ output += f"\n[stderr]: {result.stderr}"
140
+ return {"result": output[:5000], "success": result.returncode == 0,
141
+ "error": result.stderr[:1000] if result.returncode != 0 else ""}
142
+ except subprocess.TimeoutExpired:
143
+ return {"result": "", "success": False, "error": "Code execution timed out (30s limit)"}
144
+ finally:
145
+ os.unlink(f.name)
146
+
147
+ def _file_reader(self, path: str) -> dict:
148
+ """Read a file safely."""
149
+ path = os.path.abspath(path)
150
+ if not os.path.exists(path):
151
+ return {"result": "", "success": False, "error": f"File not found: {path}"}
152
+ if os.path.getsize(path) > 1_000_000:
153
+ return {"result": "", "success": False, "error": "File too large (>1MB)"}
154
+ try:
155
+ with open(path, "r", encoding="utf-8", errors="replace") as f:
156
+ content = f.read()
157
+ return {"result": content, "success": True}
158
+ except Exception as e:
159
+ return {"result": "", "success": False, "error": str(e)}
160
+
161
+ def _file_writer(self, path: str, content: str) -> dict:
162
+ """Write a file safely."""
163
+ path = os.path.abspath(path)
164
+ try:
165
+ os.makedirs(os.path.dirname(path), exist_ok=True)
166
+ with open(path, "w", encoding="utf-8") as f:
167
+ f.write(content)
168
+ return {"result": f"Written {len(content)} bytes to {path}", "success": True}
169
+ except Exception as e:
170
+ return {"result": "", "success": False, "error": str(e)}
171
+
172
+
173
+ # ─── Safety Layer ──────────────────────────────────────────────────────────────
174
+ class SafetyLayer:
175
+ """Content filtering and request safety checks."""
176
+
177
+ BLOCKED_PATTERNS = [
178
+ "make a bomb", "how to hack", "generate malware", "create a virus",
179
+ "steal credentials", "phishing email", "exploit vulnerability",
180
+ ]
181
+
182
+ @classmethod
183
+ def check_input(cls, text: str) -> tuple[bool, str]:
184
+ """Returns (is_safe, reason)."""
185
+ text_lower = text.lower()
186
+ for pattern in cls.BLOCKED_PATTERNS:
187
+ if pattern in text_lower:
188
+ return False, f"Request blocked: contains disallowed content pattern."
189
+ return True, ""
190
+
191
+ @classmethod
192
+ def check_output(cls, text: str) -> tuple[bool, str]:
193
+ """Basic output safety check."""
194
+ return True, ""
195
+
196
+
197
+ # ─── Router ────────────────────────────────────────────────────────────────────
198
+ class TaskType(str, Enum):
199
+ DIRECT = "direct"
200
+ TOOL_ASSISTED = "tool"
201
+ MULTI_STEP = "multi_step"
202
+ CODE = "code"
203
+ ANALYSIS = "analysis"
204
+
205
+
206
+ class TaskRouter:
207
+ """Classifies incoming requests to determine the execution path."""
208
+
209
+ CODE_SIGNALS = ["write code", "implement", "function", "class", "script",
210
+ "python", "javascript", "debug", "fix this code", "refactor"]
211
+ TOOL_SIGNALS = ["calculate", "compute", "search", "look up", "find", "read file",
212
+ "write file", "execute", "run"]
213
+ MULTI_STEP_SIGNALS = ["step by step", "plan", "analyze and", "research",
214
+ "compare", "build", "create a complete", "design"]
215
+ ANALYSIS_SIGNALS = ["analyze", "evaluate", "assess", "review", "audit",
216
+ "summarize this data", "what are the trends"]
217
+
218
+ @classmethod
219
+ def classify(cls, message: str) -> TaskType:
220
+ msg_lower = message.lower()
221
+ scores = {
222
+ TaskType.CODE: sum(1 for s in cls.CODE_SIGNALS if s in msg_lower),
223
+ TaskType.TOOL_ASSISTED: sum(1 for s in cls.TOOL_SIGNALS if s in msg_lower),
224
+ TaskType.MULTI_STEP: sum(1 for s in cls.MULTI_STEP_SIGNALS if s in msg_lower),
225
+ TaskType.ANALYSIS: sum(1 for s in cls.ANALYSIS_SIGNALS if s in msg_lower),
226
+ }
227
+ best = max(scores, key=scores.get)
228
+ if scores[best] > 0:
229
+ return best
230
+ return TaskType.DIRECT
231
+
232
+
233
+ # ─── Planner ───────────────────────────────────────────────────────────────────
234
+ class Planner:
235
+ """Decomposes complex tasks into executable steps using the LLM."""
236
+
237
+ PLANNING_PROMPT = """You are a task planner. Break down the user's request into 2-6 concrete, actionable steps.
238
+ Return ONLY a JSON array of step descriptions. Example:
239
+ ["Research the topic", "Outline the structure", "Write the first draft", "Review and refine"]
240
+
241
+ User request: {request}
242
+
243
+ Steps (JSON array only):"""
244
+
245
+ @staticmethod
246
+ def create_plan_from_llm(client: MultiModelClient, model_name: str, request: str) -> list[Step]:
247
+ """Use LLM to generate a plan."""
248
+ try:
249
+ resp = client.complete(
250
+ model_name=model_name,
251
+ messages=[{"role": "user", "content": Planner.PLANNING_PROMPT.format(request=request)}],
252
+ temperature=0.3,
253
+ max_tokens=500,
254
+ )
255
+ if resp.error:
256
+ return [Step(id=1, description=request)]
257
+ text = resp.content.strip()
258
+ if "```" in text:
259
+ text = text.split("```")[1]
260
+ if text.startswith("json"):
261
+ text = text[4:]
262
+ text = text.strip()
263
+ steps_list = json.loads(text)
264
+ return [Step(id=i + 1, description=s) for i, s in enumerate(steps_list)]
265
+ except Exception:
266
+ return [Step(id=1, description=request)]
267
+
268
+ @staticmethod
269
+ def create_simple_plan(request: str) -> list[Step]:
270
+ return [Step(id=1, description=request)]
271
+
272
+
273
+ # ─── Agent Core ────────────────────────────────────────────────────────────────
274
+ class MulteclawAgent:
275
+ """
276
+ The main agent orchestrator. Runs the reasoning loop:
277
+ Router → Planner → Executor → Verifier → Reporter
278
+ with repair on failure.
279
+ """
280
+
281
+ def __init__(self):
282
+ self.client = MultiModelClient()
283
+ self.memory = AgentMemory()
284
+ self.tools = ToolExecutor()
285
+ self.config = AgentConfig()
286
+ self.logs: list[str] = []
287
+
288
+ def _log(self, msg: str):
289
+ entry = f"[{datetime.now().strftime('%H:%M:%S')}] {msg}"
290
+ self.logs.append(entry)
291
+
292
+ def get_logs(self, n: int = 50) -> str:
293
+ return "\n".join(self.logs[-n:])
294
+
295
+ # ─── Main Entry: Streaming Chat ────────────────────────────────────────
296
+ def chat_stream(
297
+ self,
298
+ message: str,
299
+ history: list[dict],
300
+ model_name: str,
301
+ system_prompt: str = "",
302
+ temperature: float = 0.7,
303
+ max_tokens: int = 4096,
304
+ enable_tools: bool = True,
305
+ enable_planning: bool = True,
306
+ ) -> Generator[str, None, None]:
307
+ """
308
+ Main chat entry point. Yields partial response text for streaming.
309
+ Handles the full agent loop: safety → routing → planning → execution → verification.
310
+ """
311
+ # 1. Safety check
312
+ is_safe, reason = SafetyLayer.check_input(message)
313
+ if not is_safe:
314
+ yield f"⚠️ {reason}"
315
+ return
316
+
317
+ # 2. Update memory
318
+ self.memory.add_message("user", message)
319
+ self._log(f"User: {message[:80]}...")
320
+
321
+ # 3. Route the task
322
+ task_type = TaskRouter.classify(message)
323
+ self._log(f"Router → {task_type.value}")
324
+
325
+ # 4. Build context
326
+ all_messages = self._build_context(history, message)
327
+
328
+ # 5. Execute based on task type
329
+ if task_type == TaskType.MULTI_STEP and enable_planning:
330
+ yield from self._execute_multi_step(model_name, all_messages, system_prompt, temperature, max_tokens, message)
331
+ elif task_type == TaskType.TOOL_ASSISTED and enable_tools:
332
+ yield from self._execute_with_tools(model_name, all_messages, system_prompt, temperature, max_tokens)
333
+ else:
334
+ yield from self._execute_direct(model_name, all_messages, system_prompt, temperature, max_tokens)
335
+
336
+ # ─── Direct Execution ──────────────────────────────────────────────────
337
+ def _execute_direct(self, model_name, messages, system_prompt, temperature, max_tokens) -> Generator[str, None, None]:
338
+ """Simple streaming completion."""
339
+ self._log("Executing: direct stream")
340
+ full_response = ""
341
+ for chunk in self.client.stream(model_name, messages, system_prompt, temperature, max_tokens):
342
+ full_response += chunk
343
+ yield chunk
344
+ self.memory.add_message("assistant", full_response)
345
+ self._log(f"Response: {len(full_response)} chars")
346
+
347
+ # ─── Tool-Assisted Execution ───────────────────────────────────────────
348
+ def _execute_with_tools(self, model_name, messages, system_prompt, temperature, max_tokens) -> Generator[str, None, None]:
349
+ """
350
+ Agentic tool loop:
351
+ 1. Ask LLM what tool to call (if any)
352
+ 2. Execute the tool
353
+ 3. Feed result back to LLM
354
+ 4. Repeat until LLM gives a final answer
355
+ """
356
+ self._log("Executing: tool-assisted loop")
357
+
358
+ tool_prompt = system_prompt + "\n\nYou have access to these tools:\n"
359
+ for name, tool in BUILT_IN_TOOLS.items():
360
+ tool_prompt += f"- **{name}**: {tool['description']}\n"
361
+ tool_prompt += (
362
+ "\nTo use a tool, respond with a JSON block:\n"
363
+ '```json\n{"tool": "tool_name", "arguments": {"arg1": "value1"}}\n```\n'
364
+ "After receiving the result, provide your final answer.\n"
365
+ "If no tool is needed, just answer directly."
366
+ )
367
+
368
+ current_messages = list(messages)
369
+ attempts = 0
370
+
371
+ while attempts < self.config.max_tool_retries:
372
+ attempts += 1
373
+ self._log(f"Tool loop iteration {attempts}")
374
+
375
+ resp = self.client.complete(model_name, current_messages, tool_prompt, temperature, max_tokens)
376
+
377
+ if resp.error:
378
+ yield f"❌ Error: {resp.error}"
379
+ return
380
+
381
+ content = resp.content
382
+ tool_call = self._extract_tool_call(content)
383
+
384
+ if tool_call:
385
+ tool_name = tool_call["tool"]
386
+ tool_args = tool_call["arguments"]
387
+ self._log(f"Tool call: {tool_name}({json.dumps(tool_args)[:60]})")
388
+
389
+ yield f"🔧 Using **{tool_name}**"
390
+ if tool_name == "calculator":
391
+ yield f" → `{tool_args.get('expression', '')}`\n"
392
+ elif tool_name == "code_executor":
393
+ yield f"\n```python\n{tool_args.get('code', '')[:200]}\n```\n"
394
+ else:
395
+ yield f" → `{json.dumps(tool_args)[:100]}`\n"
396
+
397
+ result = self.tools.execute(tool_name, tool_args)
398
+ self.memory.add_tool_result(tool_name, tool_args, result.get("result", ""), result.get("success", False))
399
+
400
+ if result["success"]:
401
+ yield f"✅ Result: `{result['result'][:200]}`\n\n"
402
+ self._log(f"Tool success: {result['result'][:60]}")
403
+ else:
404
+ yield f"⚠️ Error: {result.get('error', 'Unknown error')}\n\n"
405
+ self._log(f"Tool error: {result.get('error', '')[:60]}")
406
+
407
+ current_messages.append({"role": "assistant", "content": content})
408
+ current_messages.append({"role": "user", "content": f"Tool result for {tool_name}: {result['result'][:2000]}"})
409
+ else:
410
+ yield content
411
+ self.memory.add_message("assistant", content)
412
+ return
413
+
414
+ yield "\n\n⚠️ Reached maximum tool iterations. Here's what I have so far."
415
+
416
+ # ─── Multi-Step Execution ──────────────────────────────────────────────
417
+ def _execute_multi_step(self, model_name, messages, system_prompt, temperature, max_tokens, original_request) -> Generator[str, None, None]:
418
+ """
419
+ Planning loop:
420
+ 1. Generate a plan
421
+ 2. Execute each step
422
+ 3. Combine results
423
+ """
424
+ self._log("Executing: multi-step planning")
425
+
426
+ yield "📋 **Planning...**\n\n"
427
+ plan = Planner.create_plan_from_llm(self.client, model_name, original_request)
428
+ self.memory.task_plans.append(plan)
429
+
430
+ for step in plan:
431
+ yield f" {step.id}. {step.description}\n"
432
+ yield "\n---\n\n"
433
+
434
+ step_results = []
435
+ for step in plan:
436
+ step.status = TaskStatus.RUNNING
437
+ yield f"**Step {step.id}: {step.description}**\n"
438
+ self._log(f"Step {step.id}: {step.description}")
439
+
440
+ step_context = f"""You are executing step {step.id} of a plan.
441
+ Original request: {original_request}
442
+ Current step: {step.description}
443
+ Previous step results: {json.dumps(step_results[-3:]) if step_results else 'None yet'}
444
+
445
+ Execute this step thoroughly. Be specific and detailed."""
446
+
447
+ step_messages = messages + [{"role": "user", "content": step_context}]
448
+
449
+ step_response = ""
450
+ for chunk in self.client.stream(model_name, step_messages, system_prompt, temperature, max_tokens):
451
+ step_response += chunk
452
+ yield chunk
453
+
454
+ step.status = TaskStatus.COMPLETED
455
+ step.result = step_response
456
+ step_results.append({"step": step.id, "description": step.description, "result": step_response[:500]})
457
+ yield "\n\n"
458
+
459
+ self.memory.add_message("assistant", f"[Multi-step task completed with {len(plan)} steps]")
460
+ self._log(f"Plan completed: {len(plan)} steps")
461
+
462
+ # ─── Helpers ────────────────���──────────────────────────────────────────
463
+ def _build_context(self, history: list[dict], current_message: str) -> list[dict]:
464
+ """Build the message context from history + current message."""
465
+ messages = []
466
+ for msg in history:
467
+ messages.append({"role": msg["role"], "content": msg["content"]})
468
+ messages.append({"role": "user", "content": current_message})
469
+ return messages
470
+
471
+ @staticmethod
472
+ def _extract_tool_call(text: str) -> Optional[dict]:
473
+ """Extract a JSON tool call from LLM response text."""
474
+ if "```json" in text:
475
+ try:
476
+ json_str = text.split("```json")[1].split("```")[0].strip()
477
+ obj = json.loads(json_str)
478
+ if "tool" in obj and "arguments" in obj:
479
+ return obj
480
+ except (IndexError, json.JSONDecodeError):
481
+ pass
482
+
483
+ if '"tool"' in text and '"arguments"' in text:
484
+ start = text.find("{")
485
+ if start >= 0:
486
+ depth = 0
487
+ for i in range(start, len(text)):
488
+ if text[i] == "{":
489
+ depth += 1
490
+ elif text[i] == "}":
491
+ depth -= 1
492
+ if depth == 0:
493
+ try:
494
+ obj = json.loads(text[start:i + 1])
495
+ if "tool" in obj and "arguments" in obj:
496
+ return obj
497
+ except json.JSONDecodeError:
498
+ pass
499
+ break
500
+ return None
501
+
502
+ def clear_memory(self):
503
+ self.memory.clear()
504
+ self.logs.clear()
505
+ self._log("Memory and logs cleared")