Spaces:

Chris4K
/

agent-pulse

Sleeping

App Files Files Community

Chris4K commited on Mar 14

Commit

0315f82

verified ·

1 Parent(s): d40e5e4

Update main.py

Browse files

Files changed (1) hide show

main.py +700 -245

main.py CHANGED Viewed

@@ -56,10 +56,19 @@ NEXUS_MODEL  = os.environ.get("NEXUS_MODEL", "nexus-auto")
 REACT_MAX    = int(os.environ.get("REACT_MAX_STEPS", "6"))
 # ── FORGE new infrastructure ────────────────────────────────────────
-PROMPTS_URL = os.environ.get("PROMPTS_URL", "https://chris4k-agent-prompts.hf.space")
-TRACE_URL   = os.environ.get("TRACE_URL",   "https://chris4k-agent-trace.hf.space")
-LEARN_URL   = os.environ.get("LEARN_URL",   "https://chris4k-agent-learn.hf.space")
-LOOP_URL    = os.environ.get("LOOP_URL",    "https://chris4k-agent-loop.hf.space")
 # ── Persona cache (fetched from agent-prompts, refreshed every 5min) ─
 _persona_cache: dict = {}     # agent_name → {system_prompt, max_steps, ...}
@@ -321,285 +330,731 @@ async def space_post(space: str, path: str, data: dict) -> Optional[Any]:
         log.warning(f"space_post {space}{path}: {e}")
         return None
-# ── ReAct tools ────────────────────────────────────────────────────
-TOOL_SPECS = [
-    {"name":"relay_send",     "desc":"Send message to an agent or broadcast. Args: to, subject, body, priority(low/normal/high/urgent), channel(internal/telegram/browser)"},
-    {"name":"relay_inbox",    "desc":"Read unread messages for an agent. Args: agent"},
-    {"name":"memory_search",  "desc":"Search agent memory. Args: query, tier(all/episodic/semantic/procedural/working)"},
-    {"name":"memory_store",   "desc":"Store a memory. Args: content, tier, tags(list), importance(0-10)"},
-    {"name":"kanban_list",    "desc":"List tasks. Args: status(todo/doing/done/blocked/failed), agent(optional)"},
-    {"name":"kanban_move",    "desc":"Move a task to new status. Args: id, status, slot_id(optional), llm_tokens(optional), react_steps(optional)"},
-    {"name":"kanban_create",  "desc":"Create a task. Args: title, body, priority(low/medium/high/critical), agent, est_minutes(optional), deps(list of task ids, optional)"},
-    {"name":"vault_exec",     "desc":"Execute code. Args: runtime(bash/python3/node/npm/pip/git), code, cwd(optional)"},
-    {"name":"vault_read",     "desc":"Read a file. Args: path"},
-    {"name":"vault_write",    "desc":"Write a file. Args: path, content"},
-    {"name":"forge_search",   "desc":"Search for skills/tools in FORGE. Args: query"},
-    {"name":"slot_reserve",   "desc":"Reserve the 35B GPU slot before a long task. Args: task_id, est_minutes(1-60), priority(1=critical). Returns slot_id or queue info. Use before any ki_fusion task >2 min."},
-    {"name":"slot_release",   "desc":"Release the GPU slot when done. Args: slot_id. Always call this after finishing to unblock other agents."},
-    {"name":"slot_status",    "desc":"Check who holds the GPU slot and queue. No args. Check before reserving to know wait time."},
-    {"name":"trigger_agent",  "desc":"Wake another agent immediately (e.g. after delegation). Args: agent(name), content(task description). Use after delegate to ensure immediate pickup."},
-    {"name":"self_reflect",   "desc":"Trigger your own self-reflection: reads your traces, proposes persona improvements. No args needed. Use weekly or after repeated failures."},
-    {"name":"delegate",       "desc":"Delegate a task to another agent. Args: to_agent, task, priority. Always follow with trigger_agent to wake them immediately."},
-    {"name":"finish",         "desc":"Complete the ReAct loop with a result. Args: result"},
-]
-TOOL_NAMES = {t["name"] for t in TOOL_SPECS}
-async def exec_tool(agent_name: str, tool: str, args: dict) -> str:
-    """Execute a ReAct tool and return observation string."""
     try:
-        if tool == "relay_send":
-            r = await space_post("relay", "/api/messages", {
-                "from": agent_name, "to": args.get("to","broadcast"),
-                "subject": args.get("subject",""), "body": args.get("body",""),
-                "priority": args.get("priority","normal"),
-                "channel": args.get("channel","internal")})
-            return f"Message sent id={r.get('id','')} status={r.get('dispatch_status','?')}" if r else "relay_send failed"
-        if tool == "relay_inbox":
-            r = await space_get("relay", f"/api/inbox/{args.get('agent',agent_name)}", {"unread":"true"})
-            if not r: return "inbox empty"
-            msgs = r[:5] if isinstance(r, list) else []
-            return json.dumps([{"from":m.get("from"),"subject":m.get("subject"),"body":m.get("body","")[:200]} for m in msgs])
-        if tool == "memory_search":
-            r = await space_get("memory", "/api/memories/search",
-                                 {"q": args.get("query",""), "tier": args.get("tier","all"), "limit":8})
-            if not r: return "no results"
-            results = r if isinstance(r, list) else r.get("results",[])
-            return json.dumps([{"content":m.get("content","")[:200],"tier":m.get("tier"),"tags":m.get("tags")} for m in results[:5]])
-        if tool == "memory_store":
-            r = await space_post("memory", "/api/memories", {
-                "content": args.get("content",""), "tier": args.get("tier","episodic"),
-                "tags": args.get("tags",[]), "importance": args.get("importance",6),
-                "agent": agent_name})
-            return f"stored id={r.get('id','?')}" if r else "memory_store failed"
-        if tool == "kanban_list":
-            params = {}
-            if args.get("status"): params["status"] = args["status"]
-            if args.get("agent"): params["agent"] = args["agent"]
-            r = await space_get("kanban", "/api/tasks", params)
-            tasks = r if isinstance(r, list) else []
-            return json.dumps([{"id":t.get("id"),"title":t.get("title"),"status":t.get("status"),"priority":t.get("priority")} for t in tasks[:8]])
-        if tool == "kanban_move":
-            r = await space_post("kanban", "/api/move", {"id":args.get("id"),"status":args.get("status")})
-            return f"moved {args.get('id')} to {args.get('status')}" if r else "kanban_move failed"
-        if tool == "kanban_create":
-            r = await space_post("kanban", "/api/tasks", {
-                "title": args.get("title",""), "body": args.get("body",""),
-                "priority": args.get("priority","medium"), "agent": args.get("agent",agent_name),
-                "type": "ai"})
-            return f"created task id={r.get('id','?')}" if r else "kanban_create failed"
-        if tool == "vault_exec":
-            # Sanitize cwd: only allow actual workspace dirs, never template paths
-            _VALID_CWDS = {"code", "reports", "scratch", "shared", ""}
-            raw_cwd = str(args.get("cwd","scratch")).strip().strip("/")
-            safe_cwd = raw_cwd if raw_cwd in _VALID_CWDS else "scratch"
-            r = await space_post("vault", "/api/exec", {
-                "runtime": args.get("runtime","python3"),
-                "code": args.get("code",""), "cwd": safe_cwd,
-                "timeout": 30})
-            if not r: return "vault_exec failed"
-            return f"exit={r.get('exit_code')} ms={r.get('ms')}\n{r.get('output','')[:500]}"
-        if tool == "vault_read":
-            r = await space_get("vault", "/api/read", {"path": args.get("path","")})
-            return (r.get("content","")[:800] if r else "vault_read failed")
-        if tool == "vault_write":
-            r = await space_post("vault", "/api/write", {
-                "path": args.get("path",""), "content": args.get("content",""),
-                "agent": agent_name})
-            return f"written: {args.get('path')} snap={r.get('snapshot',{}).get('id','?')}" if r else "vault_write failed"
-        if tool == "forge_search":
-            r = await space_get("forge", "/api/v1/skills", {"q": args.get("query",""), "limit":5})
-            items = r if isinstance(r, list) else (r.get("skills",[]) if r else [])
-            return json.dumps([{"name":s.get("name"),"description":s.get("description","")[:100]} for s in items[:5]])
-        if tool == "slot_reserve":
-            r = await space_post("nexus", "/api/slot/reserve", {
-                "agent": agent_name,
-                "task_id": args.get("task_id",""),
-                "est_minutes": args.get("est_minutes", 5),
-                "priority": args.get("priority", 5),
-            })
-            if not r: return "slot_reserve failed"
-            status = r.get("status","unknown")
-            if status == "active":
-                return f"slot ACTIVE slot_id={r['slot_id']} expires in {args.get('est_minutes',5)} min"
-            elif status == "queued":
-                return f"slot QUEUED position={r['queue_position']} eta={r.get('eta_seconds',0)}s current_holder={r.get('current_holder','?')} — wait or use local_cpu"
-            else:
-                return f"slot status={status}: {r}"
-        if tool == "slot_release":
-            r = await space_post("nexus", "/api/slot/release", {"slot_id": args.get("slot_id","")})
-            return f"slot released (held {r.get('held_seconds',0)}s)" if r and r.get("released") else "slot_release failed or slot not found"
-        if tool == "slot_status":
-            r = await space_get("nexus", "/api/slot/status", {})
-            if not r: return "slot_status failed"
-            active = r.get("active")
-            queue  = r.get("queue", [])
-            if active:
-                eta = int(active.get("expires_at",0) - __import__("time").time())
-                result = f"OCCUPIED by {active['agent']} task={active.get('task_id','')} expires_in={eta}s"
             else:
-                result = "FREE"
-            if queue:
-                result += f" | Queue: {[q['agent'] for q in queue]}"
-            return result
-        if tool == "delegate":
-            r = await space_post("relay", "/api/messages", {
-                "from": agent_name, "to": args.get("to_agent","broadcast"),
-                "subject": f"[DELEGATION] {args.get('task','')[:60]}",
-                "body": args.get("task",""), "priority": args.get("priority","normal"),
-                "channel": "internal", "tags": ["delegation","task"]})
-            return f"delegated to {args.get('to_agent')} via relay" if r else "delegation failed"
-        if tool == "trigger_agent":
-            target = args.get("agent","")
-            content = args.get("content", f"Task delegated by {agent_name}")
-            try:
-                r = await space_post("pulse", f"/api/trigger/{target}",
-                                      {"from": agent_name, "content": content})
-                return f"triggered {target}: {r}" if r else f"trigger sent to {target}"
-            except Exception as e:
-                return f"trigger_agent failed: {e} — agent may still pick up via heartbeat"
-        if tool == "self_reflect":
             try:
-                r = await space_post("pulse", f"/api/reflect/{agent_name}", {})
-                return f"self-reflection started — results stored in procedural memory + relayed to christof"
             except Exception as e:
-                return f"self_reflect failed: {e}"
-        return f"unknown tool: {tool}"
     except Exception as e:
-        return f"tool error: {e}"
-# ── ReAct loop ─────────────────────────────────────────────────────
-SYSTEM_TEMPLATE = """\
-You are {name}. {persona}
-CONNECTED TOOLS (use these to act):
-{tools}
-OUTPUT FORMAT — respond with exactly one JSON object per step, nothing else:
-{{"thought":"<your reasoning>","action":"<tool_name>","args":{{<args as key:value pairs>}}}}
-To complete your task:
-{{"thought":"<summary of what you did>","action":"finish","args":{{"result":"<one sentence result>"}}}}
-CRITICAL RULES:
-1. Your CONTEXT already contains your current inbox messages and open tasks. Do NOT call relay_inbox or kanban_list as your first step - act on what is already in the context.
-2. When you have an OPEN TASK: first call kanban_move(id="<id>", status="doing"), then do the work, then call kanban_move(id="<id>", status="done").
-3. To write a file to vault: call vault_write(path="code/filename.py", content="...full file content...").
-4. vault_exec cwd MUST be one of exactly: code, reports, scratch, shared. Never invent a path like path/to/vault.
-5. Never call the same tool twice in a row with the same args. If it failed, try a different approach or finish.
-6. Always finish within {max_steps} steps.
-"""
 async def react_loop(agent: dict, trigger_type: str, trigger_content: str) -> dict:
     name      = agent["name"]
-    persona   = agent.get("persona", "A helpful AI agent.")
     cost_mode = agent.get("cost_mode", "balanced")
     max_steps = agent.get("max_react_steps", REACT_MAX)
-    tool_list = "\n".join(f"  {t['name']}: {t['desc']}" for t in TOOL_SPECS)
-    system_msg = SYSTEM_TEMPLATE.format(
-        name=name, persona=persona, tools=tool_list, max_steps=max_steps)
-    user_msg = (f"TRIGGER: {trigger_type}\n"
-                f"CONTEXT: {trigger_content}\n"
-                f"Current time: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')}\n"
-                "Execute your task using available tools. Begin.")
-    messages = [{"role":"system","content":system_msg},
-                {"role":"user","content":user_msg}]
-    trace = {"agent":name,"trigger":trigger_type,"started":int(time.time()),
-              "steps":[],"result":"","ok":True}
-    push_live({"type":"react_start","agent":name,"trigger":trigger_type})
-    for step_n in range(max_steps):
-        # Call LLM with automatic fallback chain
-        try:
-            raw = await call_llm(messages, system_msg, max_tokens=900)
-            raw = raw.strip()
-        except Exception as e:
-            trace["result"] = f"all LLM providers failed: {e}"
-            trace["ok"] = False
-            push_live({"type":"error","agent":name,"message":f"LLM error: {str(e)[:80]}"})
-            break
-        # Parse JSON
-        try:
-            # Strip markdown fences
-            clean = re.sub(r"```(?:json)?|```","", raw).strip()
-            # Take first JSON object
-            m = re.search(r'\{.*\}', clean, re.DOTALL)
-            step_json = json.loads(m.group()) if m else {}
-        except Exception:
-            step_json = {"thought": raw, "action": "finish", "args": {"result": raw}}
-        thought = step_json.get("thought","") or ""
-        action  = step_json.get("action","finish") or "finish"
-        args    = step_json.get("args",{}) or {}  # guard against JSON null → None
-        step_record = {"n":step_n+1,"thought":thought,"action":action,"args":args,"observation":""}
-        push_live({"type":"react_step","agent":name,"step":step_n+1,
-                   "thought":thought[:120],"action":action})
-        if action == "finish":
-            trace["result"] = args.get("result","done")
-            step_record["observation"] = "[FINISHED]"
-            trace["steps"].append(step_record)
-            break
-        if action not in TOOL_NAMES:
-            observation = f"unknown tool: {action}. Available: {sorted(TOOL_NAMES)}"
-        else:
-            observation = await exec_tool(name, action, args)
-        step_record["observation"] = str(observation)[:400]
-        trace["steps"].append(step_record)
-        push_live({"type":"react_obs","agent":name,"step":step_n+1,
-                   "observation":step_record["observation"][:100]})
-        messages.append({"role":"assistant","content":raw})
-        messages.append({"role":"user","content":f"Observation: {observation}"})
-        # Store trace step in memory
-        asyncio.create_task(space_post("memory", "/api/memories", {
-            "content": f"[{name}] Step {step_n+1}: {action}({json.dumps(args)[:100]}) → {observation[:150]}",
-            "tier": "episodic", "tags": [name,"react","trace"], "importance": 3, "agent": name}))
-    else:
-        trace["result"] = f"max steps ({max_steps}) reached"
-    trace["finished"] = int(time.time())
-    trace["ms"] = (trace["finished"] - trace["started"]) * 1000
-    # Save full trace
-    tid = uuid.uuid4().hex[:8]
-    (BASE / "traces" / f"{tid}.json").write_text(json.dumps(trace, indent=2))
-    push_live({"type":"react_done","agent":name,"result":trace["result"][:120],
-               "ok":trace["ok"],"ms":trace["ms"],"steps":len(trace["steps"])})
     return trace
 # ── Heartbeat engine ───────────────────────────────────────────────
 scheduler = AsyncIOScheduler(timezone="UTC")
 async def agent_tick(agent_name: str, trigger_type: str = "heartbeat", content: str = ""):
     agents = load_json(AGENTS_FILE, [])

 REACT_MAX    = int(os.environ.get("REACT_MAX_STEPS", "6"))
 # ── FORGE new infrastructure ────────────────────────────────────────
+PROMPTS_URL    = os.environ.get("PROMPTS_URL",    "https://chris4k-agent-prompts.hf.space")
+TRACE_URL      = os.environ.get("TRACE_URL",      "https://chris4k-agent-trace.hf.space")
+LEARN_URL      = os.environ.get("LEARN_URL",      "https://chris4k-agent-learn.hf.space")
+LOOP_URL       = os.environ.get("LOOP_URL",       "https://chris4k-agent-loop.hf.space")
+HARNESS_URL    = os.environ.get("HARNESS_URL",    "https://chris4k-agent-harness.hf.space")
+APPROVE_URL    = os.environ.get("APPROVE_URL",    "https://chris4k-agent-approve.hf.space")
+COMPLIANCE_URL = os.environ.get("COMPLIANCE_URL", "https://chris4k-agent-compliance.hf.space")
+BRAVE_API_KEY  = os.environ.get("BRAVE_API_KEY",  "")
+# Risky tools that require approval gate
+RISKY_TOOLS = {"vault_exec"}
+RISKY_RUNTIMES = {"bash", "git"}         # within vault_exec these trigger approve
+RISKY_PATTERNS = {"rm ", "rmdir", "git push", "git force", "dd ", "chmod 777"}
 # ── Persona cache (fetched from agent-prompts, refreshed every 5min) ─
 _persona_cache: dict = {}     # agent_name → {system_prompt, max_steps, ...}
         log.warning(f"space_post {space}{path}: {e}")
         return None
+# ── Sprint 5: Middleware helpers ────────────────────────────────────
+async def harness_scan(agent: str, tool: str, content: str) -> tuple[bool, str]:
+    """Scan tool output through agent-harness before LLM sees it.
+    Returns (safe, sanitised_content). On harness unavailable, pass-through."""
+    if not HARNESS_URL:
+        return True, content
+    try:
+        async with httpx.AsyncClient(timeout=4) as c:
+            r = await c.post(f"{HARNESS_URL}/api/scan/output",
+                             json={"agent": agent, "tool": tool, "content": content})
+            if r.status_code == 200:
+                d = r.json()
+                return d.get("safe", True), d.get("sanitised", content)
+    except Exception as e:
+        log.debug(f"[HARNESS] scan failed (pass-through): {e}")
+    return True, content
+async def request_approval(agent: str, tool: str, args: dict, risk: str = "high") -> tuple[bool, str]:
+    """Gate risky tool calls through agent-approve.
+    Returns (approved, reason). Timeout = auto-reject."""
+    if not APPROVE_URL:
+        log.warning("[APPROVE] APPROVE_URL not set — auto-approving (unsafe!)")
+        return True, "approve_url_missing"
+    try:
+        async with httpx.AsyncClient(timeout=6) as c:
+            r = await c.post(f"{APPROVE_URL}/api/approval/request",
+                             json={"agent": agent, "tool": tool, "args": args,
+                                   "risk": risk, "auto_timeout": 120})
+            if r.status_code == 200:
+                d = r.json()
+                approval_id = d.get("id")
+                # Poll for up to 90s (Telegram keyboard gives christof 2 min)
+                for _ in range(18):
+                    await asyncio.sleep(5)
+                    pr = await c.get(f"{APPROVE_URL}/api/approval/{approval_id}")
+                    if pr.status_code == 200:
+                        pd = pr.json()
+                        status = pd.get("status")
+                        if status == "approved":
+                            return True, "human_approved"
+                        if status in ("rejected", "expired"):
+                            return False, status
+                return False, "timeout"
+    except Exception as e:
+        log.warning(f"[APPROVE] gate failed: {e} — blocking tool call")
+        return False, f"approve_error: {e}"
+async def compliance_scan(agent: str, content: str) -> tuple[bool, str, list]:
+    """Scan content for PII before writing to memory.
+    Returns (safe, redacted_content, pii_types_found)."""
+    if not COMPLIANCE_URL:
+        return True, content, []
+    try:
+        async with httpx.AsyncClient(timeout=4) as c:
+            r = await c.post(f"{COMPLIANCE_URL}/api/scan/pii",
+                             json={"text": content, "agent": agent, "redact": True})
+            if r.status_code == 200:
+                d = r.json()
+                return (not d.get("pii_found", False),
+                        d.get("redacted", content),
+                        d.get("types_found", []))
+    except Exception as e:
+        log.debug(f"[COMPLIANCE] scan failed (pass-through): {e}")
+    return True, content, []
+async def web_search_brave(query: str, count: int = 5) -> str:
+    """Brave Search API call. Returns formatted results."""
+    if not BRAVE_API_KEY:
+        return "web_search unavailable: BRAVE_API_KEY not configured"
     try:
+        async with httpx.AsyncClient(timeout=8) as c:
+            r = await c.get("https://api.search.brave.com/res/v1/web/search",
+                            params={"q": query, "count": count, "text_decorations": False},
+                            headers={"Accept": "application/json",
+                                     "Accept-Encoding": "gzip",
+                                     "X-Subscription-Token": BRAVE_API_KEY})
+            r.raise_for_status()
+            data = r.json()
+            results = data.get("web", {}).get("results", [])
+            if not results:
+                return "no results found"
+            lines = []
+            for i, res in enumerate(results[:count], 1):
+                lines.append(f"{i}. {res.get('title','?')} — {res.get('url','')}\n   {res.get('description','')[:200]}")
+            return "\n\n".join(lines)
+    except Exception as e:
+        return f"web_search error: {e}"
+async def fetch_url_content(url: str) -> str:
+    """Fetch a URL and return stripped text (5000 char limit)."""
+    try:
+        async with httpx.AsyncClient(timeout=10, follow_redirects=True) as c:
+            r = await c.get(url, headers={"User-Agent": "FORGE-Agent/1.0"})
+            r.raise_for_status()
+            ct = r.headers.get("content-type", "")
+            if "html" in ct:
+                text = re.sub(r"<[^>]+>", " ", r.text)
+                text = re.sub(r"\s{2,}", " ", text).strip()
             else:
+                text = r.text.strip()
+            return text[:5000] + ("…[truncated]" if len(text) > 5000 else "")
+    except Exception as e:
+        return f"fetch_url error: {e}"
+# ── Sprint 5: Saga Orchestrator ─────────────────────────────────────
+class SagaStep:
+    def __init__(self, name: str, forward, compensate=None):
+        self.name      = name
+        self.forward   = forward   # async callable → result str
+        self.compensate = compensate  # async callable → None (undo)
+class SagaOrchestrator:
+    """Run a sequence of steps with automatic compensation on failure.
+    Usage:
+        saga = SagaOrchestrator(agent_name, saga_id)
+        saga.add_step("reserve_slot",  fwd=lambda: ..., comp=lambda: ...)
+        saga.add_step("vault_write",   fwd=lambda: ..., comp=lambda: ...)
+        result = await saga.run()
+    """
+    def __init__(self, agent: str, saga_id: str = ""):
+        self.agent    = agent
+        self.saga_id  = saga_id or str(uuid.uuid4())[:8]
+        self.steps: list[SagaStep] = []
+        self.completed: list[tuple[str, str]] = []  # (name, result)
+    def add_step(self, name: str, fwd, comp=None):
+        self.steps.append(SagaStep(name, fwd, comp))
+    async def run(self) -> dict:
+        emit_trace(self.agent, "saga_start",
+                   {"saga_id": self.saga_id, "steps": [s.name for s in self.steps]})
+        for step in self.steps:
             try:
+                result = await step.forward()
+                self.completed.append((step.name, str(result)))
+                log.info(f"[SAGA {self.saga_id}] {step.name} OK: {str(result)[:80]}")
             except Exception as e:
+                log.error(f"[SAGA {self.saga_id}] {step.name} FAILED: {e} — compensating")
+                emit_trace(self.agent, "saga_failed",
+                           {"saga_id": self.saga_id, "failed_step": step.name, "error": str(e)},
+                           status="error")
+                # Compensate in reverse order
+                for name, _ in reversed(self.completed):
+                    comp_step = next((s for s in self.steps if s.name == name), None)
+                    if comp_step and comp_step.compensate:
+                        try:
+                            await comp_step.compensate()
+                            log.info(f"[SAGA {self.saga_id}] compensated {name}")
+                        except Exception as ce:
+                            log.warning(f"[SAGA {self.saga_id}] compensate {name} failed: {ce}")
+                # Alert christof
+                try:
+                    async with httpx.AsyncClient(timeout=4) as c:
+                        await c.post(f"{SPACES['relay']}/api/notify", json={
+                            "text": f"&#x26A0;&#xFE0F; SAGA {self.saga_id} failed at step <b>{step.name}</b>\nAgent: {self.agent}\nError: {str(e)[:200]}\nCompensations ran for: {[n for n,_ in self.completed]}",
+                            "parse_mode": "HTML"})
+                except Exception:
+                    pass
+                return {"ok": False, "saga_id": self.saga_id,
+                        "failed_step": step.name, "error": str(e),
+                        "saga_compensated": True}
+        emit_trace(self.agent, "saga_complete",
+                   {"saga_id": self.saga_id, "steps_completed": len(self.completed)})
+        return {"ok": True, "saga_id": self.saga_id,
+                "steps": dict(self.completed)}
+# ── smolagents — Tool definitions ──────────────────────────────────
+# Each tool uses httpx synchronous client (tools run in a thread via asyncio.to_thread).
+# CodeAgent writes Python code to call these tools, enabling loops, conditionals,
+# and natural composition — far more powerful than JSON ReAct.
+try:
+    from smolagents import CodeAgent, Tool, OpenAIServerModel, ToolCallingAgent
+    from smolagents.monitoring import LogLevel
+    SMOLAGENTS_OK = True
+except ImportError:
+    SMOLAGENTS_OK = False
+    log.warning("[SMOLAGENTS] not installed — install 'smolagents[litellm]'")
+def _sync_get(space: str, path: str, params: dict = {}) -> dict | None:
+    url = SPACES.get(space, space) + path
+    try:
+        r = httpx.get(url, params=params, timeout=HTTP_TIMEOUT)
+        r.raise_for_status()
+        return r.json()
+    except Exception as e:
+        log.warning(f"_sync_get {space}{path}: {e}")
+        return None
+def _sync_post(space: str, path: str, data: dict) -> dict | None:
+    url = SPACES.get(space, space) + path
+    try:
+        r = httpx.post(url, json=data, timeout=HTTP_TIMEOUT)
+        r.raise_for_status()
+        return r.json()
+    except Exception as e:
+        log.warning(f"_sync_post {space}{path}: {e}")
+        return None
+def _harness_scan_sync(agent: str, tool: str, content: str) -> str:
+    """Synchronous harness scan — returns sanitised content."""
+    if not HARNESS_URL:
+        return content
+    try:
+        r = httpx.post(f"{HARNESS_URL}/api/scan/output",
+                       json={"agent": agent, "tool": tool, "content": content}, timeout=4)
+        if r.status_code == 200:
+            d = r.json()
+            return d.get("sanitised", content)
+    except Exception:
+        pass
+    return content
+def _approve_sync(agent: str, tool: str, args: dict, risk: str = "high") -> tuple[bool, str]:
+    """Synchronous approval gate. Polls up to 90s."""
+    if not APPROVE_URL:
+        return True, "approve_url_missing"
+    try:
+        r = httpx.post(f"{APPROVE_URL}/api/approval/request",
+                       json={"agent": agent, "tool": tool, "args": args,
+                             "risk": risk, "auto_timeout": 120}, timeout=6)
+        if r.status_code == 200:
+            approval_id = r.json().get("id")
+            for _ in range(18):
+                time.sleep(5)
+                pr = httpx.get(f"{APPROVE_URL}/api/approval/{approval_id}", timeout=4)
+                if pr.status_code == 200:
+                    status = pr.json().get("status")
+                    if status == "approved":  return True, "human_approved"
+                    if status in ("rejected", "expired"): return False, status
+            return False, "timeout"
+    except Exception as e:
+        return False, f"approve_error: {e}"
+def _compliance_scan_sync(agent: str, content: str) -> str:
+    """Compliance PII scan — returns redacted content."""
+    if not COMPLIANCE_URL:
+        return content
+    try:
+        r = httpx.post(f"{COMPLIANCE_URL}/api/scan/pii",
+                       json={"text": content, "agent": agent, "redact": True}, timeout=4)
+        if r.status_code == 200:
+            return r.json().get("redacted", content)
+    except Exception:
+        pass
+    return content
+# ── FORGE Tool classes ──────────────────────────────────────────────
+class RelaySendTool(Tool):
+    name        = "relay_send"
+    description = "Send a message to an agent or broadcast via RELAY. Use for notifications, delegations, status updates."
+    inputs      = {
+        "to":       {"type":"string","description":"Recipient agent name or 'broadcast'"},
+        "subject":  {"type":"string","description":"Message subject (short)"},
+        "body":     {"type":"string","description":"Full message body"},
+        "priority": {"type":"string","description":"low | normal | high | urgent", "nullable":True},
+        "channel":  {"type":"string","description":"internal | telegram | browser", "nullable":True},
+    }
+    output_type = "string"
+    def __init__(self, agent_name): super().__init__(); self._agent = agent_name
+    def forward(self, to, subject, body, priority="normal", channel="internal"):
+        r = _sync_post("relay", "/api/messages", {
+            "from": self._agent, "to": to, "subject": subject, "body": body,
+            "priority": priority or "normal", "channel": channel or "internal"})
+        return f"sent id={r.get('id','?')}" if r else "relay_send failed"
+class MemorySearchTool(Tool):
+    name        = "memory_search"
+    description = "Search agent memory across tiers. Always search before answering questions — you may have relevant memories."
+    inputs      = {
+        "query": {"type":"string","description":"Search query"},
+        "tier":  {"type":"string","description":"all | episodic | semantic | procedural | working", "nullable":True},
+    }
+    output_type = "string"
+    def __init__(self, agent_name): super().__init__(); self._agent = agent_name
+    def forward(self, query, tier="all"):
+        r = _sync_get("memory", "/api/memories/search",
+                      {"q": query, "tier": tier or "all", "limit": 8})
+        if not r: return "no results"
+        results = r if isinstance(r, list) else r.get("results", [])
+        import json as _json
+        return _json.dumps([{"content": m.get("content","")[:200],
+                             "tier": m.get("tier"), "tags": m.get("tags")} for m in results[:5]])
+class MemoryStoreTool(Tool):
+    name        = "memory_store"
+    description = "Store a memory in MEMORY space. Content is PII-scanned before writing."
+    inputs      = {
+        "content":    {"type":"string","description":"Memory content to store"},
+        "tier":       {"type":"string","description":"episodic | semantic | procedural | working"},
+        "tags":       {"type":"array","description":"List of tag strings", "nullable":True},
+        "importance": {"type":"integer","description":"0-10 importance score", "nullable":True},
+    }
+    output_type = "string"
+    def __init__(self, agent_name): super().__init__(); self._agent = agent_name
+    def forward(self, content, tier="episodic", tags=None, importance=6):
+        # Compliance: PII scan before writing
+        safe_content = _compliance_scan_sync(self._agent, content)
+        r = _sync_post("memory", "/api/memories", {
+            "content": safe_content, "tier": tier, "tags": tags or [],
+            "importance": importance or 6, "agent": self._agent})
+        return f"stored id={r.get('id','?')}" if r else "memory_store failed"
+class KanbanListTool(Tool):
+    name        = "kanban_list"
+    description = "List tasks from KANBAN board. Filter by status and/or agent."
+    inputs      = {
+        "status": {"type":"string","description":"todo | doing | done | blocked | failed", "nullable":True},
+        "agent":  {"type":"string","description":"Filter by agent name", "nullable":True},
+    }
+    output_type = "string"
+    def __init__(self, agent_name): super().__init__(); self._agent = agent_name
+    def forward(self, status=None, agent=None):
+        params = {}
+        if status: params["status"] = status
+        if agent:  params["agent"]  = agent
+        import json as _json
+        r = _sync_get("kanban", "/api/tasks", params) or []
+        tasks = r if isinstance(r, list) else []
+        return _json.dumps([{"id":t.get("id"),"title":t.get("title"),
+                             "status":t.get("status"),"priority":t.get("priority")} for t in tasks[:8]])
+class KanbanMoveTool(Tool):
+    name        = "kanban_move"
+    description = "Move a task to a new status on the KANBAN board."
+    inputs      = {
+        "id":          {"type":"string","description":"Task ID"},
+        "status":      {"type":"string","description":"todo | doing | done | blocked | failed"},
+        "slot_id":     {"type":"string","description":"GPU slot ID if applicable", "nullable":True},
+        "react_steps": {"type":"integer","description":"Number of ReAct steps taken", "nullable":True},
+    }
+    output_type = "string"
+    def __init__(self, agent_name): super().__init__(); self._agent = agent_name
+    def forward(self, id, status, slot_id=None, react_steps=None):
+        payload = {"id": id, "status": status}
+        if slot_id:     payload["slot_id"]     = slot_id
+        if react_steps: payload["react_steps"] = react_steps
+        r = _sync_post("kanban", "/api/move", payload)
+        return f"moved {id} &#x2192; {status}" if r else "kanban_move failed"
+class KanbanCreateTool(Tool):
+    name        = "kanban_create"
+    description = "Create a new task on the KANBAN board and assign it to an agent."
+    inputs      = {
+        "title":       {"type":"string","description":"Short task title"},
+        "body":        {"type":"string","description":"Full task description with context"},
+        "priority":    {"type":"string","description":"low | medium | high | critical"},
+        "agent":       {"type":"string","description":"Agent to assign task to"},
+        "est_minutes": {"type":"integer","description":"Estimated completion minutes", "nullable":True},
+    }
+    output_type = "string"
+    def __init__(self, agent_name): super().__init__(); self._agent = agent_name
+    def forward(self, title, body, priority="medium", agent=None, est_minutes=None):
+        payload = {"title": title, "body": body, "priority": priority,
+                   "agent": agent or self._agent, "type": "ai"}
+        if est_minutes: payload["est_minutes"] = est_minutes
+        r = _sync_post("kanban", "/api/tasks", payload)
+        return f"created task id={r.get('id','?')}" if r else "kanban_create failed"
+class VaultExecTool(Tool):
+    name        = "vault_exec"
+    description = (
+        "Execute code in VAULT workspace. Runtimes: python3, bash, node, npm, pip, git. "
+        "IMPORTANT: cwd must be one of: code, reports, scratch, shared. "
+        "Bash and git commands that are destructive require human approval."
+    )
+    inputs      = {
+        "runtime": {"type":"string","description":"python3 | bash | node | npm | pip | git"},
+        "code":    {"type":"string","description":"Code or command to execute"},
+        "cwd":     {"type":"string","description":"Working directory: code | reports | scratch | shared", "nullable":True},
+    }
+    output_type = "string"
+    def __init__(self, agent_name): super().__init__(); self._agent = agent_name
+    def forward(self, runtime, code, cwd="scratch"):
+        _VALID_CWDS = {"code","reports","scratch","shared",""}
+        safe_cwd = (cwd or "scratch").strip("/") if (cwd or "scratch").strip("/") in _VALID_CWDS else "scratch"
+        # Approval gate for risky bash/git
+        if runtime in RISKY_RUNTIMES or any(p in code for p in RISKY_PATTERNS):
+            approved, reason = _approve_sync(self._agent, "vault_exec",
+                                             {"runtime": runtime, "code": code[:200], "cwd": safe_cwd},
+                                             risk="high")
+            if not approved:
+                return f"vault_exec BLOCKED by approval gate: {reason}"
+        r = _sync_post("vault", "/api/exec", {
+            "runtime": runtime, "code": code, "cwd": safe_cwd, "timeout": 30})
+        if not r: return "vault_exec failed"
+        out = _harness_scan_sync(self._agent, "vault_exec",
+                                 f"exit={r.get('exit_code')} ms={r.get('ms')}\n{r.get('output','')[:500]}")
+        return out
+class VaultReadTool(Tool):
+    name        = "vault_read"
+    description = "Read a file from the VAULT workspace."
+    inputs      = {"path": {"type":"string","description":"File path relative to workspace"}}
+    output_type = "string"
+    def __init__(self, agent_name): super().__init__(); self._agent = agent_name
+    def forward(self, path):
+        r = _sync_get("vault", "/api/read", {"path": path})
+        return r.get("content","")[:800] if r else "vault_read failed"
+class VaultWriteTool(Tool):
+    name        = "vault_write"
+    description = "Write a file to the VAULT workspace. Always write complete file content."
+    inputs      = {
+        "path":    {"type":"string","description":"File path, e.g. code/script.py"},
+        "content": {"type":"string","description":"Complete file content"},
+    }
+    output_type = "string"
+    def __init__(self, agent_name): super().__init__(); self._agent = agent_name
+    def forward(self, path, content):
+        r = _sync_post("vault", "/api/write", {"path": path, "content": content, "agent": self._agent})
+        return f"written: {path} snap={r.get('snapshot',{}).get('id','?')}" if r else "vault_write failed"
+class ForgeSearchTool(Tool):
+    name        = "forge_search"
+    description = "Search for skills and tools in the FORGE skill registry."
+    inputs      = {"query": {"type":"string","description":"Search query"}}
+    output_type = "string"
+    def __init__(self, agent_name): super().__init__(); self._agent = agent_name
+    def forward(self, query):
+        import json as _json
+        r = _sync_get("forge", "/api/v1/skills", {"q": query, "limit": 5})
+        items = r if isinstance(r, list) else (r.get("skills",[]) if r else [])
+        return _json.dumps([{"name":s.get("name"),"description":s.get("description","")[:100]} for s in items[:5]])
+class SlotReserveTool(Tool):
+    name        = "slot_reserve"
+    description = "Reserve the RTX 5090 GPU slot before a long task. Returns slot_id or queue position."
+    inputs      = {
+        "task_id":     {"type":"string","description":"Task identifier"},
+        "est_minutes": {"type":"integer","description":"Estimated minutes needed (1-60)"},
+        "priority":    {"type":"integer","description":"Priority 1=critical 5=normal 10=low", "nullable":True},
+    }
+    output_type = "string"
+    def __init__(self, agent_name): super().__init__(); self._agent = agent_name
+    def forward(self, task_id, est_minutes=5, priority=5):
+        r = _sync_post("nexus", "/api/slot/reserve", {
+            "agent": self._agent, "task_id": task_id,
+            "est_minutes": est_minutes, "priority": priority or 5})
+        if not r: return "slot_reserve failed"
+        status = r.get("status","unknown")
+        if status == "active":
+            return f"slot ACTIVE slot_id={r['slot_id']} expires_in={est_minutes}min"
+        elif status == "queued":
+            return f"slot QUEUED position={r.get('queue_position')} eta={r.get('eta_seconds',0)}s holder={r.get('current_holder','?')} — wait or use local_cpu"
+        return f"slot status={status}"
+class SlotReleaseTool(Tool):
+    name        = "slot_release"
+    description = "Release the GPU slot when done. Always call after finishing to unblock other agents."
+    inputs      = {"slot_id": {"type":"string","description":"Slot ID from slot_reserve"}}
+    output_type = "string"
+    def __init__(self, agent_name): super().__init__(); self._agent = agent_name
+    def forward(self, slot_id):
+        r = _sync_post("nexus", "/api/slot/release", {"slot_id": slot_id})
+        return f"slot released (held {r.get('held_seconds',0)}s)" if r and r.get("released") else "slot_release failed"
+class SlotStatusTool(Tool):
+    name        = "slot_status"
+    description = "Check who holds the GPU slot and current queue. Use before slot_reserve."
+    inputs      = {}
+    output_type = "string"
+    def __init__(self, agent_name): super().__init__(); self._agent = agent_name
+    def forward(self):
+        r = _sync_get("nexus", "/api/slot/status", {})
+        if not r: return "slot_status failed"
+        active = r.get("active")
+        queue  = r.get("queue", [])
+        result = f"OCCUPIED by {active['agent']} expires_in={int(active.get('expires_at',0)-time.time())}s" if active else "FREE"
+        if queue: result += f" | Queue: {[q['agent'] for q in queue]}"
+        return result
+class TriggerAgentTool(Tool):
+    name        = "trigger_agent"
+    description = "Wake another agent immediately with a task. Always call after delegate to ensure pickup."
+    inputs      = {
+        "agent":   {"type":"string","description":"Agent name to wake"},
+        "content": {"type":"string","description":"Task content or context for the agent"},
+    }
+    output_type = "string"
+    def __init__(self, agent_name): super().__init__(); self._agent = agent_name
+    def forward(self, agent, content=""):
+        r = _sync_post("pulse", f"/api/trigger/{agent}",
+                       {"from": self._agent, "content": content or f"Task delegated by {self._agent}"})
+        return f"triggered {agent}" if r else f"trigger queued for {agent} (heartbeat pickup)"
+class WebSearchTool(Tool):
+    name        = "web_search"
+    description = "Search the web via Brave Search API. Returns titles, URLs and snippets."
+    inputs      = {
+        "query": {"type":"string","description":"Search query"},
+        "count": {"type":"integer","description":"Number of results 1-10, default 5", "nullable":True},
+    }
+    output_type = "string"
+    def __init__(self, agent_name): super().__init__(); self._agent = agent_name
+    def forward(self, query, count=5):
+        if not BRAVE_API_KEY:
+            return "web_search unavailable: BRAVE_API_KEY not set"
+        try:
+            r = httpx.get("https://api.search.brave.com/res/v1/web/search",
+                          params={"q": query, "count": min(count or 5, 10), "text_decorations": False},
+                          headers={"Accept": "application/json",
+                                   "X-Subscription-Token": BRAVE_API_KEY}, timeout=8)
+            r.raise_for_status()
+            results = r.json().get("web", {}).get("results", [])
+            if not results: return "no results"
+            lines = [f"{i}. {res.get('title','?')} — {res.get('url','')}\n   {res.get('description','')[:200]}"
+                     for i, res in enumerate(results[:count or 5], 1)]
+            return "\n\n".join(lines)
+        except Exception as e:
+            return f"web_search error: {e}"
+class FetchUrlTool(Tool):
+    name        = "fetch_url"
+    description = "Fetch a URL and return stripped text (5000 char limit). Use after web_search."
+    inputs      = {"url": {"type":"string","description":"Full URL to fetch"}}
+    output_type = "string"
+    def __init__(self, agent_name): super().__init__(); self._agent = agent_name
+    def forward(self, url):
+        try:
+            r = httpx.get(url, headers={"User-Agent": "FORGE-Agent/1.0"},
+                          timeout=10, follow_redirects=True)
+            r.raise_for_status()
+            ct = r.headers.get("content-type", "")
+            text = re.sub(r"<[^>]+>", " ", r.text) if "html" in ct else r.text
+            text = re.sub(r"\s{2,}", " ", text).strip()
+            return text[:5000] + ("&#x2026;[truncated]" if len(text) > 5000 else "")
+        except Exception as e:
+            return f"fetch_url error: {e}"
+# ── FORGE OpenAI-compatible model (NEXUS backend) ───────────────────
+def build_forge_model(cost_mode: str = "balanced") -> object | None:
+    """Build smolagents model pointing at NEXUS (OpenAI-compatible)."""
+    if not SMOLAGENTS_OK:
+        return None
+    nexus_url  = SPACES.get("nexus", "")
+    model_name = {
+        "cheap":    "nexus-fast",
+        "balanced": "nexus-auto",
+        "best":     "nexus-best",
+    }.get(cost_mode, "nexus-auto")
+    try:
+        from smolagents import OpenAIServerModel
+        return OpenAIServerModel(
+            model_id   = model_name,
+            api_base   = nexus_url + "/v1",
+            api_key    = os.environ.get("NEXUS_API_KEY", "forge-internal"),
+        )
     except Exception as e:
+        log.warning(f"[SMOLAGENTS] OpenAIServerModel failed: {e}")
+        return None
+def build_agent_tools(agent_name: str) -> list:
+    """Instantiate all FORGE tools for the given agent."""
+    return [
+        RelaySendTool(agent_name),
+        MemorySearchTool(agent_name),
+        MemoryStoreTool(agent_name),
+        KanbanListTool(agent_name),
+        KanbanMoveTool(agent_name),
+        KanbanCreateTool(agent_name),
+        VaultExecTool(agent_name),
+        VaultReadTool(agent_name),
+        VaultWriteTool(agent_name),
+        ForgeSearchTool(agent_name),
+        SlotReserveTool(agent_name),
+        SlotReleaseTool(agent_name),
+        SlotStatusTool(agent_name),
+        TriggerAgentTool(agent_name),
+        WebSearchTool(agent_name),
+        FetchUrlTool(agent_name),
+    ]
+# ── Step callback — trace + harness ────────────────────────────────
+def make_step_callback(agent_name: str, trace: dict):
+    """Returns a step callback that emits trace events and scans tool outputs."""
+    from smolagents.memory import ActionStep
+    def _callback(step_log, agent=None):
+        if not isinstance(step_log, ActionStep):
+            return
+        # Harness: scan tool output before LLM re-ingests
+        obs = getattr(step_log, "observations", None) or ""
+        if obs and HARNESS_URL:
+            tool_name = ""
+            if step_log.tool_calls:
+                tool_name = step_log.tool_calls[0].name if hasattr(step_log.tool_calls[0], "name") else ""
+            sanitised = _harness_scan_sync(agent_name, tool_name, str(obs))
+            if sanitised != str(obs):
+                step_log.observations = sanitised
+        # Trace
+        step_info = {
+            "step":    getattr(step_log, "step_number", len(trace["steps"])),
+            "thought": str(getattr(step_log, "model_output_message", ""))[:200],
+            "tool":    step_log.tool_calls[0].name if getattr(step_log, "tool_calls", None) else "",
+            "obs":     str(getattr(step_log, "observations", ""))[:200],
+            "error":   str(step_log.error) if getattr(step_log, "error", None) else "",
+        }
+        trace["steps"].append(step_info)
+        push_live({"type": "step", "agent": agent_name, **step_info})
+        emit_trace(agent_name, "react_step", step_info,
+                   status="error" if step_info["error"] else "ok")
+    return _callback
+# ── smolagents CodeAgent react_loop ────────────────────────────────
 async def react_loop(agent: dict, trigger_type: str, trigger_content: str) -> dict:
+    """
+    Run a smolagents CodeAgent for this agent tick.
+    The agent writes Python code to call FORGE tools — loops, conditionals,
+    multi-step composition all work naturally.
+    Falls back to ToolCallingAgent if CodeAgent unavailable.
+    """
     name      = agent["name"]
     cost_mode = agent.get("cost_mode", "balanced")
     max_steps = agent.get("max_react_steps", REACT_MAX)
+    trace = {"agent": name, "trigger": trigger_type,
+             "started": int(time.time()), "steps": [], "result": "", "ok": True}
+    if not SMOLAGENTS_OK:
+        trace["result"] = "smolagents not installed"
+        trace["ok"]     = False
+        return trace
+    # Fetch persona from agent-prompts (cached)
+    persona_data  = get_agent_persona(name)
+    system_prompt = persona_data.get("system_prompt", agent.get("persona", "You are a helpful AI agent."))
+    max_steps     = persona_data.get("max_steps", max_steps)
+    # Load soul.md and user.md for context injection
+    soul_ctx = ""
+    try:
+        sv = _sync_get("vault", "/api/read", {"path": "soul.md"})
+        if sv: soul_ctx = sv.get("content", "")[:500]
+    except Exception: pass
+    user_ctx = ""
+    try:
+        uv = _sync_get("vault", "/api/read", {"path": "user.md"})
+        if uv: user_ctx = uv.get("content", "")[:300]
+    except Exception: pass
+    # Auto-load skills from FORGE at ReAct start
+    skills_ctx = ""
+    try:
+        skills = _sync_get("forge", "/api/v1/skills", {"agent": name, "limit": 5})
+        if skills:
+            items = skills if isinstance(skills, list) else skills.get("skills", [])
+            skills_ctx = "AVAILABLE SKILLS:\n" + "\n".join(
+                f"  - {s.get('name')}: {s.get('description','')[:80]}" for s in items[:5])
+    except Exception: pass
+    full_system = "\n\n".join(filter(None, [system_prompt, soul_ctx, skills_ctx]))
+    task = (
+        f"TRIGGER: {trigger_type}\n"
+        f"CONTEXT: {trigger_content}\n"
+        + (f"OPERATOR: {user_ctx}\n" if user_ctx else "")
+        + f"UTC: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M')}\n"
+        "Execute your assigned task using the available tools."
+    )
+    push_live({"type": "react_start", "agent": name, "trigger": trigger_type})
+    def _run_agent():
+        """Sync function — runs in thread pool via asyncio.to_thread."""
+        model = build_forge_model(cost_mode)
+        if model is None:
+            return {"ok": False, "result": "NEXUS model unavailable", "steps": []}
+        tools = build_agent_tools(name)
+        try:
+            from smolagents import ToolCallingAgent
+            agent_obj = ToolCallingAgent(
+                tools      = tools,
+                model      = model,
+                max_steps  = max_steps,
+                instructions = full_system,
+                verbosity_level = LogLevel.WARNING,
+                step_callbacks  = [make_step_callback(name, trace)],
+                name        = name,
+            )
+        except Exception as e:
+            log.error(f"[SMOLAGENTS] agent init failed: {e}")
+            return {"ok": False, "result": str(e), "steps": []}
+        try:
+            result = agent_obj.run(task, reset=True)
+            return {"ok": True, "result": str(result)[:500], "steps": trace["steps"]}
+        except Exception as e:
+            log.error(f"[SMOLAGENTS] agent.run failed: {e}")
+            return {"ok": False, "result": str(e), "steps": trace["steps"]}
+    try:
+        outcome = await asyncio.to_thread(_run_agent)
+    except Exception as e:
+        outcome = {"ok": False, "result": str(e), "steps": []}
+    trace["ok"]     = outcome["ok"]
+    trace["result"] = outcome["result"]
+    trace["ms"]     = int((time.time() - trace["started"]) * 1000)
+    # Emit final trace to TRACE + LEARN
+    emit_trace(name, "react_complete",
+               {"result": trace["result"], "steps": len(trace["steps"]),
+                "trigger": trigger_type, "ms": trace["ms"]},
+               status="ok" if trace["ok"] else "error")
+    push_live({"type": "react_done", "agent": name,
+               "ok": trace["ok"], "ms": trace["ms"], "steps": len(trace["steps"])})
     return trace
 # ── Heartbeat engine ───────────────────────────────────────────────
 scheduler = AsyncIOScheduler(timezone="UTC")
+# ── Heartbeat engine ───────────────────────────────────────────────
+scheduler = AsyncIOScheduler(timezone="UTC")
 async def agent_tick(agent_name: str, trigger_type: str = "heartbeat", content: str = ""):
     agents = load_json(AGENTS_FILE, [])