Spaces:

Sanyam400
/

PraisonAI

Paused

App Files Files Community

Sanyam400 commited on 28 days ago

Commit

ec32184

verified ·

1 Parent(s): 8813d72

Update app/agent_system.py

Browse files

Files changed (1) hide show

app/agent_system.py +400 -212

app/agent_system.py CHANGED Viewed

@@ -1,46 +1,160 @@
 import os
 import json
 import asyncio
 import traceback
 from openai import AsyncOpenAI
 from typing import AsyncGenerator
 from docs_context import PRAISONAI_DOCS
 LONGCAT_BASE_URL = "https://api.longcat.chat/openai/v1"
-MODEL = "LongCat-Flash-Lite"
-def build_orchestrator_system():
-    return f"""You are the Main Orchestrator Agent for PraisonChat — a powerful AI system that solves complex tasks by dynamically creating specialized sub-agents, each with custom-built tools.
 {PRAISONAI_DOCS}
 ## Your Job
 When a user sends a task:
 1. Analyze what kind of work is needed
-2. Design specialized sub-agents, each focused on one responsibility
-3. For each sub-agent, design the exact Python tools they need
-4. Return a structured execution plan as JSON
-## Response Format
-Always respond with valid JSON in this exact structure:
 {{
-  "task_analysis": "Clear explanation of what needs to be done and why",
-  "needs_sub_agents": true,
   "sub_agents": [
     {{
       "name": "AgentName",
       "role": "Specific professional role",
       "goal": "What this agent achieves",
-      "backstory": "Brief agent background/expertise",
       "tools": [
         {{
           "name": "tool_function_name",
           "description": "What this tool does",
           "parameters": "param1: str, param2: int = 10",
           "return_type": "str",
-          "docstring": "Detailed docstring explaining the tool",
-          "implementation": "Python code body (indented with 4 spaces, no def line)"
         }}
       ],
       "task_description": "Exact task for this agent to perform",
@@ -48,313 +162,387 @@ Always respond with valid JSON in this exact structure:
     }}
   ],
   "execution_order": ["AgentName1", "AgentName2"],
-  "synthesis_instruction": "How to combine all agent results into the final answer"
 }}
 ## Rules
-- Create sub-agents ONLY when needed. Simple questions = no sub-agents (set needs_sub_agents: false)
-- Tools must be real, executable Python code
-- Each tool implementation must be complete and working
-- Maximum 4 sub-agents per task
-- Tool implementations must not import anything not in Python stdlib
-- Keep tool implementations under 30 lines each
 """
-def build_tool_function(tool_spec: dict) -> callable:
-    """Dynamically create a Python function from a tool spec."""
-    name = tool_spec["name"]
-    params = tool_spec.get("parameters", "input: str")
-    return_type = tool_spec.get("return_type", "str")
-    docstring = tool_spec.get("docstring", "Auto-generated tool")
-    implementation = tool_spec.get("implementation", "return str(input)")
-    # Build the function source
-    func_source = f"""
-def {name}({params}) -> {return_type}:
-    \"\"\"{docstring}\"\"\"
-    {chr(10).join('    ' + line if line.strip() else '' for line in implementation.strip().splitlines())}
-"""
-    namespace = {}
     try:
-        exec(func_source, namespace)
-        return namespace[name]
     except Exception as e:
-        # Return a safe fallback function
-        def fallback_tool(**kwargs) -> str:
-            return f"Tool '{name}' could not be created: {e}. Using fallback."
-        fallback_tool.__name__ = name
-        fallback_tool.__doc__ = docstring
-        return fallback_tool
 class AgentOrchestrator:
     def __init__(self):
-        self._client_cache = {}
-    def get_client(self, api_key: str) -> AsyncOpenAI:
-        if api_key not in self._client_cache:
-            self._client_cache[api_key] = AsyncOpenAI(
                 api_key=api_key,
-                base_url=LONGCAT_BASE_URL
             )
-        return self._client_cache[api_key]
-    async def plan_task(self, client: AsyncOpenAI, user_message: str, history: list) -> dict:
-        """Ask the orchestrator to plan the task."""
         messages = [{"role": "system", "content": build_orchestrator_system()}]
-        # Add recent history for context
-        for msg in history[-6:]:
-            messages.append({"role": msg["role"], "content": str(msg["content"])[:2000]})
         messages.append({
             "role": "user",
-            "content": f"Plan the execution for this task: {user_message}"
         })
-        response = await client.chat.completions.create(
-            model=MODEL,
             messages=messages,
             max_tokens=6000,
-            temperature=0.2,
         )
-        raw = response.choices[0].message.content.strip()
-        # Strip markdown code fences if present
-        if raw.startswith("```"):
-            raw = raw.split("```")[1]
-            if raw.startswith("json"):
-                raw = raw[4:]
-        raw = raw.strip()
         try:
             return json.loads(raw)
         except Exception:
             return {
                 "task_analysis": "Direct response",
                 "needs_sub_agents": False,
                 "sub_agents": [],
                 "execution_order": [],
-                "synthesis_instruction": "Respond directly"
             }
-    async def run_sub_agent(
-        self,
-        client: AsyncOpenAI,
-        agent_spec: dict,
-        context_so_far: str
-    ) -> str:
-        """Run a single sub-agent with its tools."""
-        tool_descriptions = ""
-        tools_created = []
         tool_errors = []
-        for tool_spec in agent_spec.get("tools", []):
-            fn = build_tool_function(tool_spec)
-            tools_created.append(fn.__name__)
-            tool_descriptions += f"\n- {fn.__name__}: {tool_spec.get('description', '')}"
-        system_prompt = f"""You are {agent_spec['name']}.
-Role: {agent_spec['role']}
-Goal: {agent_spec['goal']}
-Backstory: {agent_spec.get('backstory', '')}
-You have access to these custom tools (simulate their usage in your reasoning):
-{tool_descriptions if tool_descriptions else "No specialized tools - use your knowledge directly."}
 Context from previous agents:
-{context_so_far if context_so_far else "You are the first agent running."}
-Execute your task thoroughly. Show your reasoning and tool usage step by step.
-Expected output: {agent_spec.get('expected_output', 'Detailed results')}"""
-        response = await client.chat.completions.create(
-            model=MODEL,
             messages=[
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": agent_spec["task_description"]}
             ],
             max_tokens=12000,
             temperature=0.7,
         )
-        return response.choices[0].message.content
-    async def synthesize(
-        self,
-        client: AsyncOpenAI,
-        user_message: str,
-        agent_results: dict,
-        synthesis_instruction: str
-    ) -> AsyncGenerator[str, None]:
-        """Stream the final synthesized response."""
-        results_text = "\n\n".join([
-            f"=== {name} ===\n{result}"
-            for name, result in agent_results.items()
-        ])
-        system_prompt = f"""You are the Main Orchestrator synthesizing results from specialized sub-agents.
 Synthesis instruction: {synthesis_instruction}
 Sub-agent results:
-{results_text}
-Provide a comprehensive, well-structured final response to the user.
-Use markdown formatting. Be thorough but concise."""
         stream = await client.chat.completions.create(
-            model=MODEL,
             messages=[
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": user_message}
             ],
             max_tokens=16000,
             temperature=0.7,
-            stream=True
         )
         async for chunk in stream:
-            delta = chunk.choices[0].delta
-            if delta.content:
-                yield delta.content
-    async def direct_response(
-        self,
-        client: AsyncOpenAI,
-        user_message: str,
-        history: list
-    ) -> AsyncGenerator[str, None]:
-        """Stream a direct response without sub-agents."""
         messages = [{
             "role": "system",
-            "content": "You are PraisonChat, a powerful AI assistant. Respond helpfully using markdown formatting."
         }]
-        for msg in history[-10:]:
-            messages.append({"role": msg["role"], "content": str(msg["content"])[:3000]})
         messages.append({"role": "user", "content": user_message})
         stream = await client.chat.completions.create(
-            model=MODEL,
             messages=messages,
             max_tokens=16000,
             temperature=0.7,
-            stream=True
         )
         async for chunk in stream:
-            delta = chunk.choices[0].delta
-            if delta.content:
-                yield delta.content
-    async def stream_response(
-        self,
-        user_message: str,
-        history: list,
-        api_key: str
-    ) -> AsyncGenerator[str, None]:
-        """Main entry point — streams SSE-formatted events."""
         def emit(data: dict) -> str:
             return json.dumps(data)
-        client = self.get_client(api_key)
         try:
-            # ── Step 1: Plan ─────────────────────────────────────────────────
-            yield emit({"type": "step", "text": "🧠 Main Agent analyzing your task..."})
             await asyncio.sleep(0)
-            plan = await self.plan_task(client, user_message, history)
-            yield emit({
-                "type": "step",
-                "text": f"📋 {plan.get('task_analysis', 'Planning execution...')}"
-            })
             await asyncio.sleep(0)
             sub_agents = plan.get("sub_agents", [])
-            needs_sub_agents = plan.get("needs_sub_agents", bool(sub_agents))
-            # ── Step 2: Sub-agents or direct ─────────────────────────────────
-            if needs_sub_agents and sub_agents:
-                yield emit({
-                    "type": "step",
-                    "text": f"🤖 Spawning {len(sub_agents)} specialized sub-agent(s)..."
-                })
-                for agent_spec in sub_agents:
-                    tool_names = [t["name"] for t in agent_spec.get("tools", [])]
                     yield emit({
                         "type": "agent_created",
-                        "name": agent_spec["name"],
-                        "role": agent_spec["role"],
-                        "tools": tool_names
                     })
                     await asyncio.sleep(0.05)
-                # Execute each sub-agent
                 context_so_far = ""
                 agent_results = {}
-                execution_order = plan.get("execution_order", [a["name"] for a in sub_agents])
-                for agent_name in execution_order:
-                    agent_spec = next(
-                        (a for a in sub_agents if a["name"] == agent_name), None
-                    )
-                    if not agent_spec:
                         continue
-                    yield emit({
-                        "type": "step",
-                        "text": f"⚡ {agent_name} working on: {agent_spec['task_description'][:100]}..."
-                    })
                     await asyncio.sleep(0)
                     try:
-                        result = await self.run_sub_agent(client, agent_spec, context_so_far)
-                        agent_results[agent_name] = result
-                        context_so_far += f"\n\n{agent_name} completed: {result[:600]}"
-                        yield emit({
-                            "type": "agent_result",
-                            "name": agent_name,
-                            "preview": result[:300] + ("..." if len(result) > 300 else "")
-                        })
                     except Exception as e:
-                        yield emit({
-                            "type": "step",
-                            "text": f"⚠️ {agent_name} encountered an issue: {str(e)[:100]}"
-                        })
-                        agent_results[agent_name] = f"Error: {e}"
-                # Synthesize
-                yield emit({"type": "step", "text": "✨ Synthesizing final response..."})
-                yield emit({"type": "response_start"})
-                await asyncio.sleep(0)
-                async for token in self.synthesize(
-                    client, user_message, agent_results,
-                    plan.get("synthesis_instruction", "Combine all results into a clear response")
-                ):
                     yield emit({"type": "token", "content": token})
             else:
                 # Direct response
-                yield emit({"type": "step", "text": "💬 Generating response..."})
-                yield emit({"type": "response_start"})
-                await asyncio.sleep(0)
-                async for token in self.direct_response(client, user_message, history):
                     yield emit({"type": "token", "content": token})
             yield emit({"type": "done"})
         except Exception as e:
-            tb = traceback.format_exc()
-            yield emit({"type": "error", "message": str(e), "detail": tb[:500]})
-# Singleton
-orchestrator = AgentOrchestrator()

 import os
 import json
 import asyncio
+import datetime
 import traceback
+import subprocess
+import tempfile
+import base64
+import io
 from openai import AsyncOpenAI
 from typing import AsyncGenerator
 from docs_context import PRAISONAI_DOCS
 LONGCAT_BASE_URL = "https://api.longcat.chat/openai/v1"
+MODEL_MAP = {
+    "LongCat-Flash-Lite":         "LongCat-Flash-Lite",
+    "LongCat-Flash-Chat":         "LongCat-Flash-Chat",
+    "LongCat-Flash-Thinking-2601":"LongCat-Flash-Thinking-2601",
+}
+DEFAULT_MODEL = "LongCat-Flash-Lite"
+# ── Built-in tools (always available to every agent) ────────────────────────
+def get_current_datetime() -> str:
+    now = datetime.datetime.now()
+    utc = datetime.datetime.utcnow()
+    return (f"Local: {now.strftime('%A, %B %d, %Y at %I:%M:%S %p')}\n"
+            f"UTC:   {utc.strftime('%Y-%m-%d %H:%M:%S')} UTC\n"
+            f"Unix:  {int(now.timestamp())}")
+def calculate_math(expression: str) -> str:
+    try:
+        safe_chars = set("0123456789+-*/.() %**^")
+        clean = expression.replace("^", "**")
+        if not all(c in safe_chars or c.isspace() for c in clean):
+            return "Error: unsafe characters in expression"
+        result = eval(clean, {"__builtins__": {}}, {})
+        return f"Result: {result}"
+    except Exception as e:
+        return f"Error: {e}"
+def run_python_code(code: str) -> str:
+    with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
+        f.write(code)
+        tmp = f.name
+    try:
+        result = subprocess.run(
+            ['python3', tmp], capture_output=True, text=True, timeout=15
+        )
+        out = (result.stdout + result.stderr).strip()
+        return out or "(no output)"
+    except subprocess.TimeoutExpired:
+        return "Error: execution timed out after 15s"
+    except Exception as e:
+        return f"Error: {e}"
+    finally:
+        try:
+            os.unlink(tmp)
+        except Exception:
+            pass
+def create_voice_response(text: str) -> str:
+    try:
+        from gtts import gTTS
+        tts = gTTS(text=text, lang='en', slow=False)
+        buf = io.BytesIO()
+        tts.write_to_fp(buf)
+        buf.seek(0)
+        b64 = base64.b64encode(buf.read()).decode('utf-8')
+        return f"AUDIO_B64:{b64}"
+    except Exception as e:
+        return f"VOICE_FALLBACK:{text[:2000]}"
+def search_information(query: str) -> str:
+    # Simple stub - returns a helpful message since we don't have a search API key
+    # The agent can use its training knowledge to answer
+    return f"Searching for: {query}\n[Search tool: returning from internal knowledge - agent should answer from training data]"
+BUILTIN_TOOLS = {
+    "get_current_datetime": get_current_datetime,
+    "calculate_math":       calculate_math,
+    "run_python_code":      run_python_code,
+    "create_voice_response":create_voice_response,
+    "search_information":   search_information,
+}
+BUILTIN_TOOLS_DOC = """
+## Always-Available Built-in Tools
+These tools exist in every agent — no need to create them:
+- get_current_datetime() -> str
+  Returns the exact current date and time (local + UTC + unix timestamp).
+  USE THIS whenever user asks about date, time, day, etc.
+- calculate_math(expression: str) -> str
+  Evaluates math: "2 + 2", "100 * 3.14", "2**10", etc.
+- run_python_code(code: str) -> str
+  Executes Python code in a sandbox. Returns stdout + stderr.
+  Use for data processing, file operations, complex calculations.
+- create_voice_response(text: str) -> str
+  Converts text to MP3 audio via gTTS. Returns AUDIO_B64:<base64>.
+  USE THIS when user explicitly asks for voice/audio/spoken response.
+- search_information(query: str) -> str
+  Queries knowledge base. Use for research tasks.
+CRITICAL RULES:
+1. If user asks "what time is it" / "what date" / "what day" -> use get_current_datetime
+2. If user asks for "voice" / "speak" / "audio" / "say it" -> use create_voice_response
+3. NEVER say "I cannot" for tasks these tools handle
+4. Always prefer tools over saying you lack capability
+"""
+def inject_datetime_context() -> str:
+    now = datetime.datetime.now()
+    return (f"[System context: Current datetime = "
+            f"{now.strftime('%A, %B %d, %Y %I:%M:%S %p')} local time | "
+            f"{datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC]\n")
+def build_orchestrator_system() -> str:
+    return f"""{inject_datetime_context()}
+You are the Main Orchestrator Agent for PraisonChat — a powerful AI that solves tasks by
+dynamically creating specialized sub-agents with custom-built Python tools.
 {PRAISONAI_DOCS}
+{BUILTIN_TOOLS_DOC}
 ## Your Job
 When a user sends a task:
 1. Analyze what kind of work is needed
+2. Use built-in tools directly for simple things (datetime, math, voice, code)
+3. For complex tasks, design sub-agents each focused on one responsibility
+4. For each sub-agent, design exact Python tools they need
+5. Return a structured JSON execution plan
+## Response Format — valid JSON ONLY, no markdown fences:
 {{
+  "task_analysis": "Clear explanation of what needs to be done",
+  "needs_sub_agents": true/false,
+  "builtin_tools_to_use": ["get_current_datetime", "calculate_math"],
   "sub_agents": [
     {{
       "name": "AgentName",
       "role": "Specific professional role",
       "goal": "What this agent achieves",
+      "backstory": "Brief agent background",
       "tools": [
         {{
           "name": "tool_function_name",
           "description": "What this tool does",
           "parameters": "param1: str, param2: int = 10",
           "return_type": "str",
+          "implementation": "# Python code body (4-space indent, use # comments not triple quotes)\\n    result = do_something(param1)\\n    return str(result)"
         }}
       ],
       "task_description": "Exact task for this agent to perform",
     }}
   ],
   "execution_order": ["AgentName1", "AgentName2"],
+  "synthesis_instruction": "How to combine all agent results into the final answer",
+  "output_format": "text"
 }}
+output_format options: "text", "voice", "code", "table", "json"
 ## Rules
+- Simple questions (time, math, quick facts) = no sub-agents, use builtin_tools_to_use
+- Tool implementations: use # comments ONLY, never triple-quoted strings inside code
+- Max 4 sub-agents per task
+- Tool code must be valid Python, no imports not in stdlib
+- If voice requested: set output_format to "voice" AND use create_voice_response tool
 """
+def build_tool_function(spec: dict):
+    name = spec.get("name", "unnamed_tool")
+    params = spec.get("parameters", "input: str")
+    rtype = spec.get("return_type", "str")
+    impl = spec.get("implementation", "return str(input)")
+    lines = impl.strip().splitlines()
+    indented = "\n".join("    " + l if l.strip() else "" for l in lines)
+    src = f"def {name}({params}) -> {rtype}:\n{indented}\n"
+    ns = {}
+    try:
+        exec(src, ns)
+        fn = ns[name]
+        fn.__doc__ = spec.get("description", "")
+        return fn, None
+    except Exception as e:
+        def fallback(**kwargs) -> str:
+            return f"[Tool '{name}' build error: {e}]"
+        fallback.__name__ = name
+        return fallback, str(e)
+def call_builtin_tool(name: str, agent_task: str) -> str:
+    fn = BUILTIN_TOOLS.get(name)
+    if not fn:
+        return f"Unknown built-in tool: {name}"
     try:
+        if name == "get_current_datetime":
+            return fn()
+        elif name == "calculate_math":
+            return fn(agent_task)
+        elif name == "run_python_code":
+            return fn(agent_task)
+        elif name == "create_voice_response":
+            return fn(agent_task)
+        elif name == "search_information":
+            return fn(agent_task)
+        else:
+            return fn()
     except Exception as e:
+        return f"Tool error: {e}"
 class AgentOrchestrator:
     def __init__(self):
+        self._clients: dict = {}
+    def get_client(self, api_key: str, model: str = DEFAULT_MODEL) -> AsyncOpenAI:
+        key = f"{api_key}:{model}"
+        if key not in self._clients:
+            self._clients[key] = AsyncOpenAI(
                 api_key=api_key,
+                base_url=LONGCAT_BASE_URL,
             )
+        return self._clients[key]
+    async def plan_task(self, client, user_message: str, history: list, model: str) -> dict:
         messages = [{"role": "system", "content": build_orchestrator_system()}]
+        for m in history[-6:]:
+            messages.append({"role": m["role"], "content": str(m.get("content", ""))[:2000]})
         messages.append({
             "role": "user",
+            "content": f"Plan execution for: {user_message}"
         })
+        resp = await client.chat.completions.create(
+            model=model,
             messages=messages,
             max_tokens=6000,
+            temperature=0.1,
         )
+        raw = resp.choices[0].message.content.strip()
+        # Strip possible markdown fences
+        if "```" in raw:
+            parts = raw.split("```")
+            for p in parts:
+                p2 = p.strip()
+                if p2.startswith("json"):
+                    p2 = p2[4:].strip()
+                if p2.startswith("{"):
+                    raw = p2
+                    break
         try:
             return json.loads(raw)
         except Exception:
             return {
                 "task_analysis": "Direct response",
                 "needs_sub_agents": False,
+                "builtin_tools_to_use": [],
                 "sub_agents": [],
                 "execution_order": [],
+                "synthesis_instruction": "Respond directly",
+                "output_format": "text",
             }
+    async def run_sub_agent(self, client, spec: dict, context: str, model: str) -> dict:
+        tools_built = []
         tool_errors = []
+        tool_descriptions = "\n".join(
+            f"- {t['name']}: {t.get('description','')}" for t in spec.get("tools", [])
+        )
+        # Build custom tools
+        for t in spec.get("tools", []):
+            fn, err = build_tool_function(t)
+            if err:
+                tool_errors.append(f"{t['name']}: {err}")
+            tools_built.append({"name": t["name"], "fn": fn, "desc": t.get("description", ""), "error": err})
+        system = f"""{inject_datetime_context()}
+You are {spec['name']}, a specialized AI agent.
+Role: {spec['role']}
+Goal: {spec['goal']}
+Backstory: {spec.get('backstory', '')}
+Built-in tools always available:
+{BUILTIN_TOOLS_DOC}
+Custom tools for this task:
+{tool_descriptions if tool_descriptions else 'None — use built-in tools and your knowledge'}
 Context from previous agents:
+{context if context else 'You are the first agent.'}
+Execute your task. Show reasoning and tool usage step by step.
+Expected output: {spec.get('expected_output', 'Detailed results')}"""
+        resp = await client.chat.completions.create(
+            model=model,
             messages=[
+                {"role": "system", "content": system},
+                {"role": "user", "content": spec["task_description"]},
             ],
             max_tokens=12000,
             temperature=0.7,
         )
+        result = resp.choices[0].message.content
+        return {
+            "result": result,
+            "tools_built": [{"name": t["name"], "desc": t["desc"], "error": t.get("error")} for t in tools_built],
+            "tool_errors": tool_errors,
+        }
+    async def synthesize(self, client, user_message: str, agent_results: dict,
+                         synthesis_instruction: str, output_format: str, model: str) -> AsyncGenerator:
+        combined = "\n\n".join(
+            f"=== {name} ===\n{r['result']}" for name, r in agent_results.items()
+        )
+        voice_note = ""
+        if output_format == "voice":
+            voice_note = "\nIMPORTANT: The user wants a voice response. End your message with: [VOICE_RESPONSE: <the exact text to speak>]"
+        system = f"""{inject_datetime_context()}
+You are the Main Orchestrator. Synthesize results from sub-agents into a final response.
 Synthesis instruction: {synthesis_instruction}
+Output format: {output_format}
+{voice_note}
 Sub-agent results:
+{combined}
+Provide a comprehensive, well-structured markdown response."""
         stream = await client.chat.completions.create(
+            model=model,
             messages=[
+                {"role": "system", "content": system},
+                {"role": "user", "content": user_message},
             ],
             max_tokens=16000,
             temperature=0.7,
+            stream=True,
         )
         async for chunk in stream:
+            c = chunk.choices[0].delta.content
+            if c:
+                yield c
+    async def direct_response(self, client, user_message: str, history: list,
+                               builtin_tools: list, output_format: str, model: str) -> AsyncGenerator:
+        # Execute builtin tools first
+        tool_results = {}
+        for tool_name in (builtin_tools or []):
+            if tool_name in BUILTIN_TOOLS:
+                tool_results[tool_name] = call_builtin_tool(tool_name, user_message)
+        tool_context = ""
+        if tool_results:
+            tool_context = "\n\nTool results:\n" + "\n".join(
+                f"[{k}]: {v}" for k, v in tool_results.items()
+            )
+        voice_note = ""
+        if output_format == "voice":
+            voice_note = "\nThe user wants a voice response. End your reply with: [VOICE_RESPONSE: <text to speak>]"
         messages = [{
             "role": "system",
+            "content": (
+                f"{inject_datetime_context()}"
+                "You are PraisonChat, a powerful AI assistant. "
+                "Respond helpfully using markdown. "
+                "You have real-time access to date/time, code execution, and voice tools. "
+                "NEVER say you cannot check the time or date — you have it above."
+                f"{tool_context}{voice_note}"
+            )
         }]
+        for m in history[-10:]:
+            messages.append({"role": m["role"], "content": str(m.get("content",""))[:3000]})
         messages.append({"role": "user", "content": user_message})
         stream = await client.chat.completions.create(
+            model=model,
             messages=messages,
             max_tokens=16000,
             temperature=0.7,
+            stream=True,
         )
         async for chunk in stream:
+            c = chunk.choices[0].delta.content
+            if c:
+                yield c
+    async def stream_response(self, user_message: str, history: list,
+                               api_key: str, model: str = DEFAULT_MODEL) -> AsyncGenerator:
         def emit(data: dict) -> str:
             return json.dumps(data)
+        model = MODEL_MAP.get(model, DEFAULT_MODEL)
+        client = self.get_client(api_key, model)
         try:
+            # Step 1: Plan
+            yield emit({"type": "thinking", "text": "Analyzing your request..."})
             await asyncio.sleep(0)
+            plan = await self.plan_task(client, user_message, history, model)
+            yield emit({"type": "thinking", "text": plan.get("task_analysis", "Planning...")})
             await asyncio.sleep(0)
             sub_agents = plan.get("sub_agents", [])
+            needs_sub = plan.get("needs_sub_agents", bool(sub_agents))
+            builtin_tools = plan.get("builtin_tools_to_use", [])
+            output_format = plan.get("output_format", "text")
+            # Emit builtin tool calls
+            for bt in builtin_tools:
+                if bt in BUILTIN_TOOLS:
+                    yield emit({"type": "tool_call", "tool": bt, "builtin": True})
+                    await asyncio.sleep(0)
+                    result = call_builtin_tool(bt, user_message)
+                    is_audio = result.startswith("AUDIO_B64:")
+                    preview = "[audio data]" if is_audio else result[:200]
+                    yield emit({"type": "tool_result", "tool": bt, "result": preview, "is_audio": is_audio,
+                                "audio_b64": result[10:] if is_audio else None})
+                    await asyncio.sleep(0)
+            # Step 2: Sub-agents or direct
+            if needs_sub and sub_agents:
+                yield emit({"type": "step", "text": f"Spawning {len(sub_agents)} sub-agent(s)..."})
+                for spec in sub_agents:
+                    tool_names = [t["name"] for t in spec.get("tools", [])]
                     yield emit({
                         "type": "agent_created",
+                        "name": spec["name"],
+                        "role": spec["role"],
+                        "goal": spec.get("goal", ""),
+                        "tools": tool_names,
+                        "tool_specs": spec.get("tools", []),
                     })
                     await asyncio.sleep(0.05)
                 context_so_far = ""
                 agent_results = {}
+                order = plan.get("execution_order", [s["name"] for s in sub_agents])
+                for agent_name in order:
+                    spec = next((s for s in sub_agents if s["name"] == agent_name), None)
+                    if not spec:
                         continue
+                    yield emit({"type": "agent_working", "name": agent_name,
+                                "task": spec["task_description"][:120]})
                     await asyncio.sleep(0)
+                    # Emit tool builds
+                    for t in spec.get("tools", []):
+                        yield emit({"type": "tool_building", "agent": agent_name,
+                                    "tool": t["name"], "description": t.get("description", "")})
+                        await asyncio.sleep(0.05)
                     try:
+                        r = await self.run_sub_agent(client, spec, context_so_far, model)
+                        agent_results[agent_name] = r
+                        # Emit tool results
+                        for tb in r.get("tools_built", []):
+                            yield emit({"type": "tool_ready", "agent": agent_name,
+                                        "tool": tb["name"], "error": tb.get("error")})
+                        context_so_far += f"\n\n{agent_name}: {r['result'][:600]}"
+                        preview = r["result"][:300] + ("..." if len(r["result"]) > 300 else "")
+                        yield emit({"type": "agent_done", "name": agent_name, "preview": preview})
                     except Exception as e:
+                        yield emit({"type": "agent_error", "name": agent_name, "error": str(e)[:200]})
+                        agent_results[agent_name] = {"result": f"Error: {e}", "tools_built": [], "tool_errors": [str(e)]}
+                yield emit({"type": "step", "text": "Synthesizing final response..."})
+                yield emit({"type": "response_start", "output_format": output_format})
+                full_text = ""
+                async for token in self.synthesize(client, user_message, agent_results,
+                                                    plan.get("synthesis_instruction", ""),
+                                                    output_format, model):
+                    full_text += token
                     yield emit({"type": "token", "content": token})
+                # Handle voice in synthesized response
+                if output_format == "voice" and "[VOICE_RESPONSE:" in full_text:
+                    try:
+                        voice_text = full_text.split("[VOICE_RESPONSE:")[1].rsplit("]", 1)[0].strip()
+                        audio_result = create_voice_response(voice_text)
+                        if audio_result.startswith("AUDIO_B64:"):
+                            yield emit({"type": "audio_response", "audio_b64": audio_result[10:],
+                                        "text": voice_text})
+                        else:
+                            yield emit({"type": "voice_fallback", "text": voice_text})
+                    except Exception:
+                        pass
             else:
                 # Direct response
+                if builtin_tools:
+                    yield emit({"type": "step", "text": f"Using built-in tools: {', '.join(builtin_tools)}"})
+                else:
+                    yield emit({"type": "step", "text": "Generating response..."})
+                yield emit({"type": "response_start", "output_format": output_format})
+                full_text = ""
+                async for token in self.direct_response(client, user_message, history,
+                                                         builtin_tools, output_format, model):
+                    full_text += token
                     yield emit({"type": "token", "content": token})
+                # Handle voice in direct response
+                if output_format == "voice" or "[VOICE_RESPONSE:" in full_text:
+                    try:
+                        if "[VOICE_RESPONSE:" in full_text:
+                            voice_text = full_text.split("[VOICE_RESPONSE:")[1].rsplit("]", 1)[0].strip()
+                        else:
+                            voice_text = full_text[:1000]
+                        audio_result = create_voice_response(voice_text)
+                        if audio_result.startswith("AUDIO_B64:"):
+                            yield emit({"type": "audio_response", "audio_b64": audio_result[10:],
+                                        "text": voice_text})
+                        else:
+                            yield emit({"type": "voice_fallback", "text": voice_text})
+                    except Exception:
+                        pass
             yield emit({"type": "done"})
         except Exception as e:
+            yield emit({"type": "error", "message": str(e), "detail": traceback.format_exc()[:800]})
+orchestrator = AgentOrchestrator()