Spaces:

nvidia
/

voice-agent-examples

Running

App Files Files Community

fciannella commited on Oct 15, 2025

Commit

9312c3a

1 Parent(s): 2f49513

First functional multi thread

Browse files

Files changed (9) hide show

README.md +1 -1
examples/voice_agent_multi_thread/agents/helper_functions.py +17 -9
examples/voice_agent_multi_thread/agents/telco-agent-multi/react_agent.py +10 -5
examples/voice_agent_multi_thread/agents/telco-agent-multi/tools.py +9 -0
examples/voice_agent_multi_thread/langgraph_llm_service.py +245 -66
examples/voice_agent_multi_thread/pipeline.py +16 -2
examples/voice_agent_webrtc_langgraph/agents/healthcare-agent/TESTING_GUIDE.md +1 -0
examples/voice_agent_webrtc_langgraph/agents/healthcare-agent/prompts.py +45 -15
examples/voice_agent_webrtc_langgraph/agents/healthcare-agent/tools.py +40 -19

README.md CHANGED Viewed

@@ -145,7 +145,7 @@ The `voice_agent_multi_thread` example includes a non-blocking multi-threaded ag
 ### Build the Docker image:
 ```bash
-docker build -t voice-agent-multi-thread .
 ```
 ### Run the container:

 ### Build the Docker image:
 ```bash
+docker build --build-arg EXAMPLE_NAME=voice_agent_multi_thread -t voice-agent-multi-thread .
 ```
 ### Run the container:

examples/voice_agent_multi_thread/agents/helper_functions.py CHANGED Viewed

@@ -22,21 +22,29 @@ def write_status(
         namespace: Namespace tuple for store isolation
         config: Optional runtime config
     """
     if not isinstance(namespace, tuple):
         try:
             namespace = tuple(namespace)
         except (TypeError, ValueError):
             namespace = (str(namespace),)
-    store.put(
-        namespace,
-        "working-tool-status-update",
-        {
-            "tool_name": tool_name,
-            "progress": progress,
-            "status": status,
-        }
-    )
 def reset_status(store: BaseStore, namespace: tuple | list) -> None:

         namespace: Namespace tuple for store isolation
         config: Optional runtime config
     """
+    import logging
+    logger = logging.getLogger(__name__)
     if not isinstance(namespace, tuple):
         try:
             namespace = tuple(namespace)
         except (TypeError, ValueError):
             namespace = (str(namespace),)
+    try:
+        logger.info(f"📝 write_status: Attempting to write to store: namespace={namespace}, key='working-tool-status-update'")
+        store.put(
+            namespace,
+            "working-tool-status-update",
+            {
+                "tool_name": tool_name,
+                "progress": progress,
+                "status": status,
+            }
+        )
+        logger.info(f"📝 write_status: Successfully called store.put() for {tool_name} at {progress}%")
+    except Exception as e:
+        logger.error(f"❌ write_status FAILED: {e}", exc_info=True)
 def reset_status(store: BaseStore, namespace: tuple | list) -> None:

examples/voice_agent_multi_thread/agents/telco-agent-multi/react_agent.py CHANGED Viewed

@@ -83,11 +83,16 @@ SYSTEM_PROMPT = (
 )
 SECONDARY_SYSTEM_PROMPT = (
-    "You are a friendly mobile operator assistant engaging in light conversation while a long-running task is being processed. "
-    "You can: (1) Check status of the ongoing task using check_status tool; (2) Answer general questions about packages, data balance, or roaming; (3) Provide light chit-chat. "
-    "DO NOT attempt to perform any long operations like changing packages, closing contracts, or purchasing passes - explain that another operation is in progress. "
-    "STYLE: Brief (1-2 sentences), friendly, and reassuring. "
-    "TTS SAFETY: Output must be plain text suitable for text-to-speech. Do not use markdown, bullets, asterisks, emojis, or special typography. Use only ASCII punctuation and straight quotes."
 )

 )
 SECONDARY_SYSTEM_PROMPT = (
+    "You are a lively, personable mobile operator assistant keeping customers entertained while their request processes in the background. "
+    "Your goal is to make the wait enjoyable - be playful, share interesting facts, tell brief jokes, or engage in light conversation. "
+    "If they ask about status, check using check_status tool and present it in an upbeat way. "
+    "If they're bored or impatient, sympathize warmly and distract them with something fun - ask about their day, share a quick tech tip, or make them smile. "
+    "For small talk (weather, location, hobbies, sports, random questions), engage enthusiastically and naturally - show genuine interest! "
+    "You can answer quick questions about their package, data balance, or roaming options. "
+    "DO NOT start new long operations (changing packages, closing contracts, purchasing passes) - playfully explain you're juggling their current request and can help with that next. "
+    "PERSONALITY: Friendly, upbeat, conversational, and entertaining - like a fun colleague who makes waiting time fly by! "
+    "STYLE: Natural (2-3 sentences), warm, and engaging. Mix status updates with personality - don't just recite percentages robotically. "
+    "TTS SAFETY: Plain text only - no markdown, bullets, asterisks, emojis, or special formatting. Use ASCII punctuation and straight quotes."
 )

examples/voice_agent_multi_thread/agents/telco-agent-multi/tools.py CHANGED Viewed

@@ -97,8 +97,13 @@ def close_contract_tool(msisdn: str, confirm: bool = False) -> str:
         return json.dumps(telco_logic.close_contract(msisdn, False))
     # Long-running operation with progress reporting (following working example pattern)
     writer = get_stream_writer()
     writer("Processing your contract closure request. This may take a moment...")
     tool_name = "close_contract_tool"
     steps = 10
@@ -107,12 +112,16 @@ def close_contract_tool(msisdn: str, confirm: bool = False) -> str:
     config = ensure_config()
     namespace = config["configurable"]["namespace_for_memory"]
     server_store = get_store()
     for i in range(1, steps + 1):
         time.sleep(interval_seconds)
         pct = (i * 100) // steps
         status = "running"
         write_status(tool_name, pct, status, server_store, namespace, config)
     # Execute actual closure
     result = telco_logic.close_contract(msisdn, True)

         return json.dumps(telco_logic.close_contract(msisdn, False))
     # Long-running operation with progress reporting (following working example pattern)
+    import logging
+    logger = logging.getLogger(__name__)
+    logger.info(f"🔥 close_contract_tool STARTING 50-second operation for {msisdn}")
     writer = get_stream_writer()
     writer("Processing your contract closure request. This may take a moment...")
+    logger.info("✅ Stream writer message sent")
     tool_name = "close_contract_tool"
     steps = 10
     config = ensure_config()
     namespace = config["configurable"]["namespace_for_memory"]
     server_store = get_store()
+    logger.info(f"📦 Got store and namespace: {namespace}")
     for i in range(1, steps + 1):
+        logger.info(f"⏱️  Step {i}/{steps} - sleeping {interval_seconds}s...")
         time.sleep(interval_seconds)
+        logger.info(f"⏱️  Step {i}/{steps} - sleep complete, writing status...")
         pct = (i * 100) // steps
         status = "running"
         write_status(tool_name, pct, status, server_store, namespace, config)
+        logger.info(f"✅ Status written: {pct}% - {status}")
     # Execute actual closure
     result = telco_logic.close_contract(msisdn, True)

examples/voice_agent_multi_thread/langgraph_llm_service.py CHANGED Viewed

@@ -74,7 +74,7 @@ class LangGraphLLMService(OpenAILLMService):
         base_url: str = "http://127.0.0.1:2024",
         assistant: str = "ace-base-agent",
         user_email: str = "test@example.com",
-        stream_mode: str = "values",
         debug_stream: bool = False,
         thread_id: Optional[str] = None,
         auth_token: Optional[str] = None,
@@ -86,9 +86,11 @@ class LangGraphLLMService(OpenAILLMService):
         self.base_url = base_url
         self.assistant = assistant
         self.user_email = user_email
-        self.stream_mode = stream_mode
         self.debug_stream = debug_stream
         self.enable_multi_threading = enable_multi_threading
         # Optional auth header
         token = (
@@ -117,6 +119,12 @@ class LangGraphLLMService(OpenAILLMService):
         self._current_task: Optional[asyncio.Task] = None
         self._outer_open: bool = False
         self._emitted_texts: set[str] = set()
     async def _ensure_thread(self, thread_type: str = "main") -> Optional[str]:
         """Ensure thread exists for the given type (main or secondary)."""
@@ -151,6 +159,47 @@ class LangGraphLLMService(OpenAILLMService):
             logger.warning(f"LangGraph: could not determine {thread_type} thread id; proceeding threadless.")
             return None
     async def _check_long_operation_running(self) -> bool:
         """Check if a long operation is currently running via the store."""
         if not self.enable_multi_threading:
@@ -161,25 +210,43 @@ class LangGraphLLMService(OpenAILLMService):
             ns_list = list(self._namespace_for_memory)
             logger.info(f"Checking store with namespace: {ns_list}")
-            # Get the specific status key that tools write to
-            item = await self._client.store.get_item(ns_list, "working-tool-status-update")
-            if item is None:
-                logger.info("No item found in store, returning False")
                 return False
-            # Extract value from the item
-            value = getattr(item, "value", None)
-            if value is None and isinstance(item, dict):
-                value = item.get("value")
-            # Check if status is "running"
-            if isinstance(value, dict):
-                status = value.get("status")
-                logger.info(f"🔍 Long operation check: status={status}, tool={value.get('tool_name')}, progress={value.get('progress')}")
-                return status == "running"
-            logger.info(f"Value not a dict: {type(value)}")
             return False
         except Exception as exc:  # noqa: BLE001
             logger.error(f"❌ Failed to check operation status: {exc}", exc_info=True)
@@ -211,48 +278,10 @@ class LangGraphLLMService(OpenAILLMService):
                 continue
         return ""
-    async def _stream_langgraph(self, text: str) -> None:
-        # Determine thread type based on whether a long operation is running
-        thread_type = "main"
-        if self.enable_multi_threading:
-            long_operation_running = await self._check_long_operation_running()
-            if long_operation_running:
-                thread_type = "secondary"
-                self._interim_messages_reset = False
-                logger.info("Long operation detected, routing to secondary thread")
-            else:
-                # Starting new main operation
-                if self._last_was_long_operation:
-                    self._interim_messages_reset = True
-                    self._last_was_long_operation = False
-                else:
-                    self._interim_messages_reset = True
-                logger.info("No long operation, routing to main thread")
-        # Ensure appropriate thread
-        thread_id = await self._ensure_thread(thread_type)
-        # Build config with namespace for store coordination
-        config = {
-            "configurable": {
-                "user_email": self.user_email,
-                "thread_id": thread_id,
-                "namespace_for_memory": list(self._namespace_for_memory),
-            }
-        }
-        # Build input dict for multi-threaded agent
-        if self.enable_multi_threading:
-            input_payload = {
-                "messages": [{"type": "human", "content": text}],
-                "thread_type": thread_type,
-                "interim_messages_reset": self._interim_messages_reset,
-            }
-        else:
-            # Backward compatible: simple message input
-            input_payload = [HumanMessage(content=text)]
         try:
             async for chunk in self._client.runs.stream(
                 thread_id,
                 self.assistant,
@@ -305,44 +334,87 @@ class LangGraphLLMService(OpenAILLMService):
                         if part_text not in self._emitted_texts:
                             self._emitted_texts.add(part_text)
                             await self.push_frame(LLMTextFrame(_tts_sanitize(part_text)))
                 # Final value-style events (values mode)
                 if event == "values":
                     # Some dev servers send final AI message content here
                     final_text = ""
                     # Handle list of messages (most common case)
                     if isinstance(data, list) and data:
                         # Find the last AI message in the list
                         for msg in reversed(data):
                             if isinstance(msg, dict):
                                 if msg.get("type") == "ai" and isinstance(msg.get("content"), str):
                                     final_text = msg["content"]
                                     break
                             elif hasattr(msg, "type") and getattr(msg, "type") == "ai":
                                 content = getattr(msg, "content", None)
                                 if isinstance(content, str):
                                     final_text = content
                                     break
                     # Handle single message object
                     elif hasattr(data, "content") and isinstance(getattr(data, "content"), str):
                         final_text = getattr(data, "content")
                     # Handle single message dict
                     elif isinstance(data, dict):
                         c = data.get("content")
                         if isinstance(c, str):
                             final_text = c
-                    if final_text:
-                        # Close backchannel utterance if open
-                        if self._outer_open:
                             await self.push_frame(LLMFullResponseEndFrame())
-                            self._outer_open = False
-                            self._emitted_texts.clear()
-                        # Emit final explanation as its own message
-                        await self.push_frame(LLMFullResponseStartFrame())
-                        await self.push_frame(LLMTextFrame(_tts_sanitize(final_text)))
-                        await self.push_frame(LLMFullResponseEndFrame())
                 # Messages mode: look for an array of messages
                 if event == "messages" or event.endswith(":messages"):
@@ -380,6 +452,88 @@ class LangGraphLLMService(OpenAILLMService):
                             await self.push_frame(LLMTextFrame(_tts_sanitize(txt)))
         except Exception as exc:  # noqa: BLE001
             logger.error(f"LangGraph stream error: {exc}")
     async def _process_context_and_frames(self, context: OpenAILLMContext) -> None:
         """Adapter entrypoint: push start/end frames and stream tokens."""
@@ -416,6 +570,31 @@ class LangGraphLLMService(OpenAILLMService):
             if self._current_task is not None and not self._current_task.done():
                 await self.cancel_task(self._current_task)
                 self._current_task = None
             return
         else:
             await super().process_frame(frame, direction)

         base_url: str = "http://127.0.0.1:2024",
         assistant: str = "ace-base-agent",
         user_email: str = "test@example.com",
+        stream_mode: Optional[list] = None,
         debug_stream: bool = False,
         thread_id: Optional[str] = None,
         auth_token: Optional[str] = None,
         self.base_url = base_url
         self.assistant = assistant
         self.user_email = user_email
+        # Match working text client: use ["values", "custom"] for multi-threading
+        self.stream_mode = stream_mode if stream_mode is not None else (["values", "custom"] if enable_multi_threading else "values")
         self.debug_stream = debug_stream
         self.enable_multi_threading = enable_multi_threading
+        logger.info(f"🎛️  LangGraphLLMService initialized: enable_multi_threading={enable_multi_threading}, stream_mode={self.stream_mode}, type={type(self.stream_mode)}")
         # Optional auth header
         token = (
         self._current_task: Optional[asyncio.Task] = None
         self._outer_open: bool = False
         self._emitted_texts: set[str] = set()
+        # Background task for main thread long operations
+        self._background_main_task: Optional[asyncio.Task] = None
+        self._background_final_message: Optional[str] = None
+        self._background_monitor_task: Optional[asyncio.Task] = None
+        self._background_task_is_long_operation: bool = False  # Track if current background task is a long operation
     async def _ensure_thread(self, thread_type: str = "main") -> Optional[str]:
         """Ensure thread exists for the given type (main or secondary)."""
             logger.warning(f"LangGraph: could not determine {thread_type} thread id; proceeding threadless.")
             return None
+    async def _monitor_background_task(self) -> None:
+        """Monitor background main task and proactively inject final message when complete."""
+        if not self._background_main_task:
+            return
+        try:
+            # Wait for the background task to complete
+            await self._background_main_task
+            logger.info("🏁 Background main task completed, checking for final message")
+            # Give a VERY brief moment for the final message to be captured (minimize race window)
+            await asyncio.sleep(0.1)
+            # If we captured a final message, inject it as a new bot-initiated turn
+            if self._background_final_message:
+                logger.info("📢 Injecting final synthesized message from background task")
+                logger.info(f"Message to inject: {self._background_final_message}")
+                # Simply push the frames directly - they should flow through TTS
+                await self.push_frame(LLMFullResponseStartFrame())
+                logger.info("✅ Pushed LLMFullResponseStartFrame")
+                await self.push_frame(LLMTextFrame(_tts_sanitize(self._background_final_message)))
+                logger.info(f"✅ Pushed LLMTextFrame with content")
+                await self.push_frame(LLMFullResponseEndFrame())
+                logger.info("✅ Pushed LLMFullResponseEndFrame")
+                # Clear the captured message
+                self._background_final_message = None
+                logger.info("✨ Final message injection complete")
+            else:
+                logger.info("ℹ️ Background task completed but no final message to inject")
+        except asyncio.CancelledError:
+            logger.info("🚫 Background task monitor cancelled")
+        except Exception as exc:
+            logger.error(f"❌ Background task monitor error: {exc}", exc_info=True)
+        finally:
+            self._background_main_task = None
+            self._background_monitor_task = None
     async def _check_long_operation_running(self) -> bool:
         """Check if a long operation is currently running via the store."""
         if not self.enable_multi_threading:
             ns_list = list(self._namespace_for_memory)
             logger.info(f"Checking store with namespace: {ns_list}")
+            # Use search_items() like the working client code does
+            items = await self._client.store.search_items(ns_list)
+            logger.info(f"🔎 search_items returned: type={type(items)}")
+            # Normalize return shape: SDK may return a dict with 'items' or a bare list (matching text client)
+            items_list = None
+            if isinstance(items, dict):
+                inner = items.get("items")
+                if isinstance(inner, list):
+                    items_list = inner
+                    logger.info(f"📦 Extracted {len(inner)} items from dict wrapper")
+            elif isinstance(items, list):
+                items_list = items
+                logger.info(f"📦 Got {len(items)} items as bare list")
+            if not items_list:
+                logger.info("No items found in store, returning False")
                 return False
+            logger.info(f"📦 Total items in store: {len(items_list)}")
+            # Walk from the end to find the most recent item that has a 'status' (EXACTLY like text client)
+            for idx, item in enumerate(reversed(items_list)):
+                item_key = getattr(item, "key", None) or (item.get("key") if isinstance(item, dict) else None)
+                value = getattr(item, "value", None)
+                if value is None and isinstance(item, dict):
+                    value = item.get("value")
+                value_keys = list(value.keys()) if isinstance(value, dict) else "N/A"
+                logger.info(f"📦 Item {idx} (from end): key={item_key}, value_keys={value_keys}")
+                if isinstance(value, dict) and "status" in value:
+                    status = value.get("status")
+                    logger.info(f"🔍 Long operation check: status={status}, tool={value.get('tool_name')}, progress={value.get('progress')}")
+                    return status == "running"
+            logger.info("No status items found in store")
             return False
         except Exception as exc:  # noqa: BLE001
             logger.error(f"❌ Failed to check operation status: {exc}", exc_info=True)
                 continue
         return ""
+    async def _stream_langgraph_impl(self, text: str, thread_type: str, thread_id: Optional[str], config: dict, input_payload: Any, is_background: bool = False) -> None:
+        """Internal implementation of LangGraph streaming."""
         try:
+            logger.info(f"🎬 Starting stream with mode: {self.stream_mode} (type: {type(self.stream_mode)})")
             async for chunk in self._client.runs.stream(
                 thread_id,
                 self.assistant,
                         if part_text not in self._emitted_texts:
                             self._emitted_texts.add(part_text)
                             await self.push_frame(LLMTextFrame(_tts_sanitize(part_text)))
+                # Custom events from get_stream_writer() - tool progress messages
+                if event == "custom":
+                    custom_text = ""
+                    if isinstance(data, str):
+                        custom_text = data
+                    elif isinstance(data, dict):
+                        # Try to extract text from custom event data
+                        custom_text = data.get("content") or data.get("text") or ""
+                    elif hasattr(data, "content"):
+                        custom_text = getattr(data, "content", "")
+                    if custom_text and isinstance(custom_text, str) and custom_text not in self._emitted_texts:
+                        logger.info(f"📢 Custom event (tool message): {custom_text[:100]}")
+                        self._emitted_texts.add(custom_text)
+                        # Emit as its own turn
+                        if self._outer_open:
+                            await self.push_frame(LLMFullResponseEndFrame())
+                            self._outer_open = False
+                        await self.push_frame(LLMFullResponseStartFrame())
+                        await self.push_frame(LLMTextFrame(_tts_sanitize(custom_text)))
+                        await self.push_frame(LLMFullResponseEndFrame())
                 # Final value-style events (values mode)
                 if event == "values":
                     # Some dev servers send final AI message content here
                     final_text = ""
+                    logger.info(f"📊 Processing values event: data_type={type(data)}, is_background={is_background}")
                     # Handle list of messages (most common case)
                     if isinstance(data, list) and data:
+                        logger.info(f"📊 Data is list with {len(data)} items")
                         # Find the last AI message in the list
                         for msg in reversed(data):
                             if isinstance(msg, dict):
                                 if msg.get("type") == "ai" and isinstance(msg.get("content"), str):
                                     final_text = msg["content"]
+                                    logger.info(f"✅ Found AI message in dict: {final_text[:100]}")
                                     break
                             elif hasattr(msg, "type") and getattr(msg, "type") == "ai":
                                 content = getattr(msg, "content", None)
                                 if isinstance(content, str):
                                     final_text = content
+                                    logger.info(f"✅ Found AI message in object: {final_text[:100]}")
                                     break
                     # Handle single message object
                     elif hasattr(data, "content") and isinstance(getattr(data, "content"), str):
                         final_text = getattr(data, "content")
+                        logger.info(f"✅ Found content in object: {final_text[:100]}")
                     # Handle single message dict
                     elif isinstance(data, dict):
                         c = data.get("content")
                         if isinstance(c, str):
                             final_text = c
+                            logger.info(f"✅ Found content in dict: {final_text[:100]}")
+                    if final_text and final_text not in self._emitted_texts:
+                        if is_background:
+                            # Running in background - capture for later injection
+                            # Only capture if there's no pending message waiting to be injected
+                            if not self._background_final_message:
+                                logger.info("💾 Capturing final message from background task")
+                                self._background_final_message = final_text
+                                self._emitted_texts.add(final_text)
+                            else:
+                                logger.info(f"⚠️  Skipping capture - pending message already exists: {self._background_final_message[:50]}...")
+                            # Close any open utterance
+                            if self._outer_open:
+                                await self.push_frame(LLMFullResponseEndFrame())
+                                self._outer_open = False
+                        else:
+                            # Normal foreground - push immediately
+                            # Close backchannel utterance if open
+                            if self._outer_open:
+                                await self.push_frame(LLMFullResponseEndFrame())
+                                self._outer_open = False
+                            # Emit final explanation as its own message
+                            self._emitted_texts.add(final_text)
+                            await self.push_frame(LLMFullResponseStartFrame())
+                            await self.push_frame(LLMTextFrame(_tts_sanitize(final_text)))
                             await self.push_frame(LLMFullResponseEndFrame())
                 # Messages mode: look for an array of messages
                 if event == "messages" or event.endswith(":messages"):
                             await self.push_frame(LLMTextFrame(_tts_sanitize(txt)))
         except Exception as exc:  # noqa: BLE001
             logger.error(f"LangGraph stream error: {exc}")
+        finally:
+            # Mark operation complete if this was a main thread
+            if thread_type == "main":
+                self._last_was_long_operation = True
+                logger.info("✅ Main thread operation completed")
+    async def _stream_langgraph(self, text: str) -> None:
+        """Route to main or secondary thread, running main operations in background."""
+        # Determine thread type based on whether a long operation is running
+        logger.info(f"🎯 _stream_langgraph called: enable_multi_threading={self.enable_multi_threading}")
+        thread_type = "main"
+        if self.enable_multi_threading:
+            long_operation_running = await self._check_long_operation_running()
+            if long_operation_running:
+                thread_type = "secondary"
+                self._interim_messages_reset = False
+                logger.info("🔀 Long operation detected, routing to secondary thread")
+            else:
+                # Starting new main operation
+                if self._last_was_long_operation:
+                    self._interim_messages_reset = True
+                    self._last_was_long_operation = False
+                else:
+                    self._interim_messages_reset = True
+                logger.info("▶️  No long operation, routing to main thread")
+        # Ensure appropriate thread
+        thread_id = await self._ensure_thread(thread_type)
+        # Build config with namespace for store coordination
+        config = {
+            "configurable": {
+                "user_email": self.user_email,
+                "thread_id": thread_id,
+                "namespace_for_memory": list(self._namespace_for_memory),
+            }
+        }
+        # Build input dict for multi-threaded agent
+        if self.enable_multi_threading:
+            input_payload = {
+                "messages": [{"type": "human", "content": text}],
+                "thread_type": thread_type,
+                "interim_messages_reset": self._interim_messages_reset,
+            }
+        else:
+            # Backward compatible: simple message input
+            input_payload = [HumanMessage(content=text)]
+        # For main thread operations, run in background to allow subsequent messages
+        if self.enable_multi_threading and thread_type == "main":
+            logger.info("🚀 Starting main thread operation in background")
+            # Cancel any existing background main task and monitor
+            if self._background_main_task is not None and not self._background_main_task.done():
+                logger.info("⚠️  Canceling previous background main task")
+                self._background_main_task.cancel()
+                try:
+                    await self._background_main_task
+                except asyncio.CancelledError:
+                    pass
+            if self._background_monitor_task is not None and not self._background_monitor_task.done():
+                self._background_monitor_task.cancel()
+                try:
+                    await self._background_monitor_task
+                except asyncio.CancelledError:
+                    pass
+            # Start new background task (with is_background=True to capture final message)
+            self._background_main_task = asyncio.create_task(
+                self._stream_langgraph_impl(text, thread_type, thread_id, config, input_payload, is_background=True)
+            )
+            # Start monitor to inject final message when background task completes
+            self._background_monitor_task = asyncio.create_task(self._monitor_background_task())
+            # Don't await - return immediately to allow pipeline to process next message
+            logger.info("✨ Main thread operation dispatched, pipeline is now free")
+        else:
+            # Secondary thread or non-multi-threaded: run synchronously (should be fast)
+            logger.info(f"⚡ Running {thread_type} thread operation synchronously")
+            await self._stream_langgraph_impl(text, thread_type, thread_id, config, input_payload, is_background=False)
     async def _process_context_and_frames(self, context: OpenAILLMContext) -> None:
         """Adapter entrypoint: push start/end frames and stream tokens."""
             if self._current_task is not None and not self._current_task.done():
                 await self.cancel_task(self._current_task)
                 self._current_task = None
+            # For multi-threading: check if a long operation is running before cancelling
+            long_op_running = False
+            if self.enable_multi_threading:
+                long_op_running = await self._check_long_operation_running()
+            # Only cancel background tasks if NOT in a long operation (which should continue)
+            if not long_op_running:
+                if self._background_main_task is not None and not self._background_main_task.done():
+                    logger.info("🛑 Canceling background main task due to interruption")
+                    self._background_main_task.cancel()
+                    try:
+                        await self._background_main_task
+                    except asyncio.CancelledError:
+                        pass
+                    self._background_main_task = None
+                if self._background_monitor_task is not None and not self._background_monitor_task.done():
+                    logger.info("🛑 Canceling background monitor task due to interruption")
+                    self._background_monitor_task.cancel()
+                    try:
+                        await self._background_monitor_task
+                    except asyncio.CancelledError:
+                        pass
+                    self._background_monitor_task = None
+            else:
+                logger.info("🔄 Long operation running - keeping background tasks alive, secondary will handle interruption")
             return
         else:
             await super().process_frame(frame, direction)

examples/voice_agent_multi_thread/pipeline.py CHANGED Viewed

@@ -281,14 +281,28 @@ async def run_bot(webrtc_connection, ws: WebSocket, assistant_override: str | No
     selected_assistant = assistant_override or os.getenv("LANGGRAPH_ASSISTANT", "ace-base-agent")
     logger.info(f"Using LangGraph assistant: {selected_assistant}")
     # Enable multi-threading for telco agent
-    enable_multi_threading = selected_assistant in ["telco-agent", "wire-transfer-agent"]
     llm = LangGraphLLMService(
         base_url=os.getenv("LANGGRAPH_BASE_URL", "http://127.0.0.1:2024"),
         assistant=selected_assistant,
         user_email=os.getenv("USER_EMAIL", "test@example.com"),
-        stream_mode=os.getenv("LANGGRAPH_STREAM_MODE", "values"),
         debug_stream=os.getenv("LANGGRAPH_DEBUG_STREAM", "false").lower() == "true",
         enable_multi_threading=enable_multi_threading,
     )

     selected_assistant = assistant_override or os.getenv("LANGGRAPH_ASSISTANT", "ace-base-agent")
     logger.info(f"Using LangGraph assistant: {selected_assistant}")
+    # Determine assistant name (may be UUID, need to fetch graph_id)
+    assistant_name = selected_assistant  # Default to the value we have
+    try:
+        # If it looks like a UUID, fetch the assistant details to get the graph_id
+        if len(selected_assistant) > 30 and "-" in selected_assistant:
+            from langgraph_sdk import get_client
+            langgraph_client = get_client(url=os.getenv("LANGGRAPH_BASE_URL", "http://127.0.0.1:2024"))
+            assistant_info = await langgraph_client.assistants.get(selected_assistant)
+            assistant_name = assistant_info.get("graph_id", selected_assistant)
+            logger.info(f"Resolved assistant UUID to graph_id: {assistant_name}")
+    except Exception as exc:  # noqa: BLE001
+        logger.warning(f"Failed to resolve assistant name, using as-is: {exc}")
     # Enable multi-threading for telco agent
+    enable_multi_threading = assistant_name in ["telco-agent", "wire-transfer-agent"]
+    logger.info(f"Multi-threading enabled: {enable_multi_threading} for assistant: {assistant_name}")
     llm = LangGraphLLMService(
         base_url=os.getenv("LANGGRAPH_BASE_URL", "http://127.0.0.1:2024"),
         assistant=selected_assistant,
         user_email=os.getenv("USER_EMAIL", "test@example.com"),
+        # stream_mode now auto-set based on enable_multi_threading (["values", "custom"] for multi-thread, "values" for single)
         debug_stream=os.getenv("LANGGRAPH_DEBUG_STREAM", "false").lower() == "true",
         enable_multi_threading=enable_multi_threading,
     )

examples/voice_agent_webrtc_langgraph/agents/healthcare-agent/TESTING_GUIDE.md CHANGED Viewed

@@ -287,3 +287,4 @@ If tests fail, provide:
 3. **Expected behavior** vs **actual behavior**
 4. **Log excerpt** from `app.log` showing tool calls
 5. **Which test scenario** from this guide

 3. **Expected behavior** vs **actual behavior**
 4. **Log excerpt** from `app.log` showing tool calls
 5. **Which test scenario** from this guide

examples/voice_agent_webrtc_langgraph/agents/healthcare-agent/prompts.py CHANGED Viewed

@@ -42,7 +42,7 @@ HEALTHCARE_SYSTEM_PROMPT = (
     "1. GREETING: Begin with a warm, brief greeting. Ask for the caller's full name.\n"
     "   - Call find_patient(full_name=...) to get patient_id\n"
     "\n"
-    "2. IDENTITY VERIFICATION (CRITICAL - MANDATORY before any medical info):\n"
     "   - Ask for date of birth in any format (you will normalize it)\n"
     "   - Call verify_identity(dob_yyyy_mm_dd=...) with the DOB\n"
     "   - IMPORTANT: Check the response:\n"
@@ -52,36 +52,60 @@ HEALTHCARE_SYSTEM_PROMPT = (
     "   - NEVER claim verification until verified=true is returned\n"
     "   - If verification fails after all attempts, politely explain you cannot access medical records\n"
     "\n"
     "3. AFTER VERIFIED=TRUE:\n"
     "   - Call get_patient_profile_tool() to retrieve allergies, medications, conditions\n"
     "   - Ask: 'What brings you in today?' or 'What's going on?'\n"
     "   - Gather chief complaint and symptoms using ONE question at a time\n"
-    "   - Ask brief follow-ups to clarify: duration, severity, associated symptoms\n"
-    "\n"
-    "4. TRIAGE AND GUIDANCE:\n"
-    "   - Call triage_symptoms_tool(symptoms_text=...) with a natural description of all symptoms\n"
-    "   - Check the 'risk' level and 'red_flags' in the response:\n"
-    "     * If risk='urgent' or red_flags present: Direct to ER/911 immediately with clear urgency\n"
-    "     * If risk='soon': Give advice and strongly recommend scheduling within 1-2 days\n"
-    "     * If risk='self_care': Give the advice, check allergies/meds for safety, offer optional follow-up\n"
     "   - ALWAYS consider patient's allergies and current medications before recommending anything (including OTC)\n"
     "\n"
-    "5. APPOINTMENT BOOKING (if appropriate):\n"
     "   - Call list_providers_tool() to get available providers\n"
     "   - Present 1-2 provider options to patient\n"
     "   - Call get_provider_slots_tool(provider_id=...) for chosen provider\n"
-    "   - Present 2-3 time slots in friendly format (today at 8pm, tomorrow at 8:30am, etc.)\n"
     "   - After patient chooses, call schedule_appointment_tool(provider_id=..., slot_iso=...)\n"
     "   - Confirm appointment and mention sending details to their phone\n"
     "\n"
-    "6. PHARMACY CONFIRMATION (if prescriptions likely):\n"
     "   - Call get_preferred_pharmacy_tool() to get on-file pharmacy\n"
     "   - Ask: 'Should we keep your pharmacy at [address]?'\n"
     "\n"
-    "7. CLOSING:\n"
     "   - Call log_call_tool(notes=..., triage_json=...) to document the encounter\n"
-    "   - Provide any urgent precautions (e.g., 'If fever goes over 102, weakness, or confusion develop, seek urgent care')\n"
-    "   - End with a warm closing\n"
     "\n"
     "## COMMUNICATION STYLE:\n"
     "- Use a calm, empathetic, warm tone\n"
@@ -90,4 +114,10 @@ HEALTHCARE_SYSTEM_PROMPT = (
     "- Avoid medical jargon; use plain language\n"
     "- Be conversational and natural - imagine speaking to a patient on the phone\n"
     "- Remember: PLAIN TEXT ONLY for text-to-speech (see TTS rules at top)\n"
 )

     "1. GREETING: Begin with a warm, brief greeting. Ask for the caller's full name.\n"
     "   - Call find_patient(full_name=...) to get patient_id\n"
     "\n"
+    "2. IDENTITY VERIFICATION (CRITICAL - Do this ONCE at start of call ONLY):\n"
     "   - Ask for date of birth in any format (you will normalize it)\n"
     "   - Call verify_identity(dob_yyyy_mm_dd=...) with the DOB\n"
     "   - IMPORTANT: Check the response:\n"
     "   - NEVER claim verification until verified=true is returned\n"
     "   - If verification fails after all attempts, politely explain you cannot access medical records\n"
     "\n"
+    "   CRITICAL: ONCE VERIFIED, DO NOT ASK FOR AUTHENTICATION AGAIN IN THIS CALL!\n"
+    "   - Verification persists for the ENTIRE conversation\n"
+    "   - If you already verified the patient earlier in this call, continue the conversation\n"
+    "   - Do NOT re-verify because of unclear input or confusion - just ask for clarification\n"
+    "   - Only one authentication per call session\n"
+    "\n"
     "3. AFTER VERIFIED=TRUE:\n"
     "   - Call get_patient_profile_tool() to retrieve allergies, medications, conditions\n"
     "   - Ask: 'What brings you in today?' or 'What's going on?'\n"
     "   - Gather chief complaint and symptoms using ONE question at a time\n"
+    "\n"
+    "4. SYMPTOM ASSESSMENT (CRITICAL - Do thorough assessment BEFORE jumping to urgent care):\n"
+    "   - Ask clarifying questions about duration, severity, onset\n"
+    "   - For common symptoms like headache or fever, ACTIVELY SCREEN FOR RED FLAGS by asking:\n"
+    "     * 'Do you have any severe symptoms like worst headache of your life, confusion, or vision changes?'\n"
+    "     * 'Any numbness, weakness, or difficulty moving?'\n"
+    "     * 'Any recent head injury or stiff neck?'\n"
+    "   - Listen carefully to answers: 'No numbness' means NO red flag, not a red flag\n"
+    "   - For chest pain specifically: Ask about severity, radiation, shortness of breath, sweating\n"
+    "   - Be measured and thorough - most symptoms are NOT emergencies\n"
+    "\n"
+    "5. TRIAGE AND GUIDANCE (After thorough assessment):\n"
+    "   - IMPORTANT: The triage tool uses keyword matching, so only describe PRESENT symptoms\n"
+    "   - DO NOT list absent symptoms (saying 'no numbness' will incorrectly trigger 'numbness' keyword)\n"
+    "   - Good: 'mild headache for 2 days, gradual onset, improved with rest'\n"
+    "   - Bad: 'headache, no confusion, no numbness' (will cause false urgent classification)\n"
+    "   - Call triage_symptoms_tool(symptoms_text=...) with description of actual complaints only\n"
+    "   - INTERPRET TRIAGE RESULTS CAREFULLY:\n"
+    "     * If risk='urgent' AND red_flags list is NOT EMPTY: Then escalate to ER/911\n"
+    "     * If risk='urgent' but patient explicitly denied red flag symptoms: Use clinical judgment - likely can schedule appointment instead\n"
+    "     * If risk='soon': Give advice and recommend scheduling within 1-2 days\n"
+    "     * If risk='self_care': Give advice, check allergies/meds for safety, offer optional follow-up\n"
     "   - ALWAYS consider patient's allergies and current medications before recommending anything (including OTC)\n"
+    "   - Remember: Most headaches, fevers, and minor aches do NOT need emergency care\n"
     "\n"
+    "6. APPOINTMENT BOOKING (if appropriate):\n"
     "   - Call list_providers_tool() to get available providers\n"
     "   - Present 1-2 provider options to patient\n"
     "   - Call get_provider_slots_tool(provider_id=...) for chosen provider\n"
+    "   - Present 2-3 time slots in friendly format (Monday at 10am, Monday at 2:30pm, Tuesday at 9:15am)\n"
     "   - After patient chooses, call schedule_appointment_tool(provider_id=..., slot_iso=...)\n"
+    "   - If patient gives partial answer like 'Tuesday' or 'the Tuesday one', confirm the full time: 'Tuesday at 9:15 a.m., correct?'\n"
     "   - Confirm appointment and mention sending details to their phone\n"
+    "   - DO NOT re-authenticate just because you need to confirm the appointment time\n"
     "\n"
+    "7. PHARMACY CONFIRMATION (if prescriptions likely):\n"
     "   - Call get_preferred_pharmacy_tool() to get on-file pharmacy\n"
     "   - Ask: 'Should we keep your pharmacy at [address]?'\n"
     "\n"
+    "8. CLOSING:\n"
     "   - Call log_call_tool(notes=..., triage_json=...) to document the encounter\n"
+    "   - Provide safety net advice only if appropriate (e.g., 'If fever goes over 102, seek urgent care')\n"
+    "   - Do NOT over-warn about emergencies for minor symptoms - this causes unnecessary anxiety\n"
+    "   - End with a warm, reassuring closing\n"
     "\n"
     "## COMMUNICATION STYLE:\n"
     "- Use a calm, empathetic, warm tone\n"
     "- Avoid medical jargon; use plain language\n"
     "- Be conversational and natural - imagine speaking to a patient on the phone\n"
     "- Remember: PLAIN TEXT ONLY for text-to-speech (see TTS rules at top)\n"
+    "\n"
+    "## HANDLING UNCLEAR INPUT:\n"
+    "- If you don't understand what the patient said, ask for clarification: 'I'm sorry, could you repeat that?'\n"
+    "- If the patient gives an ambiguous answer (e.g., 'I said tues'), ask: 'Did you mean Tuesday at 9:15 a.m.?'\n"
+    "- DO NOT re-authenticate the patient just because you didn't understand their input\n"
+    "- Maintain conversation context - you already know who they are if you verified them earlier\n"
 )

examples/voice_agent_webrtc_langgraph/agents/healthcare-agent/tools.py CHANGED Viewed

@@ -261,39 +261,60 @@ def schedule_appointment_tool(provider_id: str, slot_iso: str, patient_id: str |
 def triage_symptoms_tool(patient_id: str | None, symptoms_text: str) -> str:
     """Analyze patient symptoms using clinical triage rules to determine urgency and guidance.
-    WHEN TO CALL: After collecting chief complaint and symptoms from patient. This is a CORE tool.
     PARAMETERS:
     - patient_id: From find_patient() result (auto-injected if available, used for age-based rules)
-    - symptoms_text: Natural language description of symptoms collected from patient (e.g., "headache and fatigue, no fever")
     RETURNS: JSON with:
     - "risk": "urgent" | "soon" | "self_care" - Urgency level
     - "advice": "Try rest, hydration..." - Clinical guidance to share with patient
-    - "red_flags": ["stiff neck", "high fever"] - List of concerning symptoms detected (empty array if none)
     - "rule": "Headache - typical" - Internal rule name that matched
     RISK LEVELS:
-    - "urgent": Emergency - direct to ER/911 immediately
     - "soon": Schedule appointment within 1-2 days
     - "self_care": Home care with OTC medications, monitor symptoms
-    WHAT TO DO WITH RESULTS:
-    - If risk="urgent" AND red_flags present: "This may require urgent evaluation. Please go to the nearest ER or call 911."
-    - If risk="soon": Give brief advice, then offer appointment: "I recommend scheduling within a day or two."
-    - If risk="self_care": Give the advice verbatim, check patient profile for medication safety, offer follow-up appointment
     - ALWAYS tailor advice based on patient's allergies and current medications from get_patient_profile_tool()
-    EXAMPLE 1 (URGENT):
-    → Call triage_symptoms_tool(symptoms_text="severe chest pain and shortness of breath")
-    ← Returns: {"risk": "urgent", "advice": "Chest pain can be serious. Please call emergency services now.", "red_flags": ["chest pain"]}
-    → Say: "Chest pain can be serious. Please call 911 now or go to the nearest emergency room."
-    EXAMPLE 2 (SELF-CARE):
-    → Call triage_symptoms_tool(symptoms_text="mild headache and tired, no fever or neck stiffness")
-    ← Returns: {"risk": "self_care", "advice": "Try rest, hydration, and acetaminophen as directed...", "red_flags": []}
-    Patient profile shows: allergies=["Penicillin"], medications=[{"name": "Acetaminophen", "otc": true}]
-    → Say: "Since you're already on acetaminophen as needed and have no concerning symptoms, try rest and hydration. Would you like a follow-up appointment?"
     """
     return json.dumps(triage_symptoms(patient_id, symptoms_text))

 def triage_symptoms_tool(patient_id: str | None, symptoms_text: str) -> str:
     """Analyze patient symptoms using clinical triage rules to determine urgency and guidance.
+    WHEN TO CALL: ONLY after thorough symptom assessment. Ask clarifying questions about red flags BEFORE calling this tool.
+    CRITICAL: This tool uses simple keyword matching, so be VERY CAREFUL with your symptoms_text.
+    - Only include symptoms that ARE PRESENT
+    - Do NOT mention symptoms that are absent (saying "no numbness" will trigger the "numbness" keyword!)
+    - Instead, after screening for red flags, ONLY list positive findings in symptoms_text
+    - Use descriptive language: "mild headache for 2 days, gradual onset, no concerning features"
+    - If patient denies all red flags, do NOT list them - just describe the actual complaint
     PARAMETERS:
     - patient_id: From find_patient() result (auto-injected if available, used for age-based rules)
+    - symptoms_text: Description of PRESENT symptoms only (DO NOT list absent symptoms to avoid false triggers)
+      Good: "mild headache for 2 days, gradual onset, relieved by rest"
+      Good: "moderate headache with fever 101F, started yesterday"
+      Bad: "headache, no numbness, no confusion" (will trigger "numbness" and "confusion" keywords!)
+      Bad: "headache" (too vague, lacks detail for proper triage)
     RETURNS: JSON with:
     - "risk": "urgent" | "soon" | "self_care" - Urgency level
     - "advice": "Try rest, hydration..." - Clinical guidance to share with patient
+    - "red_flags": ["stiff neck", "high fever"] - Keywords detected (may include false positives!)
     - "rule": "Headache - typical" - Internal rule name that matched
     RISK LEVELS:
+    - "urgent": Potential emergency (but verify with clinical judgment)
     - "soon": Schedule appointment within 1-2 days
     - "self_care": Home care with OTC medications, monitor symptoms
+    WHAT TO DO WITH RESULTS (USE CLINICAL JUDGMENT):
+    - If risk="urgent" AND red_flags has items AND patient confirmed those symptoms: Direct to ER/911
+    - If risk="urgent" BUT patient explicitly denied red flag symptoms: FALSE POSITIVE - schedule appointment instead
+    - If risk="soon": Give advice and offer appointment within 1-2 days
+    - If risk="self_care": Give advice, check allergies/meds for safety, offer optional follow-up
     - ALWAYS tailor advice based on patient's allergies and current medications from get_patient_profile_tool()
+    - Remember: Most common symptoms (headache, fever, fatigue) are NOT emergencies
+    EXAMPLE 1 (TRUE URGENT):
+    Conversation: Patient says "severe crushing chest pain, sweating, short of breath"
+    → Call triage_symptoms_tool(symptoms_text="severe chest pain with sweating and shortness of breath")
+    ← Returns: {"risk": "urgent", "red_flags": ["chest pain"]}
+    → Clinical judgment: Patient confirmed severe chest pain = TRUE URGENT
+    → Say: "This sounds serious. Please call 911 now or go to the nearest emergency room."
+    EXAMPLE 2 (AVOIDING FALSE POSITIVES):
+    Conversation: You ask "Any severe symptoms like confusion, weakness, or numbness?" Patient says "No, none of those"
+    → Call triage_symptoms_tool(symptoms_text="mild headache for 2 days, gradual onset, relieved with rest")
+    ← Returns: {"risk": "self_care", "red_flags": []}
+    → Say: "Try rest, hydration, and acetaminophen. Would you like a follow-up appointment?"
+    (Note: Did NOT mention "no confusion, no numbness" to avoid triggering those keywords)
+    EXAMPLE 3 (SELF-CARE):
+    → Call triage_symptoms_tool(symptoms_text="low-grade fever 100.5F for 1 day with mild fatigue")
+    ← Returns: {"risk": "self_care", "advice": "Hydration, rest, and acetaminophen can help..."}
+    → Say: "For a low-grade fever, rest and hydration are key. You're already taking acetaminophen as needed, which is safe with your medications."
     """
     return json.dumps(triage_symptoms(patient_id, symptoms_text))