Spaces:

WebEssentz
/

gent

Build error

App Files Files Community

WebEssentz commited on Jul 13, 2025

Commit

e67faa0

1 Parent(s): decb45c

Realtime Flow

Browse files

Files changed (1) hide show

src/agent_session/main.py +17 -83

src/agent_session/main.py CHANGED Viewed

@@ -1,11 +1,6 @@
 #!/usr/bin/env python3
 """
 Agent Session for Avurna Flow (Final Corrected Version)
-This script sets up a FastAPI web server to manage a voice-based AI agent
-that connects to a LiveKit room. The core issue of the TypeError is resolved
-by implementing the LLMStateWrapper, which correctly handles the asynchronous
-generator returned by the LLM's chat method.
 """
 import os
 import json
@@ -14,93 +9,62 @@ from fastapi import FastAPI, BackgroundTasks
 from pydantic import BaseModel
 import uvicorn
 from fastapi.middleware.cors import CORSMiddleware
-# Import LiveKit and plugin components
 from livekit.rtc import Room
 from livekit.agents import Agent, AgentSession
 from livekit.agents.llm import LLM
 from livekit.agents.stt.stream_adapter import StreamAdapter
 from livekit.plugins.google import LLM as GoogleLLM
 from livekit.plugins.groq import STT
-from livekit.plugins.hume import TTS, VoiceByName
 from livekit.plugins.silero import VAD
 from src.agent_session.constants import SYSTEM_PROMPT, GREETING_INSTRUCTIONS
 from src.utils import validate_env_vars
-# --- FastAPI Application Setup ---
 app = FastAPI()
-# Configure CORS (Cross-Origin Resource Sharing) to allow all origins
 origins = ["*"]
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=origins,
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"]
-)
-# --- Pydantic Model for API Request ---
 class JoinRoomRequest(BaseModel):
-    """Defines the expected data structure for a /join-room request."""
     room_name: str
     agent_token: str
-# --- Custom Agent Definition ---
 class VoiceAssistant(Agent):
-    """A simple voice assistant agent with a predefined system prompt."""
     def __init__(self):
         super().__init__(instructions=SYSTEM_PROMPT)
-# --- Utility Function for State Publishing ---
 async def send_agent_state(room: Room, state: str):
-    """Publishes the agent's current state to the room via data channel."""
     try:
-        # The message is structured as a JSON object for easy parsing by clients
         msg = json.dumps({"type": "agent_state", "state": state})
         await room.local_participant.publish_data(msg)
         print(f"DEBUG: Sent agent state: {state}")
     except Exception as e:
         print(f"DEBUG: Error publishing agent state: {e}")
-# --- THE DEFINITIVE FIX: LLM Wrapper ---
 class LLMStateWrapper(LLM):
-    """
-    Wraps an LLM instance to correctly handle its async generator `chat` method
-    and to inject agent state updates ("thinking", "listening") into the process.
-    This class solves the `TypeError: 'async_generator' object does not support
-    the asynchronous context manager protocol`.
-    """
     def __init__(self, llm: LLM, room: Room):
         super().__init__()
         self._llm = llm
         self._room = room
     async def chat(self, **kwargs):
-        """
-        This method is called by the AgentSession. It intercepts the call to the LLM,
-        sends the 'thinking' state, properly iterates over the LLM's async generator,
-        and then sends the 'listening' state upon completion.
-        """
         await send_agent_state(self._room, "thinking")
-        # Extract 'history' and pass all arguments along to the underlying LLM.
-        # This makes the wrapper resilient to future library updates.
-        history = kwargs.pop('history', [])
-        # Use `async for` to correctly consume the asynchronous generator.
-        async for chunk in self._llm.chat(history=history, **kwargs):
-            yield chunk
-        await send_agent_state(self._room, "listening")
-# --- Main Agent Session Logic ---
 async def run_agent_session(room_name: str, agent_token: str):
-    """
-    This function contains the main logic for connecting the agent to a
-    LiveKit room and running the session.
-    """
     livekit_url = os.getenv("LIVEKIT_URL")
     room = Room()
@@ -109,35 +73,23 @@ async def run_agent_session(room_name: str, agent_token: str):
         await room.connect(livekit_url, agent_token)
         print("DEBUG: 2. Connection successful.")
-        # Agent is ready to receive input
         await send_agent_state(room, "listening")
         print("DEBUG: 3. Initializing plugins...")
-        # Wrap the Google LLM with our custom state-aware wrapper
         llm_wrapper = LLMStateWrapper(llm=GoogleLLM(model="gemini-1.5-flash"), room=room)
-        # Configure Voice Activity Detection (VAD)
         vad = VAD.load(min_speech_duration=0.1)
-        # Configure Speech-to-Text (STT) with the VAD adapter
         stt = StreamAdapter(stt=STT(model="whisper-large-v3-turbo"), vad=vad)
-        # Configure Text-to-Speech (TTS)
         tts = TTS(voice=VoiceByName(name="Tiktok Fashion Influencer"), instant_mode=True)
         print("DEBUG: 4. Plugins initialized.")
         print("DEBUG: 5. Creating AgentSession...")
         session = AgentSession(vad=vad, stt=stt, llm=llm_wrapper, tts=tts)
         print("DEBUG: 6. AgentSession created.")
-        print("DEBUG: 7. Starting session and passing room to .start()...")
-        # The .start() method begins the main processing loop for the agent
         await session.start(agent=VoiceAssistant(), room=room)
         print("DEBUG: 8. Session started. Generating initial greeting...")
-        # Proactively generate a greeting to start the conversation
         await send_agent_state(room, "speaking")
         await session.generate_reply(instructions=GREETING_INSTRUCTIONS)
@@ -147,37 +99,19 @@ async def run_agent_session(room_name: str, agent_token: str):
         print(f"FATAL ERROR in agent session: {e}")
         print(traceback.format_exc())
     finally:
-        # Ensure cleanup and disconnection on exit or error
         print(f"DEBUG: Agent session for room {room_name} is ending. Cleaning up.")
         await room.disconnect()
-# --- FastAPI API Endpoints ---
 @app.post("/join-room")
 async def join_room(req: JoinRoomRequest, background_tasks: BackgroundTasks):
-    """
-    API endpoint to trigger an agent to join a room.
-    It runs the agent session as a background task to not block the HTTP response.
-    """
     print(f"DEBUG: Received POST request to /join-room for: {req.room_name}")
     background_tasks.add_task(run_agent_session, req.room_name, req.agent_token)
     return {"status": "agent_triggered"}
 @app.get("/")
 async def root():
-    """A simple health check endpoint."""
     return {"status": "avurna_agent_server_online"}
-# --- Main Execution Block ---
 if __name__ == "__main__":
-    # Validate that all required environment variables are set before starting
-    validate_env_vars([
-        "HUME_API_KEY",
-        "LIVEKIT_URL",
-        "LIVEKIT_API_KEY",
-        "LIVEKIT_API_SECRET",
-        "GROQ_API_KEY",
-        "GOOGLE_API_KEY"
-    ])
-    # Run the FastAPI server using Uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)

 #!/usr/bin/env python3
 """
 Agent Session for Avurna Flow (Final Corrected Version)
 """
 import os
 import json
 from pydantic import BaseModel
 import uvicorn
 from fastapi.middleware.cors import CORSMiddleware
+from contextlib import asynccontextmanager # --- KEY: Import the required decorator ---
 from livekit.rtc import Room
 from livekit.agents import Agent, AgentSession
 from livekit.agents.llm import LLM
 from livekit.agents.stt.stream_adapter import StreamAdapter
 from livekit.plugins.google import LLM as GoogleLLM
 from livekit.plugins.groq import STT
+from livekit.plugins.hume import TTS, VoiceByName, VoiceProvider
 from livekit.plugins.silero import VAD
 from src.agent_session.constants import SYSTEM_PROMPT, GREETING_INSTRUCTIONS
 from src.utils import validate_env_vars
 app = FastAPI()
 origins = ["*"]
+app.add_middleware(CORSMiddleware, allow_origins=origins, allow_credentials=True, allow_methods=["*"], allow_headers=["*"])
 class JoinRoomRequest(BaseModel):
     room_name: str
     agent_token: str
 class VoiceAssistant(Agent):
     def __init__(self):
         super().__init__(instructions=SYSTEM_PROMPT)
 async def send_agent_state(room: Room, state: str):
     try:
         msg = json.dumps({"type": "agent_state", "state": state})
         await room.local_participant.publish_data(msg)
         print(f"DEBUG: Sent agent state: {state}")
     except Exception as e:
         print(f"DEBUG: Error publishing agent state: {e}")
 class LLMStateWrapper(LLM):
     def __init__(self, llm: LLM, room: Room):
         super().__init__()
         self._llm = llm
         self._room = room
+    # --- THE DEFINITIVE FIX ---
+    @asynccontextmanager # 1. Decorate the method to make it a valid context manager
     async def chat(self, **kwargs):
+        # 2. This code runs when the context is entered
         await send_agent_state(self._room, "thinking")
+        try:
+            # 3. Enter the original LLM's context and get its stream
+            async with self._llm.chat(**kwargs) as stream:
+                # 4. Yield the stream to the AgentSession
+                yield stream
+        finally:
+            # 5. This code runs when the context is exited (after TTS is done)
+            await send_agent_state(self._room, "listening")
 async def run_agent_session(room_name: str, agent_token: str):
     livekit_url = os.getenv("LIVEKIT_URL")
     room = Room()
         await room.connect(livekit_url, agent_token)
         print("DEBUG: 2. Connection successful.")
         await send_agent_state(room, "listening")
         print("DEBUG: 3. Initializing plugins...")
         llm_wrapper = LLMStateWrapper(llm=GoogleLLM(model="gemini-1.5-flash"), room=room)
         vad = VAD.load(min_speech_duration=0.1)
         stt = StreamAdapter(stt=STT(model="whisper-large-v3-turbo"), vad=vad)
         tts = TTS(voice=VoiceByName(name="Tiktok Fashion Influencer"), instant_mode=True)
         print("DEBUG: 4. Plugins initialized.")
         print("DEBUG: 5. Creating AgentSession...")
         session = AgentSession(vad=vad, stt=stt, llm=llm_wrapper, tts=tts)
         print("DEBUG: 6. AgentSession created.")
+        print("DEBUG: 7. Starting session...")
         await session.start(agent=VoiceAssistant(), room=room)
         print("DEBUG: 8. Session started. Generating initial greeting...")
         await send_agent_state(room, "speaking")
         await session.generate_reply(instructions=GREETING_INSTRUCTIONS)
         print(f"FATAL ERROR in agent session: {e}")
         print(traceback.format_exc())
     finally:
         print(f"DEBUG: Agent session for room {room_name} is ending. Cleaning up.")
         await room.disconnect()
 @app.post("/join-room")
 async def join_room(req: JoinRoomRequest, background_tasks: BackgroundTasks):
     print(f"DEBUG: Received POST request to /join-room for: {req.room_name}")
     background_tasks.add_task(run_agent_session, req.room_name, req.agent_token)
     return {"status": "agent_triggered"}
 @app.get("/")
 async def root():
     return {"status": "avurna_agent_server_online"}
 if __name__ == "__main__":
+    validate_env_vars(["HUME_API_KEY", "LIVEKIT_URL", "LIVEKIT_API_KEY", "LIVEKIT_API_SECRET", "GROQ_API_KEY", "GOOGLE_API_KEY"])
     uvicorn.run(app, host="0.0.0.0", port=7860)