Spaces:

WebEssentz
/

gent

Build error

App Files Files Community

WebEssentz commited on Jul 13, 2025

Commit

decb45c

1 Parent(s): 6f9c53c

Realtime Flow

Browse files

Files changed (1) hide show

src/agent_session/main.py +77 -13

src/agent_session/main.py CHANGED Viewed

@@ -1,6 +1,11 @@
 #!/usr/bin/env python3
 """
 Agent Session for Avurna Flow (Final Corrected Version)
 """
 import os
 import json
@@ -10,63 +15,92 @@ from pydantic import BaseModel
 import uvicorn
 from fastapi.middleware.cors import CORSMiddleware
 from livekit.rtc import Room
 from livekit.agents import Agent, AgentSession
 from livekit.agents.llm import LLM
 from livekit.agents.stt.stream_adapter import StreamAdapter
 from livekit.plugins.google import LLM as GoogleLLM
 from livekit.plugins.groq import STT
-from livekit.plugins.hume import TTS, VoiceByName, VoiceProvider
 from livekit.plugins.silero import VAD
 from src.agent_session.constants import SYSTEM_PROMPT, GREETING_INSTRUCTIONS
 from src.utils import validate_env_vars
 app = FastAPI()
-origins = ["*"]
-app.add_middleware(CORSMiddleware, allow_origins=origins, allow_credentials=True, allow_methods=["*"], allow_headers=["*"])
 class JoinRoomRequest(BaseModel):
     room_name: str
     agent_token: str
 class VoiceAssistant(Agent):
     def __init__(self):
         super().__init__(instructions=SYSTEM_PROMPT)
 async def send_agent_state(room: Room, state: str):
     try:
         msg = json.dumps({"type": "agent_state", "state": state})
         await room.local_participant.publish_data(msg)
         print(f"DEBUG: Sent agent state: {state}")
     except Exception as e:
         print(f"DEBUG: Error publishing agent state: {e}")
-# --- THE DEFINITIVE FIX IS HERE ---
 class LLMStateWrapper(LLM):
     def __init__(self, llm: LLM, room: Room):
         super().__init__()
         self._llm = llm
         self._room = room
-    # This signature accepts ANY named arguments the library might send.
     async def chat(self, **kwargs):
         await send_agent_state(self._room, "thinking")
-        # We find and extract 'history' from the arguments.
-        # This makes our code resilient to the library's internal calling convention.
         history = kwargs.pop('history', [])
-        # We pass the extracted history and all other arguments along.
         async for chunk in self._llm.chat(history=history, **kwargs):
             yield chunk
         await send_agent_state(self._room, "listening")
-# The rest of the file is correct and can remain unchanged.
-# For absolute certainty, here is the full correct file again.
 async def run_agent_session(room_name: str, agent_token: str):
     livekit_url = os.getenv("LIVEKIT_URL")
     room = Room()
@@ -75,13 +109,23 @@ async def run_agent_session(room_name: str, agent_token: str):
         await room.connect(livekit_url, agent_token)
         print("DEBUG: 2. Connection successful.")
         await send_agent_state(room, "listening")
         print("DEBUG: 3. Initializing plugins...")
         llm_wrapper = LLMStateWrapper(llm=GoogleLLM(model="gemini-1.5-flash"), room=room)
         vad = VAD.load(min_speech_duration=0.1)
         stt = StreamAdapter(stt=STT(model="whisper-large-v3-turbo"), vad=vad)
         tts = TTS(voice=VoiceByName(name="Tiktok Fashion Influencer"), instant_mode=True)
         print("DEBUG: 4. Plugins initialized.")
         print("DEBUG: 5. Creating AgentSession...")
@@ -89,9 +133,11 @@ async def run_agent_session(room_name: str, agent_token: str):
         print("DEBUG: 6. AgentSession created.")
         print("DEBUG: 7. Starting session and passing room to .start()...")
         await session.start(agent=VoiceAssistant(), room=room)
         print("DEBUG: 8. Session started. Generating initial greeting...")
         await send_agent_state(room, "speaking")
         await session.generate_reply(instructions=GREETING_INSTRUCTIONS)
@@ -101,19 +147,37 @@ async def run_agent_session(room_name: str, agent_token: str):
         print(f"FATAL ERROR in agent session: {e}")
         print(traceback.format_exc())
     finally:
         print(f"DEBUG: Agent session for room {room_name} is ending. Cleaning up.")
         await room.disconnect()
 @app.post("/join-room")
 async def join_room(req: JoinRoomRequest, background_tasks: BackgroundTasks):
     print(f"DEBUG: Received POST request to /join-room for: {req.room_name}")
     background_tasks.add_task(run_agent_session, req.room_name, req.agent_token)
     return {"status": "agent_triggered"}
 @app.get("/")
 async def root():
     return {"status": "avurna_agent_server_online"}
 if __name__ == "__main__":
-    validate_env_vars(["HUME_API_KEY", "LIVEKIT_URL", "LIVEKIT_API_KEY", "LIVEKIT_API_SECRET", "GROQ_API_KEY", "GOOGLE_API_KEY"])
     uvicorn.run(app, host="0.0.0.0", port=7860)

 #!/usr/bin/env python3
 """
 Agent Session for Avurna Flow (Final Corrected Version)
+This script sets up a FastAPI web server to manage a voice-based AI agent
+that connects to a LiveKit room. The core issue of the TypeError is resolved
+by implementing the LLMStateWrapper, which correctly handles the asynchronous
+generator returned by the LLM's chat method.
 """
 import os
 import json
 import uvicorn
 from fastapi.middleware.cors import CORSMiddleware
+# Import LiveKit and plugin components
 from livekit.rtc import Room
 from livekit.agents import Agent, AgentSession
 from livekit.agents.llm import LLM
 from livekit.agents.stt.stream_adapter import StreamAdapter
 from livekit.plugins.google import LLM as GoogleLLM
 from livekit.plugins.groq import STT
+from livekit.plugins.hume import TTS, VoiceByName
 from livekit.plugins.silero import VAD
 from src.agent_session.constants import SYSTEM_PROMPT, GREETING_INSTRUCTIONS
 from src.utils import validate_env_vars
+# --- FastAPI Application Setup ---
 app = FastAPI()
+# Configure CORS (Cross-Origin Resource Sharing) to allow all origins
+origins = ["*"]
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=origins,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"]
+)
+# --- Pydantic Model for API Request ---
 class JoinRoomRequest(BaseModel):
+    """Defines the expected data structure for a /join-room request."""
     room_name: str
     agent_token: str
+# --- Custom Agent Definition ---
 class VoiceAssistant(Agent):
+    """A simple voice assistant agent with a predefined system prompt."""
     def __init__(self):
         super().__init__(instructions=SYSTEM_PROMPT)
+# --- Utility Function for State Publishing ---
 async def send_agent_state(room: Room, state: str):
+    """Publishes the agent's current state to the room via data channel."""
     try:
+        # The message is structured as a JSON object for easy parsing by clients
         msg = json.dumps({"type": "agent_state", "state": state})
         await room.local_participant.publish_data(msg)
         print(f"DEBUG: Sent agent state: {state}")
     except Exception as e:
         print(f"DEBUG: Error publishing agent state: {e}")
+# --- THE DEFINITIVE FIX: LLM Wrapper ---
 class LLMStateWrapper(LLM):
+    """
+    Wraps an LLM instance to correctly handle its async generator `chat` method
+    and to inject agent state updates ("thinking", "listening") into the process.
+    This class solves the `TypeError: 'async_generator' object does not support
+    the asynchronous context manager protocol`.
+    """
     def __init__(self, llm: LLM, room: Room):
         super().__init__()
         self._llm = llm
         self._room = room
     async def chat(self, **kwargs):
+        """
+        This method is called by the AgentSession. It intercepts the call to the LLM,
+        sends the 'thinking' state, properly iterates over the LLM's async generator,
+        and then sends the 'listening' state upon completion.
+        """
         await send_agent_state(self._room, "thinking")
+        # Extract 'history' and pass all arguments along to the underlying LLM.
+        # This makes the wrapper resilient to future library updates.
         history = kwargs.pop('history', [])
+        # Use `async for` to correctly consume the asynchronous generator.
         async for chunk in self._llm.chat(history=history, **kwargs):
             yield chunk
         await send_agent_state(self._room, "listening")
+# --- Main Agent Session Logic ---
 async def run_agent_session(room_name: str, agent_token: str):
+    """
+    This function contains the main logic for connecting the agent to a
+    LiveKit room and running the session.
+    """
     livekit_url = os.getenv("LIVEKIT_URL")
     room = Room()
         await room.connect(livekit_url, agent_token)
         print("DEBUG: 2. Connection successful.")
+        # Agent is ready to receive input
         await send_agent_state(room, "listening")
         print("DEBUG: 3. Initializing plugins...")
+        # Wrap the Google LLM with our custom state-aware wrapper
         llm_wrapper = LLMStateWrapper(llm=GoogleLLM(model="gemini-1.5-flash"), room=room)
+        # Configure Voice Activity Detection (VAD)
         vad = VAD.load(min_speech_duration=0.1)
+        # Configure Speech-to-Text (STT) with the VAD adapter
         stt = StreamAdapter(stt=STT(model="whisper-large-v3-turbo"), vad=vad)
+        # Configure Text-to-Speech (TTS)
         tts = TTS(voice=VoiceByName(name="Tiktok Fashion Influencer"), instant_mode=True)
         print("DEBUG: 4. Plugins initialized.")
         print("DEBUG: 5. Creating AgentSession...")
         print("DEBUG: 6. AgentSession created.")
         print("DEBUG: 7. Starting session and passing room to .start()...")
+        # The .start() method begins the main processing loop for the agent
         await session.start(agent=VoiceAssistant(), room=room)
         print("DEBUG: 8. Session started. Generating initial greeting...")
+        # Proactively generate a greeting to start the conversation
         await send_agent_state(room, "speaking")
         await session.generate_reply(instructions=GREETING_INSTRUCTIONS)
         print(f"FATAL ERROR in agent session: {e}")
         print(traceback.format_exc())
     finally:
+        # Ensure cleanup and disconnection on exit or error
         print(f"DEBUG: Agent session for room {room_name} is ending. Cleaning up.")
         await room.disconnect()
+# --- FastAPI API Endpoints ---
 @app.post("/join-room")
 async def join_room(req: JoinRoomRequest, background_tasks: BackgroundTasks):
+    """
+    API endpoint to trigger an agent to join a room.
+    It runs the agent session as a background task to not block the HTTP response.
+    """
     print(f"DEBUG: Received POST request to /join-room for: {req.room_name}")
     background_tasks.add_task(run_agent_session, req.room_name, req.agent_token)
     return {"status": "agent_triggered"}
 @app.get("/")
 async def root():
+    """A simple health check endpoint."""
     return {"status": "avurna_agent_server_online"}
+# --- Main Execution Block ---
 if __name__ == "__main__":
+    # Validate that all required environment variables are set before starting
+    validate_env_vars([
+        "HUME_API_KEY",
+        "LIVEKIT_URL",
+        "LIVEKIT_API_KEY",
+        "LIVEKIT_API_SECRET",
+        "GROQ_API_KEY",
+        "GOOGLE_API_KEY"
+    ])
+    # Run the FastAPI server using Uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)