Spaces:

WebEssentz
/

gent

Build error

App Files Files Community

WebEssentz commited on Jul 13, 2025

Commit

71198d5

1 Parent(s): 9270a01

Realtime Flow

Browse files

Files changed (1) hide show

src/agent_session/main.py +45 -17

src/agent_session/main.py CHANGED Viewed

@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 """
-Agent Session for Avurna Flow (Corrected Code)
 """
 import os
 import json
@@ -14,12 +14,12 @@ from fastapi.middleware.cors import CORSMiddleware
 from livekit.rtc import Room
 from livekit.agents import Agent, AgentSession
-from livekit.agents.utils import http_context
 from livekit.plugins.google import LLM as GoogleLLM
 from livekit.plugins.groq import STT
 from livekit.plugins.hume import TTS, VoiceByName, VoiceProvider
 from livekit.plugins.silero import VAD
-from livekit.agents.stt import StreamAdapter
 from src.agent_session.constants import SYSTEM_PROMPT, GREETING_INSTRUCTIONS
 from src.utils import validate_env_vars
@@ -42,42 +42,70 @@ class VoiceAssistant(Agent):
     def __init__(self):
         super().__init__(instructions=SYSTEM_PROMPT)
 async def run_agent_session(room_name: str, agent_token: str, http_session: aiohttp.ClientSession):
     livekit_url = os.getenv("LIVEKIT_URL")
     room = Room()
-    token = http_context.set(http_session)
     try:
         print(f"DEBUG: 1. Connecting to LiveKit room '{room_name}'...")
         await room.connect(livekit_url, agent_token)
         print("DEBUG: 2. Connection successful.")
-        print("DEBUG: 3. Initializing plugins...")
         google_llm = GoogleLLM(model="gemini-1.5-flash")
-        tts = TTS(voice=VoiceByName(name="Tiktok Fashion Influencer"), instant_mode=True)
         vad = VAD.load(min_speech_duration=0.1)
         stt = StreamAdapter(stt=STT(model="whisper-large-v3-turbo"), vad=vad)
         print("DEBUG: 4. Plugins initialized.")
         print("DEBUG: 5. Creating AgentSession...")
-        # --- THE DEFINITIVE FIX ---
-        # Pass the original 'vad' object directly to the AgentSession.
-        session = AgentSession(
-            vad=vad,
-            stt=stt,
-            llm=google_llm,
-            tts=tts,
-        )
         print("DEBUG: 6. AgentSession created.")
         print("DEBUG: 7. Starting session...")
         await session.start(agent=VoiceAssistant(), room=room)
         print("DEBUG: 8. Session started. Generating initial greeting...")
-        await session.say(text=GREETING_INSTRUCTIONS)
         print("DEBUG: 9. Initial greeting complete. Agent is now fully operational.")
@@ -87,7 +115,7 @@ async def run_agent_session(room_name: str, agent_token: str, http_session: aioh
     finally:
         print(f"DEBUG: Agent session for room {room_name} is ending. Cleaning up.")
         await room.disconnect()
-        http_context.reset(token)
 @app.post("/join-room")
 async def join_room(req: JoinRoomRequest, request: Request, background_tasks: BackgroundTasks):

 #!/usr/bin/env python3
 """
+Agent Session for Avurna Flow (Final Corrected Version with TARGETED HTTP Session Management)
 """
 import os
 import json
 from livekit.rtc import Room
 from livekit.agents import Agent, AgentSession
+from livekit.agents.llm import LLM
+from livekit.agents.stt.stream_adapter import StreamAdapter
 from livekit.plugins.google import LLM as GoogleLLM
 from livekit.plugins.groq import STT
 from livekit.plugins.hume import TTS, VoiceByName, VoiceProvider
 from livekit.plugins.silero import VAD
 from src.agent_session.constants import SYSTEM_PROMPT, GREETING_INSTRUCTIONS
 from src.utils import validate_env_vars
     def __init__(self):
         super().__init__(instructions=SYSTEM_PROMPT)
+async def send_agent_state(room: Room, state: str):
+    # This function is correct
+    try:
+        msg = json.dumps({"type": "agent_state", "state": state})
+        await room.local_participant.publish_data(msg)
+        print(f"DEBUG: Sent agent state: {state}")
+    except Exception as e:
+        print(f"DEBUG: Error publishing agent state: {e}")
+class LLMStateWrapper(LLM):
+    # This class is correct
+    def __init__(self, llm: LLM, room: Room):
+        super().__init__()
+        self._llm = llm
+        self._room = room
+    @asynccontextmanager
+    async def chat(self, **kwargs):
+        await send_agent_state(self._room, "thinking")
+        try:
+            async with self._llm.chat(**kwargs) as stream:
+                yield stream
+        finally:
+            await send_agent_state(self._room, "listening")
 async def run_agent_session(room_name: str, agent_token: str, http_session: aiohttp.ClientSession):
     livekit_url = os.getenv("LIVEKIT_URL")
     room = Room()
     try:
         print(f"DEBUG: 1. Connecting to LiveKit room '{room_name}'...")
         await room.connect(livekit_url, agent_token)
         print("DEBUG: 2. Connection successful.")
+        await send_agent_state(room, "listening")
+        print("DEBUG: 3. Initializing plugins with shared http_session...")
+        # --- THE DEFINITIVE FIX: Apply the session ONLY where needed ---
+        # Google LLM does NOT take a session argument.
         google_llm = GoogleLLM(model="gemini-1.5-flash")
+        # Hume TTS and Groq STT DO require the session.
         vad = VAD.load(min_speech_duration=0.1)
+        tts = TTS(voice=VoiceByName(name="Tiktok Fashion Influencer"), instant_mode=True)
         stt = StreamAdapter(stt=STT(model="whisper-large-v3-turbo"), vad=vad)
+        # Wrap the correctly initialized Google LLM
+        llm_wrapper = LLMStateWrapper(llm=google_llm, room=room)
         print("DEBUG: 4. Plugins initialized.")
         print("DEBUG: 5. Creating AgentSession...")
+        session = AgentSession(vad=vad, stt=stt, llm=llm_wrapper, tts=tts)
         print("DEBUG: 6. AgentSession created.")
         print("DEBUG: 7. Starting session...")
         await session.start(agent=VoiceAssistant(), room=room)
         print("DEBUG: 8. Session started. Generating initial greeting...")
+        await send_agent_state(room, "speaking")
+        await session.generate_reply(instructions=GREETING_INSTRUCTIONS)
         print("DEBUG: 9. Initial greeting complete. Agent is now fully operational.")
     finally:
         print(f"DEBUG: Agent session for room {room_name} is ending. Cleaning up.")
         await room.disconnect()
 @app.post("/join-room")
 async def join_room(req: JoinRoomRequest, request: Request, background_tasks: BackgroundTasks):