Spaces:

WebEssentz
/

gent

Build error

App Files Files Community

WebEssentz commited on Jul 13, 2025

Commit

dbcd819

1 Parent(s): 44afed5

Realtime Flow

Browse files

Files changed (1) hide show

src/agent_session/main.py +34 -46

src/agent_session/main.py CHANGED Viewed

@@ -1,17 +1,18 @@
 #!/usr/bin/env python3
 """
-Agent Session for Avurna Flow, wrapped in a FastAPI server. (DEBUGGING ENABLED)
 """
-import asyncio
 import os
 import json
-import traceback # Import traceback to print full errors
 from fastapi import FastAPI, BackgroundTasks
 from pydantic import BaseModel
 import uvicorn
 from fastapi.middleware.cors import CORSMiddleware
-from livekit.agents import Agent, AgentSession, JobContext
 from livekit.agents.llm import LLM
 from livekit.agents.stt.stream_adapter import StreamAdapter
 from livekit.plugins.google import LLM as GoogleLLM
@@ -22,16 +23,9 @@ from livekit.plugins.silero import VAD
 from src.agent_session.constants import SYSTEM_PROMPT, GREETING_INSTRUCTIONS
 from src.utils import validate_env_vars
-# --- FastAPI and CORS setup (unchanged) ---
 app = FastAPI()
 origins = ["*"]
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=origins,
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
 class JoinRoomRequest(BaseModel):
     room_name: str
@@ -39,50 +33,45 @@ class JoinRoomRequest(BaseModel):
 class VoiceAssistant(Agent):
     def __init__(self):
-        super().__init__(instructions=SYSTEM_PROMPT)
-async def send_agent_state(ctx: JobContext, state: str):
     try:
         msg = json.dumps({"type": "agent_state", "state": state})
-        await ctx.room.local_participant.publish_data(msg)
         print(f"DEBUG: Sent agent state: {state}")
     except Exception as e:
         print(f"DEBUG: Error publishing agent state: {e}")
-# --- KEY FIX 2: Correctly yield from the wrapped LLM chat method ---
 class LLMStateWrapper(LLM):
-    def __init__(self, llm: LLM, ctx: JobContext):
         super().__init__()
         self._llm = llm
-        self._ctx = ctx
     async def chat(self, history):
-        await send_agent_state(self._ctx, "thinking")
-        # The `yield from` is crucial for streaming responses.
         async for chunk in self._llm.chat(history):
             yield chunk
-        await send_agent_state(self._ctx, "listening") # Change state back to listening after speaking is done
 async def run_agent_session(room_name: str, agent_token: str):
-    # --- KEY FIX 1: Add aggressive "breadcrumb" logging ---
-    print(f"DEBUG: Starting run_agent_session for room: {room_name}")
     livekit_url = os.getenv("LIVEKIT_URL")
-    # --- THIS IS THE ONLY LINE THAT CHANGES ---
-    # The 'room_name' argument is removed as it's derived from the token.
-    ctx = JobContext(token=agent_token)
     try:
-        print("DEBUG: 1. Connecting to LiveKit context...")
-        await ctx.connect()
-        print("DEBUG: 2. Context connected. Sending initial 'listening' state.")
-        await send_agent_state(ctx, "listening")
-        print("DEBUG: 3. Initializing plugins (VAD, STT, LLM, TTS)...")
-        llm_state_wrapper = LLMStateWrapper(
             llm=GoogleLLM(model="gemini-1.5-flash", temperature=0.5),
-            ctx=ctx
         )
         vad = VAD.load(min_speech_duration=0.1, min_silence_duration=0.5)
         stt = StreamAdapter(stt=STT(model="whisper-large-v3-turbo", language="en"), vad=vad)
@@ -90,36 +79,35 @@ async def run_agent_session(room_name: str, agent_token: str):
         print("DEBUG: 4. Plugins initialized.")
         print("DEBUG: 5. Creating AgentSession...")
         session = AgentSession(
             vad=vad,
             stt=stt,
-            llm=llm_state_wrapper,
             tts=tts,
         )
         print("DEBUG: 6. AgentSession created. Starting session now...")
-        await session.start(agent=VoiceAssistant(), room=ctx.room)
         print("DEBUG: 7. Session started. Generating initial greeting...")
-        await send_agent_state(ctx, "speaking")
         await session.generate_reply(instructions=GREETING_INSTRUCTIONS)
-        # Note: The state is now set back to 'listening' inside the LLM wrapper
-        print("DEBUG: 8. Initial greeting complete. Agent is now fully operational.")
     except Exception as e:
-        # This will now print the FULL error to your Hugging Face logs
         print(f"FATAL ERROR in agent session: {e}")
         print(traceback.format_exc())
     finally:
         print(f"DEBUG: Agent session for room {room_name} is ending. Cleaning up.")
-        await ctx.disconnect()
 @app.post("/join-room")
 async def join_room(req: JoinRoomRequest, background_tasks: BackgroundTasks):
-    print(f"DEBUG: Received request for agent to join room: {req.room_name}")
     background_tasks.add_task(run_agent_session, req.room_name, req.agent_token)
-    return {"status": "agent_joining"}
 @app.get("/")
 async def root():

 #!/usr/bin/env python3
 """
+Agent Session for Avurna Flow, wrapped in a FastAPI server. (Corrected Connection Logic)
 """
 import os
 import json
+import traceback
 from fastapi import FastAPI, BackgroundTasks
 from pydantic import BaseModel
 import uvicorn
 from fastapi.middleware.cors import CORSMiddleware
+# --- KEY CHANGE: Import the correct connection tools ---
+from livekit.rtc import Room, aio
+from livekit.agents import Agent, AgentSession
 from livekit.agents.llm import LLM
 from livekit.agents.stt.stream_adapter import StreamAdapter
 from livekit.plugins.google import LLM as GoogleLLM
 from src.agent_session.constants import SYSTEM_PROMPT, GREETING_INSTRUCTIONS
 from src.utils import validate_env_vars
 app = FastAPI()
 origins = ["*"]
+app.add_middleware(CORSMiddleware, allow_origins=origins, allow_credentials=True, allow_methods=["*"], allow_headers=["*"])
 class JoinRoomRequest(BaseModel):
     room_name: str
 class VoiceAssistant(Agent):
     def __init__(self):
+        super().__init__()
+async def send_agent_state(room: Room, state: str):
     try:
         msg = json.dumps({"type": "agent_state", "state": state})
+        await room.local_participant.publish_data(msg)
         print(f"DEBUG: Sent agent state: {state}")
     except Exception as e:
         print(f"DEBUG: Error publishing agent state: {e}")
 class LLMStateWrapper(LLM):
+    def __init__(self, llm: LLM, room: Room):
         super().__init__()
         self._llm = llm
+        self._room = room
     async def chat(self, history):
+        await send_agent_state(self._room, "thinking")
         async for chunk in self._llm.chat(history):
             yield chunk
+        # Speaking is finished, the user can talk again
+        await send_agent_state(self._room, "listening")
 async def run_agent_session(room_name: str, agent_token: str):
     livekit_url = os.getenv("LIVEKIT_URL")
+    room = Room()  # Create a Room object
     try:
+        print(f"DEBUG: 1. Connecting to LiveKit room '{room_name}' at {livekit_url}...")
+        # --- KEY FIX: Use aio.connect to join the room ---
+        await aio.connect(livekit_url, agent_token, room=room)
+        print("DEBUG: 2. Connection successful. Agent is in the room.")
+        await send_agent_state(room, "listening")
+        print("DEBUG: 3. Initializing plugins...")
+        llm_wrapper = LLMStateWrapper(
             llm=GoogleLLM(model="gemini-1.5-flash", temperature=0.5),
+            room=room
         )
         vad = VAD.load(min_speech_duration=0.1, min_silence_duration=0.5)
         stt = StreamAdapter(stt=STT(model="whisper-large-v3-turbo", language="en"), vad=vad)
         print("DEBUG: 4. Plugins initialized.")
         print("DEBUG: 5. Creating AgentSession...")
+        # --- KEY FIX: Pass the 'room' object directly to AgentSession ---
         session = AgentSession(
+            room=room,  # Pass the connected room
             vad=vad,
             stt=stt,
+            llm=llm_wrapper,
             tts=tts,
         )
         print("DEBUG: 6. AgentSession created. Starting session now...")
+        await session.start(agent=VoiceAssistant())
         print("DEBUG: 7. Session started. Generating initial greeting...")
+        await send_agent_state(room, "speaking")
         await session.generate_reply(instructions=GREETING_INSTRUCTIONS)
+        print("DEBUG: 8. Initial greeting complete. Agent is now fully operational and listening.")
     except Exception as e:
         print(f"FATAL ERROR in agent session: {e}")
         print(traceback.format_exc())
     finally:
         print(f"DEBUG: Agent session for room {room_name} is ending. Cleaning up.")
+        await room.disconnect()
 @app.post("/join-room")
 async def join_room(req: JoinRoomRequest, background_tasks: BackgroundTasks):
+    print(f"DEBUG: Received POST request to /join-room for: {req.room_name}")
     background_tasks.add_task(run_agent_session, req.room_name, req.agent_token)
+    return {"status": "agent_triggered"}
 @app.get("/")
 async def root():