Spaces:

WebEssentz
/

gent

Build error

App Files Files Community

WebEssentz commited on Jul 14, 2025

Commit

8e67cf0

1 Parent(s): 51a1cba

Realtime Flow

Browse files

Files changed (1) hide show

src/agent_session/main.py +58 -75

src/agent_session/main.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# Generated with 💚 by Avurna AI (2025)
 #!/usr/bin/env python3
 """
@@ -16,8 +15,7 @@ from pydantic import BaseModel
 import uvicorn
 from fastapi.middleware.cors import CORSMiddleware
 from livekit.rtc import Room
-from livekit.agents import Agent, AgentSession, JobContext
-from livekit.agents.llm import LLM
 from livekit.agents.stt.stream_adapter import StreamAdapter
 from livekit.plugins.google import LLM as GoogleLLM
 from livekit.plugins.groq import STT
@@ -27,7 +25,7 @@ from livekit.plugins.silero import VAD
 from src.agent_session.constants import SYSTEM_PROMPT, GREETING_INSTRUCTIONS
 from src.utils import validate_env_vars
-# FastAPI app setup (unchanged)
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     async with aiohttp.ClientSession() as session:
@@ -50,93 +48,78 @@ async def send_agent_state(room: Room, state: str):
     except Exception as e:
         print(f"DEBUG: Error publishing agent state: {e}")
-class LLMStateWrapper(LLM):
-    def __init__(self, llm: LLM, room: Room):
-        super().__init__()
-        self._llm = llm
-        self._room = room
-    @asynccontextmanager
-    async def chat(self, **kwargs):
-        await send_agent_state(self._room, "thinking")
-        try:
-            async with self._llm.chat(**kwargs) as stream:
-                yield stream
-        finally:
-            await send_agent_state(self._room, "listening")
-# The VoiceAssistant is now simpler, as setup is handled externally.
 class VoiceAssistant(Agent):
     def __init__(self):
         super().__init__(instructions=SYSTEM_PROMPT)
-    async def on_connected(self, ctx: JobContext):
-        # This hook is still called, but setup is no longer needed here.
-        # We can use it for logging or post-connection actions.
-        print("DEBUG: VoiceAssistant agent has connected to the room.")
-        await send_agent_state(ctx.room, "listening")
-# --- THIS FUNCTION IS NOW CORRECTED BASED ON THE HUME DOCS ---
-async def run_agent_session(room_name: str, agent_token: str):
-    livekit_url = os.getenv("LIVEKIT_URL")
-    room = Room()
-    session = None
-    try:
-        print(f"DEBUG: 1. Connecting to LiveKit room '{room_name}'...")
-        await room.connect(livekit_url, agent_token)
-        print("DEBUG: 2. Connection successful.")
-        # Create HTTP session that will persist for the entire session
-        session = aiohttp.ClientSession()
-        # 1. Instantiate all plugins
-        vad = VAD.load(min_speech_duration=0.1, min_silence_duration=0.5)
-        tts = TTS(
             voice=VoiceByName(
                 name="Male English Actor",
                 provider=VoiceProvider.hume,
             ),
-            instant_mode=True,
-            http_session=session,
-        )
-        stt = StreamAdapter(stt=STT(model="whisper-large-v3-turbo"), vad=vad)
-        google_llm = GoogleLLM(model="gemini-2.5-flash")
-        llm_wrapper = LLMStateWrapper(llm=google_llm, room=room)
-        # 2. Create the AgentSession
-        agent_session = AgentSession(
-            stt=stt,
-            llm=llm_wrapper,
-            tts=tts,
-            vad=vad,
-        )
-        # 3. Create an instance of your agent
-        agent = VoiceAssistant()
-        # 4. Start the session
-        print("DEBUG: Starting AgentSession...")
-        await send_agent_state(room, "speaking")
-        await agent_session.start(agent=agent, room=room)
-        await agent_session.generate_reply(instructions=GREETING_INSTRUCTIONS)
-        print("DEBUG: Initial greeting complete. Agent is now fully operational.")
     except Exception as e:
         print(f"FATAL ERROR in agent session: {e}")
         print(traceback.format_exc())
     finally:
-        print(f"DEBUG: Agent session for room {room_name} is ending.")
-        if session:
-            await session.close()
-        await room.disconnect()
 @app.post("/join-room")
 async def join_room(req: JoinRoomRequest, background_tasks: BackgroundTasks):
     print(f"DEBUG: Received POST request to /join-room for: {req.room_name}")
-    background_tasks.add_task(run_agent_session, req.room_name, req.agent_token)
     return {"status": "agent_triggered"}
 @app.get("/")

 #!/usr/bin/env python3
 """
 import uvicorn
 from fastapi.middleware.cors import CORSMiddleware
 from livekit.rtc import Room
+from livekit.agents import Agent, AgentSession, JobContext, WorkerOptions, cli
 from livekit.agents.stt.stream_adapter import StreamAdapter
 from livekit.plugins.google import LLM as GoogleLLM
 from livekit.plugins.groq import STT
 from src.agent_session.constants import SYSTEM_PROMPT, GREETING_INSTRUCTIONS
 from src.utils import validate_env_vars
+# FastAPI app setup
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     async with aiohttp.ClientSession() as session:
     except Exception as e:
         print(f"DEBUG: Error publishing agent state: {e}")
 class VoiceAssistant(Agent):
     def __init__(self):
         super().__init__(instructions=SYSTEM_PROMPT)
+async def entrypoint(ctx: JobContext) -> None:
+    """
+    Configure and run STT, LLM, and TTS in a LiveKit session.
+    """
+    await ctx.connect()
+    # Voice-activity detection + buffering for non-streaming STT
+    vad = VAD.load(
+        min_speech_duration=0.1,
+        min_silence_duration=0.5
+    )
+    session = AgentSession(
+        vad=vad,
+        stt=StreamAdapter(
+            stt=STT(
+                model="whisper-large-v3-turbo",
+                language="en",
+            ),
+            vad=vad,
+        ),
+        llm=GoogleLLM(
+            model="gemini-2.5-flash",
+            temperature=0.5,
+        ),
+        tts=TTS(
             voice=VoiceByName(
                 name="Male English Actor",
                 provider=VoiceProvider.hume,
             ),
+            instant_mode=True
+        ),
+    )
+    await session.start(agent=VoiceAssistant(), room=ctx.room)
+    await session.generate_reply(instructions=GREETING_INSTRUCTIONS)
+# Custom entrypoint for FastAPI integration
+async def run_agent_with_room(room_name: str, agent_token: str):
+    """
+    Custom entrypoint that connects to a specific room
+    """
+    livekit_url = os.getenv("LIVEKIT_URL")
+    # Create a mock JobContext for the specific room
+    class MockJobContext:
+        def __init__(self, room_name: str, agent_token: str):
+            self.room_name = room_name
+            self.agent_token = agent_token
+            self.room = Room()
+        async def connect(self):
+            await self.room.connect(livekit_url, self.agent_token)
+    ctx = MockJobContext(room_name, agent_token)
+    try:
+        await entrypoint(ctx)
     except Exception as e:
         print(f"FATAL ERROR in agent session: {e}")
         print(traceback.format_exc())
     finally:
+        await ctx.room.disconnect()
 @app.post("/join-room")
 async def join_room(req: JoinRoomRequest, background_tasks: BackgroundTasks):
     print(f"DEBUG: Received POST request to /join-room for: {req.room_name}")
+    background_tasks.add_task(run_agent_with_room, req.room_name, req.agent_token)
     return {"status": "agent_triggered"}
 @app.get("/")