Spaces:

WebEssentz
/

gent

Build error

App Files Files Community

WebEssentz commited on Jul 13, 2025

Commit

ee7e520

1 Parent(s): 7af59d7

Realtime Flow

Browse files

Files changed (1) hide show

src/agent_session/main.py +49 -37

src/agent_session/main.py CHANGED Viewed

@@ -4,13 +4,14 @@ Agent Session for Avurna Flow, wrapped in a FastAPI server.
 """
 import asyncio
 import os
-import json # Import the json library
 from fastapi import FastAPI, BackgroundTasks
-from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 import uvicorn
 from livekit.agents import Agent, AgentSession, JobContext
 from livekit.agents.stt.stream_adapter import StreamAdapter
 from livekit.plugins.google import LLM as GoogleLLM
 from livekit.plugins.groq import STT
@@ -23,13 +24,12 @@ from src.utils import validate_env_vars
 app = FastAPI()
 origins = ["*"]
 app.add_middleware(
     CORSMiddleware,
     allow_origins=origins,
     allow_credentials=True,
-    allow_methods=["*"], # Allows all methods (GET, POST, etc.)
-    allow_headers=["*"], # Allows all headers
 )
 class JoinRoomRequest(BaseModel):
@@ -40,9 +40,7 @@ class VoiceAssistant(Agent):
     def __init__(self):
         super().__init__(instructions=SYSTEM_PROMPT)
-# --- KEY CHANGE: Function to send state to the frontend ---
 async def send_agent_state(ctx: JobContext, state: str):
-    """Helper to notify the frontend of the agent's current state."""
     try:
         msg = json.dumps({"type": "agent_state", "state": state})
         await ctx.room.local_participant.publish_data(msg)
@@ -50,41 +48,55 @@ async def send_agent_state(ctx: JobContext, state: str):
     except Exception as e:
         print(f"Error publishing agent state: {e}")
 async def run_agent_session(room_name: str, agent_token: str):
     livekit_url = os.getenv("LIVEKIT_URL")
     ctx = JobContext(room_name=room_name, livekit_url=livekit_url, token=agent_token)
-    await ctx.connect()
-    await send_agent_state(ctx, "listening") # Start in listening state
-    vad = VAD.load(min_speech_duration=0.1, min_silence_duration=0.5)
-    # --- This is a conceptual wrapper for the LLM to inject state messages ---
-    # We will wrap the original LLM to send messages before and after it runs.
-    original_llm = GoogleLLM(model="gemini-1.5-flash", temperature=0.5)
-    async def llm_wrapper_fnc(history):
-        await send_agent_state(ctx, "thinking") # State change before LLM call
-        result = await original_llm.chat(history)
-        await send_agent_state(ctx, "speaking") # State change after LLM, before TTS
-        return result
-    session = AgentSession(
-        vad=vad,
-        stt=StreamAdapter(stt=STT(model="whisper-large-v3-turbo", language="en"), vad=vad),
-        llm=llm_wrapper_fnc, # Use our wrapped function
-        tts=TTS(voice=VoiceByName(name="Tiktok Fashion Influencer", provider=VoiceProvider.hume), instant_mode=True),
-    )
-    print(f"Agent starting session in room: {room_name}")
-    await session.start(agent=VoiceAssistant(), room=ctx.room)
-    # We'll also wrap the initial greeting
-    await send_agent_state(ctx, "speaking")
-    await session.generate_reply(instructions=GREETING_INSTRUCTIONS)
-    await send_agent_state(ctx, "listening") # Return to listening after greeting
-    print(f"Agent session ended for room: {room_name}")
 @app.post("/join-room")

 """
 import asyncio
 import os
+import json
 from fastapi import FastAPI, BackgroundTasks
 from pydantic import BaseModel
 import uvicorn
+from fastapi.middleware.cors import CORSMiddleware
 from livekit.agents import Agent, AgentSession, JobContext
+from livekit.agents.llm import LLM
 from livekit.agents.stt.stream_adapter import StreamAdapter
 from livekit.plugins.google import LLM as GoogleLLM
 from livekit.plugins.groq import STT
 app = FastAPI()
 origins = ["*"]
 app.add_middleware(
     CORSMiddleware,
     allow_origins=origins,
     allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
 )
 class JoinRoomRequest(BaseModel):
     def __init__(self):
         super().__init__(instructions=SYSTEM_PROMPT)
 async def send_agent_state(ctx: JobContext, state: str):
     try:
         msg = json.dumps({"type": "agent_state", "state": state})
         await ctx.room.local_participant.publish_data(msg)
     except Exception as e:
         print(f"Error publishing agent state: {e}")
+# --- KEY FIX: Create a proper wrapper CLASS for the LLM ---
+# This class conforms to the interface that AgentSession expects.
+class LLMStateWrapper(LLM):
+    def __init__(self, llm: LLM, ctx: JobContext):
+        super().__init__()
+        self._llm = llm
+        self._ctx = ctx
+    async def chat(self, history):
+        await send_agent_state(self._ctx, "thinking")
+        res_stream = self._llm.chat(history)
+        await send_agent_state(self._ctx, "speaking")
+        return res_stream
+# --- Main agent session logic ---
 async def run_agent_session(room_name: str, agent_token: str):
     livekit_url = os.getenv("LIVEKIT_URL")
     ctx = JobContext(room_name=room_name, livekit_url=livekit_url, token=agent_token)
+    try:
+        await ctx.connect()
+        await send_agent_state(ctx, "listening")
+        # --- KEY FIX: Instantiate the wrapper class correctly ---
+        llm_state_wrapper = LLMStateWrapper(
+            llm=GoogleLLM(model="gemini-1.5-flash", temperature=0.5),
+            ctx=ctx
+        )
+        vad = VAD.load(min_speech_duration=0.1, min_silence_duration=0.5)
+        session = AgentSession(
+            vad=vad,
+            stt=StreamAdapter(stt=STT(model="whisper-large-v3-turbo", language="en"), vad=vad),
+            llm=llm_state_wrapper, # Use the class instance here
+            tts=TTS(voice=VoiceByName(name="Tiktok Fashion Influencer", provider=VoiceProvider.hume), instant_mode=True),
+        )
+        print(f"Agent starting session in room: {room_name}")
+        await session.start(agent=VoiceAssistant(), room=ctx.room)
+        await send_agent_state(ctx, "speaking")
+        await session.generate_reply(instructions=GREETING_INSTRUCTIONS)
+        await send_agent_state(ctx, "listening")
+    except Exception as e:
+        print(f"An error occurred during the agent session: {e}")
+    finally:
+        print(f"Agent session ended for room: {room_name}. Cleaning up.")
+        await ctx.disconnect()
 @app.post("/join-room")