Spaces:

WebEssentz
/

gent

Build error

App Files Files Community

WebEssentz commited on Jul 13, 2025

Commit

aab8473

1 Parent(s): 054b7cc

Realtime Flow

Browse files

Files changed (3) hide show

Dockerfile +0 -1
requirements.txt +1 -1
src/agent_session/main.py +35 -46

Dockerfile CHANGED Viewed

@@ -1,5 +1,4 @@
 # Dockerfile - Phase 1 Version
-# Generated with 💚 by Avurna AI (2025)
 # Use the official Python 3.11 slim image
 FROM python:3.11-slim

 # Dockerfile - Phase 1 Version
 # Use the official Python 3.11 slim image
 FROM python:3.11-slim

requirements.txt CHANGED Viewed

@@ -5,4 +5,4 @@ livekit-plugins-hume
 livekit-plugins-silero
 fastapi
 uvicorn
-livekit

 livekit-plugins-silero
 fastapi
 uvicorn
+livekit

src/agent_session/main.py CHANGED Viewed

@@ -4,11 +4,12 @@ Agent Session for Avurna Flow, wrapped in a FastAPI server.
 """
 import asyncio
 import os
 from fastapi import FastAPI, BackgroundTasks
 from pydantic import BaseModel
 import uvicorn
-from livekit.agents import Agent, AgentSession, JobContext, WorkerOptions
 from livekit.agents.stt.stream_adapter import StreamAdapter
 from livekit.plugins.google import LLM as GoogleLLM
 from livekit.plugins.groq import STT
@@ -18,88 +19,76 @@ from livekit.plugins.silero import VAD
 from src.agent_session.constants import SYSTEM_PROMPT, GREETING_INSTRUCTIONS
 from src.utils import validate_env_vars
-# --- FastAPI App Definition ---
 app = FastAPI()
-# --- Pydantic Model for the request body ---
 class JoinRoomRequest(BaseModel):
     room_name: str
     agent_token: str
-# --- The Core Agent Logic (mostly unchanged) ---
 class VoiceAssistant(Agent):
     def __init__(self):
         super().__init__(instructions=SYSTEM_PROMPT)
 async def run_agent_session(room_name: str, agent_token: str):
-    """
-    This function contains the core logic to connect and run the agent in a LiveKit room.
-    """
     livekit_url = os.getenv("LIVEKIT_URL")
-    # This context will be used by the agent to connect to the room
-    ctx = JobContext(
-        room_name=room_name,
-        livekit_url=livekit_url,
-        token=agent_token,
-    )
     await ctx.connect()
     vad = VAD.load(min_speech_duration=0.1, min_silence_duration=0.5)
     session = AgentSession(
         vad=vad,
-        stt=StreamAdapter(
-            stt=STT(model="whisper-large-v3-turbo", language="en"),
-            vad=vad,
-        ),
-        llm=GoogleLLM(model="gemini-1.5-flash", temperature=0.5), # Changed to 1.5-flash for more power
-        tts=TTS(
-            voice=VoiceByName(name="Tiktok Fashion Influencer", provider=VoiceProvider.hume),
-            instant_mode=True
-        ),
     )
     print(f"Agent starting session in room: {room_name}")
     await session.start(agent=VoiceAssistant(), room=ctx.room)
     await session.generate_reply(instructions=GREETING_INSTRUCTIONS)
     print(f"Agent session ended for room: {room_name}")
-# --- FastAPI Endpoint ---
 @app.post("/join-room")
 async def join_room(req: JoinRoomRequest, background_tasks: BackgroundTasks):
-    """
-    This endpoint is called by the frontend to trigger the agent.
-    It immediately returns a success message and starts the agent in the background.
-    """
     print(f"Received request for agent to join room: {req.room_name}")
-    # Add the long-running agent session as a background task
     background_tasks.add_task(run_agent_session, req.room_name, req.agent_token)
     return {"status": "agent_joining"}
-# --- Health Check Endpoint (good practice) ---
 @app.get("/")
 async def root():
     return {"status": "avurna_agent_server_online"}
-# --- Main execution block ---
 if __name__ == "__main__":
-    # Validate environment variables on startup
     validate_env_vars([
-        "HUME_API_KEY",
-        "LIVEKIT_URL",
-        "LIVEKIT_API_KEY",
-        "LIVEKIT_API_SECRET",
-        "GROQ_API_KEY",
-        "GOOGLE_API_KEY", # Assuming you meant GOOGLE_API_KEY from your original file
     ])
-    # Run the FastAPI server using uvicorn
-    # Hugging Face Spaces requires the app to run on port 7860
-    # and host 0.0.0.0 to be accessible from the internet.
     uvicorn.run(app, host="0.0.0.0", port=7860)

 """
 import asyncio
 import os
+import json # Import the json library
 from fastapi import FastAPI, BackgroundTasks
 from pydantic import BaseModel
 import uvicorn
+from livekit.agents import Agent, AgentSession, JobContext
 from livekit.agents.stt.stream_adapter import StreamAdapter
 from livekit.plugins.google import LLM as GoogleLLM
 from livekit.plugins.groq import STT
 from src.agent_session.constants import SYSTEM_PROMPT, GREETING_INSTRUCTIONS
 from src.utils import validate_env_vars
 app = FastAPI()
 class JoinRoomRequest(BaseModel):
     room_name: str
     agent_token: str
 class VoiceAssistant(Agent):
     def __init__(self):
         super().__init__(instructions=SYSTEM_PROMPT)
+# --- KEY CHANGE: Function to send state to the frontend ---
+async def send_agent_state(ctx: JobContext, state: str):
+    """Helper to notify the frontend of the agent's current state."""
+    try:
+        msg = json.dumps({"type": "agent_state", "state": state})
+        await ctx.room.local_participant.publish_data(msg)
+        print(f"Sent agent state: {state}")
+    except Exception as e:
+        print(f"Error publishing agent state: {e}")
 async def run_agent_session(room_name: str, agent_token: str):
     livekit_url = os.getenv("LIVEKIT_URL")
+    ctx = JobContext(room_name=room_name, livekit_url=livekit_url, token=agent_token)
     await ctx.connect()
+    await send_agent_state(ctx, "listening") # Start in listening state
     vad = VAD.load(min_speech_duration=0.1, min_silence_duration=0.5)
+    # --- This is a conceptual wrapper for the LLM to inject state messages ---
+    # We will wrap the original LLM to send messages before and after it runs.
+    original_llm = GoogleLLM(model="gemini-1.5-flash", temperature=0.5)
+    async def llm_wrapper_fnc(history):
+        await send_agent_state(ctx, "thinking") # State change before LLM call
+        result = await original_llm.chat(history)
+        await send_agent_state(ctx, "speaking") # State change after LLM, before TTS
+        return result
     session = AgentSession(
         vad=vad,
+        stt=StreamAdapter(stt=STT(model="whisper-large-v3-turbo", language="en"), vad=vad),
+        llm=llm_wrapper_fnc, # Use our wrapped function
+        tts=TTS(voice=VoiceByName(name="Tiktok Fashion Influencer", provider=VoiceProvider.hume), instant_mode=True),
     )
     print(f"Agent starting session in room: {room_name}")
     await session.start(agent=VoiceAssistant(), room=ctx.room)
+    # We'll also wrap the initial greeting
+    await send_agent_state(ctx, "speaking")
     await session.generate_reply(instructions=GREETING_INSTRUCTIONS)
+    await send_agent_state(ctx, "listening") # Return to listening after greeting
     print(f"Agent session ended for room: {room_name}")
 @app.post("/join-room")
 async def join_room(req: JoinRoomRequest, background_tasks: BackgroundTasks):
     print(f"Received request for agent to join room: {req.room_name}")
     background_tasks.add_task(run_agent_session, req.room_name, req.agent_token)
     return {"status": "agent_joining"}
 @app.get("/")
 async def root():
     return {"status": "avurna_agent_server_online"}
 if __name__ == "__main__":
     validate_env_vars([
+        "HUME_API_KEY", "LIVEKIT_URL", "LIVEKIT_API_KEY", "LIVEKIT_API_SECRET",
+        "GROQ_API_KEY", "GOOGLE_API_KEY",
     ])
     uvicorn.run(app, host="0.0.0.0", port=7860)