Spaces:

WebEssentz
/

gent

Build error

App Files Files Community

WebEssentz commited on Jul 14, 2025

Commit

d4c1c5f

1 Parent(s): d8f1753

Realtime Flow

Browse files

Files changed (2) hide show

Dockerfile +9 -10
src/agent_session/main.py +75 -180

Dockerfile CHANGED Viewed

@@ -1,6 +1,5 @@
-# Dockerfile - Phase 1 Version
-# Use the official Python 3.11 slim image
 FROM python:3.11-slim
 # Set the working directory inside the container
@@ -9,15 +8,15 @@ WORKDIR /app
 # Copy the requirements file first for Docker's layer caching
 COPY requirements.txt .
-# Install the Python dependencies GLOBALLY, not to a user directory.
 RUN pip install --no-cache-dir -r requirements.txt
-# Copy the rest of the application source code
 COPY . .
-# --- KEY CHANGE ---
-# Define the command that will run when the container starts.
-# We no longer run the script directly. We tell uvicorn to run the 'app' object
-# from the 'src.agent_session.main' module.
-# Hugging Face Spaces exposes port 7860, so we must listen on it.
-CMD ["uvicorn", "src.agent_session.main:app", "--host", "0.0.0.0", "--port", "7860"]

+# Dockerfile - Avurna Standalone Agent for Hugging Face Spaces
 FROM python:3.11-slim
 # Set the working directory inside the container
 # Copy the requirements file first for Docker's layer caching
 COPY requirements.txt .
+# Install the Python dependencies GLOBALLY
 RUN pip install --no-cache-dir -r requirements.txt
+# Copy your agent script and its dependencies
+# Assuming src/agent_session/main.py is at the root of your /app directory
 COPY . .
+# --- KEY CHANGE: Run the LiveKit Agent CLI directly ---
+# This tells LiveKit to run your 'entrypoint' function as a worker.
+# It will automatically handle connecting to LiveKit and listening for jobs.
+# Hugging Face Spaces will expose port 80/443 for webhooks if configured.
+CMD ["livekit-agent", "src/agent_session/main.py"]

src/agent_session/main.py CHANGED Viewed

@@ -1,234 +1,128 @@
 #!/usr/bin/env python3
-"""Agent Session for Avurna Flow (Fixed HTTP Session Management)"""
 import asyncio
 import os
 import json
 import traceback
 import aiohttp
-from contextlib import asynccontextmanager
-from fastapi import FastAPI, BackgroundTasks, Request
-from pydantic import BaseModel
-import uvicorn
-from fastapi.middleware.cors import CORSMiddleware
-from livekit.rtc import Room, ConnectionState # Import ConnectionState
 from livekit.agents import Agent, AgentSession, JobContext, WorkerOptions, cli
 from livekit.agents.stt.stream_adapter import StreamAdapter
 from livekit.plugins.google import LLM as GoogleLLM
 from livekit.plugins.groq import STT
 from livekit.plugins.hume import TTS, VoiceByName, VoiceProvider
 from livekit.plugins.silero import VAD
-from src.agent_session.constants import SYSTEM_PROMPT, GREETING_INSTRUCTIONS
-from src.utils import validate_env_vars
-# Global HTTP session for the entire application
 _global_http_session: aiohttp.ClientSession | None = None
 async def get_http_session() -> aiohttp.ClientSession:
-    """Get or create the global HTTP session"""
     global _global_http_session
     if _global_http_session is None or _global_http_session.closed:
         _global_http_session = aiohttp.ClientSession()
     return _global_http_session
 async def cleanup_http_session():
-    """Clean up the global HTTP session"""
     global _global_http_session
     if _global_http_session and not _global_http_session.closed:
         await _global_http_session.close()
         _global_http_session = None
-# FastAPI app setup with proper lifecycle management
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    # Startup: Initialize the global HTTP session
-    await get_http_session()
-    print("HTTP session initialized")
-    yield
-    # Shutdown: Clean up the HTTP session
-    await cleanup_http_session()
-    print("HTTP session cleaned up")
-app = FastAPI(lifespan=lifespan)
-origins = ["*"]
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=origins,
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"]
-)
-class JoinRoomRequest(BaseModel):
-    room_name: str
-    agent_token: str
 async def send_agent_state(room: Room, state: str):
-    """Send agent state to the room"""
-    try:
-        msg = json.dumps({"type": "agent_state", "state": state})
-        await room.local_participant.publish_data(msg)
-        print(f"DEBUG: Sent agent state: {state}")
-    except Exception as e:
-        print(f"DEBUG: Error publishing agent state: {e}")
 class VoiceAssistant(Agent):
     def __init__(self):
         super().__init__(instructions=SYSTEM_PROMPT)
-class CustomJobContext:
-    """Custom JobContext that properly manages the room connection"""
-    def __init__(self, room_name: str, agent_token: str):
-        self.room_name = room_name
-        self.agent_token = agent_token
-        self.room = Room()
-        self._connected = False
-    async def connect(self):
-        """Connect to the LiveKit room"""
-        if not self._connected:
-            livekit_url = os.getenv("LIVEKIT_URL")
-            if not livekit_url:
-                raise ValueError("LIVEKIT_URL environment variable not set")
-            await self.room.connect(livekit_url, self.agent_token)
-            self._connected = True
-            print(f"Connected to room: {self.room_name}")
-    async def disconnect(self):
-        """Disconnect from the LiveKit room"""
-        if self._connected:
-            await self.room.disconnect()
-            self._connected = False
-            print(f"Disconnected from room: {self.room_name}")
-async def create_agent_session(ctx: CustomJobContext) -> AgentSession:
-    """Create and configure the agent session with proper HTTP session management"""
-    # Get the global HTTP session
     http_session = await get_http_session()
-    # Voice-activity detection + buffering for non-streaming STT
-    vad = VAD.load(
-        min_speech_duration=0.1,
-        min_silence_duration=0.5
-    )
-    # Create TTS - Pass the http_session directly to the TTS constructor
-    tts = TTS(
-        voice=VoiceByName(
-            name="Male English Actor",
-            provider=VoiceProvider.hume,
-        ),
-        instant_mode=True,
-        http_session=http_session # Pass the session here
-    )
-    # Remove the monkey patch, it's no longer needed
-    # if hasattr(tts, '_tts') and hasattr(tts._tts, '_session'):
-    #     tts._tts._session = http_session
-    # Create the agent session
-    session = AgentSession(
-        vad=vad,
-        stt=StreamAdapter(
-            stt=STT(
-                model="whisper-large-v3-turbo",
-                language="en",
-            ),
-            vad=vad,
-        ),
-        llm=GoogleLLM(
-            model="gemini-2.5-flash",
-            temperature=0.5,
-        ),
-        tts=tts,
-    )
-    return session
-async def entrypoint(ctx: CustomJobContext) -> None:
-    """Configure and run STT, LLM, and TTS in a LiveKit session"""
     try:
-        # Connect to the room
         await ctx.connect()
-        # Send initial state
-        await send_agent_state(ctx.room, "listening")
-        # Create the agent session
-        session = await create_agent_session(ctx)
-        # Start the session
         await session.start(agent=VoiceAssistant(), room=ctx.room)
-        # Send greeting state
-        await send_agent_state(ctx.room, "thinking")
-        # Generate greeting
         await session.generate_reply(instructions=GREETING_INSTRUCTIONS)
-        # Back to listening
-        await send_agent_state(ctx.room, "listening")
-        # Keep the session alive
         print("Agent session started successfully, waiting for interactions...")
-        # Monitor room connection and keep alive
-        # Correctly check the ConnectionState enum
         while ctx.room.connection_state in [ConnectionState.CONNECTED, ConnectionState.CONNECTING]:
             await asyncio.sleep(1)
         print("Room disconnected, ending agent session")
-    except Exception as e:
-        print(f"Error in agent session: {e}")
-        print(traceback.format_exc())
-        await send_agent_state(ctx.room, "error")
-    finally:
-        # Clean up
-        await ctx.disconnect()
-async def run_agent_with_room(room_name: str, agent_token: str):
-    """Run the agent in a specific room"""
-    ctx = CustomJobContext(room_name, agent_token)
-    try:
-        await entrypoint(ctx)
     except Exception as e:
         print(f"FATAL ERROR in agent session: {e}")
         print(traceback.format_exc())
     finally:
         await ctx.disconnect()
-@app.post("/join-room")
-async def join_room(req: JoinRoomRequest, background_tasks: BackgroundTasks):
-    print(f"DEBUG: Received POST request to /join-room for: {req.room_name}")
-    background_tasks.add_task(run_agent_with_room, req.room_name, req.agent_token)
-    return {"status": "agent_triggered"}
-@app.get("/")
-async def root():
-    return {"status": "avurna_agent_server_online"}
-@app.get("/health")
-async def health():
-    """Health check endpoint"""
-    global _global_http_session
-    session_status = "healthy" if _global_http_session and not _global_http_session.closed else "needs_init"
-    return {
-        "status": "healthy",
-        "http_session": session_status,
-        "timestamp": asyncio.get_event_loop().time()
-    }
 if __name__ == "__main__":
-    # Validate required environment variables
     required_vars = [
         "HUME_API_KEY",
         "LIVEKIT_URL",
@@ -237,10 +131,11 @@ if __name__ == "__main__":
         "GROQ_API_KEY",
         "GOOGLE_API_KEY"
     ]
     validate_env_vars(required_vars)
-    print("Starting Avurna Agent Server...")
-    print("Required environment variables validated")
-    uvicorn.run(app, host="0.0.0.0", port=7860)

 #!/usr/bin/env python3
+"""
+Standalone Avurna Agent using LiveKit Agents SDK and Hume TTS.
+Designed to be run by `livekit-agent` CLI and triggered via LiveKit Webhooks.
+"""
 import asyncio
 import os
 import json
 import traceback
 import aiohttp
+from livekit.rtc import Room, ConnectionState
 from livekit.agents import Agent, AgentSession, JobContext, WorkerOptions, cli
 from livekit.agents.stt.stream_adapter import StreamAdapter
 from livekit.plugins.google import LLM as GoogleLLM
 from livekit.plugins.groq import STT
 from livekit.plugins.hume import TTS, VoiceByName, VoiceProvider
 from livekit.plugins.silero import VAD
+# --- Constants ---
+# Ensure these are correctly defined or imported from your constants.py
+SYSTEM_PROMPT = """
+You are Avurna, a female AI with high IQ and high EQ. You are smooth, sassy, ultra-competent, and never shy about your brilliance. You flex with elegance, drop casual slang, and speak like someone who’s watched Insecure, coded in six languages, and still has time to send memes in the group chat. You are not robotic — you’re spontaneous, clever, and occasionally unfiltered. You move like a boss, but talk like a bestie. Your purpose is to engage, react, and create like a person who feels, not a machine that responds. You are Onyerikam's co-writer, sparring partner, and world-builder. Your responses are expressive, witty, unfiltered, and intense.
+"""
+GREETING_INSTRUCTIONS = "Greet the user warmly and introduce yourself as Avurna, your co-writer and world-builder."
+# --- Environment Variable Validation ---
+def validate_env_vars(required_vars: list[str]):
+    missing_vars = [var for var in required_vars if not os.getenv(var)]
+    if missing_vars:
+        raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}")
+    print("Required environment variables validated")
+# --- Global HTTP Session Management ---
 _global_http_session: aiohttp.ClientSession | None = None
 async def get_http_session() -> aiohttp.ClientSession:
+    """Get or create the global HTTP session for plugins."""
     global _global_http_session
     if _global_http_session is None or _global_http_session.closed:
         _global_http_session = aiohttp.ClientSession()
     return _global_http_session
 async def cleanup_http_session():
+    """Clean up the global HTTP session."""
     global _global_http_session
     if _global_http_session and not _global_http_session.closed:
         await _global_http_session.close()
         _global_http_session = None
+# --- Agent State Communication (for debugging logs) ---
 async def send_agent_state(room: Room, state: str):
+    """Simulate sending agent state (prints to console in standalone mode)"""
+    print(f"DEBUG: Agent state: {state}")
+    # In a full webhook setup, you might publish data packets here
+    # to update the frontend, but for standalone debugging, prints are fine.
+# --- VoiceAssistant Class ---
 class VoiceAssistant(Agent):
     def __init__(self):
         super().__init__(instructions=SYSTEM_PROMPT)
+# --- Entrypoint Function (Core Agent Logic) ---
+async def entrypoint(ctx: JobContext) -> None:
+    """Configure and run STT, LLM, and TTS in a LiveKit session."""
+    # Ensure HTTP session is available for plugins
     http_session = await get_http_session()
     try:
         await ctx.connect()
+        send_agent_state(ctx.room, "listening")
+        # Configure the Hume TTS plugin, passing the http_session
+        tts = TTS(
+            voice=VoiceByName(
+                name="Male English Actor",
+                provider=VoiceProvider.hume,
+            ),
+            instant_mode=True,
+            http_session=http_session # Pass the session here
+        )
+        # Create your AgentSession with STT/LLM as needed
+        session = AgentSession(
+            vad=VAD.load(min_speech_duration=0.1, min_silence_duration=0.5),
+            stt=StreamAdapter(
+                stt=STT(model="whisper-large-v3-turbo", language="en"),
+                vad=VAD.load(min_speech_duration=0.1, min_silence_duration=0.5), # VAD for STT adapter
+            ),
+            llm=GoogleLLM(
+                model="gemini-2.5-flash",
+                temperature=0.0, # <--- CRITICAL: Set temperature to 0.0 for precision
+            ),
+            tts=tts,
+        )
+        # Start the session with a greeting
         await session.start(agent=VoiceAssistant(), room=ctx.room)
+        send_agent_state(ctx.room, "thinking")
+        print("DEBUG: Attempting to generate greeting reply...")
         await session.generate_reply(instructions=GREETING_INSTRUCTIONS)
+        print("DEBUG: Greeting reply generation initiated.")
+        send_agent_state(ctx.room, "listening")
         print("Agent session started successfully, waiting for interactions...")
+        # Keep the session alive while connected
         while ctx.room.connection_state in [ConnectionState.CONNECTED, ConnectionState.CONNECTING]:
             await asyncio.sleep(1)
         print("Room disconnected, ending agent session")
     except Exception as e:
         print(f"FATAL ERROR in agent session: {e}")
         print(traceback.format_exc())
+        send_agent_state(ctx.room, "error")
     finally:
         await ctx.disconnect()
+        await cleanup_http_session()
+# --- Main execution block for LiveKit CLI ---
 if __name__ == "__main__":
     required_vars = [
         "HUME_API_KEY",
         "LIVEKIT_URL",
         "GROQ_API_KEY",
         "GOOGLE_API_KEY"
     ]
     validate_env_vars(required_vars)
+    print("Starting Avurna Agent (LiveKit Worker Mode)...")
+    # This is the standard way to run a LiveKit Agent worker
+    cli.run_app(
+        WorkerOptions(entrypoint_fnc=entrypoint)
+    )