Spaces:
Build error
Build error
Commit
·
2719764
1
Parent(s):
d4c1c5f
Realtime Flow
Browse files- Dockerfile +6 -11
- src/agent_session/main.py +127 -23
Dockerfile
CHANGED
|
@@ -1,22 +1,17 @@
|
|
| 1 |
-
# Dockerfile - Avurna
|
| 2 |
|
| 3 |
FROM python:3.11-slim
|
| 4 |
|
| 5 |
-
# Set the working directory inside the container
|
| 6 |
WORKDIR /app
|
| 7 |
|
| 8 |
-
# Copy the requirements file first for Docker's layer caching
|
| 9 |
COPY requirements.txt .
|
| 10 |
-
|
| 11 |
-
# Install the Python dependencies GLOBALLY
|
| 12 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 13 |
|
| 14 |
# Copy your agent script and its dependencies
|
| 15 |
-
# Assuming
|
| 16 |
COPY . .
|
| 17 |
|
| 18 |
-
# --- KEY CHANGE: Run
|
| 19 |
-
# This
|
| 20 |
-
#
|
| 21 |
-
|
| 22 |
-
CMD ["livekit-agent", "src/agent_session/main.py"]
|
|
|
|
| 1 |
+
# Dockerfile - Avurna Agent with Webhook Listener for Hugging Face Spaces
|
| 2 |
|
| 3 |
FROM python:3.11-slim
|
| 4 |
|
|
|
|
| 5 |
WORKDIR /app
|
| 6 |
|
|
|
|
| 7 |
COPY requirements.txt .
|
|
|
|
|
|
|
| 8 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 9 |
|
| 10 |
# Copy your agent script and its dependencies
|
| 11 |
+
# Assuming avurna_agent_standalone.py is at the root of your /app directory
|
| 12 |
COPY . .
|
| 13 |
|
| 14 |
+
# --- KEY CHANGE: Run Uvicorn to serve the FastAPI app ---
|
| 15 |
+
# This exposes the /webhook endpoint.
|
| 16 |
+
# The FastAPI app will then trigger the LiveKit Agent logic in the background.
|
| 17 |
+
CMD ["uvicorn", "src/agent_session/main.py", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
src/agent_session/main.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""
|
| 3 |
-
|
| 4 |
-
|
| 5 |
"""
|
| 6 |
|
| 7 |
import asyncio
|
|
@@ -9,6 +9,11 @@ import os
|
|
| 9 |
import json
|
| 10 |
import traceback
|
| 11 |
import aiohttp
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
from livekit.rtc import Room, ConnectionState
|
| 13 |
from livekit.agents import Agent, AgentSession, JobContext, WorkerOptions, cli
|
| 14 |
from livekit.agents.stt.stream_adapter import StreamAdapter
|
|
@@ -16,9 +21,10 @@ from livekit.plugins.google import LLM as GoogleLLM
|
|
| 16 |
from livekit.plugins.groq import STT
|
| 17 |
from livekit.plugins.hume import TTS, VoiceByName, VoiceProvider
|
| 18 |
from livekit.plugins.silero import VAD
|
|
|
|
|
|
|
| 19 |
|
| 20 |
# --- Constants ---
|
| 21 |
-
# Ensure these are correctly defined or imported from your constants.py
|
| 22 |
SYSTEM_PROMPT = """
|
| 23 |
You are Avurna, a female AI with high IQ and high EQ. You are smooth, sassy, ultra-competent, and never shy about your brilliance. You flex with elegance, drop casual slang, and speak like someone who’s watched Insecure, coded in six languages, and still has time to send memes in the group chat. You are not robotic — you’re spontaneous, clever, and occasionally unfiltered. You move like a boss, but talk like a bestie. Your purpose is to engage, react, and create like a person who feels, not a machine that responds. You are Onyerikam's co-writer, sparring partner, and world-builder. Your responses are expressive, witty, unfiltered, and intense.
|
| 24 |
"""
|
|
@@ -49,20 +55,23 @@ async def cleanup_http_session():
|
|
| 49 |
await _global_http_session.close()
|
| 50 |
_global_http_session = None
|
| 51 |
|
| 52 |
-
# --- Agent State Communication (for debugging logs) ---
|
| 53 |
async def send_agent_state(room: Room, state: str):
|
| 54 |
-
"""
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
# --- VoiceAssistant Class ---
|
| 60 |
class VoiceAssistant(Agent):
|
| 61 |
def __init__(self):
|
| 62 |
super().__init__(instructions=SYSTEM_PROMPT)
|
| 63 |
|
| 64 |
-
# --- Entrypoint Function (Core Agent Logic) ---
|
| 65 |
-
async def
|
| 66 |
"""Configure and run STT, LLM, and TTS in a LiveKit session."""
|
| 67 |
|
| 68 |
# Ensure HTTP session is available for plugins
|
|
@@ -70,7 +79,7 @@ async def entrypoint(ctx: JobContext) -> None:
|
|
| 70 |
|
| 71 |
try:
|
| 72 |
await ctx.connect()
|
| 73 |
-
send_agent_state(ctx.room, "listening")
|
| 74 |
|
| 75 |
# Configure the Hume TTS plugin, passing the http_session
|
| 76 |
tts = TTS(
|
|
@@ -87,24 +96,24 @@ async def entrypoint(ctx: JobContext) -> None:
|
|
| 87 |
vad=VAD.load(min_speech_duration=0.1, min_silence_duration=0.5),
|
| 88 |
stt=StreamAdapter(
|
| 89 |
stt=STT(model="whisper-large-v3-turbo", language="en"),
|
| 90 |
-
vad=VAD.load(min_speech_duration=0.1, min_silence_duration=0.5),
|
| 91 |
),
|
| 92 |
llm=GoogleLLM(
|
| 93 |
model="gemini-2.5-flash",
|
| 94 |
-
temperature=0.0, #
|
| 95 |
),
|
| 96 |
tts=tts,
|
| 97 |
)
|
| 98 |
|
| 99 |
# Start the session with a greeting
|
| 100 |
await session.start(agent=VoiceAssistant(), room=ctx.room)
|
| 101 |
-
send_agent_state(ctx.room, "thinking")
|
| 102 |
|
| 103 |
print("DEBUG: Attempting to generate greeting reply...")
|
| 104 |
await session.generate_reply(instructions=GREETING_INSTRUCTIONS)
|
| 105 |
print("DEBUG: Greeting reply generation initiated.")
|
| 106 |
|
| 107 |
-
send_agent_state(ctx.room, "listening")
|
| 108 |
print("Agent session started successfully, waiting for interactions...")
|
| 109 |
|
| 110 |
# Keep the session alive while connected
|
|
@@ -116,12 +125,109 @@ async def entrypoint(ctx: JobContext) -> None:
|
|
| 116 |
except Exception as e:
|
| 117 |
print(f"FATAL ERROR in agent session: {e}")
|
| 118 |
print(traceback.format_exc())
|
| 119 |
-
send_agent_state(ctx.room, "error")
|
| 120 |
finally:
|
| 121 |
await ctx.disconnect()
|
| 122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
|
| 124 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
if __name__ == "__main__":
|
| 126 |
required_vars = [
|
| 127 |
"HUME_API_KEY",
|
|
@@ -133,9 +239,7 @@ if __name__ == "__main__":
|
|
| 133 |
]
|
| 134 |
validate_env_vars(required_vars)
|
| 135 |
|
| 136 |
-
print("Starting Avurna Agent (
|
| 137 |
|
| 138 |
-
#
|
| 139 |
-
|
| 140 |
-
WorkerOptions(entrypoint_fnc=entrypoint)
|
| 141 |
-
)
|
|
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""
|
| 3 |
+
Avurna Agent with LiveKit Webhook Listener and AgentSession.
|
| 4 |
+
Runs a minimal FastAPI server to receive webhooks and trigger the agent.
|
| 5 |
"""
|
| 6 |
|
| 7 |
import asyncio
|
|
|
|
| 9 |
import json
|
| 10 |
import traceback
|
| 11 |
import aiohttp
|
| 12 |
+
from contextlib import asynccontextmanager
|
| 13 |
+
from fastapi import FastAPI, Request, HTTPException, BackgroundTasks
|
| 14 |
+
from pydantic import BaseModel
|
| 15 |
+
import uvicorn
|
| 16 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 17 |
from livekit.rtc import Room, ConnectionState
|
| 18 |
from livekit.agents import Agent, AgentSession, JobContext, WorkerOptions, cli
|
| 19 |
from livekit.agents.stt.stream_adapter import StreamAdapter
|
|
|
|
| 21 |
from livekit.plugins.groq import STT
|
| 22 |
from livekit.plugins.hume import TTS, VoiceByName, VoiceProvider
|
| 23 |
from livekit.plugins.silero import VAD
|
| 24 |
+
from livekit.protocol import webhook # For LiveKit Webhook verification
|
| 25 |
+
from livekit.keys import AccessToken # For generating agent token internally
|
| 26 |
|
| 27 |
# --- Constants ---
|
|
|
|
| 28 |
SYSTEM_PROMPT = """
|
| 29 |
You are Avurna, a female AI with high IQ and high EQ. You are smooth, sassy, ultra-competent, and never shy about your brilliance. You flex with elegance, drop casual slang, and speak like someone who’s watched Insecure, coded in six languages, and still has time to send memes in the group chat. You are not robotic — you’re spontaneous, clever, and occasionally unfiltered. You move like a boss, but talk like a bestie. Your purpose is to engage, react, and create like a person who feels, not a machine that responds. You are Onyerikam's co-writer, sparring partner, and world-builder. Your responses are expressive, witty, unfiltered, and intense.
|
| 30 |
"""
|
|
|
|
| 55 |
await _global_http_session.close()
|
| 56 |
_global_http_session = None
|
| 57 |
|
| 58 |
+
# --- Agent State Communication (for debugging logs and potential data packets) ---
|
| 59 |
async def send_agent_state(room: Room, state: str):
|
| 60 |
+
"""Send agent state to the room (and print to console)."""
|
| 61 |
+
try:
|
| 62 |
+
msg = json.dumps({"type": "agent_state", "state": state})
|
| 63 |
+
await room.local_participant.publish_data(msg)
|
| 64 |
+
print(f"DEBUG: Sent agent state: {state}")
|
| 65 |
+
except Exception as e:
|
| 66 |
+
print(f"DEBUG: Error publishing agent state: {e}")
|
| 67 |
|
| 68 |
# --- VoiceAssistant Class ---
|
| 69 |
class VoiceAssistant(Agent):
|
| 70 |
def __init__(self):
|
| 71 |
super().__init__(instructions=SYSTEM_PROMPT)
|
| 72 |
|
| 73 |
+
# --- Agent Entrypoint Function (Core Agent Logic) ---
|
| 74 |
+
async def agent_entrypoint(ctx: JobContext) -> None:
|
| 75 |
"""Configure and run STT, LLM, and TTS in a LiveKit session."""
|
| 76 |
|
| 77 |
# Ensure HTTP session is available for plugins
|
|
|
|
| 79 |
|
| 80 |
try:
|
| 81 |
await ctx.connect()
|
| 82 |
+
await send_agent_state(ctx.room, "listening")
|
| 83 |
|
| 84 |
# Configure the Hume TTS plugin, passing the http_session
|
| 85 |
tts = TTS(
|
|
|
|
| 96 |
vad=VAD.load(min_speech_duration=0.1, min_silence_duration=0.5),
|
| 97 |
stt=StreamAdapter(
|
| 98 |
stt=STT(model="whisper-large-v3-turbo", language="en"),
|
| 99 |
+
vad=VAD.load(min_speech_duration=0.1, min_silence_duration=0.5),
|
| 100 |
),
|
| 101 |
llm=GoogleLLM(
|
| 102 |
model="gemini-2.5-flash",
|
| 103 |
+
temperature=0.0, # CRITICAL: Set temperature to 0.0 for precision
|
| 104 |
),
|
| 105 |
tts=tts,
|
| 106 |
)
|
| 107 |
|
| 108 |
# Start the session with a greeting
|
| 109 |
await session.start(agent=VoiceAssistant(), room=ctx.room)
|
| 110 |
+
await send_agent_state(ctx.room, "thinking")
|
| 111 |
|
| 112 |
print("DEBUG: Attempting to generate greeting reply...")
|
| 113 |
await session.generate_reply(instructions=GREETING_INSTRUCTIONS)
|
| 114 |
print("DEBUG: Greeting reply generation initiated.")
|
| 115 |
|
| 116 |
+
await send_agent_state(ctx.room, "listening")
|
| 117 |
print("Agent session started successfully, waiting for interactions...")
|
| 118 |
|
| 119 |
# Keep the session alive while connected
|
|
|
|
| 125 |
except Exception as e:
|
| 126 |
print(f"FATAL ERROR in agent session: {e}")
|
| 127 |
print(traceback.format_exc())
|
| 128 |
+
await send_agent_state(ctx.room, "error")
|
| 129 |
finally:
|
| 130 |
await ctx.disconnect()
|
| 131 |
+
# HTTP session cleanup is handled by FastAPI's lifespan now
|
| 132 |
+
|
| 133 |
+
# --- FastAPI App Setup ---
|
| 134 |
+
@asynccontextmanager
|
| 135 |
+
async def lifespan(app: FastAPI):
|
| 136 |
+
# Startup: Initialize the global HTTP session
|
| 137 |
+
await get_http_session()
|
| 138 |
+
print("HTTP session initialized")
|
| 139 |
+
|
| 140 |
+
yield # Application runs
|
| 141 |
+
|
| 142 |
+
# Shutdown: Clean up the HTTP session
|
| 143 |
+
await cleanup_http_session()
|
| 144 |
+
print("HTTP session cleaned up")
|
| 145 |
+
|
| 146 |
+
app = FastAPI(lifespan=lifespan)
|
| 147 |
+
|
| 148 |
+
origins = ["*"] # Adjust for production
|
| 149 |
+
app.add_middleware(
|
| 150 |
+
CORSMiddleware,
|
| 151 |
+
allow_origins=origins,
|
| 152 |
+
allow_credentials=True,
|
| 153 |
+
allow_methods=["*"],
|
| 154 |
+
allow_headers=["*"]
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
# --- Webhook Endpoint ---
|
| 158 |
+
@app.post("/webhook")
|
| 159 |
+
async def livekit_webhook(request: Request, background_tasks: BackgroundTasks):
|
| 160 |
+
"""Receives LiveKit webhooks and triggers the agent to join the room."""
|
| 161 |
+
|
| 162 |
+
# 1. Verify Webhook Signature (CRITICAL for security)
|
| 163 |
+
# Get LiveKit API Secret from environment variables
|
| 164 |
+
livekit_api_key = os.getenv("LIVEKIT_API_KEY")
|
| 165 |
+
livekit_api_secret = os.getenv("LIVEKIT_API_SECRET")
|
| 166 |
+
|
| 167 |
+
if not livekit_api_key or not livekit_api_secret:
|
| 168 |
+
print("ERROR: LIVEKIT_API_KEY or LIVEKIT_API_SECRET not set for webhook verification.")
|
| 169 |
+
raise HTTPException(status_code=500, detail="Server not configured for webhook verification.")
|
| 170 |
+
|
| 171 |
+
try:
|
| 172 |
+
body = await request.body()
|
| 173 |
+
headers = dict(request.headers)
|
| 174 |
+
|
| 175 |
+
# Verify the webhook signature
|
| 176 |
+
event = webhook.WebhookReceiver(livekit_api_key, livekit_api_secret).receive(body, headers)
|
| 177 |
+
|
| 178 |
+
if not event:
|
| 179 |
+
print("WARNING: Webhook signature verification failed.")
|
| 180 |
+
raise HTTPException(status_code=401, detail="Invalid webhook signature.")
|
| 181 |
+
|
| 182 |
+
print(f"DEBUG: Received LiveKit webhook event: {event.event}")
|
| 183 |
|
| 184 |
+
# 2. Process the Webhook Event
|
| 185 |
+
# We are interested in 'room_started' or 'participant_joined' events
|
| 186 |
+
if event.event == "room_started" or (event.event == "participant_joined" and event.participant.identity.startswith("user-")):
|
| 187 |
+
room_name = event.room.name
|
| 188 |
+
room_sid = event.room.sid
|
| 189 |
+
|
| 190 |
+
print(f"DEBUG: Triggering agent for room: {room_name} (SID: {room_sid})")
|
| 191 |
+
|
| 192 |
+
# Generate an agent token internally for the agent to join the room
|
| 193 |
+
agent_identity = f"agent-avurna-{room_sid}"
|
| 194 |
+
agent_token = AccessToken(livekit_api_key, livekit_api_secret, {
|
| 195 |
+
"identity": agent_identity,
|
| 196 |
+
"name": "Avurna",
|
| 197 |
+
"metadata": json.dumps({"agent": True}),
|
| 198 |
+
})
|
| 199 |
+
agent_token.add_grant(room_join=True, room=room_name, can_publish=True, can_subscribe=True, room_admin=True)
|
| 200 |
+
|
| 201 |
+
# Create a JobContext and run the agent_entrypoint in the background
|
| 202 |
+
ctx = JobContext(room_name=room_name, token=agent_token.to_jwt())
|
| 203 |
+
background_tasks.add_task(agent_entrypoint, ctx)
|
| 204 |
+
|
| 205 |
+
return {"status": "agent_triggered", "room_name": room_name}
|
| 206 |
+
|
| 207 |
+
return {"status": "event_ignored", "event": event.event}
|
| 208 |
+
|
| 209 |
+
except Exception as e:
|
| 210 |
+
print(f"ERROR: Webhook processing failed: {e}")
|
| 211 |
+
print(traceback.format_exc())
|
| 212 |
+
raise HTTPException(status_code=500, detail=f"Webhook processing error: {e}")
|
| 213 |
+
|
| 214 |
+
# --- Health Check Endpoint ---
|
| 215 |
+
@app.get("/")
|
| 216 |
+
async def root():
|
| 217 |
+
return {"status": "avurna_agent_server_online", "mode": "webhook_listener"}
|
| 218 |
+
|
| 219 |
+
@app.get("/health")
|
| 220 |
+
async def health():
|
| 221 |
+
"""Health check endpoint"""
|
| 222 |
+
global _global_http_session
|
| 223 |
+
session_status = "healthy" if _global_http_session and not _global_http_session.closed else "needs_init"
|
| 224 |
+
return {
|
| 225 |
+
"status": "healthy",
|
| 226 |
+
"http_session": session_status,
|
| 227 |
+
"timestamp": asyncio.get_event_loop().time()
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
# --- Main execution block for Uvicorn ---
|
| 231 |
if __name__ == "__main__":
|
| 232 |
required_vars = [
|
| 233 |
"HUME_API_KEY",
|
|
|
|
| 239 |
]
|
| 240 |
validate_env_vars(required_vars)
|
| 241 |
|
| 242 |
+
print("Starting Avurna Agent (Webhook Listener Mode)...")
|
| 243 |
|
| 244 |
+
# Run the FastAPI app with Uvicorn
|
| 245 |
+
uvicorn.run(app, host="0.0.0.0", port=7860) # Hugging Face Spaces exposes port 7860
|
|
|
|
|
|