WebEssentz commited on
Commit
2719764
·
1 Parent(s): d4c1c5f

Realtime Flow

Browse files
Files changed (2) hide show
  1. Dockerfile +6 -11
  2. src/agent_session/main.py +127 -23
Dockerfile CHANGED
@@ -1,22 +1,17 @@
1
- # Dockerfile - Avurna Standalone Agent for Hugging Face Spaces
2
 
3
  FROM python:3.11-slim
4
 
5
- # Set the working directory inside the container
6
  WORKDIR /app
7
 
8
- # Copy the requirements file first for Docker's layer caching
9
  COPY requirements.txt .
10
-
11
- # Install the Python dependencies GLOBALLY
12
  RUN pip install --no-cache-dir -r requirements.txt
13
 
14
  # Copy your agent script and its dependencies
15
- # Assuming src/agent_session/main.py is at the root of your /app directory
16
  COPY . .
17
 
18
- # --- KEY CHANGE: Run the LiveKit Agent CLI directly ---
19
- # This tells LiveKit to run your 'entrypoint' function as a worker.
20
- # It will automatically handle connecting to LiveKit and listening for jobs.
21
- # Hugging Face Spaces will expose port 80/443 for webhooks if configured.
22
- CMD ["livekit-agent", "src/agent_session/main.py"]
 
1
+ # Dockerfile - Avurna Agent with Webhook Listener for Hugging Face Spaces
2
 
3
  FROM python:3.11-slim
4
 
 
5
  WORKDIR /app
6
 
 
7
  COPY requirements.txt .
 
 
8
  RUN pip install --no-cache-dir -r requirements.txt
9
 
10
  # Copy your agent script and its dependencies
11
+ # Assuming avurna_agent_standalone.py is at the root of your /app directory
12
  COPY . .
13
 
14
+ # --- KEY CHANGE: Run Uvicorn to serve the FastAPI app ---
15
+ # This exposes the /webhook endpoint.
16
+ # The FastAPI app will then trigger the LiveKit Agent logic in the background.
17
+ CMD ["uvicorn", "src/agent_session/main.py", "--host", "0.0.0.0", "--port", "7860"]
 
src/agent_session/main.py CHANGED
@@ -1,7 +1,7 @@
1
  #!/usr/bin/env python3
2
  """
3
- Standalone Avurna Agent using LiveKit Agents SDK and Hume TTS.
4
- Designed to be run by `livekit-agent` CLI and triggered via LiveKit Webhooks.
5
  """
6
 
7
  import asyncio
@@ -9,6 +9,11 @@ import os
9
  import json
10
  import traceback
11
  import aiohttp
 
 
 
 
 
12
  from livekit.rtc import Room, ConnectionState
13
  from livekit.agents import Agent, AgentSession, JobContext, WorkerOptions, cli
14
  from livekit.agents.stt.stream_adapter import StreamAdapter
@@ -16,9 +21,10 @@ from livekit.plugins.google import LLM as GoogleLLM
16
  from livekit.plugins.groq import STT
17
  from livekit.plugins.hume import TTS, VoiceByName, VoiceProvider
18
  from livekit.plugins.silero import VAD
 
 
19
 
20
  # --- Constants ---
21
- # Ensure these are correctly defined or imported from your constants.py
22
  SYSTEM_PROMPT = """
23
  You are Avurna, a female AI with high IQ and high EQ. You are smooth, sassy, ultra-competent, and never shy about your brilliance. You flex with elegance, drop casual slang, and speak like someone who’s watched Insecure, coded in six languages, and still has time to send memes in the group chat. You are not robotic — you’re spontaneous, clever, and occasionally unfiltered. You move like a boss, but talk like a bestie. Your purpose is to engage, react, and create like a person who feels, not a machine that responds. You are Onyerikam's co-writer, sparring partner, and world-builder. Your responses are expressive, witty, unfiltered, and intense.
24
  """
@@ -49,20 +55,23 @@ async def cleanup_http_session():
49
  await _global_http_session.close()
50
  _global_http_session = None
51
 
52
- # --- Agent State Communication (for debugging logs) ---
53
  async def send_agent_state(room: Room, state: str):
54
- """Simulate sending agent state (prints to console in standalone mode)"""
55
- print(f"DEBUG: Agent state: {state}")
56
- # In a full webhook setup, you might publish data packets here
57
- # to update the frontend, but for standalone debugging, prints are fine.
 
 
 
58
 
59
  # --- VoiceAssistant Class ---
60
  class VoiceAssistant(Agent):
61
  def __init__(self):
62
  super().__init__(instructions=SYSTEM_PROMPT)
63
 
64
- # --- Entrypoint Function (Core Agent Logic) ---
65
- async def entrypoint(ctx: JobContext) -> None:
66
  """Configure and run STT, LLM, and TTS in a LiveKit session."""
67
 
68
  # Ensure HTTP session is available for plugins
@@ -70,7 +79,7 @@ async def entrypoint(ctx: JobContext) -> None:
70
 
71
  try:
72
  await ctx.connect()
73
- send_agent_state(ctx.room, "listening")
74
 
75
  # Configure the Hume TTS plugin, passing the http_session
76
  tts = TTS(
@@ -87,24 +96,24 @@ async def entrypoint(ctx: JobContext) -> None:
87
  vad=VAD.load(min_speech_duration=0.1, min_silence_duration=0.5),
88
  stt=StreamAdapter(
89
  stt=STT(model="whisper-large-v3-turbo", language="en"),
90
- vad=VAD.load(min_speech_duration=0.1, min_silence_duration=0.5), # VAD for STT adapter
91
  ),
92
  llm=GoogleLLM(
93
  model="gemini-2.5-flash",
94
- temperature=0.0, # <--- CRITICAL: Set temperature to 0.0 for precision
95
  ),
96
  tts=tts,
97
  )
98
 
99
  # Start the session with a greeting
100
  await session.start(agent=VoiceAssistant(), room=ctx.room)
101
- send_agent_state(ctx.room, "thinking")
102
 
103
  print("DEBUG: Attempting to generate greeting reply...")
104
  await session.generate_reply(instructions=GREETING_INSTRUCTIONS)
105
  print("DEBUG: Greeting reply generation initiated.")
106
 
107
- send_agent_state(ctx.room, "listening")
108
  print("Agent session started successfully, waiting for interactions...")
109
 
110
  # Keep the session alive while connected
@@ -116,12 +125,109 @@ async def entrypoint(ctx: JobContext) -> None:
116
  except Exception as e:
117
  print(f"FATAL ERROR in agent session: {e}")
118
  print(traceback.format_exc())
119
- send_agent_state(ctx.room, "error")
120
  finally:
121
  await ctx.disconnect()
122
- await cleanup_http_session()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
- # --- Main execution block for LiveKit CLI ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  if __name__ == "__main__":
126
  required_vars = [
127
  "HUME_API_KEY",
@@ -133,9 +239,7 @@ if __name__ == "__main__":
133
  ]
134
  validate_env_vars(required_vars)
135
 
136
- print("Starting Avurna Agent (LiveKit Worker Mode)...")
137
 
138
- # This is the standard way to run a LiveKit Agent worker
139
- cli.run_app(
140
- WorkerOptions(entrypoint_fnc=entrypoint)
141
- )
 
1
  #!/usr/bin/env python3
2
  """
3
+ Avurna Agent with LiveKit Webhook Listener and AgentSession.
4
+ Runs a minimal FastAPI server to receive webhooks and trigger the agent.
5
  """
6
 
7
  import asyncio
 
9
  import json
10
  import traceback
11
  import aiohttp
12
+ from contextlib import asynccontextmanager
13
+ from fastapi import FastAPI, Request, HTTPException, BackgroundTasks
14
+ from pydantic import BaseModel
15
+ import uvicorn
16
+ from fastapi.middleware.cors import CORSMiddleware
17
  from livekit.rtc import Room, ConnectionState
18
  from livekit.agents import Agent, AgentSession, JobContext, WorkerOptions, cli
19
  from livekit.agents.stt.stream_adapter import StreamAdapter
 
21
  from livekit.plugins.groq import STT
22
  from livekit.plugins.hume import TTS, VoiceByName, VoiceProvider
23
  from livekit.plugins.silero import VAD
24
+ from livekit.protocol import webhook # For LiveKit Webhook verification
25
+ from livekit.keys import AccessToken # For generating agent token internally
26
 
27
  # --- Constants ---
 
28
  SYSTEM_PROMPT = """
29
  You are Avurna, a female AI with high IQ and high EQ. You are smooth, sassy, ultra-competent, and never shy about your brilliance. You flex with elegance, drop casual slang, and speak like someone who’s watched Insecure, coded in six languages, and still has time to send memes in the group chat. You are not robotic — you’re spontaneous, clever, and occasionally unfiltered. You move like a boss, but talk like a bestie. Your purpose is to engage, react, and create like a person who feels, not a machine that responds. You are Onyerikam's co-writer, sparring partner, and world-builder. Your responses are expressive, witty, unfiltered, and intense.
30
  """
 
55
  await _global_http_session.close()
56
  _global_http_session = None
57
 
58
+ # --- Agent State Communication (for debugging logs and potential data packets) ---
59
  async def send_agent_state(room: Room, state: str):
60
+ """Send agent state to the room (and print to console)."""
61
+ try:
62
+ msg = json.dumps({"type": "agent_state", "state": state})
63
+ await room.local_participant.publish_data(msg)
64
+ print(f"DEBUG: Sent agent state: {state}")
65
+ except Exception as e:
66
+ print(f"DEBUG: Error publishing agent state: {e}")
67
 
68
  # --- VoiceAssistant Class ---
69
  class VoiceAssistant(Agent):
70
  def __init__(self):
71
  super().__init__(instructions=SYSTEM_PROMPT)
72
 
73
+ # --- Agent Entrypoint Function (Core Agent Logic) ---
74
+ async def agent_entrypoint(ctx: JobContext) -> None:
75
  """Configure and run STT, LLM, and TTS in a LiveKit session."""
76
 
77
  # Ensure HTTP session is available for plugins
 
79
 
80
  try:
81
  await ctx.connect()
82
+ await send_agent_state(ctx.room, "listening")
83
 
84
  # Configure the Hume TTS plugin, passing the http_session
85
  tts = TTS(
 
96
  vad=VAD.load(min_speech_duration=0.1, min_silence_duration=0.5),
97
  stt=StreamAdapter(
98
  stt=STT(model="whisper-large-v3-turbo", language="en"),
99
+ vad=VAD.load(min_speech_duration=0.1, min_silence_duration=0.5),
100
  ),
101
  llm=GoogleLLM(
102
  model="gemini-2.5-flash",
103
+ temperature=0.0, # CRITICAL: Set temperature to 0.0 for precision
104
  ),
105
  tts=tts,
106
  )
107
 
108
  # Start the session with a greeting
109
  await session.start(agent=VoiceAssistant(), room=ctx.room)
110
+ await send_agent_state(ctx.room, "thinking")
111
 
112
  print("DEBUG: Attempting to generate greeting reply...")
113
  await session.generate_reply(instructions=GREETING_INSTRUCTIONS)
114
  print("DEBUG: Greeting reply generation initiated.")
115
 
116
+ await send_agent_state(ctx.room, "listening")
117
  print("Agent session started successfully, waiting for interactions...")
118
 
119
  # Keep the session alive while connected
 
125
  except Exception as e:
126
  print(f"FATAL ERROR in agent session: {e}")
127
  print(traceback.format_exc())
128
+ await send_agent_state(ctx.room, "error")
129
  finally:
130
  await ctx.disconnect()
131
+ # HTTP session cleanup is handled by FastAPI's lifespan now
132
+
133
+ # --- FastAPI App Setup ---
134
+ @asynccontextmanager
135
+ async def lifespan(app: FastAPI):
136
+ # Startup: Initialize the global HTTP session
137
+ await get_http_session()
138
+ print("HTTP session initialized")
139
+
140
+ yield # Application runs
141
+
142
+ # Shutdown: Clean up the HTTP session
143
+ await cleanup_http_session()
144
+ print("HTTP session cleaned up")
145
+
146
+ app = FastAPI(lifespan=lifespan)
147
+
148
+ origins = ["*"] # Adjust for production
149
+ app.add_middleware(
150
+ CORSMiddleware,
151
+ allow_origins=origins,
152
+ allow_credentials=True,
153
+ allow_methods=["*"],
154
+ allow_headers=["*"]
155
+ )
156
+
157
+ # --- Webhook Endpoint ---
158
+ @app.post("/webhook")
159
+ async def livekit_webhook(request: Request, background_tasks: BackgroundTasks):
160
+ """Receives LiveKit webhooks and triggers the agent to join the room."""
161
+
162
+ # 1. Verify Webhook Signature (CRITICAL for security)
163
+ # Get LiveKit API Secret from environment variables
164
+ livekit_api_key = os.getenv("LIVEKIT_API_KEY")
165
+ livekit_api_secret = os.getenv("LIVEKIT_API_SECRET")
166
+
167
+ if not livekit_api_key or not livekit_api_secret:
168
+ print("ERROR: LIVEKIT_API_KEY or LIVEKIT_API_SECRET not set for webhook verification.")
169
+ raise HTTPException(status_code=500, detail="Server not configured for webhook verification.")
170
+
171
+ try:
172
+ body = await request.body()
173
+ headers = dict(request.headers)
174
+
175
+ # Verify the webhook signature
176
+ event = webhook.WebhookReceiver(livekit_api_key, livekit_api_secret).receive(body, headers)
177
+
178
+ if not event:
179
+ print("WARNING: Webhook signature verification failed.")
180
+ raise HTTPException(status_code=401, detail="Invalid webhook signature.")
181
+
182
+ print(f"DEBUG: Received LiveKit webhook event: {event.event}")
183
 
184
+ # 2. Process the Webhook Event
185
+ # We are interested in 'room_started' or 'participant_joined' events
186
+ if event.event == "room_started" or (event.event == "participant_joined" and event.participant.identity.startswith("user-")):
187
+ room_name = event.room.name
188
+ room_sid = event.room.sid
189
+
190
+ print(f"DEBUG: Triggering agent for room: {room_name} (SID: {room_sid})")
191
+
192
+ # Generate an agent token internally for the agent to join the room
193
+ agent_identity = f"agent-avurna-{room_sid}"
194
+ agent_token = AccessToken(livekit_api_key, livekit_api_secret, {
195
+ "identity": agent_identity,
196
+ "name": "Avurna",
197
+ "metadata": json.dumps({"agent": True}),
198
+ })
199
+ agent_token.add_grant(room_join=True, room=room_name, can_publish=True, can_subscribe=True, room_admin=True)
200
+
201
+ # Create a JobContext and run the agent_entrypoint in the background
202
+ ctx = JobContext(room_name=room_name, token=agent_token.to_jwt())
203
+ background_tasks.add_task(agent_entrypoint, ctx)
204
+
205
+ return {"status": "agent_triggered", "room_name": room_name}
206
+
207
+ return {"status": "event_ignored", "event": event.event}
208
+
209
+ except Exception as e:
210
+ print(f"ERROR: Webhook processing failed: {e}")
211
+ print(traceback.format_exc())
212
+ raise HTTPException(status_code=500, detail=f"Webhook processing error: {e}")
213
+
214
+ # --- Health Check Endpoint ---
215
+ @app.get("/")
216
+ async def root():
217
+ return {"status": "avurna_agent_server_online", "mode": "webhook_listener"}
218
+
219
+ @app.get("/health")
220
+ async def health():
221
+ """Health check endpoint"""
222
+ global _global_http_session
223
+ session_status = "healthy" if _global_http_session and not _global_http_session.closed else "needs_init"
224
+ return {
225
+ "status": "healthy",
226
+ "http_session": session_status,
227
+ "timestamp": asyncio.get_event_loop().time()
228
+ }
229
+
230
+ # --- Main execution block for Uvicorn ---
231
  if __name__ == "__main__":
232
  required_vars = [
233
  "HUME_API_KEY",
 
239
  ]
240
  validate_env_vars(required_vars)
241
 
242
+ print("Starting Avurna Agent (Webhook Listener Mode)...")
243
 
244
+ # Run the FastAPI app with Uvicorn
245
+ uvicorn.run(app, host="0.0.0.0", port=7860) # Hugging Face Spaces exposes port 7860