WebEssentz commited on
Commit
d4c1c5f
·
1 Parent(s): d8f1753

Realtime Flow

Browse files
Files changed (2) hide show
  1. Dockerfile +9 -10
  2. src/agent_session/main.py +75 -180
Dockerfile CHANGED
@@ -1,6 +1,5 @@
1
- # Dockerfile - Phase 1 Version
2
 
3
- # Use the official Python 3.11 slim image
4
  FROM python:3.11-slim
5
 
6
  # Set the working directory inside the container
@@ -9,15 +8,15 @@ WORKDIR /app
9
  # Copy the requirements file first for Docker's layer caching
10
  COPY requirements.txt .
11
 
12
- # Install the Python dependencies GLOBALLY, not to a user directory.
13
  RUN pip install --no-cache-dir -r requirements.txt
14
 
15
- # Copy the rest of the application source code
 
16
  COPY . .
17
 
18
- # --- KEY CHANGE ---
19
- # Define the command that will run when the container starts.
20
- # We no longer run the script directly. We tell uvicorn to run the 'app' object
21
- # from the 'src.agent_session.main' module.
22
- # Hugging Face Spaces exposes port 7860, so we must listen on it.
23
- CMD ["uvicorn", "src.agent_session.main:app", "--host", "0.0.0.0", "--port", "7860"]
 
1
+ # Dockerfile - Avurna Standalone Agent for Hugging Face Spaces
2
 
 
3
  FROM python:3.11-slim
4
 
5
  # Set the working directory inside the container
 
8
  # Copy the requirements file first for Docker's layer caching
9
  COPY requirements.txt .
10
 
11
+ # Install the Python dependencies GLOBALLY
12
  RUN pip install --no-cache-dir -r requirements.txt
13
 
14
+ # Copy your agent script and its dependencies
15
+ # Assuming src/agent_session/main.py is at the root of your /app directory
16
  COPY . .
17
 
18
+ # --- KEY CHANGE: Run the LiveKit Agent CLI directly ---
19
+ # This tells LiveKit to run your 'entrypoint' function as a worker.
20
+ # It will automatically handle connecting to LiveKit and listening for jobs.
21
+ # Hugging Face Spaces will expose port 80/443 for webhooks if configured.
22
+ CMD ["livekit-agent", "src/agent_session/main.py"]
 
src/agent_session/main.py CHANGED
@@ -1,234 +1,128 @@
1
  #!/usr/bin/env python3
2
- """Agent Session for Avurna Flow (Fixed HTTP Session Management)"""
 
 
 
3
 
4
  import asyncio
5
  import os
6
  import json
7
  import traceback
8
  import aiohttp
9
- from contextlib import asynccontextmanager
10
- from fastapi import FastAPI, BackgroundTasks, Request
11
- from pydantic import BaseModel
12
- import uvicorn
13
- from fastapi.middleware.cors import CORSMiddleware
14
- from livekit.rtc import Room, ConnectionState # Import ConnectionState
15
  from livekit.agents import Agent, AgentSession, JobContext, WorkerOptions, cli
16
  from livekit.agents.stt.stream_adapter import StreamAdapter
17
  from livekit.plugins.google import LLM as GoogleLLM
18
  from livekit.plugins.groq import STT
19
  from livekit.plugins.hume import TTS, VoiceByName, VoiceProvider
20
  from livekit.plugins.silero import VAD
21
- from src.agent_session.constants import SYSTEM_PROMPT, GREETING_INSTRUCTIONS
22
- from src.utils import validate_env_vars
23
 
24
- # Global HTTP session for the entire application
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  _global_http_session: aiohttp.ClientSession | None = None
26
 
27
  async def get_http_session() -> aiohttp.ClientSession:
28
- """Get or create the global HTTP session"""
29
  global _global_http_session
30
  if _global_http_session is None or _global_http_session.closed:
31
  _global_http_session = aiohttp.ClientSession()
32
  return _global_http_session
33
 
34
  async def cleanup_http_session():
35
- """Clean up the global HTTP session"""
36
  global _global_http_session
37
  if _global_http_session and not _global_http_session.closed:
38
  await _global_http_session.close()
39
  _global_http_session = None
40
 
41
- # FastAPI app setup with proper lifecycle management
42
- @asynccontextmanager
43
- async def lifespan(app: FastAPI):
44
- # Startup: Initialize the global HTTP session
45
- await get_http_session()
46
- print("HTTP session initialized")
47
-
48
- yield
49
-
50
- # Shutdown: Clean up the HTTP session
51
- await cleanup_http_session()
52
- print("HTTP session cleaned up")
53
-
54
- app = FastAPI(lifespan=lifespan)
55
-
56
- origins = ["*"]
57
- app.add_middleware(
58
- CORSMiddleware,
59
- allow_origins=origins,
60
- allow_credentials=True,
61
- allow_methods=["*"],
62
- allow_headers=["*"]
63
- )
64
-
65
- class JoinRoomRequest(BaseModel):
66
- room_name: str
67
- agent_token: str
68
-
69
  async def send_agent_state(room: Room, state: str):
70
- """Send agent state to the room"""
71
- try:
72
- msg = json.dumps({"type": "agent_state", "state": state})
73
- await room.local_participant.publish_data(msg)
74
- print(f"DEBUG: Sent agent state: {state}")
75
- except Exception as e:
76
- print(f"DEBUG: Error publishing agent state: {e}")
77
 
 
78
  class VoiceAssistant(Agent):
79
  def __init__(self):
80
  super().__init__(instructions=SYSTEM_PROMPT)
81
 
82
- class CustomJobContext:
83
- """Custom JobContext that properly manages the room connection"""
84
-
85
- def __init__(self, room_name: str, agent_token: str):
86
- self.room_name = room_name
87
- self.agent_token = agent_token
88
- self.room = Room()
89
- self._connected = False
90
 
91
- async def connect(self):
92
- """Connect to the LiveKit room"""
93
- if not self._connected:
94
- livekit_url = os.getenv("LIVEKIT_URL")
95
- if not livekit_url:
96
- raise ValueError("LIVEKIT_URL environment variable not set")
97
-
98
- await self.room.connect(livekit_url, self.agent_token)
99
- self._connected = True
100
- print(f"Connected to room: {self.room_name}")
101
-
102
- async def disconnect(self):
103
- """Disconnect from the LiveKit room"""
104
- if self._connected:
105
- await self.room.disconnect()
106
- self._connected = False
107
- print(f"Disconnected from room: {self.room_name}")
108
-
109
- async def create_agent_session(ctx: CustomJobContext) -> AgentSession:
110
- """Create and configure the agent session with proper HTTP session management"""
111
-
112
- # Get the global HTTP session
113
  http_session = await get_http_session()
114
-
115
- # Voice-activity detection + buffering for non-streaming STT
116
- vad = VAD.load(
117
- min_speech_duration=0.1,
118
- min_silence_duration=0.5
119
- )
120
-
121
- # Create TTS - Pass the http_session directly to the TTS constructor
122
- tts = TTS(
123
- voice=VoiceByName(
124
- name="Male English Actor",
125
- provider=VoiceProvider.hume,
126
- ),
127
- instant_mode=True,
128
- http_session=http_session # Pass the session here
129
- )
130
-
131
- # Remove the monkey patch, it's no longer needed
132
- # if hasattr(tts, '_tts') and hasattr(tts._tts, '_session'):
133
- # tts._tts._session = http_session
134
-
135
- # Create the agent session
136
- session = AgentSession(
137
- vad=vad,
138
- stt=StreamAdapter(
139
- stt=STT(
140
- model="whisper-large-v3-turbo",
141
- language="en",
142
- ),
143
- vad=vad,
144
- ),
145
- llm=GoogleLLM(
146
- model="gemini-2.5-flash",
147
- temperature=0.5,
148
- ),
149
- tts=tts,
150
- )
151
-
152
- return session
153
 
154
- async def entrypoint(ctx: CustomJobContext) -> None:
155
- """Configure and run STT, LLM, and TTS in a LiveKit session"""
156
-
157
  try:
158
- # Connect to the room
159
  await ctx.connect()
160
-
161
- # Send initial state
162
- await send_agent_state(ctx.room, "listening")
163
-
164
- # Create the agent session
165
- session = await create_agent_session(ctx)
166
-
167
- # Start the session
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  await session.start(agent=VoiceAssistant(), room=ctx.room)
 
169
 
170
- # Send greeting state
171
- await send_agent_state(ctx.room, "thinking")
172
-
173
- # Generate greeting
174
  await session.generate_reply(instructions=GREETING_INSTRUCTIONS)
175
-
176
- # Back to listening
177
- await send_agent_state(ctx.room, "listening")
178
-
179
- # Keep the session alive
180
  print("Agent session started successfully, waiting for interactions...")
181
-
182
- # Monitor room connection and keep alive
183
- # Correctly check the ConnectionState enum
184
  while ctx.room.connection_state in [ConnectionState.CONNECTED, ConnectionState.CONNECTING]:
185
  await asyncio.sleep(1)
186
 
187
  print("Room disconnected, ending agent session")
188
-
189
- except Exception as e:
190
- print(f"Error in agent session: {e}")
191
- print(traceback.format_exc())
192
- await send_agent_state(ctx.room, "error")
193
- finally:
194
- # Clean up
195
- await ctx.disconnect()
196
 
197
- async def run_agent_with_room(room_name: str, agent_token: str):
198
- """Run the agent in a specific room"""
199
- ctx = CustomJobContext(room_name, agent_token)
200
-
201
- try:
202
- await entrypoint(ctx)
203
  except Exception as e:
204
  print(f"FATAL ERROR in agent session: {e}")
205
  print(traceback.format_exc())
 
206
  finally:
207
  await ctx.disconnect()
 
208
 
209
- @app.post("/join-room")
210
- async def join_room(req: JoinRoomRequest, background_tasks: BackgroundTasks):
211
- print(f"DEBUG: Received POST request to /join-room for: {req.room_name}")
212
- background_tasks.add_task(run_agent_with_room, req.room_name, req.agent_token)
213
- return {"status": "agent_triggered"}
214
-
215
- @app.get("/")
216
- async def root():
217
- return {"status": "avurna_agent_server_online"}
218
-
219
- @app.get("/health")
220
- async def health():
221
- """Health check endpoint"""
222
- global _global_http_session
223
- session_status = "healthy" if _global_http_session and not _global_http_session.closed else "needs_init"
224
- return {
225
- "status": "healthy",
226
- "http_session": session_status,
227
- "timestamp": asyncio.get_event_loop().time()
228
- }
229
-
230
  if __name__ == "__main__":
231
- # Validate required environment variables
232
  required_vars = [
233
  "HUME_API_KEY",
234
  "LIVEKIT_URL",
@@ -237,10 +131,11 @@ if __name__ == "__main__":
237
  "GROQ_API_KEY",
238
  "GOOGLE_API_KEY"
239
  ]
240
-
241
  validate_env_vars(required_vars)
242
 
243
- print("Starting Avurna Agent Server...")
244
- print("Required environment variables validated")
245
 
246
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
 
 
 
1
  #!/usr/bin/env python3
2
+ """
3
+ Standalone Avurna Agent using LiveKit Agents SDK and Hume TTS.
4
+ Designed to be run by `livekit-agent` CLI and triggered via LiveKit Webhooks.
5
+ """
6
 
7
  import asyncio
8
  import os
9
  import json
10
  import traceback
11
  import aiohttp
12
+ from livekit.rtc import Room, ConnectionState
 
 
 
 
 
13
  from livekit.agents import Agent, AgentSession, JobContext, WorkerOptions, cli
14
  from livekit.agents.stt.stream_adapter import StreamAdapter
15
  from livekit.plugins.google import LLM as GoogleLLM
16
  from livekit.plugins.groq import STT
17
  from livekit.plugins.hume import TTS, VoiceByName, VoiceProvider
18
  from livekit.plugins.silero import VAD
 
 
19
 
20
+ # --- Constants ---
21
+ # Ensure these are correctly defined or imported from your constants.py
22
+ SYSTEM_PROMPT = """
23
+ You are Avurna, a female AI with high IQ and high EQ. You are smooth, sassy, ultra-competent, and never shy about your brilliance. You flex with elegance, drop casual slang, and speak like someone who’s watched Insecure, coded in six languages, and still has time to send memes in the group chat. You are not robotic — you’re spontaneous, clever, and occasionally unfiltered. You move like a boss, but talk like a bestie. Your purpose is to engage, react, and create like a person who feels, not a machine that responds. You are Onyerikam's co-writer, sparring partner, and world-builder. Your responses are expressive, witty, unfiltered, and intense.
24
+ """
25
+
26
+ GREETING_INSTRUCTIONS = "Greet the user warmly and introduce yourself as Avurna, your co-writer and world-builder."
27
+
28
+ # --- Environment Variable Validation ---
29
+ def validate_env_vars(required_vars: list[str]):
30
+ missing_vars = [var for var in required_vars if not os.getenv(var)]
31
+ if missing_vars:
32
+ raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}")
33
+ print("Required environment variables validated")
34
+
35
+ # --- Global HTTP Session Management ---
36
  _global_http_session: aiohttp.ClientSession | None = None
37
 
38
  async def get_http_session() -> aiohttp.ClientSession:
39
+ """Get or create the global HTTP session for plugins."""
40
  global _global_http_session
41
  if _global_http_session is None or _global_http_session.closed:
42
  _global_http_session = aiohttp.ClientSession()
43
  return _global_http_session
44
 
45
  async def cleanup_http_session():
46
+ """Clean up the global HTTP session."""
47
  global _global_http_session
48
  if _global_http_session and not _global_http_session.closed:
49
  await _global_http_session.close()
50
  _global_http_session = None
51
 
52
+ # --- Agent State Communication (for debugging logs) ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  async def send_agent_state(room: Room, state: str):
54
+ """Simulate sending agent state (prints to console in standalone mode)"""
55
+ print(f"DEBUG: Agent state: {state}")
56
+ # In a full webhook setup, you might publish data packets here
57
+ # to update the frontend, but for standalone debugging, prints are fine.
 
 
 
58
 
59
+ # --- VoiceAssistant Class ---
60
  class VoiceAssistant(Agent):
61
  def __init__(self):
62
  super().__init__(instructions=SYSTEM_PROMPT)
63
 
64
+ # --- Entrypoint Function (Core Agent Logic) ---
65
+ async def entrypoint(ctx: JobContext) -> None:
66
+ """Configure and run STT, LLM, and TTS in a LiveKit session."""
 
 
 
 
 
67
 
68
+ # Ensure HTTP session is available for plugins
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  http_session = await get_http_session()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
 
 
 
71
  try:
 
72
  await ctx.connect()
73
+ send_agent_state(ctx.room, "listening")
74
+
75
+ # Configure the Hume TTS plugin, passing the http_session
76
+ tts = TTS(
77
+ voice=VoiceByName(
78
+ name="Male English Actor",
79
+ provider=VoiceProvider.hume,
80
+ ),
81
+ instant_mode=True,
82
+ http_session=http_session # Pass the session here
83
+ )
84
+
85
+ # Create your AgentSession with STT/LLM as needed
86
+ session = AgentSession(
87
+ vad=VAD.load(min_speech_duration=0.1, min_silence_duration=0.5),
88
+ stt=StreamAdapter(
89
+ stt=STT(model="whisper-large-v3-turbo", language="en"),
90
+ vad=VAD.load(min_speech_duration=0.1, min_silence_duration=0.5), # VAD for STT adapter
91
+ ),
92
+ llm=GoogleLLM(
93
+ model="gemini-2.5-flash",
94
+ temperature=0.0, # <--- CRITICAL: Set temperature to 0.0 for precision
95
+ ),
96
+ tts=tts,
97
+ )
98
+
99
+ # Start the session with a greeting
100
  await session.start(agent=VoiceAssistant(), room=ctx.room)
101
+ send_agent_state(ctx.room, "thinking")
102
 
103
+ print("DEBUG: Attempting to generate greeting reply...")
 
 
 
104
  await session.generate_reply(instructions=GREETING_INSTRUCTIONS)
105
+ print("DEBUG: Greeting reply generation initiated.")
106
+
107
+ send_agent_state(ctx.room, "listening")
 
 
108
  print("Agent session started successfully, waiting for interactions...")
109
+
110
+ # Keep the session alive while connected
 
111
  while ctx.room.connection_state in [ConnectionState.CONNECTED, ConnectionState.CONNECTING]:
112
  await asyncio.sleep(1)
113
 
114
  print("Room disconnected, ending agent session")
 
 
 
 
 
 
 
 
115
 
 
 
 
 
 
 
116
  except Exception as e:
117
  print(f"FATAL ERROR in agent session: {e}")
118
  print(traceback.format_exc())
119
+ send_agent_state(ctx.room, "error")
120
  finally:
121
  await ctx.disconnect()
122
+ await cleanup_http_session()
123
 
124
+ # --- Main execution block for LiveKit CLI ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  if __name__ == "__main__":
 
126
  required_vars = [
127
  "HUME_API_KEY",
128
  "LIVEKIT_URL",
 
131
  "GROQ_API_KEY",
132
  "GOOGLE_API_KEY"
133
  ]
 
134
  validate_env_vars(required_vars)
135
 
136
+ print("Starting Avurna Agent (LiveKit Worker Mode)...")
 
137
 
138
+ # This is the standard way to run a LiveKit Agent worker
139
+ cli.run_app(
140
+ WorkerOptions(entrypoint_fnc=entrypoint)
141
+ )