WebEssentz commited on
Commit
aab8473
·
1 Parent(s): 054b7cc

Realtime Flow

Browse files
Files changed (3) hide show
  1. Dockerfile +0 -1
  2. requirements.txt +1 -1
  3. src/agent_session/main.py +35 -46
Dockerfile CHANGED
@@ -1,5 +1,4 @@
1
  # Dockerfile - Phase 1 Version
2
- # Generated with 💚 by Avurna AI (2025)
3
 
4
  # Use the official Python 3.11 slim image
5
  FROM python:3.11-slim
 
1
  # Dockerfile - Phase 1 Version
 
2
 
3
  # Use the official Python 3.11 slim image
4
  FROM python:3.11-slim
requirements.txt CHANGED
@@ -5,4 +5,4 @@ livekit-plugins-hume
5
  livekit-plugins-silero
6
  fastapi
7
  uvicorn
8
- livekit
 
5
  livekit-plugins-silero
6
  fastapi
7
  uvicorn
8
+ livekit
src/agent_session/main.py CHANGED
@@ -4,11 +4,12 @@ Agent Session for Avurna Flow, wrapped in a FastAPI server.
4
  """
5
  import asyncio
6
  import os
 
7
  from fastapi import FastAPI, BackgroundTasks
8
  from pydantic import BaseModel
9
  import uvicorn
10
 
11
- from livekit.agents import Agent, AgentSession, JobContext, WorkerOptions
12
  from livekit.agents.stt.stream_adapter import StreamAdapter
13
  from livekit.plugins.google import LLM as GoogleLLM
14
  from livekit.plugins.groq import STT
@@ -18,88 +19,76 @@ from livekit.plugins.silero import VAD
18
  from src.agent_session.constants import SYSTEM_PROMPT, GREETING_INSTRUCTIONS
19
  from src.utils import validate_env_vars
20
 
21
- # --- FastAPI App Definition ---
22
  app = FastAPI()
23
 
24
- # --- Pydantic Model for the request body ---
25
  class JoinRoomRequest(BaseModel):
26
  room_name: str
27
  agent_token: str
28
 
29
- # --- The Core Agent Logic (mostly unchanged) ---
30
  class VoiceAssistant(Agent):
31
  def __init__(self):
32
  super().__init__(instructions=SYSTEM_PROMPT)
33
 
 
 
 
 
 
 
 
 
 
 
34
  async def run_agent_session(room_name: str, agent_token: str):
35
- """
36
- This function contains the core logic to connect and run the agent in a LiveKit room.
37
- """
38
  livekit_url = os.getenv("LIVEKIT_URL")
39
-
40
- # This context will be used by the agent to connect to the room
41
- ctx = JobContext(
42
- room_name=room_name,
43
- livekit_url=livekit_url,
44
- token=agent_token,
45
- )
46
 
47
  await ctx.connect()
 
48
 
49
  vad = VAD.load(min_speech_duration=0.1, min_silence_duration=0.5)
 
 
 
 
 
 
 
 
 
 
 
50
  session = AgentSession(
51
  vad=vad,
52
- stt=StreamAdapter(
53
- stt=STT(model="whisper-large-v3-turbo", language="en"),
54
- vad=vad,
55
- ),
56
- llm=GoogleLLM(model="gemini-1.5-flash", temperature=0.5), # Changed to 1.5-flash for more power
57
- tts=TTS(
58
- voice=VoiceByName(name="Tiktok Fashion Influencer", provider=VoiceProvider.hume),
59
- instant_mode=True
60
- ),
61
  )
62
 
63
  print(f"Agent starting session in room: {room_name}")
64
  await session.start(agent=VoiceAssistant(), room=ctx.room)
 
 
 
65
  await session.generate_reply(instructions=GREETING_INSTRUCTIONS)
 
 
66
  print(f"Agent session ended for room: {room_name}")
67
 
68
 
69
- # --- FastAPI Endpoint ---
70
  @app.post("/join-room")
71
  async def join_room(req: JoinRoomRequest, background_tasks: BackgroundTasks):
72
- """
73
- This endpoint is called by the frontend to trigger the agent.
74
- It immediately returns a success message and starts the agent in the background.
75
- """
76
  print(f"Received request for agent to join room: {req.room_name}")
77
-
78
- # Add the long-running agent session as a background task
79
  background_tasks.add_task(run_agent_session, req.room_name, req.agent_token)
80
-
81
  return {"status": "agent_joining"}
82
 
83
-
84
- # --- Health Check Endpoint (good practice) ---
85
  @app.get("/")
86
  async def root():
87
  return {"status": "avurna_agent_server_online"}
88
 
89
-
90
- # --- Main execution block ---
91
  if __name__ == "__main__":
92
- # Validate environment variables on startup
93
  validate_env_vars([
94
- "HUME_API_KEY",
95
- "LIVEKIT_URL",
96
- "LIVEKIT_API_KEY",
97
- "LIVEKIT_API_SECRET",
98
- "GROQ_API_KEY",
99
- "GOOGLE_API_KEY", # Assuming you meant GOOGLE_API_KEY from your original file
100
  ])
101
-
102
- # Run the FastAPI server using uvicorn
103
- # Hugging Face Spaces requires the app to run on port 7860
104
- # and host 0.0.0.0 to be accessible from the internet.
105
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
4
  """
5
  import asyncio
6
  import os
7
+ import json # Import the json library
8
  from fastapi import FastAPI, BackgroundTasks
9
  from pydantic import BaseModel
10
  import uvicorn
11
 
12
+ from livekit.agents import Agent, AgentSession, JobContext
13
  from livekit.agents.stt.stream_adapter import StreamAdapter
14
  from livekit.plugins.google import LLM as GoogleLLM
15
  from livekit.plugins.groq import STT
 
19
  from src.agent_session.constants import SYSTEM_PROMPT, GREETING_INSTRUCTIONS
20
  from src.utils import validate_env_vars
21
 
 
22
  app = FastAPI()
23
 
 
24
  class JoinRoomRequest(BaseModel):
25
  room_name: str
26
  agent_token: str
27
 
 
28
  class VoiceAssistant(Agent):
29
  def __init__(self):
30
  super().__init__(instructions=SYSTEM_PROMPT)
31
 
32
+ # --- KEY CHANGE: Function to send state to the frontend ---
33
+ async def send_agent_state(ctx: JobContext, state: str):
34
+ """Helper to notify the frontend of the agent's current state."""
35
+ try:
36
+ msg = json.dumps({"type": "agent_state", "state": state})
37
+ await ctx.room.local_participant.publish_data(msg)
38
+ print(f"Sent agent state: {state}")
39
+ except Exception as e:
40
+ print(f"Error publishing agent state: {e}")
41
+
42
  async def run_agent_session(room_name: str, agent_token: str):
 
 
 
43
  livekit_url = os.getenv("LIVEKIT_URL")
44
+ ctx = JobContext(room_name=room_name, livekit_url=livekit_url, token=agent_token)
 
 
 
 
 
 
45
 
46
  await ctx.connect()
47
+ await send_agent_state(ctx, "listening") # Start in listening state
48
 
49
  vad = VAD.load(min_speech_duration=0.1, min_silence_duration=0.5)
50
+
51
+ # --- This is a conceptual wrapper for the LLM to inject state messages ---
52
+ # We will wrap the original LLM to send messages before and after it runs.
53
+ original_llm = GoogleLLM(model="gemini-1.5-flash", temperature=0.5)
54
+
55
+ async def llm_wrapper_fnc(history):
56
+ await send_agent_state(ctx, "thinking") # State change before LLM call
57
+ result = await original_llm.chat(history)
58
+ await send_agent_state(ctx, "speaking") # State change after LLM, before TTS
59
+ return result
60
+
61
  session = AgentSession(
62
  vad=vad,
63
+ stt=StreamAdapter(stt=STT(model="whisper-large-v3-turbo", language="en"), vad=vad),
64
+ llm=llm_wrapper_fnc, # Use our wrapped function
65
+ tts=TTS(voice=VoiceByName(name="Tiktok Fashion Influencer", provider=VoiceProvider.hume), instant_mode=True),
 
 
 
 
 
 
66
  )
67
 
68
  print(f"Agent starting session in room: {room_name}")
69
  await session.start(agent=VoiceAssistant(), room=ctx.room)
70
+
71
+ # We'll also wrap the initial greeting
72
+ await send_agent_state(ctx, "speaking")
73
  await session.generate_reply(instructions=GREETING_INSTRUCTIONS)
74
+ await send_agent_state(ctx, "listening") # Return to listening after greeting
75
+
76
  print(f"Agent session ended for room: {room_name}")
77
 
78
 
 
79
  @app.post("/join-room")
80
  async def join_room(req: JoinRoomRequest, background_tasks: BackgroundTasks):
 
 
 
 
81
  print(f"Received request for agent to join room: {req.room_name}")
 
 
82
  background_tasks.add_task(run_agent_session, req.room_name, req.agent_token)
 
83
  return {"status": "agent_joining"}
84
 
 
 
85
  @app.get("/")
86
  async def root():
87
  return {"status": "avurna_agent_server_online"}
88
 
 
 
89
  if __name__ == "__main__":
 
90
  validate_env_vars([
91
+ "HUME_API_KEY", "LIVEKIT_URL", "LIVEKIT_API_KEY", "LIVEKIT_API_SECRET",
92
+ "GROQ_API_KEY", "GOOGLE_API_KEY",
 
 
 
 
93
  ])
 
 
 
 
94
  uvicorn.run(app, host="0.0.0.0", port=7860)