WebEssentz commited on
Commit
71198d5
·
1 Parent(s): 9270a01

Realtime Flow

Browse files
Files changed (1) hide show
  1. src/agent_session/main.py +45 -17
src/agent_session/main.py CHANGED
@@ -1,6 +1,6 @@
1
  #!/usr/bin/env python3
2
  """
3
- Agent Session for Avurna Flow (Corrected Code)
4
  """
5
  import os
6
  import json
@@ -14,12 +14,12 @@ from fastapi.middleware.cors import CORSMiddleware
14
 
15
  from livekit.rtc import Room
16
  from livekit.agents import Agent, AgentSession
17
- from livekit.agents.utils import http_context
 
18
  from livekit.plugins.google import LLM as GoogleLLM
19
  from livekit.plugins.groq import STT
20
  from livekit.plugins.hume import TTS, VoiceByName, VoiceProvider
21
  from livekit.plugins.silero import VAD
22
- from livekit.agents.stt import StreamAdapter
23
 
24
  from src.agent_session.constants import SYSTEM_PROMPT, GREETING_INSTRUCTIONS
25
  from src.utils import validate_env_vars
@@ -42,42 +42,70 @@ class VoiceAssistant(Agent):
42
  def __init__(self):
43
  super().__init__(instructions=SYSTEM_PROMPT)
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  async def run_agent_session(room_name: str, agent_token: str, http_session: aiohttp.ClientSession):
46
  livekit_url = os.getenv("LIVEKIT_URL")
47
  room = Room()
48
 
49
- token = http_context.set(http_session)
50
-
51
  try:
52
  print(f"DEBUG: 1. Connecting to LiveKit room '{room_name}'...")
53
  await room.connect(livekit_url, agent_token)
54
  print("DEBUG: 2. Connection successful.")
 
 
55
 
56
- print("DEBUG: 3. Initializing plugins...")
57
 
 
 
 
58
  google_llm = GoogleLLM(model="gemini-1.5-flash")
59
- tts = TTS(voice=VoiceByName(name="Tiktok Fashion Influencer"), instant_mode=True)
 
60
  vad = VAD.load(min_speech_duration=0.1)
 
61
  stt = StreamAdapter(stt=STT(model="whisper-large-v3-turbo"), vad=vad)
62
 
 
 
 
63
  print("DEBUG: 4. Plugins initialized.")
64
 
65
  print("DEBUG: 5. Creating AgentSession...")
66
- # --- THE DEFINITIVE FIX ---
67
- # Pass the original 'vad' object directly to the AgentSession.
68
- session = AgentSession(
69
- vad=vad,
70
- stt=stt,
71
- llm=google_llm,
72
- tts=tts,
73
- )
74
  print("DEBUG: 6. AgentSession created.")
75
 
76
  print("DEBUG: 7. Starting session...")
77
  await session.start(agent=VoiceAssistant(), room=room)
78
 
79
  print("DEBUG: 8. Session started. Generating initial greeting...")
80
- await session.say(text=GREETING_INSTRUCTIONS)
 
81
 
82
  print("DEBUG: 9. Initial greeting complete. Agent is now fully operational.")
83
 
@@ -87,7 +115,7 @@ async def run_agent_session(room_name: str, agent_token: str, http_session: aioh
87
  finally:
88
  print(f"DEBUG: Agent session for room {room_name} is ending. Cleaning up.")
89
  await room.disconnect()
90
- http_context.reset(token)
91
 
92
  @app.post("/join-room")
93
  async def join_room(req: JoinRoomRequest, request: Request, background_tasks: BackgroundTasks):
 
1
  #!/usr/bin/env python3
2
  """
3
+ Agent Session for Avurna Flow (Final Corrected Version with TARGETED HTTP Session Management)
4
  """
5
  import os
6
  import json
 
14
 
15
  from livekit.rtc import Room
16
  from livekit.agents import Agent, AgentSession
17
+ from livekit.agents.llm import LLM
18
+ from livekit.agents.stt.stream_adapter import StreamAdapter
19
  from livekit.plugins.google import LLM as GoogleLLM
20
  from livekit.plugins.groq import STT
21
  from livekit.plugins.hume import TTS, VoiceByName, VoiceProvider
22
  from livekit.plugins.silero import VAD
 
23
 
24
  from src.agent_session.constants import SYSTEM_PROMPT, GREETING_INSTRUCTIONS
25
  from src.utils import validate_env_vars
 
42
  def __init__(self):
43
  super().__init__(instructions=SYSTEM_PROMPT)
44
 
45
+ async def send_agent_state(room: Room, state: str):
46
+ # This function is correct
47
+ try:
48
+ msg = json.dumps({"type": "agent_state", "state": state})
49
+ await room.local_participant.publish_data(msg)
50
+ print(f"DEBUG: Sent agent state: {state}")
51
+ except Exception as e:
52
+ print(f"DEBUG: Error publishing agent state: {e}")
53
+
54
+ class LLMStateWrapper(LLM):
55
+ # This class is correct
56
+ def __init__(self, llm: LLM, room: Room):
57
+ super().__init__()
58
+ self._llm = llm
59
+ self._room = room
60
+
61
+ @asynccontextmanager
62
+ async def chat(self, **kwargs):
63
+ await send_agent_state(self._room, "thinking")
64
+ try:
65
+ async with self._llm.chat(**kwargs) as stream:
66
+ yield stream
67
+ finally:
68
+ await send_agent_state(self._room, "listening")
69
+
70
+
71
  async def run_agent_session(room_name: str, agent_token: str, http_session: aiohttp.ClientSession):
72
  livekit_url = os.getenv("LIVEKIT_URL")
73
  room = Room()
74
 
 
 
75
  try:
76
  print(f"DEBUG: 1. Connecting to LiveKit room '{room_name}'...")
77
  await room.connect(livekit_url, agent_token)
78
  print("DEBUG: 2. Connection successful.")
79
+
80
+ await send_agent_state(room, "listening")
81
 
82
+ print("DEBUG: 3. Initializing plugins with shared http_session...")
83
 
84
+ # --- THE DEFINITIVE FIX: Apply the session ONLY where needed ---
85
+
86
+ # Google LLM does NOT take a session argument.
87
  google_llm = GoogleLLM(model="gemini-1.5-flash")
88
+
89
+ # Hume TTS and Groq STT DO require the session.
90
  vad = VAD.load(min_speech_duration=0.1)
91
+ tts = TTS(voice=VoiceByName(name="Tiktok Fashion Influencer"), instant_mode=True)
92
  stt = StreamAdapter(stt=STT(model="whisper-large-v3-turbo"), vad=vad)
93
 
94
+ # Wrap the correctly initialized Google LLM
95
+ llm_wrapper = LLMStateWrapper(llm=google_llm, room=room)
96
+
97
  print("DEBUG: 4. Plugins initialized.")
98
 
99
  print("DEBUG: 5. Creating AgentSession...")
100
+ session = AgentSession(vad=vad, stt=stt, llm=llm_wrapper, tts=tts)
 
 
 
 
 
 
 
101
  print("DEBUG: 6. AgentSession created.")
102
 
103
  print("DEBUG: 7. Starting session...")
104
  await session.start(agent=VoiceAssistant(), room=room)
105
 
106
  print("DEBUG: 8. Session started. Generating initial greeting...")
107
+ await send_agent_state(room, "speaking")
108
+ await session.generate_reply(instructions=GREETING_INSTRUCTIONS)
109
 
110
  print("DEBUG: 9. Initial greeting complete. Agent is now fully operational.")
111
 
 
115
  finally:
116
  print(f"DEBUG: Agent session for room {room_name} is ending. Cleaning up.")
117
  await room.disconnect()
118
+
119
 
120
  @app.post("/join-room")
121
  async def join_room(req: JoinRoomRequest, request: Request, background_tasks: BackgroundTasks):