WebEssentz commited on
Commit
a18ad88
·
1 Parent(s): ee7e520

Realtime Flow

Browse files
Files changed (1) hide show
  1. src/agent_session/main.py +34 -21
src/agent_session/main.py CHANGED
@@ -1,10 +1,11 @@
1
  #!/usr/bin/env python3
2
  """
3
- Agent Session for Avurna Flow, wrapped in a FastAPI server.
4
  """
5
  import asyncio
6
  import os
7
  import json
 
8
  from fastapi import FastAPI, BackgroundTasks
9
  from pydantic import BaseModel
10
  import uvicorn
@@ -21,8 +22,8 @@ from livekit.plugins.silero import VAD
21
  from src.agent_session.constants import SYSTEM_PROMPT, GREETING_INSTRUCTIONS
22
  from src.utils import validate_env_vars
23
 
 
24
  app = FastAPI()
25
-
26
  origins = ["*"]
27
  app.add_middleware(
28
  CORSMiddleware,
@@ -44,12 +45,11 @@ async def send_agent_state(ctx: JobContext, state: str):
44
  try:
45
  msg = json.dumps({"type": "agent_state", "state": state})
46
  await ctx.room.local_participant.publish_data(msg)
47
- print(f"Sent agent state: {state}")
48
  except Exception as e:
49
- print(f"Error publishing agent state: {e}")
50
 
51
- # --- KEY FIX: Create a proper wrapper CLASS for the LLM ---
52
- # This class conforms to the interface that AgentSession expects.
53
  class LLMStateWrapper(LLM):
54
  def __init__(self, llm: LLM, ctx: JobContext):
55
  super().__init__()
@@ -58,50 +58,63 @@ class LLMStateWrapper(LLM):
58
 
59
  async def chat(self, history):
60
  await send_agent_state(self._ctx, "thinking")
61
- res_stream = self._llm.chat(history)
62
- await send_agent_state(self._ctx, "speaking")
63
- return res_stream
 
 
64
 
65
- # --- Main agent session logic ---
66
  async def run_agent_session(room_name: str, agent_token: str):
 
 
67
  livekit_url = os.getenv("LIVEKIT_URL")
68
  ctx = JobContext(room_name=room_name, livekit_url=livekit_url, token=agent_token)
69
 
70
  try:
 
71
  await ctx.connect()
 
72
  await send_agent_state(ctx, "listening")
73
 
74
- # --- KEY FIX: Instantiate the wrapper class correctly ---
75
  llm_state_wrapper = LLMStateWrapper(
76
  llm=GoogleLLM(model="gemini-1.5-flash", temperature=0.5),
77
  ctx=ctx
78
  )
79
-
80
  vad = VAD.load(min_speech_duration=0.1, min_silence_duration=0.5)
 
 
 
 
 
81
  session = AgentSession(
82
  vad=vad,
83
- stt=StreamAdapter(stt=STT(model="whisper-large-v3-turbo", language="en"), vad=vad),
84
- llm=llm_state_wrapper, # Use the class instance here
85
- tts=TTS(voice=VoiceByName(name="Tiktok Fashion Influencer", provider=VoiceProvider.hume), instant_mode=True),
86
  )
87
-
88
- print(f"Agent starting session in room: {room_name}")
89
  await session.start(agent=VoiceAssistant(), room=ctx.room)
90
 
 
91
  await send_agent_state(ctx, "speaking")
92
  await session.generate_reply(instructions=GREETING_INSTRUCTIONS)
93
- await send_agent_state(ctx, "listening")
 
 
94
 
95
  except Exception as e:
96
- print(f"An error occurred during the agent session: {e}")
 
 
97
  finally:
98
- print(f"Agent session ended for room: {room_name}. Cleaning up.")
99
  await ctx.disconnect()
100
 
101
 
102
  @app.post("/join-room")
103
  async def join_room(req: JoinRoomRequest, background_tasks: BackgroundTasks):
104
- print(f"Received request for agent to join room: {req.room_name}")
105
  background_tasks.add_task(run_agent_session, req.room_name, req.agent_token)
106
  return {"status": "agent_joining"}
107
 
 
1
  #!/usr/bin/env python3
2
  """
3
+ Agent Session for Avurna Flow, wrapped in a FastAPI server. (DEBUGGING ENABLED)
4
  """
5
  import asyncio
6
  import os
7
  import json
8
+ import traceback # Import traceback to print full errors
9
  from fastapi import FastAPI, BackgroundTasks
10
  from pydantic import BaseModel
11
  import uvicorn
 
22
  from src.agent_session.constants import SYSTEM_PROMPT, GREETING_INSTRUCTIONS
23
  from src.utils import validate_env_vars
24
 
25
+ # --- FastAPI and CORS setup (unchanged) ---
26
  app = FastAPI()
 
27
  origins = ["*"]
28
  app.add_middleware(
29
  CORSMiddleware,
 
45
  try:
46
  msg = json.dumps({"type": "agent_state", "state": state})
47
  await ctx.room.local_participant.publish_data(msg)
48
+ print(f"DEBUG: Sent agent state: {state}")
49
  except Exception as e:
50
+ print(f"DEBUG: Error publishing agent state: {e}")
51
 
52
+ # --- KEY FIX 2: Correctly yield from the wrapped LLM chat method ---
 
53
  class LLMStateWrapper(LLM):
54
  def __init__(self, llm: LLM, ctx: JobContext):
55
  super().__init__()
 
58
 
59
  async def chat(self, history):
60
  await send_agent_state(self._ctx, "thinking")
61
+ # The `yield from` is crucial for streaming responses.
62
+ async for chunk in self._llm.chat(history):
63
+ yield chunk
64
+ await send_agent_state(self._ctx, "listening") # Change state back to listening after speaking is done
65
+
66
 
 
67
  async def run_agent_session(room_name: str, agent_token: str):
68
+ # --- KEY FIX 1: Add aggressive "breadcrumb" logging ---
69
+ print(f"DEBUG: Starting run_agent_session for room: {room_name}")
70
  livekit_url = os.getenv("LIVEKIT_URL")
71
  ctx = JobContext(room_name=room_name, livekit_url=livekit_url, token=agent_token)
72
 
73
  try:
74
+ print("DEBUG: 1. Connecting to LiveKit context...")
75
  await ctx.connect()
76
+ print("DEBUG: 2. Context connected. Sending initial 'listening' state.")
77
  await send_agent_state(ctx, "listening")
78
 
79
+ print("DEBUG: 3. Initializing plugins (VAD, STT, LLM, TTS)...")
80
  llm_state_wrapper = LLMStateWrapper(
81
  llm=GoogleLLM(model="gemini-1.5-flash", temperature=0.5),
82
  ctx=ctx
83
  )
 
84
  vad = VAD.load(min_speech_duration=0.1, min_silence_duration=0.5)
85
+ stt = StreamAdapter(stt=STT(model="whisper-large-v3-turbo", language="en"), vad=vad)
86
+ tts = TTS(voice=VoiceByName(name="Tiktok Fashion Influencer", provider=VoiceProvider.hume), instant_mode=True)
87
+ print("DEBUG: 4. Plugins initialized.")
88
+
89
+ print("DEBUG: 5. Creating AgentSession...")
90
  session = AgentSession(
91
  vad=vad,
92
+ stt=stt,
93
+ llm=llm_state_wrapper,
94
+ tts=tts,
95
  )
96
+ print("DEBUG: 6. AgentSession created. Starting session now...")
 
97
  await session.start(agent=VoiceAssistant(), room=ctx.room)
98
 
99
+ print("DEBUG: 7. Session started. Generating initial greeting...")
100
  await send_agent_state(ctx, "speaking")
101
  await session.generate_reply(instructions=GREETING_INSTRUCTIONS)
102
+ # Note: The state is now set back to 'listening' inside the LLM wrapper
103
+
104
+ print("DEBUG: 8. Initial greeting complete. Agent is now fully operational.")
105
 
106
  except Exception as e:
107
+ # This will now print the FULL error to your Hugging Face logs
108
+ print(f"FATAL ERROR in agent session: {e}")
109
+ print(traceback.format_exc())
110
  finally:
111
+ print(f"DEBUG: Agent session for room {room_name} is ending. Cleaning up.")
112
  await ctx.disconnect()
113
 
114
 
115
  @app.post("/join-room")
116
  async def join_room(req: JoinRoomRequest, background_tasks: BackgroundTasks):
117
+ print(f"DEBUG: Received request for agent to join room: {req.room_name}")
118
  background_tasks.add_task(run_agent_session, req.room_name, req.agent_token)
119
  return {"status": "agent_joining"}
120