WebEssentz commited on
Commit
054b7cc
·
1 Parent(s): 9f77ab8

Avurna agent

Browse files
Files changed (3) hide show
  1. Dockerfile +7 -4
  2. requirements.txt +3 -1
  3. src/agent_session/main.py +61 -35
Dockerfile CHANGED
@@ -1,4 +1,4 @@
1
- # Dockerfile - Final Version
2
  # Generated with 💚 by Avurna AI (2025)
3
 
4
  # Use the official Python 3.11 slim image
@@ -11,11 +11,14 @@ WORKDIR /app
11
  COPY requirements.txt .
12
 
13
  # Install the Python dependencies GLOBALLY, not to a user directory.
14
- # This is the key fix.
15
  RUN pip install --no-cache-dir -r requirements.txt
16
 
17
  # Copy the rest of the application source code
18
  COPY . .
19
 
20
- # Define the command that will run when the container starts
21
- CMD ["python", "-m", "src.agent_session.main", "start"]
 
 
 
 
 
1
+ # Dockerfile - Phase 1 Version
2
  # Generated with 💚 by Avurna AI (2025)
3
 
4
  # Use the official Python 3.11 slim image
 
11
  COPY requirements.txt .
12
 
13
  # Install the Python dependencies GLOBALLY, not to a user directory.
 
14
  RUN pip install --no-cache-dir -r requirements.txt
15
 
16
  # Copy the rest of the application source code
17
  COPY . .
18
 
19
+ # --- KEY CHANGE ---
20
+ # Define the command that will run when the container starts.
21
+ # We no longer run the script directly. We tell uvicorn to run the 'app' object
22
+ # from the 'src.agent_session.main' module.
23
+ # Hugging Face Spaces exposes port 7860, so we must listen on it.
24
+ CMD ["uvicorn", "src.agent_session.main:app", "--host", "0.0.0.0", "--port", "7860"]
requirements.txt CHANGED
@@ -3,4 +3,6 @@ livekit-plugins-google
3
  livekit-plugins-groq
4
  livekit-plugins-hume
5
  livekit-plugins-silero
6
- flask
 
 
 
3
  livekit-plugins-groq
4
  livekit-plugins-hume
5
  livekit-plugins-silero
6
+ fastapi
7
+ uvicorn
8
+ livekit
src/agent_session/main.py CHANGED
@@ -1,12 +1,16 @@
1
  #!/usr/bin/env python3
2
  """
3
- Agent Session demo for Hume LiveKit Agents TTS plugin.
4
  """
5
- import sys
 
 
 
 
6
 
7
- from livekit.agents import Agent, AgentSession, JobContext, WorkerOptions, cli
8
  from livekit.agents.stt.stream_adapter import StreamAdapter
9
- from livekit.plugins.google import LLM as GoogleLLM # CHANGED: Import Google's LLM
10
  from livekit.plugins.groq import STT
11
  from livekit.plugins.hume import TTS, VoiceByName, VoiceProvider
12
  from livekit.plugins.silero import VAD
@@ -14,66 +18,88 @@ from livekit.plugins.silero import VAD
14
  from src.agent_session.constants import SYSTEM_PROMPT, GREETING_INSTRUCTIONS
15
  from src.utils import validate_env_vars
16
 
 
 
 
 
 
 
 
 
 
17
  class VoiceAssistant(Agent):
18
- """
19
- Agent using the voice-assistant prompt.
20
- """
21
  def __init__(self):
22
  super().__init__(instructions=SYSTEM_PROMPT)
23
 
24
-
25
- async def entrypoint(ctx: JobContext) -> None:
26
  """
27
- Configure and run STT, LLM, and TTS in a LiveKit session.
28
  """
29
- await ctx.connect()
30
 
31
- # Voice-activity detection + buffering for non-streaming STT
32
- vad = VAD.load(
33
- min_speech_duration=0.1,
34
- min_silence_duration=0.5
 
35
  )
 
 
36
 
 
37
  session = AgentSession(
38
  vad=vad,
39
  stt=StreamAdapter(
40
- stt=STT(
41
- model="whisper-large-v3-turbo",
42
- language="en",
43
- ),
44
  vad=vad,
45
  ),
46
- # CHANGED: Replaced Anthropic LLM with Google's Gemini LLM
47
- llm=GoogleLLM(
48
- model="gemini-2.5-flash", # A fast and capable model
49
- temperature=0.5,
50
- ),
51
  tts=TTS(
52
- voice=VoiceByName(
53
- name="Tiktok Fashion Influencer",
54
- provider=VoiceProvider.hume,
55
- ),
56
  instant_mode=True
57
  ),
58
  )
59
 
 
60
  await session.start(agent=VoiceAssistant(), room=ctx.room)
61
  await session.generate_reply(instructions=GREETING_INSTRUCTIONS)
 
62
 
63
 
64
- if __name__ == "__main__":
 
 
65
  """
66
- Validate environment variables and run the agent as a non-interactive worker.
 
67
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  validate_env_vars([
69
  "HUME_API_KEY",
70
  "LIVEKIT_URL",
71
  "LIVEKIT_API_KEY",
72
  "LIVEKIT_API_SECRET",
73
  "GROQ_API_KEY",
74
- "GOOGLE_API_KEY",
75
  ])
76
-
77
- # Run as a simple worker, not an interactive CLI
78
- opts = WorkerOptions(entrypoint_fnc=entrypoint)
79
- cli.run_app(opts)
 
 
1
  #!/usr/bin/env python3
2
  """
3
+ Agent Session for Avurna Flow, wrapped in a FastAPI server.
4
  """
5
+ import asyncio
6
+ import os
7
+ from fastapi import FastAPI, BackgroundTasks
8
+ from pydantic import BaseModel
9
+ import uvicorn
10
 
11
+ from livekit.agents import Agent, AgentSession, JobContext, WorkerOptions
12
  from livekit.agents.stt.stream_adapter import StreamAdapter
13
+ from livekit.plugins.google import LLM as GoogleLLM
14
  from livekit.plugins.groq import STT
15
  from livekit.plugins.hume import TTS, VoiceByName, VoiceProvider
16
  from livekit.plugins.silero import VAD
 
18
  from src.agent_session.constants import SYSTEM_PROMPT, GREETING_INSTRUCTIONS
19
  from src.utils import validate_env_vars
20
 
21
+ # --- FastAPI App Definition ---
22
+ app = FastAPI()
23
+
24
+ # --- Pydantic Model for the request body ---
25
+ class JoinRoomRequest(BaseModel):
26
+ room_name: str
27
+ agent_token: str
28
+
29
+ # --- The Core Agent Logic (mostly unchanged) ---
30
  class VoiceAssistant(Agent):
 
 
 
31
  def __init__(self):
32
  super().__init__(instructions=SYSTEM_PROMPT)
33
 
34
+ async def run_agent_session(room_name: str, agent_token: str):
 
35
  """
36
+ This function contains the core logic to connect and run the agent in a LiveKit room.
37
  """
38
+ livekit_url = os.getenv("LIVEKIT_URL")
39
 
40
+ # This context will be used by the agent to connect to the room
41
+ ctx = JobContext(
42
+ room_name=room_name,
43
+ livekit_url=livekit_url,
44
+ token=agent_token,
45
  )
46
+
47
+ await ctx.connect()
48
 
49
+ vad = VAD.load(min_speech_duration=0.1, min_silence_duration=0.5)
50
  session = AgentSession(
51
  vad=vad,
52
  stt=StreamAdapter(
53
+ stt=STT(model="whisper-large-v3-turbo", language="en"),
 
 
 
54
  vad=vad,
55
  ),
56
+ llm=GoogleLLM(model="gemini-1.5-flash", temperature=0.5), # Changed to 1.5-flash for more power
 
 
 
 
57
  tts=TTS(
58
+ voice=VoiceByName(name="Tiktok Fashion Influencer", provider=VoiceProvider.hume),
 
 
 
59
  instant_mode=True
60
  ),
61
  )
62
 
63
+ print(f"Agent starting session in room: {room_name}")
64
  await session.start(agent=VoiceAssistant(), room=ctx.room)
65
  await session.generate_reply(instructions=GREETING_INSTRUCTIONS)
66
+ print(f"Agent session ended for room: {room_name}")
67
 
68
 
69
+ # --- FastAPI Endpoint ---
70
+ @app.post("/join-room")
71
+ async def join_room(req: JoinRoomRequest, background_tasks: BackgroundTasks):
72
  """
73
+ This endpoint is called by the frontend to trigger the agent.
74
+ It immediately returns a success message and starts the agent in the background.
75
  """
76
+ print(f"Received request for agent to join room: {req.room_name}")
77
+
78
+ # Add the long-running agent session as a background task
79
+ background_tasks.add_task(run_agent_session, req.room_name, req.agent_token)
80
+
81
+ return {"status": "agent_joining"}
82
+
83
+
84
+ # --- Health Check Endpoint (good practice) ---
85
+ @app.get("/")
86
+ async def root():
87
+ return {"status": "avurna_agent_server_online"}
88
+
89
+
90
+ # --- Main execution block ---
91
+ if __name__ == "__main__":
92
+ # Validate environment variables on startup
93
  validate_env_vars([
94
  "HUME_API_KEY",
95
  "LIVEKIT_URL",
96
  "LIVEKIT_API_KEY",
97
  "LIVEKIT_API_SECRET",
98
  "GROQ_API_KEY",
99
+ "GOOGLE_API_KEY", # Assuming you meant GOOGLE_API_KEY from your original file
100
  ])
101
+
102
+ # Run the FastAPI server using uvicorn
103
+ # Hugging Face Spaces requires the app to run on port 7860
104
+ # and host 0.0.0.0 to be accessible from the internet.
105
+ uvicorn.run(app, host="0.0.0.0", port=7860)