Spaces:

WebEssentz
/

gent

Build error

App Files Files Community

WebEssentz commited on Jul 13, 2025

Commit

054b7cc

1 Parent(s): 9f77ab8

Avurna agent

Browse files

Files changed (3) hide show

Dockerfile +7 -4
requirements.txt +3 -1
src/agent_session/main.py +61 -35

Dockerfile CHANGED Viewed

@@ -1,4 +1,4 @@
-# Dockerfile - Final Version
 # Generated with 💚 by Avurna AI (2025)
 # Use the official Python 3.11 slim image
@@ -11,11 +11,14 @@ WORKDIR /app
 COPY requirements.txt .
 # Install the Python dependencies GLOBALLY, not to a user directory.
-# This is the key fix.
 RUN pip install --no-cache-dir -r requirements.txt
 # Copy the rest of the application source code
 COPY . .
-# Define the command that will run when the container starts
-CMD ["python", "-m", "src.agent_session.main", "start"]

+# Dockerfile - Phase 1 Version
 # Generated with 💚 by Avurna AI (2025)
 # Use the official Python 3.11 slim image
 COPY requirements.txt .
 # Install the Python dependencies GLOBALLY, not to a user directory.
 RUN pip install --no-cache-dir -r requirements.txt
 # Copy the rest of the application source code
 COPY . .
+# --- KEY CHANGE ---
+# Define the command that will run when the container starts.
+# We no longer run the script directly. We tell uvicorn to run the 'app' object
+# from the 'src.agent_session.main' module.
+# Hugging Face Spaces exposes port 7860, so we must listen on it.
+CMD ["uvicorn", "src.agent_session.main:app", "--host", "0.0.0.0", "--port", "7860"]

requirements.txt CHANGED Viewed

@@ -3,4 +3,6 @@ livekit-plugins-google
 livekit-plugins-groq
 livekit-plugins-hume
 livekit-plugins-silero
-flask

 livekit-plugins-groq
 livekit-plugins-hume
 livekit-plugins-silero
+fastapi
+uvicorn
+livekit

src/agent_session/main.py CHANGED Viewed

@@ -1,12 +1,16 @@
 #!/usr/bin/env python3
 """
-Agent Session demo for Hume LiveKit Agents TTS plugin.
 """
-import sys
-from livekit.agents import Agent, AgentSession, JobContext, WorkerOptions, cli
 from livekit.agents.stt.stream_adapter import StreamAdapter
-from livekit.plugins.google import LLM as GoogleLLM # CHANGED: Import Google's LLM
 from livekit.plugins.groq import STT
 from livekit.plugins.hume import TTS, VoiceByName, VoiceProvider
 from livekit.plugins.silero import VAD
@@ -14,66 +18,88 @@ from livekit.plugins.silero import VAD
 from src.agent_session.constants import SYSTEM_PROMPT, GREETING_INSTRUCTIONS
 from src.utils import validate_env_vars
 class VoiceAssistant(Agent):
-    """
-    Agent using the voice-assistant prompt.
-    """
     def __init__(self):
         super().__init__(instructions=SYSTEM_PROMPT)
-async def entrypoint(ctx: JobContext) -> None:
     """
-    Configure and run STT, LLM, and TTS in a LiveKit session.
     """
-    await ctx.connect()
-    # Voice-activity detection + buffering for non-streaming STT
-    vad = VAD.load(
-        min_speech_duration=0.1,
-        min_silence_duration=0.5
     )
     session = AgentSession(
         vad=vad,
         stt=StreamAdapter(
-            stt=STT(
-                model="whisper-large-v3-turbo",
-                language="en",
-            ),
             vad=vad,
         ),
-        # CHANGED: Replaced Anthropic LLM with Google's Gemini LLM
-        llm=GoogleLLM(
-            model="gemini-2.5-flash", # A fast and capable model
-            temperature=0.5,
-        ),
         tts=TTS(
-            voice=VoiceByName(
-                name="Tiktok Fashion Influencer",
-                provider=VoiceProvider.hume,
-            ),
             instant_mode=True
         ),
     )
     await session.start(agent=VoiceAssistant(), room=ctx.room)
     await session.generate_reply(instructions=GREETING_INSTRUCTIONS)
-if __name__ == "__main__":
     """
-    Validate environment variables and run the agent as a non-interactive worker.
     """
     validate_env_vars([
         "HUME_API_KEY",
         "LIVEKIT_URL",
         "LIVEKIT_API_KEY",
         "LIVEKIT_API_SECRET",
         "GROQ_API_KEY",
-        "GOOGLE_API_KEY",
     ])
-    # Run as a simple worker, not an interactive CLI
-    opts = WorkerOptions(entrypoint_fnc=entrypoint)
-    cli.run_app(opts)

 #!/usr/bin/env python3
 """
+Agent Session for Avurna Flow, wrapped in a FastAPI server.
 """
+import asyncio
+import os
+from fastapi import FastAPI, BackgroundTasks
+from pydantic import BaseModel
+import uvicorn
+from livekit.agents import Agent, AgentSession, JobContext, WorkerOptions
 from livekit.agents.stt.stream_adapter import StreamAdapter
+from livekit.plugins.google import LLM as GoogleLLM
 from livekit.plugins.groq import STT
 from livekit.plugins.hume import TTS, VoiceByName, VoiceProvider
 from livekit.plugins.silero import VAD
 from src.agent_session.constants import SYSTEM_PROMPT, GREETING_INSTRUCTIONS
 from src.utils import validate_env_vars
+# --- FastAPI App Definition ---
+app = FastAPI()
+# --- Pydantic Model for the request body ---
+class JoinRoomRequest(BaseModel):
+    room_name: str
+    agent_token: str
+# --- The Core Agent Logic (mostly unchanged) ---
 class VoiceAssistant(Agent):
     def __init__(self):
         super().__init__(instructions=SYSTEM_PROMPT)
+async def run_agent_session(room_name: str, agent_token: str):
     """
+    This function contains the core logic to connect and run the agent in a LiveKit room.
     """
+    livekit_url = os.getenv("LIVEKIT_URL")
+    # This context will be used by the agent to connect to the room
+    ctx = JobContext(
+        room_name=room_name,
+        livekit_url=livekit_url,
+        token=agent_token,
     )
+    await ctx.connect()
+    vad = VAD.load(min_speech_duration=0.1, min_silence_duration=0.5)
     session = AgentSession(
         vad=vad,
         stt=StreamAdapter(
+            stt=STT(model="whisper-large-v3-turbo", language="en"),
             vad=vad,
         ),
+        llm=GoogleLLM(model="gemini-1.5-flash", temperature=0.5), # Changed to 1.5-flash for more power
         tts=TTS(
+            voice=VoiceByName(name="Tiktok Fashion Influencer", provider=VoiceProvider.hume),
             instant_mode=True
         ),
     )
+    print(f"Agent starting session in room: {room_name}")
     await session.start(agent=VoiceAssistant(), room=ctx.room)
     await session.generate_reply(instructions=GREETING_INSTRUCTIONS)
+    print(f"Agent session ended for room: {room_name}")
+# --- FastAPI Endpoint ---
+@app.post("/join-room")
+async def join_room(req: JoinRoomRequest, background_tasks: BackgroundTasks):
     """
+    This endpoint is called by the frontend to trigger the agent.
+    It immediately returns a success message and starts the agent in the background.
     """
+    print(f"Received request for agent to join room: {req.room_name}")
+    # Add the long-running agent session as a background task
+    background_tasks.add_task(run_agent_session, req.room_name, req.agent_token)
+    return {"status": "agent_joining"}
+# --- Health Check Endpoint (good practice) ---
+@app.get("/")
+async def root():
+    return {"status": "avurna_agent_server_online"}
+# --- Main execution block ---
+if __name__ == "__main__":
+    # Validate environment variables on startup
     validate_env_vars([
         "HUME_API_KEY",
         "LIVEKIT_URL",
         "LIVEKIT_API_KEY",
         "LIVEKIT_API_SECRET",
         "GROQ_API_KEY",
+        "GOOGLE_API_KEY", # Assuming you meant GOOGLE_API_KEY from your original file
     ])
+    # Run the FastAPI server using uvicorn
+    # Hugging Face Spaces requires the app to run on port 7860
+    # and host 0.0.0.0 to be accessible from the internet.
+    uvicorn.run(app, host="0.0.0.0", port=7860)