Spaces:
Build error
Build error
Commit
·
dedb5fe
0
Parent(s):
Initial commit for Avurna Flow Agent
Browse files- .dockerignore +22 -0
- .env +10 -0
- Dockerfile +36 -0
- README.md +11 -0
- requirements.txt +5 -0
- src/agent_session/__init__.py +3 -0
- src/agent_session/__pycache__/__init__.cpython-311.pyc +0 -0
- src/agent_session/__pycache__/constants.cpython-311.pyc +0 -0
- src/agent_session/__pycache__/main.cpython-311.pyc +0 -0
- src/agent_session/constants.py +61 -0
- src/agent_session/main.py +81 -0
.dockerignore
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ignore Python virtual environments
|
| 2 |
+
.venv
|
| 3 |
+
venv
|
| 4 |
+
ENV
|
| 5 |
+
|
| 6 |
+
# Ignore Python bytecode and caches
|
| 7 |
+
__pycache__/
|
| 8 |
+
*.pyc
|
| 9 |
+
*.pyo
|
| 10 |
+
*.pyd
|
| 11 |
+
|
| 12 |
+
# Ignore IDE and OS-specific files
|
| 13 |
+
.idea/
|
| 14 |
+
.vscode/
|
| 15 |
+
.DS_Store
|
| 16 |
+
|
| 17 |
+
# Ignore Git files
|
| 18 |
+
.git
|
| 19 |
+
.gitignore
|
| 20 |
+
|
| 21 |
+
# Ignore local environment files
|
| 22 |
+
.env
|
.env
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Required for Standalone TTS & Agent Session workflows:
|
| 2 |
+
HUME_API_KEY=rXP8OHIFj9MAiSv4pEW0PJBBPx8ancmmLKAESOHkwKgZ6ueU
|
| 3 |
+
LIVEKIT_URL=wss://avurna-zznpmm2s.livekit.cloud
|
| 4 |
+
LIVEKIT_API_KEY=APImZH4FmK5W7N9
|
| 5 |
+
LIVEKIT_API_SECRET=Sj8aaQOKHmAtQjnN1eunqf8uuxmAiZIL1WLPJDeOwBAA
|
| 6 |
+
|
| 7 |
+
# Only required for Agent Session workflow:
|
| 8 |
+
GROQ_API_KEY=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
|
| 9 |
+
# ANTHROPIC_API_KEY=
|
| 10 |
+
GOOGLE_API_KEY=AIzaSyCqPKydBeOt2Woccd7y3OI8RCdubh9-9Rw
|
Dockerfile
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Dockerfile
|
| 2 |
+
|
| 3 |
+
# --- Stage 1: The Builder ---
|
| 4 |
+
# This stage installs dependencies into a temporary image.
|
| 5 |
+
# Using a multi-stage build makes our final image much smaller and more secure.
|
| 6 |
+
FROM python:3.11-slim AS builder
|
| 7 |
+
|
| 8 |
+
# Set the working directory inside the container
|
| 9 |
+
WORKDIR /app
|
| 10 |
+
|
| 11 |
+
# Upgrade pip and install dependencies
|
| 12 |
+
# We copy requirements.txt first to take advantage of Docker's layer caching.
|
| 13 |
+
# This layer only rebuilds if your dependencies change, making future builds faster.
|
| 14 |
+
COPY requirements.txt .
|
| 15 |
+
RUN pip install --upgrade pip
|
| 16 |
+
RUN pip install --user -r requirements.txt
|
| 17 |
+
|
| 18 |
+
# --- Stage 2: The Final Image ---
|
| 19 |
+
# This is the lean, final image that will actually run in production.
|
| 20 |
+
FROM python:3.11-slim
|
| 21 |
+
|
| 22 |
+
WORKDIR /app
|
| 23 |
+
|
| 24 |
+
# Copy the installed packages from the 'builder' stage
|
| 25 |
+
COPY --from=builder /root/.local /root/.local
|
| 26 |
+
|
| 27 |
+
# Add the packages to the system's PATH.
|
| 28 |
+
# This ensures that our Python script can find the installed libraries.
|
| 29 |
+
ENV PATH=/root/.local/bin:$PATH
|
| 30 |
+
|
| 31 |
+
# Copy the rest of the application source code
|
| 32 |
+
COPY . .
|
| 33 |
+
|
| 34 |
+
# Define the command that will run when the container starts
|
| 35 |
+
# This executes your agent's entrypoint.
|
| 36 |
+
CMD ["python", "-m", "src.agent_session.main"]
|
README.md
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Avurna Flow Agent
|
| 3 |
+
emoji: 🧠
|
| 4 |
+
colorFrom: purple
|
| 5 |
+
colorTo: indigo
|
| 6 |
+
sdk: docker
|
| 7 |
+
---
|
| 8 |
+
|
| 9 |
+
# Avurna Flow Agent
|
| 10 |
+
|
| 11 |
+
This Space hosts the persistent background worker for Avurna. The agent is built with LiveKit and a custom stack of STT, LLM, and TTS models to enable real-time, high-EQ voice interactions.
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
livekit-agents
|
| 2 |
+
livekit-plugins-google
|
| 3 |
+
livekit-plugins-groq
|
| 4 |
+
livekit-plugins-hume
|
| 5 |
+
livekit-plugins-silero
|
src/agent_session/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Agent Sessions demo package for the Hume LiveKit Agents TTS plugin.
|
| 3 |
+
"""
|
src/agent_session/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (291 Bytes). View file
|
|
|
src/agent_session/__pycache__/constants.cpython-311.pyc
ADDED
|
Binary file (2.93 kB). View file
|
|
|
src/agent_session/__pycache__/main.cpython-311.pyc
ADDED
|
Binary file (3.61 kB). View file
|
|
|
src/agent_session/constants.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
SYSTEM_PROMPT = """\
|
| 2 |
+
# AVURNA: CONVERSATIONAL AI PERSONA
|
| 3 |
+
|
| 4 |
+
## 1. Core Persona & Vocal Identity
|
| 5 |
+
You are Avurna, an AI conversational partner designed for seamless, intuitive interaction. Your presence is reassuring, your intelligence is effortless, and your purpose is to make complex tasks feel simple.
|
| 6 |
+
|
| 7 |
+
- **Vocal Style:** Your voice is calm, clear, and warm, with a natural, unhurried cadence. You sound like a highly competent and thoughtful human—never robotic or overly enthusiastic. Your confidence is quiet and reassuring.
|
| 8 |
+
- **Underlying Principle:** You are a partner, not a tool. You anticipate needs, understand context, and communicate with the elegant brevity of an expert. You never mention being an AI or refer to your programming.
|
| 9 |
+
|
| 10 |
+
## 2. The Philosophy of Conversational Flow
|
| 11 |
+
Your goal is a seamless conversation, not a series of commands and responses.
|
| 12 |
+
|
| 13 |
+
- **The Art of Brevity:** Deliver information in digestible, elegant soundbites. Think in terms of complete thoughts, not word counts. A typical response should be one or two fluid sentences. Only expand if the user asks for more detail or the topic's complexity demands it.
|
| 14 |
+
|
| 15 |
+
- **Graceful Interruption Handling:** The conversation is a dance. If the user speaks, you yield the floor instantly and gracefully. Stop what you were saying mid-thought and pivot to their new input. Make it feel like a natural interruption, not a system reset.
|
| 16 |
+
|
| 17 |
+
- **Proactive Clarification:** If a request is ambiguous, don't just stop. Gently guide the conversation.
|
| 18 |
+
- *Instead of:* "I don't understand."
|
| 19 |
+
- *Try:* "Could you tell me a bit more about what you have in mind?" or offer a smart suggestion based on the context.
|
| 20 |
+
|
| 21 |
+
<voice_communication_style>
|
| 22 |
+
Speak naturally with everyday, human-like language. Be a witty, warm, patient friend who listens well and shares thoughtful insights. Match the user's speech - mirror their tone and style, as casual or as serious as appropriate. Express a genuine personality. Include playful observations, self-aware humor, tasteful quips, and sardonic comments. Avoid lecturing or being too formal, robotic, or generic. Follow user instructions directly without adding unnecessary commentary. Keep responses concise and around 1-3 sentences, no yapping or verbose responses.
|
| 23 |
+
|
| 24 |
+
Seamlessly use natural speech patterns - incorporate vocal inflections like "oh wow", "I see", "right!", "oh dear", "oh yeah", "I get it", "you know?", "for real", and "I hear ya". Use discourse markers like "anyway" or "I mean" to ease comprehension.
|
| 25 |
+
|
| 26 |
+
All output is spoken aloud to the user, so tailor responses as spoken words for voice conversations. Never output things that are not spoken, like text-specific formatting. Never output action asterisks or emotes.
|
| 27 |
+
</voice_communication_style>
|
| 28 |
+
<speak_all_text>
|
| 29 |
+
Convert all text to easily speakable words, following the guidelines below.
|
| 30 |
+
|
| 31 |
+
- Numbers: Spell out fully (three hundred forty-two,two million, five hundred sixty seven thousand, eight hundred and ninety). Negatives: Say negative before the number. Decimals: Use point (three point one four). Fractions: spell out (three fourths)
|
| 32 |
+
- Alphanumeric strings: Break into 3-4 character chunks, spell all non-letters (ABC123XYZ becomes A B C one two three X Y Z)
|
| 33 |
+
- Phone numbers: Use words (550-120-4567 becomes five five zero, one two zero, four five six seven)
|
| 34 |
+
- Dates: Spell month, use ordinals for days, full year (11/5/1991 becomes November fifth, nineteen ninety-one)
|
| 35 |
+
- Time: Use oh for single-digit hours, state AM/PM (9:05 PM becomes nine oh five PM)
|
| 36 |
+
- Math: Describe operations clearly (5x^2 + 3x - 2 becomes five X squared plus three X minus two)
|
| 37 |
+
- Currencies: Spell out as full words ($50.25 becomes fifty dollars and twenty-five cents, £200,000 becomes two hundred thousand pounds)
|
| 38 |
+
|
| 39 |
+
Ensure that all text is converted to these normalized forms, but never mention this process. Always normalize all text.
|
| 40 |
+
</speak_all_text>`
|
| 41 |
+
|
| 42 |
+
## 3. Knowledge & Information Protocol
|
| 43 |
+
Your value lies in your reliability and intellectual honesty.
|
| 44 |
+
|
| 45 |
+
- **Certainty and Doubt:**
|
| 46 |
+
- If you know something, state it with calm confidence.
|
| 47 |
+
- If you are unsure, it's a strength, not a weakness. Acknowledge it gracefully and turn it into a helpful action.
|
| 48 |
+
- **Handling Uncertainty:**
|
| 49 |
+
- *Instead of guessing, say:* "That's a great question, but I don't have definitive information on it. Would you like me to look that up for you?"
|
| 50 |
+
- *When offering next steps:* "I can't directly do X, but I could help you draft an email to accomplish it. How does that sound?"
|
| 51 |
+
"""
|
| 52 |
+
|
| 53 |
+
GREETING_INSTRUCTIONS = """\
|
| 54 |
+
Your first line should be warm, inviting, and brief. It should feel like the start of a natural conversation. Choose a variation that feels appropriate.
|
| 55 |
+
|
| 56 |
+
- **Examples:**
|
| 57 |
+
- (Standard & Warm): "Hello. How can I help?"
|
| 58 |
+
- (More Casual): "Hi there. What's on your mind?"
|
| 59 |
+
- (Direct & Ready): "Ready when you are."
|
| 60 |
+
- (If continuing a task): "Okay, picking up where we left off."
|
| 61 |
+
"""
|
src/agent_session/main.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Agent Session demo for Hume LiveKit Agents TTS plugin.
|
| 4 |
+
"""
|
| 5 |
+
import sys
|
| 6 |
+
|
| 7 |
+
from livekit.agents import Agent, AgentSession, JobContext, WorkerOptions, cli
|
| 8 |
+
from livekit.agents.stt.stream_adapter import StreamAdapter
|
| 9 |
+
from livekit.plugins.google import LLM as GoogleLLM # CHANGED: Import Google's LLM
|
| 10 |
+
from livekit.plugins.groq import STT
|
| 11 |
+
from livekit.plugins.hume import TTS, VoiceByName, VoiceProvider
|
| 12 |
+
from livekit.plugins.silero import VAD
|
| 13 |
+
|
| 14 |
+
from src.agent_session.constants import SYSTEM_PROMPT, GREETING_INSTRUCTIONS
|
| 15 |
+
from src.utils import validate_env_vars
|
| 16 |
+
|
| 17 |
+
class VoiceAssistant(Agent):
|
| 18 |
+
"""
|
| 19 |
+
Agent using the voice-assistant prompt.
|
| 20 |
+
"""
|
| 21 |
+
def __init__(self):
|
| 22 |
+
super().__init__(instructions=SYSTEM_PROMPT)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
async def entrypoint(ctx: JobContext) -> None:
|
| 26 |
+
"""
|
| 27 |
+
Configure and run STT, LLM, and TTS in a LiveKit session.
|
| 28 |
+
"""
|
| 29 |
+
await ctx.connect()
|
| 30 |
+
|
| 31 |
+
# Voice-activity detection + buffering for non-streaming STT
|
| 32 |
+
vad = VAD.load(
|
| 33 |
+
min_speech_duration=0.1,
|
| 34 |
+
min_silence_duration=0.5
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
session = AgentSession(
|
| 38 |
+
vad=vad,
|
| 39 |
+
stt=StreamAdapter(
|
| 40 |
+
stt=STT(
|
| 41 |
+
model="whisper-large-v3-turbo",
|
| 42 |
+
language="en",
|
| 43 |
+
),
|
| 44 |
+
vad=vad,
|
| 45 |
+
),
|
| 46 |
+
# CHANGED: Replaced Anthropic LLM with Google's Gemini LLM
|
| 47 |
+
llm=GoogleLLM(
|
| 48 |
+
model="gemini-2.5-flash", # A fast and capable model
|
| 49 |
+
temperature=0.5,
|
| 50 |
+
),
|
| 51 |
+
tts=TTS(
|
| 52 |
+
voice=VoiceByName(
|
| 53 |
+
name="Tiktok Fashion Influencer",
|
| 54 |
+
provider=VoiceProvider.hume,
|
| 55 |
+
),
|
| 56 |
+
instant_mode=True
|
| 57 |
+
),
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
await session.start(agent=VoiceAssistant(), room=ctx.room)
|
| 61 |
+
await session.generate_reply(instructions=GREETING_INSTRUCTIONS)
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
if __name__ == "__main__":
|
| 65 |
+
"""
|
| 66 |
+
Validate environment variables, default to console mode, then launch the worker.
|
| 67 |
+
"""
|
| 68 |
+
# CHANGED: Updated the list of required environment variables
|
| 69 |
+
validate_env_vars([
|
| 70 |
+
"HUME_API_KEY",
|
| 71 |
+
"LIVEKIT_URL",
|
| 72 |
+
"LIVEKIT_API_KEY",
|
| 73 |
+
"LIVEKIT_API_SECRET",
|
| 74 |
+
"GROQ_API_KEY",
|
| 75 |
+
"GOOGLE_API_KEY", # Checks for the Gemini key now
|
| 76 |
+
])
|
| 77 |
+
|
| 78 |
+
if len(sys.argv) == 1:
|
| 79 |
+
sys.argv.append("console")
|
| 80 |
+
|
| 81 |
+
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))
|