sanjaystarc commited on
Commit
3b5e44c
·
verified ·
1 Parent(s): 1706a47

Update voice_agent.py

Browse files
Files changed (1) hide show
  1. voice_agent.py +23 -12
voice_agent.py CHANGED
@@ -1,13 +1,13 @@
1
  import os
2
  from dotenv import load_dotenv
3
 
4
- # block OpenAI fallbacks
5
  os.environ["OPENAI_API_KEY"] = ""
6
  os.environ["ANTHROPIC_API_KEY"] = ""
7
  os.environ["COHERE_API_KEY"] = ""
8
 
9
  from crewai import Agent, Task, Crew
10
- from google import genai # new official SDK
11
 
12
  load_dotenv()
13
 
@@ -17,14 +17,14 @@ class CrewVoiceAgent:
17
  def __init__(self):
18
  self.agent = Agent(
19
  role="Voice Assistant",
20
- goal="Respond to user voice with clarity.",
21
- backstory="Friendly AI voice assistant.",
22
  llm=None
23
  )
24
 
25
  self.task = Task(
26
- description="Produce a spoken response to the user.",
27
- expected_output="Audio response",
28
  agent=self.agent,
29
  llm=None
30
  )
@@ -37,7 +37,8 @@ class CrewVoiceAgent:
37
 
38
  async def handle_audio(self, audio_bytes: bytes):
39
 
40
- # Model expects: contents = [{mime_type, data}]
 
41
  response = client.models.generate_content(
42
  model="gemini-2.5-flash-native-audio-dialog",
43
  contents=[
@@ -48,9 +49,19 @@ class CrewVoiceAgent:
48
  ]
49
  )
50
 
51
- # HF Spaces CANNOT stream audio chunks
52
- # so we use response.audio.data directly
53
- if hasattr(response, "audio") and hasattr(response.audio, "data"):
54
- return response.audio.data
 
 
 
 
 
 
 
 
 
55
 
56
- return b"" # fallback if no audio
 
 
1
  import os
2
  from dotenv import load_dotenv
3
 
4
+ # Disable OpenAI fallback
5
  os.environ["OPENAI_API_KEY"] = ""
6
  os.environ["ANTHROPIC_API_KEY"] = ""
7
  os.environ["COHERE_API_KEY"] = ""
8
 
9
  from crewai import Agent, Task, Crew
10
+ from google import genai
11
 
12
  load_dotenv()
13
 
 
17
  def __init__(self):
18
  self.agent = Agent(
19
  role="Voice Assistant",
20
+ goal="Respond clearly.",
21
+ backstory="You are helpful.",
22
  llm=None
23
  )
24
 
25
  self.task = Task(
26
+ description="Respond to speech.",
27
+ expected_output="Audio",
28
  agent=self.agent,
29
  llm=None
30
  )
 
37
 
38
  async def handle_audio(self, audio_bytes: bytes):
39
 
40
+ print("Received audio bytes:", len(audio_bytes))
41
+
42
  response = client.models.generate_content(
43
  model="gemini-2.5-flash-native-audio-dialog",
44
  contents=[
 
49
  ]
50
  )
51
 
52
+ print("GEMINI RESPONSE FIELDS:", response)
53
+ print("HAS AUDIO:", hasattr(response, "audio"))
54
+
55
+ if hasattr(response, "audio"):
56
+ print("AUDIO OBJECT:", response.audio)
57
+
58
+ # Try direct
59
+ try:
60
+ if response.audio and response.audio.data:
61
+ print("Returning audio bytes:", len(response.audio.data))
62
+ return response.audio.data
63
+ except Exception as e:
64
+ print("Error reading audio:", e)
65
 
66
+ print("NO AUDIO RETURNED")
67
+ return b""