Spaces:

sanjaystarc
/

voice-agent

Sleeping

sanjaystarc commited on Dec 25, 2025

Commit

3b5e44c

verified ·

1 Parent(s): 1706a47

Update voice_agent.py

Files changed (1) hide show

voice_agent.py CHANGED Viewed

@@ -1,13 +1,13 @@
 import os
 from dotenv import load_dotenv
-# block OpenAI fallbacks
 os.environ["OPENAI_API_KEY"] = ""
 os.environ["ANTHROPIC_API_KEY"] = ""
 os.environ["COHERE_API_KEY"] = ""
 from crewai import Agent, Task, Crew
-from google import genai   # new official SDK
 load_dotenv()
@@ -17,14 +17,14 @@ class CrewVoiceAgent:
     def __init__(self):
         self.agent = Agent(
             role="Voice Assistant",
-            goal="Respond to user voice with clarity.",
-            backstory="Friendly AI voice assistant.",
             llm=None
         )
         self.task = Task(
-            description="Produce a spoken response to the user.",
-            expected_output="Audio response",
             agent=self.agent,
             llm=None
         )
@@ -37,7 +37,8 @@ class CrewVoiceAgent:
     async def handle_audio(self, audio_bytes: bytes):
-        # Model expects: contents = [{mime_type, data}]
         response = client.models.generate_content(
             model="gemini-2.5-flash-native-audio-dialog",
             contents=[
@@ -48,9 +49,19 @@ class CrewVoiceAgent:
             ]
         )
-        # HF Spaces CANNOT stream audio chunks
-        # so we use response.audio.data directly
-        if hasattr(response, "audio") and hasattr(response.audio, "data"):
-            return response.audio.data
-        return b""  # fallback if no audio

 import os
 from dotenv import load_dotenv
+# Disable OpenAI fallback
 os.environ["OPENAI_API_KEY"] = ""
 os.environ["ANTHROPIC_API_KEY"] = ""
 os.environ["COHERE_API_KEY"] = ""
 from crewai import Agent, Task, Crew
+from google import genai
 load_dotenv()
     def __init__(self):
         self.agent = Agent(
             role="Voice Assistant",
+            goal="Respond clearly.",
+            backstory="You are helpful.",
             llm=None
         )
         self.task = Task(
+            description="Respond to speech.",
+            expected_output="Audio",
             agent=self.agent,
             llm=None
         )
     async def handle_audio(self, audio_bytes: bytes):
+        print("Received audio bytes:", len(audio_bytes))
         response = client.models.generate_content(
             model="gemini-2.5-flash-native-audio-dialog",
             contents=[
             ]
         )
+        print("GEMINI RESPONSE FIELDS:", response)
+        print("HAS AUDIO:", hasattr(response, "audio"))
+        if hasattr(response, "audio"):
+            print("AUDIO OBJECT:", response.audio)
+        # Try direct
+        try:
+            if response.audio and response.audio.data:
+                print("Returning audio bytes:", len(response.audio.data))
+                return response.audio.data
+        except Exception as e:
+            print("Error reading audio:", e)
+        print("NO AUDIO RETURNED")
+        return b""