Spaces:

SreekarB
/

app_trial_current

Sleeping

App Files Files Community

SreekarB commited on May 19, 2025

Commit

891575e

verified ·

1 Parent(s): 377ef31

Upload 10 files

Browse files

Files changed (2) hide show

nova_sonic_tool_use.py +180 -68
requirements.txt +4 -2

nova_sonic_tool_use.py CHANGED Viewed

@@ -11,6 +11,14 @@ import hashlib
 import datetime
 import time
 import inspect
 from aws_sdk_bedrock_runtime.client import BedrockRuntimeClient, InvokeModelWithBidirectionalStreamOperationInput
 from aws_sdk_bedrock_runtime.models import InvokeModelWithBidirectionalStreamInputChunk, BidirectionalInputPayloadPart
 from aws_sdk_bedrock_runtime.config import Config, HTTPAuthSchemeResolver, SigV4AuthScheme
@@ -672,36 +680,73 @@ class AudioStreamer:
         self.stream_manager = stream_manager
         self.is_streaming = False
         self.loop = asyncio.get_event_loop()
-        # Initialize PyAudio
-        debug_print("AudioStreamer Initializing PyAudio...")
-        self.p = time_it("AudioStreamerInitPyAudio", pyaudio.PyAudio)
-        debug_print("AudioStreamer PyAudio initialized")
-        # Initialize separate streams for input and output
-        # Input stream with callback for microphone
-        debug_print("Opening input audio stream...")
-        self.input_stream = time_it("AudioStreamerOpenAudio", lambda  : self.p.open(
-            format=FORMAT,
-            channels=CHANNELS,
-            rate=INPUT_SAMPLE_RATE,
-            input=True,
-            frames_per_buffer=CHUNK_SIZE,
-            stream_callback=self.input_callback
-        ))
-        debug_print("input audio stream opened")
-        # Output stream for direct writing (no callback)
-        debug_print("Opening output audio stream...")
-        self.output_stream = time_it("AudioStreamerOpenAudio", lambda  : self.p.open(
-            format=FORMAT,
-            channels=CHANNELS,
-            rate=OUTPUT_SAMPLE_RATE,
-            output=True,
-            frames_per_buffer=CHUNK_SIZE
-        ))
-        debug_print("output audio stream opened")
     def input_callback(self, in_data, frame_count, time_info, status):
         """Callback function that schedules audio processing in the asyncio event loop"""
@@ -722,6 +767,32 @@ class AudioStreamer:
             if self.is_streaming:
                 print(f"Error processing input audio: {e}")
     async def play_output_audio(self):
         """Play audio responses from Nova Sonic"""
         while self.is_streaming:
@@ -746,26 +817,39 @@ class AudioStreamer:
                 )
                 if audio_data and self.is_streaming:
-                    # Write directly to the output stream in smaller chunks
-                    chunk_size = CHUNK_SIZE  # Use the same chunk size as the stream
-                    # Write the audio data in chunks to avoid blocking too long
-                    for i in range(0, len(audio_data), chunk_size):
-                        if not self.is_streaming:
-                            break
-                        end = min(i + chunk_size, len(audio_data))
-                        chunk = audio_data[i:end]
-                        # Create a new function that captures the chunk by value
-                        def write_chunk(data):
-                            return self.output_stream.write(data)
-                        # Pass the chunk to the function
-                        await asyncio.get_event_loop().run_in_executor(None, write_chunk, chunk)
-                        # Brief yield to allow other tasks to run
-                        await asyncio.sleep(0.001)
             except asyncio.TimeoutError:
                 # No data available within timeout, just continue
@@ -773,8 +857,9 @@ class AudioStreamer:
             except Exception as e:
                 if self.is_streaming:
                     print(f"Error playing output audio: {str(e)}")
-                    import traceback
-                    traceback.print_exc()
                 await asyncio.sleep(0.05)
     async def start_streaming(self):
@@ -782,7 +867,11 @@ class AudioStreamer:
         if self.is_streaming:
             return
-        print("Starting audio streaming. Speak into your microphone...")
         print("Press Enter to stop streaming...")
         # Send audio content start event
@@ -790,13 +879,21 @@ class AudioStreamer:
         self.is_streaming = True
-        # Start the input stream if not already started
-        if not self.input_stream.is_active():
-            self.input_stream.start_stream()
-        # Start processing tasks
-        #self.input_task = asyncio.create_task(self.process_input_audio())
         self.output_task = asyncio.create_task(self.play_output_audio())
         # Wait for user to press Enter to stop
         await asyncio.get_event_loop().run_in_executor(None, input)
@@ -821,18 +918,33 @@ class AudioStreamer:
             task.cancel()
         if tasks:
             await asyncio.gather(*tasks, return_exceptions=True)
-        # Stop and close the streams
-        if self.input_stream:
-            if self.input_stream.is_active():
-                self.input_stream.stop_stream()
-            self.input_stream.close()
-        if self.output_stream:
-            if self.output_stream.is_active():
-                self.output_stream.stop_stream()
-            self.output_stream.close()
-        if self.p:
-            self.p.terminate()
         await self.stream_manager.close()

 import datetime
 import time
 import inspect
+# Import dotenv for environment variables
+try:
+    from dotenv import load_dotenv
+    # Load environment variables from .env file if it exists
+    load_dotenv()
+except ImportError:
+    print("Warning: python-dotenv not installed, using environment variables directly")
+    pass
 from aws_sdk_bedrock_runtime.client import BedrockRuntimeClient, InvokeModelWithBidirectionalStreamOperationInput
 from aws_sdk_bedrock_runtime.models import InvokeModelWithBidirectionalStreamInputChunk, BidirectionalInputPayloadPart
 from aws_sdk_bedrock_runtime.config import Config, HTTPAuthSchemeResolver, SigV4AuthScheme
         self.stream_manager = stream_manager
         self.is_streaming = False
         self.loop = asyncio.get_event_loop()
+        self.input_stream = None
+        self.output_stream = None
+        self.p = None
+        self.use_audio_fallback = False
+        try:
+            # Initialize PyAudio
+            debug_print("AudioStreamer Initializing PyAudio...")
+            self.p = time_it("AudioStreamerInitPyAudio", pyaudio.PyAudio)
+            debug_print("AudioStreamer PyAudio initialized")
+            # Check for available audio devices
+            input_device_index = None
+            output_device_index = None
+            info = self.p.get_host_api_info_by_index(0)
+            num_devices = info.get('deviceCount')
+            # Find input and output devices
+            for i in range(num_devices):
+                device_info = self.p.get_device_info_by_index(i)
+                debug_print(f"Device {i}: {device_info['name']}")
+                if device_info.get('maxInputChannels') > 0 and input_device_index is None:
+                    input_device_index = i
+                    debug_print(f"Selected input device: {device_info['name']}")
+                if device_info.get('maxOutputChannels') > 0 and output_device_index is None:
+                    output_device_index = i
+                    debug_print(f"Selected output device: {device_info['name']}")
+            if input_device_index is None or output_device_index is None:
+                raise ValueError("No suitable audio devices found")
+            # Initialize separate streams for input and output
+            # Input stream with callback for microphone
+            debug_print("Opening input audio stream...")
+            self.input_stream = time_it("AudioStreamerOpenAudio", lambda: self.p.open(
+                format=FORMAT,
+                channels=CHANNELS,
+                rate=INPUT_SAMPLE_RATE,
+                input=True,
+                input_device_index=input_device_index,
+                frames_per_buffer=CHUNK_SIZE,
+                stream_callback=self.input_callback
+            ))
+            debug_print("input audio stream opened")
+            # Output stream for direct writing (no callback)
+            debug_print("Opening output audio stream...")
+            self.output_stream = time_it("AudioStreamerOpenAudio", lambda: self.p.open(
+                format=FORMAT,
+                channels=CHANNELS,
+                rate=OUTPUT_SAMPLE_RATE,
+                output=True,
+                output_device_index=output_device_index,
+                frames_per_buffer=CHUNK_SIZE
+            ))
+            debug_print("output audio stream opened")
+        except Exception as e:
+            print(f"Warning: Could not initialize audio devices: {e}")
+            print("Using fallback mode: Will simulate audio without using real devices")
+            if self.p:
+                self.p.terminate()
+            self.p = None
+            self.use_audio_fallback = True
     def input_callback(self, in_data, frame_count, time_info, status):
         """Callback function that schedules audio processing in the asyncio event loop"""
             if self.is_streaming:
                 print(f"Error processing input audio: {e}")
+    async def generate_simulated_input(self):
+        """Generate simulated audio input in fallback mode"""
+        import numpy as np
+        while self.is_streaming:
+            try:
+                # Generate a dummy audio chunk with some basic noise
+                # This simulates someone speaking into the microphone
+                samples = np.random.normal(0, 0.01, CHUNK_SIZE * CHANNELS).astype(np.float32)
+                audio_data = (samples * 32767).astype(np.int16).tobytes()
+                # Send to Bedrock
+                self.stream_manager.add_audio_chunk(audio_data)
+                # Wait a bit between chunks
+                await asyncio.sleep(0.05)
+                # Occasionally "end" the simulated speech to get a response
+                if random.random() < 0.05:  # 5% chance to end speech
+                    print("Simulated speech ended, awaiting response...")
+                    await asyncio.sleep(1.0)  # Wait longer between "sentences"
+            except Exception as e:
+                if self.is_streaming:
+                    print(f"Error generating simulated audio: {e}")
+                await asyncio.sleep(0.5)
     async def play_output_audio(self):
         """Play audio responses from Nova Sonic"""
         while self.is_streaming:
                 )
                 if audio_data and self.is_streaming:
+                    if self.use_audio_fallback:
+                        # In fallback mode, just log that we received audio
+                        audio_size = len(audio_data)
+                        print(f"Received {audio_size} bytes of audio from Nova")
+                        # Store the audio for potential replay
+                        self.stream_manager.output_queue.put_nowait({
+                            "event": {
+                                "audioOutput": {
+                                    "content": "Audio would play here if audio devices were available"
+                                }
+                            }
+                        })
+                    else:
+                        # Write directly to the output stream in smaller chunks
+                        chunk_size = CHUNK_SIZE  # Use the same chunk size as the stream
+                        # Write the audio data in chunks to avoid blocking too long
+                        for i in range(0, len(audio_data), chunk_size):
+                            if not self.is_streaming:
+                                break
+                            end = min(i + chunk_size, len(audio_data))
+                            chunk = audio_data[i:end]
+                            # Create a new function that captures the chunk by value
+                            def write_chunk(data):
+                                return self.output_stream.write(data)
+                            # Pass the chunk to the function
+                            await asyncio.get_event_loop().run_in_executor(None, write_chunk, chunk)
+                            # Brief yield to allow other tasks to run
+                            await asyncio.sleep(0.001)
             except asyncio.TimeoutError:
                 # No data available within timeout, just continue
             except Exception as e:
                 if self.is_streaming:
                     print(f"Error playing output audio: {str(e)}")
+                    if DEBUG:
+                        import traceback
+                        traceback.print_exc()
                 await asyncio.sleep(0.05)
     async def start_streaming(self):
         if self.is_streaming:
             return
+        if self.use_audio_fallback:
+            print("Starting audio in fallback mode (no real audio devices)...")
+        else:
+            print("Starting audio streaming. Speak into your microphone...")
         print("Press Enter to stop streaming...")
         # Send audio content start event
         self.is_streaming = True
+        # Set up tasks based on mode
+        tasks = []
+        if self.use_audio_fallback:
+            # In fallback mode, simulate input
+            self.input_task = asyncio.create_task(self.generate_simulated_input())
+            tasks.append(self.input_task)
+        else:
+            # In normal mode, start the actual audio stream
+            if self.input_stream and not self.input_stream.is_active():
+                self.input_stream.start_stream()
+        # Always process output (even in fallback mode)
         self.output_task = asyncio.create_task(self.play_output_audio())
+        tasks.append(self.output_task)
         # Wait for user to press Enter to stop
         await asyncio.get_event_loop().run_in_executor(None, input)
             task.cancel()
         if tasks:
             await asyncio.gather(*tasks, return_exceptions=True)
+        # Clean up audio resources if not in fallback mode
+        if not self.use_audio_fallback:
+            # Stop and close the streams
+            if self.input_stream:
+                try:
+                    if self.input_stream.is_active():
+                        self.input_stream.stop_stream()
+                    self.input_stream.close()
+                except Exception as e:
+                    print(f"Error closing input stream: {e}")
+            if self.output_stream:
+                try:
+                    if self.output_stream.is_active():
+                        self.output_stream.stop_stream()
+                    self.output_stream.close()
+                except Exception as e:
+                    print(f"Error closing output stream: {e}")
+            if self.p:
+                try:
+                    self.p.terminate()
+                except Exception as e:
+                    print(f"Error terminating PyAudio: {e}")
+        # Always close the stream manager
         await self.stream_manager.close()

requirements.txt CHANGED Viewed

@@ -4,7 +4,9 @@ pyaudio>=0.2.13
 numpy>=1.24.0
 gradio>=3.50.2
 pytz>=2023.3
-aws-sdk-bedrock-runtime>=0.0.1
-smithy-aws-core==0.0.1
 sounddevice>=0.4.6
 soundfile>=0.12.1

 numpy>=1.24.0
 gradio>=3.50.2
 pytz>=2023.3
+aws-sdk-bedrock-runtime>=0.1.0
+smithy-aws-core>=0.1.0
 sounddevice>=0.4.6
 soundfile>=0.12.1
+# For environment variables
+python-dotenv>=1.0.0