SreekarB commited on
Commit
891575e
·
verified ·
1 Parent(s): 377ef31

Upload 10 files

Browse files
Files changed (2) hide show
  1. nova_sonic_tool_use.py +180 -68
  2. requirements.txt +4 -2
nova_sonic_tool_use.py CHANGED
@@ -11,6 +11,14 @@ import hashlib
11
  import datetime
12
  import time
13
  import inspect
 
 
 
 
 
 
 
 
14
  from aws_sdk_bedrock_runtime.client import BedrockRuntimeClient, InvokeModelWithBidirectionalStreamOperationInput
15
  from aws_sdk_bedrock_runtime.models import InvokeModelWithBidirectionalStreamInputChunk, BidirectionalInputPayloadPart
16
  from aws_sdk_bedrock_runtime.config import Config, HTTPAuthSchemeResolver, SigV4AuthScheme
@@ -672,36 +680,73 @@ class AudioStreamer:
672
  self.stream_manager = stream_manager
673
  self.is_streaming = False
674
  self.loop = asyncio.get_event_loop()
675
-
676
- # Initialize PyAudio
677
- debug_print("AudioStreamer Initializing PyAudio...")
678
- self.p = time_it("AudioStreamerInitPyAudio", pyaudio.PyAudio)
679
- debug_print("AudioStreamer PyAudio initialized")
680
-
681
- # Initialize separate streams for input and output
682
- # Input stream with callback for microphone
683
- debug_print("Opening input audio stream...")
684
- self.input_stream = time_it("AudioStreamerOpenAudio", lambda : self.p.open(
685
- format=FORMAT,
686
- channels=CHANNELS,
687
- rate=INPUT_SAMPLE_RATE,
688
- input=True,
689
- frames_per_buffer=CHUNK_SIZE,
690
- stream_callback=self.input_callback
691
- ))
692
- debug_print("input audio stream opened")
693
-
694
- # Output stream for direct writing (no callback)
695
- debug_print("Opening output audio stream...")
696
- self.output_stream = time_it("AudioStreamerOpenAudio", lambda : self.p.open(
697
- format=FORMAT,
698
- channels=CHANNELS,
699
- rate=OUTPUT_SAMPLE_RATE,
700
- output=True,
701
- frames_per_buffer=CHUNK_SIZE
702
- ))
703
-
704
- debug_print("output audio stream opened")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
705
 
706
  def input_callback(self, in_data, frame_count, time_info, status):
707
  """Callback function that schedules audio processing in the asyncio event loop"""
@@ -722,6 +767,32 @@ class AudioStreamer:
722
  if self.is_streaming:
723
  print(f"Error processing input audio: {e}")
724
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
725
  async def play_output_audio(self):
726
  """Play audio responses from Nova Sonic"""
727
  while self.is_streaming:
@@ -746,26 +817,39 @@ class AudioStreamer:
746
  )
747
 
748
  if audio_data and self.is_streaming:
749
- # Write directly to the output stream in smaller chunks
750
- chunk_size = CHUNK_SIZE # Use the same chunk size as the stream
751
-
752
- # Write the audio data in chunks to avoid blocking too long
753
- for i in range(0, len(audio_data), chunk_size):
754
- if not self.is_streaming:
755
- break
756
-
757
- end = min(i + chunk_size, len(audio_data))
758
- chunk = audio_data[i:end]
759
-
760
- # Create a new function that captures the chunk by value
761
- def write_chunk(data):
762
- return self.output_stream.write(data)
763
-
764
- # Pass the chunk to the function
765
- await asyncio.get_event_loop().run_in_executor(None, write_chunk, chunk)
766
 
767
- # Brief yield to allow other tasks to run
768
- await asyncio.sleep(0.001)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
769
 
770
  except asyncio.TimeoutError:
771
  # No data available within timeout, just continue
@@ -773,8 +857,9 @@ class AudioStreamer:
773
  except Exception as e:
774
  if self.is_streaming:
775
  print(f"Error playing output audio: {str(e)}")
776
- import traceback
777
- traceback.print_exc()
 
778
  await asyncio.sleep(0.05)
779
 
780
  async def start_streaming(self):
@@ -782,7 +867,11 @@ class AudioStreamer:
782
  if self.is_streaming:
783
  return
784
 
785
- print("Starting audio streaming. Speak into your microphone...")
 
 
 
 
786
  print("Press Enter to stop streaming...")
787
 
788
  # Send audio content start event
@@ -790,13 +879,21 @@ class AudioStreamer:
790
 
791
  self.is_streaming = True
792
 
793
- # Start the input stream if not already started
794
- if not self.input_stream.is_active():
795
- self.input_stream.start_stream()
796
 
797
- # Start processing tasks
798
- #self.input_task = asyncio.create_task(self.process_input_audio())
 
 
 
 
 
 
 
 
799
  self.output_task = asyncio.create_task(self.play_output_audio())
 
800
 
801
  # Wait for user to press Enter to stop
802
  await asyncio.get_event_loop().run_in_executor(None, input)
@@ -821,18 +918,33 @@ class AudioStreamer:
821
  task.cancel()
822
  if tasks:
823
  await asyncio.gather(*tasks, return_exceptions=True)
824
- # Stop and close the streams
825
- if self.input_stream:
826
- if self.input_stream.is_active():
827
- self.input_stream.stop_stream()
828
- self.input_stream.close()
829
- if self.output_stream:
830
- if self.output_stream.is_active():
831
- self.output_stream.stop_stream()
832
- self.output_stream.close()
833
- if self.p:
834
- self.p.terminate()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
835
 
 
836
  await self.stream_manager.close()
837
 
838
 
 
11
  import datetime
12
  import time
13
  import inspect
14
+ # Import dotenv for environment variables
15
+ try:
16
+ from dotenv import load_dotenv
17
+ # Load environment variables from .env file if it exists
18
+ load_dotenv()
19
+ except ImportError:
20
+ print("Warning: python-dotenv not installed, using environment variables directly")
21
+ pass
22
  from aws_sdk_bedrock_runtime.client import BedrockRuntimeClient, InvokeModelWithBidirectionalStreamOperationInput
23
  from aws_sdk_bedrock_runtime.models import InvokeModelWithBidirectionalStreamInputChunk, BidirectionalInputPayloadPart
24
  from aws_sdk_bedrock_runtime.config import Config, HTTPAuthSchemeResolver, SigV4AuthScheme
 
680
  self.stream_manager = stream_manager
681
  self.is_streaming = False
682
  self.loop = asyncio.get_event_loop()
683
+ self.input_stream = None
684
+ self.output_stream = None
685
+ self.p = None
686
+ self.use_audio_fallback = False
687
+
688
+ try:
689
+ # Initialize PyAudio
690
+ debug_print("AudioStreamer Initializing PyAudio...")
691
+ self.p = time_it("AudioStreamerInitPyAudio", pyaudio.PyAudio)
692
+ debug_print("AudioStreamer PyAudio initialized")
693
+
694
+ # Check for available audio devices
695
+ input_device_index = None
696
+ output_device_index = None
697
+
698
+ info = self.p.get_host_api_info_by_index(0)
699
+ num_devices = info.get('deviceCount')
700
+
701
+ # Find input and output devices
702
+ for i in range(num_devices):
703
+ device_info = self.p.get_device_info_by_index(i)
704
+ debug_print(f"Device {i}: {device_info['name']}")
705
+
706
+ if device_info.get('maxInputChannels') > 0 and input_device_index is None:
707
+ input_device_index = i
708
+ debug_print(f"Selected input device: {device_info['name']}")
709
+
710
+ if device_info.get('maxOutputChannels') > 0 and output_device_index is None:
711
+ output_device_index = i
712
+ debug_print(f"Selected output device: {device_info['name']}")
713
+
714
+ if input_device_index is None or output_device_index is None:
715
+ raise ValueError("No suitable audio devices found")
716
+
717
+ # Initialize separate streams for input and output
718
+ # Input stream with callback for microphone
719
+ debug_print("Opening input audio stream...")
720
+ self.input_stream = time_it("AudioStreamerOpenAudio", lambda: self.p.open(
721
+ format=FORMAT,
722
+ channels=CHANNELS,
723
+ rate=INPUT_SAMPLE_RATE,
724
+ input=True,
725
+ input_device_index=input_device_index,
726
+ frames_per_buffer=CHUNK_SIZE,
727
+ stream_callback=self.input_callback
728
+ ))
729
+ debug_print("input audio stream opened")
730
+
731
+ # Output stream for direct writing (no callback)
732
+ debug_print("Opening output audio stream...")
733
+ self.output_stream = time_it("AudioStreamerOpenAudio", lambda: self.p.open(
734
+ format=FORMAT,
735
+ channels=CHANNELS,
736
+ rate=OUTPUT_SAMPLE_RATE,
737
+ output=True,
738
+ output_device_index=output_device_index,
739
+ frames_per_buffer=CHUNK_SIZE
740
+ ))
741
+ debug_print("output audio stream opened")
742
+
743
+ except Exception as e:
744
+ print(f"Warning: Could not initialize audio devices: {e}")
745
+ print("Using fallback mode: Will simulate audio without using real devices")
746
+ if self.p:
747
+ self.p.terminate()
748
+ self.p = None
749
+ self.use_audio_fallback = True
750
 
751
  def input_callback(self, in_data, frame_count, time_info, status):
752
  """Callback function that schedules audio processing in the asyncio event loop"""
 
767
  if self.is_streaming:
768
  print(f"Error processing input audio: {e}")
769
 
770
+ async def generate_simulated_input(self):
771
+ """Generate simulated audio input in fallback mode"""
772
+ import numpy as np
773
+ while self.is_streaming:
774
+ try:
775
+ # Generate a dummy audio chunk with some basic noise
776
+ # This simulates someone speaking into the microphone
777
+ samples = np.random.normal(0, 0.01, CHUNK_SIZE * CHANNELS).astype(np.float32)
778
+ audio_data = (samples * 32767).astype(np.int16).tobytes()
779
+
780
+ # Send to Bedrock
781
+ self.stream_manager.add_audio_chunk(audio_data)
782
+
783
+ # Wait a bit between chunks
784
+ await asyncio.sleep(0.05)
785
+
786
+ # Occasionally "end" the simulated speech to get a response
787
+ if random.random() < 0.05: # 5% chance to end speech
788
+ print("Simulated speech ended, awaiting response...")
789
+ await asyncio.sleep(1.0) # Wait longer between "sentences"
790
+
791
+ except Exception as e:
792
+ if self.is_streaming:
793
+ print(f"Error generating simulated audio: {e}")
794
+ await asyncio.sleep(0.5)
795
+
796
  async def play_output_audio(self):
797
  """Play audio responses from Nova Sonic"""
798
  while self.is_streaming:
 
817
  )
818
 
819
  if audio_data and self.is_streaming:
820
+ if self.use_audio_fallback:
821
+ # In fallback mode, just log that we received audio
822
+ audio_size = len(audio_data)
823
+ print(f"Received {audio_size} bytes of audio from Nova")
824
+ # Store the audio for potential replay
825
+ self.stream_manager.output_queue.put_nowait({
826
+ "event": {
827
+ "audioOutput": {
828
+ "content": "Audio would play here if audio devices were available"
829
+ }
830
+ }
831
+ })
832
+ else:
833
+ # Write directly to the output stream in smaller chunks
834
+ chunk_size = CHUNK_SIZE # Use the same chunk size as the stream
 
 
835
 
836
+ # Write the audio data in chunks to avoid blocking too long
837
+ for i in range(0, len(audio_data), chunk_size):
838
+ if not self.is_streaming:
839
+ break
840
+
841
+ end = min(i + chunk_size, len(audio_data))
842
+ chunk = audio_data[i:end]
843
+
844
+ # Create a new function that captures the chunk by value
845
+ def write_chunk(data):
846
+ return self.output_stream.write(data)
847
+
848
+ # Pass the chunk to the function
849
+ await asyncio.get_event_loop().run_in_executor(None, write_chunk, chunk)
850
+
851
+ # Brief yield to allow other tasks to run
852
+ await asyncio.sleep(0.001)
853
 
854
  except asyncio.TimeoutError:
855
  # No data available within timeout, just continue
 
857
  except Exception as e:
858
  if self.is_streaming:
859
  print(f"Error playing output audio: {str(e)}")
860
+ if DEBUG:
861
+ import traceback
862
+ traceback.print_exc()
863
  await asyncio.sleep(0.05)
864
 
865
  async def start_streaming(self):
 
867
  if self.is_streaming:
868
  return
869
 
870
+ if self.use_audio_fallback:
871
+ print("Starting audio in fallback mode (no real audio devices)...")
872
+ else:
873
+ print("Starting audio streaming. Speak into your microphone...")
874
+
875
  print("Press Enter to stop streaming...")
876
 
877
  # Send audio content start event
 
879
 
880
  self.is_streaming = True
881
 
882
+ # Set up tasks based on mode
883
+ tasks = []
 
884
 
885
+ if self.use_audio_fallback:
886
+ # In fallback mode, simulate input
887
+ self.input_task = asyncio.create_task(self.generate_simulated_input())
888
+ tasks.append(self.input_task)
889
+ else:
890
+ # In normal mode, start the actual audio stream
891
+ if self.input_stream and not self.input_stream.is_active():
892
+ self.input_stream.start_stream()
893
+
894
+ # Always process output (even in fallback mode)
895
  self.output_task = asyncio.create_task(self.play_output_audio())
896
+ tasks.append(self.output_task)
897
 
898
  # Wait for user to press Enter to stop
899
  await asyncio.get_event_loop().run_in_executor(None, input)
 
918
  task.cancel()
919
  if tasks:
920
  await asyncio.gather(*tasks, return_exceptions=True)
921
+
922
+ # Clean up audio resources if not in fallback mode
923
+ if not self.use_audio_fallback:
924
+ # Stop and close the streams
925
+ if self.input_stream:
926
+ try:
927
+ if self.input_stream.is_active():
928
+ self.input_stream.stop_stream()
929
+ self.input_stream.close()
930
+ except Exception as e:
931
+ print(f"Error closing input stream: {e}")
932
+
933
+ if self.output_stream:
934
+ try:
935
+ if self.output_stream.is_active():
936
+ self.output_stream.stop_stream()
937
+ self.output_stream.close()
938
+ except Exception as e:
939
+ print(f"Error closing output stream: {e}")
940
+
941
+ if self.p:
942
+ try:
943
+ self.p.terminate()
944
+ except Exception as e:
945
+ print(f"Error terminating PyAudio: {e}")
946
 
947
+ # Always close the stream manager
948
  await self.stream_manager.close()
949
 
950
 
requirements.txt CHANGED
@@ -4,7 +4,9 @@ pyaudio>=0.2.13
4
  numpy>=1.24.0
5
  gradio>=3.50.2
6
  pytz>=2023.3
7
- aws-sdk-bedrock-runtime>=0.0.1
8
- smithy-aws-core==0.0.1
9
  sounddevice>=0.4.6
10
  soundfile>=0.12.1
 
 
 
4
  numpy>=1.24.0
5
  gradio>=3.50.2
6
  pytz>=2023.3
7
+ aws-sdk-bedrock-runtime>=0.1.0
8
+ smithy-aws-core>=0.1.0
9
  sounddevice>=0.4.6
10
  soundfile>=0.12.1
11
+ # For environment variables
12
+ python-dotenv>=1.0.0