Spaces:
Sleeping
Sleeping
Upload 10 files
Browse files- nova_sonic_tool_use.py +180 -68
- requirements.txt +4 -2
nova_sonic_tool_use.py
CHANGED
|
@@ -11,6 +11,14 @@ import hashlib
|
|
| 11 |
import datetime
|
| 12 |
import time
|
| 13 |
import inspect
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
from aws_sdk_bedrock_runtime.client import BedrockRuntimeClient, InvokeModelWithBidirectionalStreamOperationInput
|
| 15 |
from aws_sdk_bedrock_runtime.models import InvokeModelWithBidirectionalStreamInputChunk, BidirectionalInputPayloadPart
|
| 16 |
from aws_sdk_bedrock_runtime.config import Config, HTTPAuthSchemeResolver, SigV4AuthScheme
|
|
@@ -672,36 +680,73 @@ class AudioStreamer:
|
|
| 672 |
self.stream_manager = stream_manager
|
| 673 |
self.is_streaming = False
|
| 674 |
self.loop = asyncio.get_event_loop()
|
| 675 |
-
|
| 676 |
-
|
| 677 |
-
|
| 678 |
-
self.
|
| 679 |
-
|
| 680 |
-
|
| 681 |
-
|
| 682 |
-
|
| 683 |
-
|
| 684 |
-
|
| 685 |
-
|
| 686 |
-
|
| 687 |
-
|
| 688 |
-
|
| 689 |
-
|
| 690 |
-
|
| 691 |
-
|
| 692 |
-
|
| 693 |
-
|
| 694 |
-
|
| 695 |
-
|
| 696 |
-
|
| 697 |
-
|
| 698 |
-
|
| 699 |
-
|
| 700 |
-
|
| 701 |
-
|
| 702 |
-
|
| 703 |
-
|
| 704 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 705 |
|
| 706 |
def input_callback(self, in_data, frame_count, time_info, status):
|
| 707 |
"""Callback function that schedules audio processing in the asyncio event loop"""
|
|
@@ -722,6 +767,32 @@ class AudioStreamer:
|
|
| 722 |
if self.is_streaming:
|
| 723 |
print(f"Error processing input audio: {e}")
|
| 724 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 725 |
async def play_output_audio(self):
|
| 726 |
"""Play audio responses from Nova Sonic"""
|
| 727 |
while self.is_streaming:
|
|
@@ -746,26 +817,39 @@ class AudioStreamer:
|
|
| 746 |
)
|
| 747 |
|
| 748 |
if audio_data and self.is_streaming:
|
| 749 |
-
|
| 750 |
-
|
| 751 |
-
|
| 752 |
-
|
| 753 |
-
|
| 754 |
-
|
| 755 |
-
|
| 756 |
-
|
| 757 |
-
|
| 758 |
-
|
| 759 |
-
|
| 760 |
-
|
| 761 |
-
|
| 762 |
-
|
| 763 |
-
|
| 764 |
-
# Pass the chunk to the function
|
| 765 |
-
await asyncio.get_event_loop().run_in_executor(None, write_chunk, chunk)
|
| 766 |
|
| 767 |
-
#
|
| 768 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 769 |
|
| 770 |
except asyncio.TimeoutError:
|
| 771 |
# No data available within timeout, just continue
|
|
@@ -773,8 +857,9 @@ class AudioStreamer:
|
|
| 773 |
except Exception as e:
|
| 774 |
if self.is_streaming:
|
| 775 |
print(f"Error playing output audio: {str(e)}")
|
| 776 |
-
|
| 777 |
-
|
|
|
|
| 778 |
await asyncio.sleep(0.05)
|
| 779 |
|
| 780 |
async def start_streaming(self):
|
|
@@ -782,7 +867,11 @@ class AudioStreamer:
|
|
| 782 |
if self.is_streaming:
|
| 783 |
return
|
| 784 |
|
| 785 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 786 |
print("Press Enter to stop streaming...")
|
| 787 |
|
| 788 |
# Send audio content start event
|
|
@@ -790,13 +879,21 @@ class AudioStreamer:
|
|
| 790 |
|
| 791 |
self.is_streaming = True
|
| 792 |
|
| 793 |
-
#
|
| 794 |
-
|
| 795 |
-
self.input_stream.start_stream()
|
| 796 |
|
| 797 |
-
|
| 798 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 799 |
self.output_task = asyncio.create_task(self.play_output_audio())
|
|
|
|
| 800 |
|
| 801 |
# Wait for user to press Enter to stop
|
| 802 |
await asyncio.get_event_loop().run_in_executor(None, input)
|
|
@@ -821,18 +918,33 @@ class AudioStreamer:
|
|
| 821 |
task.cancel()
|
| 822 |
if tasks:
|
| 823 |
await asyncio.gather(*tasks, return_exceptions=True)
|
| 824 |
-
|
| 825 |
-
if
|
| 826 |
-
|
| 827 |
-
|
| 828 |
-
self.input_stream
|
| 829 |
-
|
| 830 |
-
|
| 831 |
-
|
| 832 |
-
|
| 833 |
-
|
| 834 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 835 |
|
|
|
|
| 836 |
await self.stream_manager.close()
|
| 837 |
|
| 838 |
|
|
|
|
| 11 |
import datetime
|
| 12 |
import time
|
| 13 |
import inspect
|
| 14 |
+
# Import dotenv for environment variables
|
| 15 |
+
try:
|
| 16 |
+
from dotenv import load_dotenv
|
| 17 |
+
# Load environment variables from .env file if it exists
|
| 18 |
+
load_dotenv()
|
| 19 |
+
except ImportError:
|
| 20 |
+
print("Warning: python-dotenv not installed, using environment variables directly")
|
| 21 |
+
pass
|
| 22 |
from aws_sdk_bedrock_runtime.client import BedrockRuntimeClient, InvokeModelWithBidirectionalStreamOperationInput
|
| 23 |
from aws_sdk_bedrock_runtime.models import InvokeModelWithBidirectionalStreamInputChunk, BidirectionalInputPayloadPart
|
| 24 |
from aws_sdk_bedrock_runtime.config import Config, HTTPAuthSchemeResolver, SigV4AuthScheme
|
|
|
|
| 680 |
self.stream_manager = stream_manager
|
| 681 |
self.is_streaming = False
|
| 682 |
self.loop = asyncio.get_event_loop()
|
| 683 |
+
self.input_stream = None
|
| 684 |
+
self.output_stream = None
|
| 685 |
+
self.p = None
|
| 686 |
+
self.use_audio_fallback = False
|
| 687 |
+
|
| 688 |
+
try:
|
| 689 |
+
# Initialize PyAudio
|
| 690 |
+
debug_print("AudioStreamer Initializing PyAudio...")
|
| 691 |
+
self.p = time_it("AudioStreamerInitPyAudio", pyaudio.PyAudio)
|
| 692 |
+
debug_print("AudioStreamer PyAudio initialized")
|
| 693 |
+
|
| 694 |
+
# Check for available audio devices
|
| 695 |
+
input_device_index = None
|
| 696 |
+
output_device_index = None
|
| 697 |
+
|
| 698 |
+
info = self.p.get_host_api_info_by_index(0)
|
| 699 |
+
num_devices = info.get('deviceCount')
|
| 700 |
+
|
| 701 |
+
# Find input and output devices
|
| 702 |
+
for i in range(num_devices):
|
| 703 |
+
device_info = self.p.get_device_info_by_index(i)
|
| 704 |
+
debug_print(f"Device {i}: {device_info['name']}")
|
| 705 |
+
|
| 706 |
+
if device_info.get('maxInputChannels') > 0 and input_device_index is None:
|
| 707 |
+
input_device_index = i
|
| 708 |
+
debug_print(f"Selected input device: {device_info['name']}")
|
| 709 |
+
|
| 710 |
+
if device_info.get('maxOutputChannels') > 0 and output_device_index is None:
|
| 711 |
+
output_device_index = i
|
| 712 |
+
debug_print(f"Selected output device: {device_info['name']}")
|
| 713 |
+
|
| 714 |
+
if input_device_index is None or output_device_index is None:
|
| 715 |
+
raise ValueError("No suitable audio devices found")
|
| 716 |
+
|
| 717 |
+
# Initialize separate streams for input and output
|
| 718 |
+
# Input stream with callback for microphone
|
| 719 |
+
debug_print("Opening input audio stream...")
|
| 720 |
+
self.input_stream = time_it("AudioStreamerOpenAudio", lambda: self.p.open(
|
| 721 |
+
format=FORMAT,
|
| 722 |
+
channels=CHANNELS,
|
| 723 |
+
rate=INPUT_SAMPLE_RATE,
|
| 724 |
+
input=True,
|
| 725 |
+
input_device_index=input_device_index,
|
| 726 |
+
frames_per_buffer=CHUNK_SIZE,
|
| 727 |
+
stream_callback=self.input_callback
|
| 728 |
+
))
|
| 729 |
+
debug_print("input audio stream opened")
|
| 730 |
+
|
| 731 |
+
# Output stream for direct writing (no callback)
|
| 732 |
+
debug_print("Opening output audio stream...")
|
| 733 |
+
self.output_stream = time_it("AudioStreamerOpenAudio", lambda: self.p.open(
|
| 734 |
+
format=FORMAT,
|
| 735 |
+
channels=CHANNELS,
|
| 736 |
+
rate=OUTPUT_SAMPLE_RATE,
|
| 737 |
+
output=True,
|
| 738 |
+
output_device_index=output_device_index,
|
| 739 |
+
frames_per_buffer=CHUNK_SIZE
|
| 740 |
+
))
|
| 741 |
+
debug_print("output audio stream opened")
|
| 742 |
+
|
| 743 |
+
except Exception as e:
|
| 744 |
+
print(f"Warning: Could not initialize audio devices: {e}")
|
| 745 |
+
print("Using fallback mode: Will simulate audio without using real devices")
|
| 746 |
+
if self.p:
|
| 747 |
+
self.p.terminate()
|
| 748 |
+
self.p = None
|
| 749 |
+
self.use_audio_fallback = True
|
| 750 |
|
| 751 |
def input_callback(self, in_data, frame_count, time_info, status):
|
| 752 |
"""Callback function that schedules audio processing in the asyncio event loop"""
|
|
|
|
| 767 |
if self.is_streaming:
|
| 768 |
print(f"Error processing input audio: {e}")
|
| 769 |
|
| 770 |
+
async def generate_simulated_input(self):
|
| 771 |
+
"""Generate simulated audio input in fallback mode"""
|
| 772 |
+
import numpy as np
|
| 773 |
+
while self.is_streaming:
|
| 774 |
+
try:
|
| 775 |
+
# Generate a dummy audio chunk with some basic noise
|
| 776 |
+
# This simulates someone speaking into the microphone
|
| 777 |
+
samples = np.random.normal(0, 0.01, CHUNK_SIZE * CHANNELS).astype(np.float32)
|
| 778 |
+
audio_data = (samples * 32767).astype(np.int16).tobytes()
|
| 779 |
+
|
| 780 |
+
# Send to Bedrock
|
| 781 |
+
self.stream_manager.add_audio_chunk(audio_data)
|
| 782 |
+
|
| 783 |
+
# Wait a bit between chunks
|
| 784 |
+
await asyncio.sleep(0.05)
|
| 785 |
+
|
| 786 |
+
# Occasionally "end" the simulated speech to get a response
|
| 787 |
+
if random.random() < 0.05: # 5% chance to end speech
|
| 788 |
+
print("Simulated speech ended, awaiting response...")
|
| 789 |
+
await asyncio.sleep(1.0) # Wait longer between "sentences"
|
| 790 |
+
|
| 791 |
+
except Exception as e:
|
| 792 |
+
if self.is_streaming:
|
| 793 |
+
print(f"Error generating simulated audio: {e}")
|
| 794 |
+
await asyncio.sleep(0.5)
|
| 795 |
+
|
| 796 |
async def play_output_audio(self):
|
| 797 |
"""Play audio responses from Nova Sonic"""
|
| 798 |
while self.is_streaming:
|
|
|
|
| 817 |
)
|
| 818 |
|
| 819 |
if audio_data and self.is_streaming:
|
| 820 |
+
if self.use_audio_fallback:
|
| 821 |
+
# In fallback mode, just log that we received audio
|
| 822 |
+
audio_size = len(audio_data)
|
| 823 |
+
print(f"Received {audio_size} bytes of audio from Nova")
|
| 824 |
+
# Store the audio for potential replay
|
| 825 |
+
self.stream_manager.output_queue.put_nowait({
|
| 826 |
+
"event": {
|
| 827 |
+
"audioOutput": {
|
| 828 |
+
"content": "Audio would play here if audio devices were available"
|
| 829 |
+
}
|
| 830 |
+
}
|
| 831 |
+
})
|
| 832 |
+
else:
|
| 833 |
+
# Write directly to the output stream in smaller chunks
|
| 834 |
+
chunk_size = CHUNK_SIZE # Use the same chunk size as the stream
|
|
|
|
|
|
|
| 835 |
|
| 836 |
+
# Write the audio data in chunks to avoid blocking too long
|
| 837 |
+
for i in range(0, len(audio_data), chunk_size):
|
| 838 |
+
if not self.is_streaming:
|
| 839 |
+
break
|
| 840 |
+
|
| 841 |
+
end = min(i + chunk_size, len(audio_data))
|
| 842 |
+
chunk = audio_data[i:end]
|
| 843 |
+
|
| 844 |
+
# Create a new function that captures the chunk by value
|
| 845 |
+
def write_chunk(data):
|
| 846 |
+
return self.output_stream.write(data)
|
| 847 |
+
|
| 848 |
+
# Pass the chunk to the function
|
| 849 |
+
await asyncio.get_event_loop().run_in_executor(None, write_chunk, chunk)
|
| 850 |
+
|
| 851 |
+
# Brief yield to allow other tasks to run
|
| 852 |
+
await asyncio.sleep(0.001)
|
| 853 |
|
| 854 |
except asyncio.TimeoutError:
|
| 855 |
# No data available within timeout, just continue
|
|
|
|
| 857 |
except Exception as e:
|
| 858 |
if self.is_streaming:
|
| 859 |
print(f"Error playing output audio: {str(e)}")
|
| 860 |
+
if DEBUG:
|
| 861 |
+
import traceback
|
| 862 |
+
traceback.print_exc()
|
| 863 |
await asyncio.sleep(0.05)
|
| 864 |
|
| 865 |
async def start_streaming(self):
|
|
|
|
| 867 |
if self.is_streaming:
|
| 868 |
return
|
| 869 |
|
| 870 |
+
if self.use_audio_fallback:
|
| 871 |
+
print("Starting audio in fallback mode (no real audio devices)...")
|
| 872 |
+
else:
|
| 873 |
+
print("Starting audio streaming. Speak into your microphone...")
|
| 874 |
+
|
| 875 |
print("Press Enter to stop streaming...")
|
| 876 |
|
| 877 |
# Send audio content start event
|
|
|
|
| 879 |
|
| 880 |
self.is_streaming = True
|
| 881 |
|
| 882 |
+
# Set up tasks based on mode
|
| 883 |
+
tasks = []
|
|
|
|
| 884 |
|
| 885 |
+
if self.use_audio_fallback:
|
| 886 |
+
# In fallback mode, simulate input
|
| 887 |
+
self.input_task = asyncio.create_task(self.generate_simulated_input())
|
| 888 |
+
tasks.append(self.input_task)
|
| 889 |
+
else:
|
| 890 |
+
# In normal mode, start the actual audio stream
|
| 891 |
+
if self.input_stream and not self.input_stream.is_active():
|
| 892 |
+
self.input_stream.start_stream()
|
| 893 |
+
|
| 894 |
+
# Always process output (even in fallback mode)
|
| 895 |
self.output_task = asyncio.create_task(self.play_output_audio())
|
| 896 |
+
tasks.append(self.output_task)
|
| 897 |
|
| 898 |
# Wait for user to press Enter to stop
|
| 899 |
await asyncio.get_event_loop().run_in_executor(None, input)
|
|
|
|
| 918 |
task.cancel()
|
| 919 |
if tasks:
|
| 920 |
await asyncio.gather(*tasks, return_exceptions=True)
|
| 921 |
+
|
| 922 |
+
# Clean up audio resources if not in fallback mode
|
| 923 |
+
if not self.use_audio_fallback:
|
| 924 |
+
# Stop and close the streams
|
| 925 |
+
if self.input_stream:
|
| 926 |
+
try:
|
| 927 |
+
if self.input_stream.is_active():
|
| 928 |
+
self.input_stream.stop_stream()
|
| 929 |
+
self.input_stream.close()
|
| 930 |
+
except Exception as e:
|
| 931 |
+
print(f"Error closing input stream: {e}")
|
| 932 |
+
|
| 933 |
+
if self.output_stream:
|
| 934 |
+
try:
|
| 935 |
+
if self.output_stream.is_active():
|
| 936 |
+
self.output_stream.stop_stream()
|
| 937 |
+
self.output_stream.close()
|
| 938 |
+
except Exception as e:
|
| 939 |
+
print(f"Error closing output stream: {e}")
|
| 940 |
+
|
| 941 |
+
if self.p:
|
| 942 |
+
try:
|
| 943 |
+
self.p.terminate()
|
| 944 |
+
except Exception as e:
|
| 945 |
+
print(f"Error terminating PyAudio: {e}")
|
| 946 |
|
| 947 |
+
# Always close the stream manager
|
| 948 |
await self.stream_manager.close()
|
| 949 |
|
| 950 |
|
requirements.txt
CHANGED
|
@@ -4,7 +4,9 @@ pyaudio>=0.2.13
|
|
| 4 |
numpy>=1.24.0
|
| 5 |
gradio>=3.50.2
|
| 6 |
pytz>=2023.3
|
| 7 |
-
aws-sdk-bedrock-runtime>=0.
|
| 8 |
-
smithy-aws-core=
|
| 9 |
sounddevice>=0.4.6
|
| 10 |
soundfile>=0.12.1
|
|
|
|
|
|
|
|
|
| 4 |
numpy>=1.24.0
|
| 5 |
gradio>=3.50.2
|
| 6 |
pytz>=2023.3
|
| 7 |
+
aws-sdk-bedrock-runtime>=0.1.0
|
| 8 |
+
smithy-aws-core>=0.1.0
|
| 9 |
sounddevice>=0.4.6
|
| 10 |
soundfile>=0.12.1
|
| 11 |
+
# For environment variables
|
| 12 |
+
python-dotenv>=1.0.0
|