Spaces:

SreekarB
/

app_trial_current

Sleeping

App Files Files Community

SreekarB commited on May 19, 2025

Commit

f7b85fd

verified ·

1 Parent(s): e94791f

Upload 10 files

Browse files

Files changed (3) hide show

app.py +46 -3
hf_audio_utils.py +208 -0
nova_sonic_tool_use.py +88 -16

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import threading
 import time
 import argparse
@@ -8,6 +9,26 @@ from session_manager import SessionManager
 from config import UI_TITLE, UI_SUBTITLE
 import gradio as gr
 class NovaConversationApp:
     def __init__(self, session_id=None):
         # Initialize core components
@@ -43,11 +64,33 @@ class NovaConversationApp:
         # Run initialization in the event loop
         try:
             # Initialize stream manager
-            self.stream_manager = BedrockStreamManager(model_id='amazon.nova-sonic-v1:0', region='us-east-1')
-            # Initialize audio streamer
-            self.audio_streamer = AudioStreamer(self.stream_manager)
             # Initialize the stream in the event loop
             self.loop.run_until_complete(self._initialize_streaming())

+import os
 import threading
 import time
 import argparse
 from config import UI_TITLE, UI_SUBTITLE
 import gradio as gr
+# Import dotenv for environment variables if available
+try:
+    from dotenv import load_dotenv
+    # Load environment variables from .env file if it exists
+    load_dotenv()
+except ImportError:
+    pass
+# Import HF-specific audio utils
+try:
+    from hf_audio_utils import HFAudioStreamer
+    HF_AUDIO_AVAILABLE = True
+except ImportError:
+    HF_AUDIO_AVAILABLE = False
+# Check if we're in HF Spaces
+def is_huggingface_spaces():
+    """Detect if we're running on HuggingFace Spaces"""
+    return "SPACE_ID" in os.environ or "SYSTEM" in os.environ and os.environ.get("SYSTEM") == "spaces"
 class NovaConversationApp:
     def __init__(self, session_id=None):
         # Initialize core components
         # Run initialization in the event loop
         try:
+            # Check for AWS credentials
+            if not os.environ.get("AWS_ACCESS_KEY_ID") or not os.environ.get("AWS_SECRET_ACCESS_KEY"):
+                missing = []
+                if not os.environ.get("AWS_ACCESS_KEY_ID"):
+                    missing.append("AWS_ACCESS_KEY_ID")
+                if not os.environ.get("AWS_SECRET_ACCESS_KEY"):
+                    missing.append("AWS_SECRET_ACCESS_KEY")
+                error_msg = f"Missing AWS credentials: {', '.join(missing)}"
+                # Check if running in Hugging Face Spaces
+                if "SPACE_ID" in os.environ or ("SYSTEM" in os.environ and os.environ.get("SYSTEM") == "spaces"):
+                    error_msg += "\nPlease add these as secrets in your Hugging Face Space settings."
+                else:
+                    error_msg += "\nPlease set these environment variables or add them to a .env file."
+                raise ValueError(error_msg)
             # Initialize stream manager
+            region = os.environ.get("AWS_DEFAULT_REGION", "us-east-1")
+            self.stream_manager = BedrockStreamManager(model_id='amazon.nova-sonic-v1:0', region=region)
+            # Initialize the appropriate audio streamer based on environment
+            if is_huggingface_spaces() and HF_AUDIO_AVAILABLE:
+                print("Using Hugging Face Spaces-optimized audio streamer")
+                self.audio_streamer = HFAudioStreamer(self.stream_manager)
+            else:
+                print("Using standard audio streamer")
+                self.audio_streamer = AudioStreamer(self.stream_manager)
             # Initialize the stream in the event loop
             self.loop.run_until_complete(self._initialize_streaming())

hf_audio_utils.py ADDED Viewed

	@@ -0,0 +1,208 @@

+"""
+Audio utilities for Hugging Face Spaces integration.
+This module provides audio streaming for Hugging Face Spaces environments.
+"""
+import os
+import asyncio
+import numpy as np
+import random
+import time
+import threading
+import base64
+# Try to import the Hugging Face-specific audio utilities
+try:
+    from transformers.pipelines.audio_utils import ffmpeg_microphone_live
+    HF_AUDIO_AVAILABLE = True
+except ImportError:
+    HF_AUDIO_AVAILABLE = False
+    print("Warning: transformers.pipelines.audio_utils not available, will use fallback audio simulation")
+class HFAudioStreamer:
+    """Audio streamer for Hugging Face Spaces that works with or without real audio devices"""
+    def __init__(self, stream_manager):
+        """Initialize the HF Audio Streamer"""
+        self.stream_manager = stream_manager
+        self.is_streaming = False
+        self.use_ffmpeg = HF_AUDIO_AVAILABLE
+        self.mic_stream = None
+        self.mic_thread = None
+        self.loop = asyncio.get_event_loop()
+        # Check if we're in HF Spaces
+        self.is_hf_spaces = "SPACE_ID" in os.environ or ("SYSTEM" in os.environ and os.environ.get("SYSTEM") == "spaces")
+        print(f"HF Audio Streamer initialized. Using ffmpeg: {self.use_ffmpeg}, In HF Spaces: {self.is_hf_spaces}")
+    def _mic_thread_worker(self):
+        """Thread function to capture audio from ffmpeg and send it to the stream manager"""
+        if not self.use_ffmpeg:
+            return
+        print("Starting microphone capture using ffmpeg")
+        try:
+            # Set up the mic stream with ffmpeg
+            sampling_rate = 16000  # 16kHz as required by Nova Sonic
+            chunk_length_s = 2.0   # Process 2 seconds at a time
+            stream_chunk_s = 0.25  # Stream in 0.25 second chunks
+            # Create the mic stream
+            self.mic_stream = ffmpeg_microphone_live(
+                sampling_rate=sampling_rate,
+                chunk_length_s=chunk_length_s,
+                stream_chunk_s=stream_chunk_s,
+            )
+            # Process audio chunks
+            for audio_chunk in self.mic_stream:
+                if not self.is_streaming:
+                    break
+                # Convert the float32 numpy array to int16 bytes
+                if isinstance(audio_chunk, np.ndarray):
+                    # Scale from [-1.0, 1.0] to int16 range
+                    audio_int16 = (audio_chunk * 32767).astype(np.int16)
+                    audio_bytes = audio_int16.tobytes()
+                    # Send to Bedrock
+                    asyncio.run_coroutine_threadsafe(
+                        self._send_audio_chunk(audio_bytes),
+                        self.loop
+                    )
+        except Exception as e:
+            print(f"Error in microphone thread: {e}")
+            if self.is_streaming:
+                # Fall back to simulated audio if ffmpeg fails
+                print("Falling back to simulated audio input")
+                self.use_ffmpeg = False
+                asyncio.run_coroutine_threadsafe(
+                    self.generate_simulated_input(),
+                    self.loop
+                )
+    async def _send_audio_chunk(self, audio_bytes):
+        """Send an audio chunk to the stream manager"""
+        if self.is_streaming and self.stream_manager and audio_bytes:
+            self.stream_manager.add_audio_chunk(audio_bytes)
+    async def generate_simulated_input(self):
+        """Generate simulated audio input"""
+        import numpy as np
+        print("Generating simulated audio input...")
+        CHUNK_SIZE = 1024  # Standard audio chunk size
+        CHANNELS = 1       # Mono audio
+        while self.is_streaming:
+            try:
+                # Generate a dummy audio chunk with some basic noise
+                # This simulates someone speaking into the microphone
+                samples = np.random.normal(0, 0.01, CHUNK_SIZE * CHANNELS).astype(np.float32)
+                audio_data = (samples * 32767).astype(np.int16).tobytes()
+                # Send to Bedrock
+                await self._send_audio_chunk(audio_data)
+                # Wait a bit between chunks
+                await asyncio.sleep(0.05)
+                # Occasionally "end" the simulated speech to get a response
+                if random.random() < 0.05:  # 5% chance to end speech
+                    print("Simulated speech ended, awaiting response...")
+                    await asyncio.sleep(1.0)  # Wait longer between "sentences"
+            except Exception as e:
+                if self.is_streaming:
+                    print(f"Error generating simulated audio: {e}")
+                await asyncio.sleep(0.5)
+    async def play_output_audio(self):
+        """Handle audio output (in Hugging Face, we just log it)"""
+        while self.is_streaming:
+            try:
+                # Get audio data from the stream manager's queue
+                audio_data = await asyncio.wait_for(
+                    self.stream_manager.audio_output_queue.get(),
+                    timeout=0.1
+                )
+                if audio_data and self.is_streaming:
+                    # In HF Spaces, just log that we received audio
+                    audio_size = len(audio_data)
+                    print(f"Received {audio_size} bytes of audio from Nova")
+                    # Store the audio for potential replay
+                    self.stream_manager.output_queue.put_nowait({
+                        "event": {
+                            "audioOutput": {
+                                "content": "Audio would play here if audio devices were available"
+                            }
+                        }
+                    })
+            except asyncio.TimeoutError:
+                # No message received within timeout, continue
+                continue
+            except Exception as e:
+                if self.is_streaming:
+                    print(f"Error processing audio output: {str(e)}")
+                await asyncio.sleep(0.05)
+    async def start_streaming(self):
+        """Start streaming audio"""
+        if self.is_streaming:
+            return
+        print("Starting HF audio streaming...")
+        print("Press Enter to stop streaming...")
+        # Send audio content start event
+        await self.stream_manager.send_audio_content_start_event()
+        self.is_streaming = True
+        # Set up tasks based on mode
+        if self.use_ffmpeg:
+            # Start the ffmpeg microphone thread
+            self.mic_thread = threading.Thread(target=self._mic_thread_worker)
+            self.mic_thread.daemon = True
+            self.mic_thread.start()
+        else:
+            # Use simulated input
+            asyncio.create_task(self.generate_simulated_input())
+        # Always process output
+        output_task = asyncio.create_task(self.play_output_audio())
+        # Wait for user to press Enter to stop
+        await asyncio.get_event_loop().run_in_executor(None, input)
+        # Once input() returns, stop streaming
+        await self.stop_streaming()
+    async def stop_streaming(self):
+        """Stop streaming audio"""
+        if not self.is_streaming:
+            return
+        print("Stopping HF audio streaming...")
+        self.is_streaming = False
+        # Stop the ffmpeg mic stream if it's active
+        if self.mic_stream:
+            try:
+                self.mic_stream.close()
+            except:
+                pass
+            self.mic_stream = None
+        # Wait for the thread to finish if it exists
+        if self.mic_thread and self.mic_thread.is_alive():
+            self.mic_thread.join(timeout=2.0)
+            self.mic_thread = None
+        # Always close the stream manager
+        await self.stream_manager.close()

nova_sonic_tool_use.py CHANGED Viewed

@@ -19,6 +19,36 @@ try:
 except ImportError:
     print("Warning: python-dotenv not installed, using environment variables directly")
     pass
 from aws_sdk_bedrock_runtime.client import BedrockRuntimeClient, InvokeModelWithBidirectionalStreamOperationInput
 from aws_sdk_bedrock_runtime.models import InvokeModelWithBidirectionalStreamInputChunk, BidirectionalInputPayloadPart
 from aws_sdk_bedrock_runtime.config import Config, HTTPAuthSchemeResolver, SigV4AuthScheme
@@ -296,14 +326,34 @@ class BedrockStreamManager:
     def _initialize_client(self):
         """Initialize the Bedrock client."""
-        config = Config(
-            endpoint_uri=f"https://bedrock-runtime.{self.region}.amazonaws.com",
-            region=self.region,
-            aws_credentials_identity_resolver=EnvironmentCredentialsResolver(),
-            http_auth_scheme_resolver=HTTPAuthSchemeResolver(),
-            http_auth_schemes={"aws.auth#sigv4": SigV4AuthScheme()}
-        )
-        self.bedrock_client = BedrockRuntimeClient(config=config)
     async def initialize_stream(self):
         """Initialize the bidirectional stream with Bedrock."""
@@ -953,24 +1003,46 @@ async def main(debug=False):
     global DEBUG
     DEBUG = debug
-    # Create stream manager
-    stream_manager = BedrockStreamManager(model_id='amazon.nova-sonic-v1:0', region='us-east-1')
-    # Create audio streamer
-    audio_streamer = AudioStreamer(stream_manager)
-    # Initialize the stream
-    await time_it_async("initialize_stream", stream_manager.initialize_stream)
-    try:
         # This will run until the user presses Enter
         await audio_streamer.start_streaming()
     except KeyboardInterrupt:
         print("Interrupted by user")
     finally:
         # Clean up
-        await audio_streamer.stop_streaming()
 if __name__ == "__main__":

 except ImportError:
     print("Warning: python-dotenv not installed, using environment variables directly")
     pass
+# Check for HuggingFace Spaces environment
+def is_huggingface_spaces():
+    """Detect if we're running on HuggingFace Spaces"""
+    return "SPACE_ID" in os.environ or "SYSTEM" in os.environ and os.environ.get("SYSTEM") == "spaces"
+# Handle HuggingFace Spaces secrets
+def setup_environment_variables():
+    """Set up AWS credentials from various sources including Hugging Face Spaces secrets"""
+    # Explicitly check for HuggingFace Spaces secrets
+    if is_huggingface_spaces():
+        print("Detected HuggingFace Spaces environment, checking for secrets...")
+        # In HF Spaces, secrets might be in different formats
+        # Check for HF_AWS_ACCESS_KEY_ID or AWS_ACCESS_KEY_ID
+        if os.environ.get("HF_AWS_ACCESS_KEY_ID") and not os.environ.get("AWS_ACCESS_KEY_ID"):
+            os.environ["AWS_ACCESS_KEY_ID"] = os.environ.get("HF_AWS_ACCESS_KEY_ID")
+            print("Using HF_AWS_ACCESS_KEY_ID")
+        if os.environ.get("HF_AWS_SECRET_ACCESS_KEY") and not os.environ.get("AWS_SECRET_ACCESS_KEY"):
+            os.environ["AWS_SECRET_ACCESS_KEY"] = os.environ.get("HF_AWS_SECRET_ACCESS_KEY")
+            print("Using HF_AWS_SECRET_ACCESS_KEY")
+        # Set default region if not already set
+        if not os.environ.get("AWS_DEFAULT_REGION"):
+            os.environ["AWS_DEFAULT_REGION"] = "us-east-1"
+            print("Set default AWS region to us-east-1")
+# Set up environment variables
+setup_environment_variables()
 from aws_sdk_bedrock_runtime.client import BedrockRuntimeClient, InvokeModelWithBidirectionalStreamOperationInput
 from aws_sdk_bedrock_runtime.models import InvokeModelWithBidirectionalStreamInputChunk, BidirectionalInputPayloadPart
 from aws_sdk_bedrock_runtime.config import Config, HTTPAuthSchemeResolver, SigV4AuthScheme
     def _initialize_client(self):
         """Initialize the Bedrock client."""
+        # Double-check AWS credentials before initializing
+        if not os.environ.get("AWS_ACCESS_KEY_ID") or not os.environ.get("AWS_SECRET_ACCESS_KEY"):
+            missing = []
+            if not os.environ.get("AWS_ACCESS_KEY_ID"):
+                missing.append("AWS_ACCESS_KEY_ID")
+            if not os.environ.get("AWS_SECRET_ACCESS_KEY"):
+                missing.append("AWS_SECRET_ACCESS_KEY")
+            error_msg = f"Missing AWS credentials: {', '.join(missing)}"
+            if is_huggingface_spaces():
+                error_msg += "\nPlease add these as secrets in your Hugging Face Space settings."
+            else:
+                error_msg += "\nPlease set these environment variables or add them to a .env file."
+            raise ValueError(error_msg)
+        try:
+            config = Config(
+                endpoint_uri=f"https://bedrock-runtime.{self.region}.amazonaws.com",
+                region=self.region,
+                aws_credentials_identity_resolver=EnvironmentCredentialsResolver(),
+                http_auth_scheme_resolver=HTTPAuthSchemeResolver(),
+                http_auth_schemes={"aws.auth#sigv4": SigV4AuthScheme()}
+            )
+            self.bedrock_client = BedrockRuntimeClient(config=config)
+        except Exception as e:
+            error_msg = f"Failed to initialize AWS Bedrock client: {str(e)}"
+            print(error_msg)
+            raise ValueError(error_msg)
     async def initialize_stream(self):
         """Initialize the bidirectional stream with Bedrock."""
     global DEBUG
     DEBUG = debug
+    # Check AWS credentials first
+    missing_creds = []
+    if not os.environ.get("AWS_ACCESS_KEY_ID"):
+        missing_creds.append("AWS_ACCESS_KEY_ID")
+    if not os.environ.get("AWS_SECRET_ACCESS_KEY"):
+        missing_creds.append("AWS_SECRET_ACCESS_KEY")
+    if missing_creds:
+        error_message = f"Missing AWS credentials: {', '.join(missing_creds)}"
+        if is_huggingface_spaces():
+            error_message += "\nPlease add these secrets in your Hugging Face Space's settings."
+        else:
+            error_message += "\nPlease set these environment variables or create a .env file."
+        print(error_message)
+        return
+    try:
+        # Create stream manager
+        stream_manager = BedrockStreamManager(model_id='amazon.nova-sonic-v1:0', region=os.environ.get("AWS_DEFAULT_REGION", "us-east-1"))
+        # Create audio streamer
+        audio_streamer = AudioStreamer(stream_manager)
+        # Initialize the stream
+        await time_it_async("initialize_stream", stream_manager.initialize_stream)
         # This will run until the user presses Enter
         await audio_streamer.start_streaming()
     except KeyboardInterrupt:
         print("Interrupted by user")
+    except Exception as e:
+        print(f"Error running application: {e}")
+        if DEBUG:
+            import traceback
+            traceback.print_exc()
     finally:
         # Clean up
+        if 'audio_streamer' in locals():
+            await audio_streamer.stop_streaming()
 if __name__ == "__main__":