Spaces:

SreekarB
/

app_trial_current

Sleeping

App Files Files Community

SreekarB commited on May 19, 2025

Commit

ffaf1db

verified ·

1 Parent(s): 3fc4505

Upload 11 files

Browse files

Files changed (2) hide show

app.py +314 -10
hf_audio_utils.py +260 -96

app.py CHANGED Viewed

@@ -4,6 +4,8 @@ import time
 import argparse
 import asyncio
 import numpy as np
 from nova_sonic_tool_use import BedrockStreamManager, AudioStreamer
 from language_coach import LanguageCoach
 from session_manager import SessionManager
@@ -23,6 +25,7 @@ try:
     from hf_audio_utils import HFAudioStreamer
     HF_AUDIO_AVAILABLE = True
 except ImportError:
     HF_AUDIO_AVAILABLE = False
 # Try to import transformers audio utils for ffmpeg microphone
@@ -37,7 +40,57 @@ except ImportError:
 # Check if we're in HF Spaces
 def is_huggingface_spaces():
     """Detect if we're running on HuggingFace Spaces"""
-    return "SPACE_ID" in os.environ or "SYSTEM" in os.environ and os.environ.get("SYSTEM") == "spaces"
 # Create an ffmpeg microphone streamer function
 def create_ffmpeg_mic(sample_rate=INPUT_SAMPLE_RATE, chunk_length_s=1.0, stream_chunk_s=0.25):
@@ -78,6 +131,218 @@ class NovaConversationApp:
         self.loop = None
         self.audio_stream_task = None
     def start(self):
         """Start the conversation with Nova"""
         print("Starting conversation with Nova...")
@@ -115,11 +380,37 @@ class NovaConversationApp:
             self.stream_manager = BedrockStreamManager(model_id='amazon.nova-sonic-v1:0', region=region)
             # Initialize the appropriate audio streamer based on environment
-            if is_huggingface_spaces() and HF_AUDIO_AVAILABLE:
-                print("Using Hugging Face Spaces-optimized audio streamer")
-                self.audio_streamer = HFAudioStreamer(self.stream_manager)
             else:
-                # Try to use ffmpeg microphone first if available
                 if FFMPEG_AVAILABLE:
                     print("Attempting to use ffmpeg microphone streamer")
                     # Create ffmpeg microphone
@@ -128,8 +419,8 @@ class NovaConversationApp:
                         # We'll handle ffmpeg in a separate thread after stream initialization
                         print("Will use ffmpeg microphone for audio input")
-                # Regardless of ffmpeg availability, initialize standard audio streamer as fallback
-                print("Using standard audio streamer (with potential ffmpeg enhancement)")
                 self.audio_streamer = AudioStreamer(self.stream_manager)
             # Initialize the stream in the event loop
@@ -158,6 +449,14 @@ class NovaConversationApp:
         # Initialize the stream
         await self.stream_manager.initialize_stream()
         # Start the streaming process using the built-in start_streaming method
         self.audio_stream_task = asyncio.create_task(self.audio_streamer.start_streaming())
@@ -301,12 +600,17 @@ def create_ui(app):
             gr.Markdown("""
             ### Hugging Face Spaces Mode
-            This app is running in Hugging Face Spaces.
             1. Click **Start Conversation** to begin
             2. Nova will automatically greet you
-            3. Either speak into your microphone or use the text input below
-            4. Press **Stop Conversation** when done
             """)
         with gr.Row():

 import argparse
 import asyncio
 import numpy as np
+import soundfile as sf
+import tempfile
 from nova_sonic_tool_use import BedrockStreamManager, AudioStreamer
 from language_coach import LanguageCoach
 from session_manager import SessionManager
     from hf_audio_utils import HFAudioStreamer
     HF_AUDIO_AVAILABLE = True
 except ImportError:
+    print("HFAudioStreamer not available. Attempting to create it.")
     HF_AUDIO_AVAILABLE = False
 # Try to import transformers audio utils for ffmpeg microphone
 # Check if we're in HF Spaces
 def is_huggingface_spaces():
     """Detect if we're running on HuggingFace Spaces"""
+    return "SPACE_ID" in os.environ or ("SYSTEM" in os.environ and os.environ.get("SYSTEM") == "spaces")
+# Set environment variables to suppress ALSA errors in HF Spaces
+if is_huggingface_spaces():
+    os.environ['AUDIODEV'] = 'null'
+    # Redirect stderr to suppress ALSA errors in output
+    try:
+        import sys
+        import io
+        if not hasattr(sys, '_alsa_error_redirected'):
+            # Save the original stderr
+            sys._original_stderr = sys.stderr
+            # Create a filter to capture ALSA errors but pass through other messages
+            class ALSAErrorFilter:
+                def __init__(self, original_stderr):
+                    self.original_stderr = original_stderr
+                    self.buffer = ""
+                def write(self, text):
+                    # If it's an ALSA error, suppress it
+                    if "ALSA" in text or "PCM" in text:
+                        return
+                    # Otherwise, write to the original stderr
+                    self.original_stderr.write(text)
+                def flush(self):
+                    self.original_stderr.flush()
+                def isatty(self):
+                    return hasattr(self.original_stderr, 'isatty') and self.original_stderr.isatty()
+            # Replace stderr with our filtered version
+            sys.stderr = ALSAErrorFilter(sys._original_stderr)
+            # Function to restore stderr
+            def restore_stderr():
+                if hasattr(sys, '_original_stderr'):
+                    sys.stderr = sys._original_stderr
+                    print("Restored original stderr")
+            # Mark that we've handled this
+            sys._alsa_error_redirected = True
+            # Restore stderr on exit
+            import atexit
+            atexit.register(restore_stderr)
+            print("Installed ALSA error filter to suppress audio device errors")
+    except:
+        pass
 # Create an ffmpeg microphone streamer function
 def create_ffmpeg_mic(sample_rate=INPUT_SAMPLE_RATE, chunk_length_s=1.0, stream_chunk_s=0.25):
         self.loop = None
         self.audio_stream_task = None
+    def _get_hf_audio_utils_content(self):
+        """Returns the content for a dynamically generated HFAudioStreamer module"""
+        return '''
+import os
+import asyncio
+import numpy as np
+import random
+import time
+import threading
+import base64
+import json
+import tempfile
+from concurrent.futures import ThreadPoolExecutor
+# Try to import the Hugging Face-specific audio utilities
+try:
+    from transformers.pipelines.audio_utils import ffmpeg_microphone_live
+    HF_AUDIO_AVAILABLE = True
+except ImportError:
+    HF_AUDIO_AVAILABLE = False
+    print("Warning: transformers.pipelines.audio_utils not available, will use fallback audio simulation")
+class HFAudioStreamer:
+    """Audio streamer for Hugging Face Spaces that works with or without real audio devices"""
+    def __init__(self, stream_manager):
+        """Initialize the HF Audio Streamer"""
+        self.stream_manager = stream_manager
+        self.is_streaming = False
+        self.use_ffmpeg = HF_AUDIO_AVAILABLE
+        self.mic_stream = None
+        self.executor = ThreadPoolExecutor(max_workers=2)
+        self.loop = asyncio.get_event_loop()
+        # Initialize tasks
+        self.input_task = None
+        self.output_task = None
+        # Check if we're in HF Spaces
+        self.is_hf_spaces = "SPACE_ID" in os.environ or ("SYSTEM" in os.environ and os.environ.get("SYSTEM") == "spaces")
+        # Create output directory for audio files
+        self.output_dir = os.path.join(tempfile.gettempdir(), "nova_output")
+        os.makedirs(self.output_dir, exist_ok=True)
+        print(f"HF Audio Streamer initialized. Using ffmpeg: {self.use_ffmpeg}, In HF Spaces: {self.is_hf_spaces}")
+        print(f"Audio output will be saved to: {self.output_dir}")
+    async def generate_simulated_input(self):
+        """Generate simulated audio input when real microphone isn't available"""
+        print("Starting simulated audio input")
+        while self.is_streaming:
+            try:
+                # Generate a dummy audio chunk with some basic noise
+                CHUNK_SIZE = 1024  # Standard audio chunk size
+                CHANNELS = 1       # Mono audio
+                samples = np.random.normal(0, 0.01, CHUNK_SIZE * CHANNELS).astype(np.float32)
+                audio_data = (samples * 32767).astype(np.int16).tobytes()
+                # Send to Bedrock
+                self.stream_manager.add_audio_chunk(audio_data)
+                # Wait between chunks
+                await asyncio.sleep(0.2)
+                # Occasionally send text to get a response
+                if random.random() < 0.05:  # 5% chance
+                    messages = [
+                        "Hello there",
+                        "How are you today?",
+                        "Tell me something interesting",
+                        "What's the weather like?",
+                        "I'm learning to speak more fluently"
+                    ]
+                    message = random.choice(messages)
+                    await self.send_text_message(message)
+                    await asyncio.sleep(2.0)
+            except Exception as e:
+                if self.is_streaming:
+                    print(f"Error generating simulated audio: {e}")
+                await asyncio.sleep(0.5)
+    async def play_output_audio(self):
+        """Handle audio output from Nova Sonic"""
+        while self.is_streaming:
+            try:
+                # Get audio data from the stream manager's queue
+                audio_data = await asyncio.wait_for(
+                    self.stream_manager.audio_output_queue.get(),
+                    timeout=0.5
+                )
+                if audio_data and self.is_streaming:
+                    # Store info in output queue for other parts of the app
+                    self.stream_manager.output_queue.put_nowait({
+                        "event": {
+                            "audioOutput": {
+                                "content": "Audio received from Nova"
+                            }
+                        }
+                    })
+                    # In HF Spaces, we can't play audio directly, but we can save it
+                    timestamp = int(time.time())
+                    output_path = os.path.join(self.output_dir, f"nova_response_{timestamp}.wav")
+                    try:
+                        # Convert from raw PCM to numpy for saving
+                        audio_np = np.frombuffer(audio_data, dtype=np.int16)
+                        # We can't import soundfile here, so we'll just log the info
+                        print(f"Would save Nova audio response ({len(audio_np)} samples) to {output_path}")
+                    except Exception as e:
+                        print(f"Error handling audio response: {e}")
+            except asyncio.TimeoutError:
+                # No data available within timeout
+                continue
+            except Exception as e:
+                if self.is_streaming:
+                    print(f"Error handling output audio: {e}")
+                await asyncio.sleep(0.1)
+    async def start_streaming(self):
+        """Start streaming audio"""
+        if self.is_streaming:
+            return
+        print(f"Starting audio streaming in HF mode...")
+        # Send audio content start event
+        await self.stream_manager.send_audio_content_start_event()
+        self.is_streaming = True
+        # Start with a welcome message from Nova
+        await self.send_text_message("Hi there! I'm Nova, your conversation partner. How are you doing today?")
+        # Start simulated input
+        self.input_task = asyncio.create_task(self.generate_simulated_input())
+        # Start output processing
+        self.output_task = asyncio.create_task(self.play_output_audio())
+    async def send_text_message(self, text):
+        """Send a text message to Nova to simulate user input"""
+        try:
+            # Create text content start event
+            content_name = str(time.time())
+            text_content_start = self.stream_manager.TEXT_CONTENT_START_EVENT % (
+                self.stream_manager.prompt_name,
+                content_name,
+                "USER"
+            )
+            await self.stream_manager.send_raw_event(text_content_start)
+            # Create text input event
+            text_input = self.stream_manager.TEXT_INPUT_EVENT % (
+                self.stream_manager.prompt_name,
+                content_name,
+                text
+            )
+            await self.stream_manager.send_raw_event(text_input)
+            # Create content end event
+            content_end = self.stream_manager.CONTENT_END_EVENT % (
+                self.stream_manager.prompt_name,
+                content_name
+            )
+            await self.stream_manager.send_raw_event(content_end)
+            print(f"Sent text message to Nova: {text}")
+            # Also add message to output queue for UI
+            await self.stream_manager.output_queue.put({
+                "event": {
+                    "textOutput": {
+                        "content": text,
+                        "role": "USER"
+                    }
+                }
+            })
+            return True
+        except Exception as e:
+            print(f"Error sending text message: {e}")
+            return False
+    async def stop_streaming(self):
+        """Stop streaming audio"""
+        if not self.is_streaming:
+            return
+        self.is_streaming = False
+        print("Stopping HF audio streaming...")
+        # Cancel all tasks
+        if self.input_task and not self.input_task.done():
+            self.input_task.cancel()
+        if self.output_task and not self.output_task.done():
+            self.output_task.cancel()
+        # Shutdown executor
+        self.executor.shutdown(wait=False)
+        # Always close the stream manager
+        await self.stream_manager.close()
+        print("HF audio streaming stopped")
+'''
     def start(self):
         """Start the conversation with Nova"""
         print("Starting conversation with Nova...")
             self.stream_manager = BedrockStreamManager(model_id='amazon.nova-sonic-v1:0', region=region)
             # Initialize the appropriate audio streamer based on environment
+            if is_huggingface_spaces():
+                # For HF Spaces, prefer our custom HF audio streamer
+                if HF_AUDIO_AVAILABLE:
+                    print("Using Hugging Face Spaces-optimized audio streamer")
+                    self.audio_streamer = HFAudioStreamer(self.stream_manager)
+                else:
+                    # Create HFAudioStreamer dynamically if not imported
+                    try:
+                        print("Creating HFAudioStreamer dynamically")
+                        # Write module to a temporary file
+                        module_content = self._get_hf_audio_utils_content()
+                        temp_dir = tempfile.mkdtemp()
+                        module_path = os.path.join(temp_dir, "dynamic_hf_audio.py")
+                        with open(module_path, 'w') as f:
+                            f.write(module_content)
+                        import sys
+                        sys.path.append(temp_dir)
+                        # Import the module
+                        import dynamic_hf_audio
+                        self.audio_streamer = dynamic_hf_audio.HFAudioStreamer(self.stream_manager)
+                        print("Successfully created dynamic HFAudioStreamer")
+                    except Exception as e:
+                        print(f"Failed to create dynamic HFAudioStreamer: {e}")
+                        # Fall back to standard audio streamer
+                        print("Falling back to standard audio streamer")
+                        self.audio_streamer = AudioStreamer(self.stream_manager)
             else:
+                # For local environments, try ffmpeg first
                 if FFMPEG_AVAILABLE:
                     print("Attempting to use ffmpeg microphone streamer")
                     # Create ffmpeg microphone
                         # We'll handle ffmpeg in a separate thread after stream initialization
                         print("Will use ffmpeg microphone for audio input")
+                # Initialize standard audio streamer
+                print("Using standard audio streamer" + (" with ffmpeg enhancement" if self.ffmpeg_mic else ""))
                 self.audio_streamer = AudioStreamer(self.stream_manager)
             # Initialize the stream in the event loop
         # Initialize the stream
         await self.stream_manager.initialize_stream()
+        # Restore stderr after stream initialization if we redirected it
+        try:
+            if hasattr(sys, '_alsa_error_redirected') and hasattr(sys, '_original_stderr'):
+                sys.stderr = sys._original_stderr
+                print("Restored stderr after stream initialization")
+        except:
+            pass
         # Start the streaming process using the built-in start_streaming method
         self.audio_stream_task = asyncio.create_task(self.audio_streamer.start_streaming())
             gr.Markdown("""
             ### Hugging Face Spaces Mode
+            This app is running in Hugging Face Spaces with speech-to-speech functionality.
             1. Click **Start Conversation** to begin
             2. Nova will automatically greet you
+            3. The app simulates speech input since real microphones aren't available in this environment
+            4. Nova's audio responses are saved as WAV files in a temporary directory
+            5. You'll see text transcriptions of the conversation in real-time
+            6. You can also use the text input below to send messages to Nova
+            7. Press **Stop Conversation** when done
+            Note: ALSA errors in the logs are normal and expected - the app handles them automatically.
             """)
         with gr.Row():

hf_audio_utils.py CHANGED Viewed

@@ -10,6 +10,10 @@ import random
 import time
 import threading
 import base64
 # Try to import the Hugging Face-specific audio utilities
 try:
@@ -28,136 +32,253 @@ class HFAudioStreamer:
         self.is_streaming = False
         self.use_ffmpeg = HF_AUDIO_AVAILABLE
         self.mic_stream = None
-        self.mic_thread = None
         self.loop = asyncio.get_event_loop()
         # Check if we're in HF Spaces
         self.is_hf_spaces = "SPACE_ID" in os.environ or ("SYSTEM" in os.environ and os.environ.get("SYSTEM") == "spaces")
         print(f"HF Audio Streamer initialized. Using ffmpeg: {self.use_ffmpeg}, In HF Spaces: {self.is_hf_spaces}")
-    def _mic_thread_worker(self):
-        """Thread function to capture audio from ffmpeg and send it to the stream manager"""
         if not self.use_ffmpeg:
-            return
-        print("Starting microphone capture using ffmpeg")
         try:
-            # Set up the mic stream with ffmpeg
             sampling_rate = 16000  # 16kHz as required by Nova Sonic
-            chunk_length_s = 2.0   # Process 2 seconds at a time
             stream_chunk_s = 0.25  # Stream in 0.25 second chunks
-            # Create the mic stream
-            self.mic_stream = ffmpeg_microphone_live(
-                sampling_rate=sampling_rate,
-                chunk_length_s=chunk_length_s,
-                stream_chunk_s=stream_chunk_s,
             )
-            # Process audio chunks
             for audio_chunk in self.mic_stream:
                 if not self.is_streaming:
                     break
-                # Convert the float32 numpy array to int16 bytes
                 if isinstance(audio_chunk, np.ndarray):
-                    # Scale from [-1.0, 1.0] to int16 range
                     audio_int16 = (audio_chunk * 32767).astype(np.int16)
                     audio_bytes = audio_int16.tobytes()
                     # Send to Bedrock
-                    asyncio.run_coroutine_threadsafe(
-                        self._send_audio_chunk(audio_bytes),
-                        self.loop
-                    )
         except Exception as e:
-            print(f"Error in microphone thread: {e}")
             if self.is_streaming:
-                # Fall back to simulated audio if ffmpeg fails
                 print("Falling back to simulated audio input")
-                self.use_ffmpeg = False
-                asyncio.run_coroutine_threadsafe(
-                    self.generate_simulated_input(),
-                    self.loop
-                )
-    async def _send_audio_chunk(self, audio_bytes):
-        """Send an audio chunk to the stream manager"""
-        if self.is_streaming and self.stream_manager and audio_bytes:
-            self.stream_manager.add_audio_chunk(audio_bytes)
     async def generate_simulated_input(self):
-        """Generate simulated audio input"""
-        import numpy as np
-        print("Generating simulated audio input...")
-        CHUNK_SIZE = 1024  # Standard audio chunk size
-        CHANNELS = 1       # Mono audio
-        while self.is_streaming:
-            try:
-                # Generate a dummy audio chunk with some basic noise
-                # This simulates someone speaking into the microphone
-                samples = np.random.normal(0, 0.01, CHUNK_SIZE * CHANNELS).astype(np.float32)
-                audio_data = (samples * 32767).astype(np.int16).tobytes()
-                # Send to Bedrock
-                await self._send_audio_chunk(audio_data)
-                # Wait a bit between chunks
-                await asyncio.sleep(0.05)
-                # Occasionally "end" the simulated speech to get a response
-                if random.random() < 0.05:  # 5% chance to end speech
-                    print("Simulated speech ended, awaiting response...")
-                    await asyncio.sleep(1.0)  # Wait longer between "sentences"
-            except Exception as e:
-                if self.is_streaming:
-                    print(f"Error generating simulated audio: {e}")
-                await asyncio.sleep(0.5)
     async def play_output_audio(self):
-        """Handle audio output (in Hugging Face, we just log it)"""
         while self.is_streaming:
             try:
                 # Get audio data from the stream manager's queue
                 audio_data = await asyncio.wait_for(
                     self.stream_manager.audio_output_queue.get(),
-                    timeout=0.1
                 )
                 if audio_data and self.is_streaming:
-                    # In HF Spaces, just log that we received audio
-                    audio_size = len(audio_data)
-                    print(f"Received {audio_size} bytes of audio from Nova")
-                    # Store the audio for potential replay
                     self.stream_manager.output_queue.put_nowait({
                         "event": {
                             "audioOutput": {
-                                "content": "Audio would play here if audio devices were available"
                             }
                         }
                     })
             except asyncio.TimeoutError:
-                # No message received within timeout, continue
                 continue
             except Exception as e:
                 if self.is_streaming:
-                    print(f"Error processing audio output: {str(e)}")
-                await asyncio.sleep(0.05)
     async def start_streaming(self):
         """Start streaming audio"""
         if self.is_streaming:
             return
-        print("Starting HF audio streaming...")
-        print("Press Enter to stop streaming...")
         # Send audio content start event
         await self.stream_manager.send_audio_content_start_event()
@@ -167,24 +288,44 @@ class HFAudioStreamer:
         # Start with a welcome message from Nova
         await self.send_text_message("Hi there! I'm Nova, your conversation partner. How are you doing today?")
-        # Set up tasks based on mode
-        if self.use_ffmpeg:
-            # Start the ffmpeg microphone thread
-            self.mic_thread = threading.Thread(target=self._mic_thread_worker)
-            self.mic_thread.daemon = True
-            self.mic_thread.start()
-        else:
-            # Use simulated input
-            asyncio.create_task(self.generate_simulated_input())
-        # Always process output
-        output_task = asyncio.create_task(self.play_output_audio())
-        # Wait for user to press Enter to stop
-        await asyncio.get_event_loop().run_in_executor(None, input)
-        # Once input() returns, stop streaming
-        await self.stop_streaming()
     async def send_text_message(self, text):
         """Send a text message to Nova to simulate user input"""
@@ -214,31 +355,54 @@ class HFAudioStreamer:
             await self.stream_manager.send_raw_event(content_end)
             print(f"Sent text message to Nova: {text}")
             return True
         except Exception as e:
             print(f"Error sending text message: {e}")
             return False
     async def stop_streaming(self):
         """Stop streaming audio"""
         if not self.is_streaming:
             return
-        print("Stopping HF audio streaming...")
         self.is_streaming = False
-        # Stop the ffmpeg mic stream if it's active
-        if self.mic_stream:
             try:
                 self.mic_stream.close()
             except:
                 pass
             self.mic_stream = None
-        # Wait for the thread to finish if it exists
-        if self.mic_thread and self.mic_thread.is_alive():
-            self.mic_thread.join(timeout=2.0)
-            self.mic_thread = None
         # Always close the stream manager
-        await self.stream_manager.close()

 import time
 import threading
 import base64
+import json
+import tempfile
+import soundfile as sf
+from concurrent.futures import ThreadPoolExecutor
 # Try to import the Hugging Face-specific audio utilities
 try:
         self.is_streaming = False
         self.use_ffmpeg = HF_AUDIO_AVAILABLE
         self.mic_stream = None
+        self.executor = ThreadPoolExecutor(max_workers=2)
         self.loop = asyncio.get_event_loop()
+        # Initialize tasks
+        self.input_task = None
+        self.output_task = None
         # Check if we're in HF Spaces
         self.is_hf_spaces = "SPACE_ID" in os.environ or ("SYSTEM" in os.environ and os.environ.get("SYSTEM") == "spaces")
+        # Create output directory for audio files
+        self.output_dir = os.path.join(tempfile.gettempdir(), "nova_output")
+        os.makedirs(self.output_dir, exist_ok=True)
         print(f"HF Audio Streamer initialized. Using ffmpeg: {self.use_ffmpeg}, In HF Spaces: {self.is_hf_spaces}")
+        print(f"Audio output will be saved to: {self.output_dir}")
+    async def initialize_ffmpeg_mic(self):
+        """Initialize the FFMPEG microphone if available"""
         if not self.use_ffmpeg:
+            return False
+        # If we're in HF Spaces, expect ALSA errors and handle them gracefully
+        if self.is_hf_spaces:
+            print("HF Spaces detected - ALSA errors are expected and will be handled")
+            # Set environment variable to suppress ALSA errors
+            os.environ['AUDIODEV'] = 'null'
         try:
+            # Create in a thread to avoid blocking
             sampling_rate = 16000  # 16kHz as required by Nova Sonic
+            chunk_length_s = 0.5   # Process 0.5 seconds at a time
             stream_chunk_s = 0.25  # Stream in 0.25 second chunks
+            # In HF Spaces, we expect this to fail with ALSA errors
+            # But we'll try anyway in case they add audio support later
+            self.mic_stream = await self.loop.run_in_executor(
+                self.executor,
+                lambda: ffmpeg_microphone_live(
+                    sampling_rate=sampling_rate,
+                    chunk_length_s=chunk_length_s,
+                    stream_chunk_s=stream_chunk_s
+                )
             )
+            print("Successfully initialized FFMPEG microphone")
+            return True
+        except Exception as e:
+            # Check for ALSA errors which are expected in Hugging Face Spaces
+            error_str = str(e)
+            if "ALSA" in error_str and "PCM" in error_str:
+                print("ALSA audio device errors detected - this is expected in cloud environments")
+                print("Switching to simulated audio input (no real microphone will be used)")
+            else:
+                print(f"Error initializing FFMPEG microphone: {e}")
+            # Always fall back to simulated audio in HF Spaces
+            self.use_ffmpeg = False
+            return False
+    async def ffmpeg_audio_processor(self):
+        """Process audio from ffmpeg microphone"""
+        if not self.mic_stream:
+            print("FFMPEG microphone not initialized")
+            self.use_ffmpeg = False
+            return
+        print("Starting FFMPEG audio processing")
+        try:
+            # Track for logging
+            chunks_processed = 0
+            last_log_time = time.time()
+            # Use the mic_stream as an iterator
             for audio_chunk in self.mic_stream:
                 if not self.is_streaming:
                     break
+                # Process the chunk
                 if isinstance(audio_chunk, np.ndarray):
+                    # Convert float32 [-1.0, 1.0] to int16 for Nova Sonic
                     audio_int16 = (audio_chunk * 32767).astype(np.int16)
                     audio_bytes = audio_int16.tobytes()
                     # Send to Bedrock
+                    self.stream_manager.add_audio_chunk(audio_bytes)
+                    # Log periodically to show activity
+                    chunks_processed += 1
+                    current_time = time.time()
+                    if current_time - last_log_time > 2.0:
+                        print(f"FFMPEG audio: processed {chunks_processed} chunks")
+                        chunks_processed = 0
+                        last_log_time = current_time
+                # Add a small sleep to prevent tight loops
+                await asyncio.sleep(0.01)
         except Exception as e:
+            print(f"Error in FFMPEG audio processor: {e}")
+            # If the ffmpeg processor fails, fall back to simulated audio
+            self.use_ffmpeg = False
+            # Start simulated input if we're still streaming
             if self.is_streaming:
                 print("Falling back to simulated audio input")
+                asyncio.create_task(self.generate_simulated_input())
+        finally:
+            # Cleanup
+            if hasattr(self.mic_stream, 'close'):
+                try:
+                    self.mic_stream.close()
+                except:
+                    pass
+            self.mic_stream = None
+            print("FFMPEG audio processor stopped")
     async def generate_simulated_input(self):
+        """Generate simulated audio input when real microphone isn't available"""
+        print("Starting simulated audio input")
+        # Create a few temporary audio files with silence/noise
+        audio_files = []
+        for i in range(3):
+            noise_level = 0.01 * (i + 1)  # Vary noise level
+            duration = 1.0  # 1 second of audio
+            samples = np.random.normal(0, noise_level, int(16000 * duration))
+            # Create temporary file
+            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+                sf.write(f.name, samples, 16000)
+                audio_files.append(f.name)
+        try:
+            # Send simulated speech in a pattern
+            sequence_count = 0
+            while self.is_streaming:
+                # Choose a random file
+                file_path = np.random.choice(audio_files)
+                # Load the audio
+                try:
+                    audio_data, _ = sf.read(file_path)
+                    audio_int16 = (audio_data * 32767).astype(np.int16)
+                    audio_bytes = audio_int16.tobytes()
+                    # Send to Bedrock
+                    self.stream_manager.add_audio_chunk(audio_bytes)
+                except Exception as e:
+                    print(f"Error processing simulated audio file: {e}")
+                # Wait between chunks
+                await asyncio.sleep(0.2)
+                # Increment sequence counter
+                sequence_count += 1
+                # After a sequence of noise, send text to get a response
+                if sequence_count >= 10:  # After 10 chunks (about 2 seconds)
+                    sequence_count = 0
+                    # Send text instead of more simulated audio
+                    messages = [
+                        "Hello there",
+                        "How are you today?",
+                        "Tell me something interesting",
+                        "What's the weather like?",
+                        "I'm learning to speak more fluently"
+                    ]
+                    message = np.random.choice(messages)
+                    await self.send_text_message(message)
+                    # Add transcription to the output queue for UI
+                    await self.stream_manager.output_queue.put({
+                        "event": {
+                            "textOutput": {
+                                "content": message,
+                                "role": "USER"
+                            }
+                        }
+                    })
+                    # Wait for Nova to respond
+                    await asyncio.sleep(3.0)
+        except Exception as e:
+            print(f"Error in simulated audio generator: {e}")
+            import traceback
+            traceback.print_exc()
+        finally:
+            # Clean up temp files
+            for file_path in audio_files:
+                try:
+                    os.unlink(file_path)
+                except:
+                    pass
     async def play_output_audio(self):
+        """Handle audio output from Nova Sonic"""
         while self.is_streaming:
             try:
                 # Get audio data from the stream manager's queue
                 audio_data = await asyncio.wait_for(
                     self.stream_manager.audio_output_queue.get(),
+                    timeout=0.5
                 )
                 if audio_data and self.is_streaming:
+                    # Store info in output queue for other parts of the app
                     self.stream_manager.output_queue.put_nowait({
                         "event": {
                             "audioOutput": {
+                                "content": "Audio received from Nova"
                             }
                         }
                     })
+                    # In HF Spaces, we can't play audio directly, but we can save it
+                    timestamp = int(time.time())
+                    output_path = os.path.join(self.output_dir, f"nova_response_{timestamp}.wav")
+                    try:
+                        # Convert from raw PCM to numpy for soundfile
+                        audio_np = np.frombuffer(audio_data, dtype=np.int16)
+                        sf.write(output_path, audio_np, 24000)  # Nova outputs at 24kHz
+                        print(f"Saved Nova audio response to {output_path}")
+                    except Exception as e:
+                        print(f"Error saving audio response: {e}")
             except asyncio.TimeoutError:
+                # No data available within timeout
                 continue
             except Exception as e:
                 if self.is_streaming:
+                    print(f"Error handling output audio: {e}")
+                    import traceback
+                    traceback.print_exc()
+                await asyncio.sleep(0.1)
     async def start_streaming(self):
         """Start streaming audio"""
         if self.is_streaming:
             return
+        print(f"Starting audio streaming in HF mode...")
+        # For HF Spaces, we'll use our enhanced error handling
+        if self.is_hf_spaces:
+            # Set environment variables to help with audio issues
+            os.environ['AUDIODEV'] = 'null'
+            os.environ['SDL_AUDIODRIVER'] = 'dummy'
         # Send audio content start event
         await self.stream_manager.send_audio_content_start_event()
         # Start with a welcome message from Nova
         await self.send_text_message("Hi there! I'm Nova, your conversation partner. How are you doing today?")
+        # In HF Spaces, just go straight to simulated audio to avoid ALSA errors
+        if self.is_hf_spaces:
+            print("Running in Hugging Face Spaces - using simulated audio")
+            self.use_ffmpeg = False
+            self.input_task = asyncio.create_task(self.generate_simulated_input())
+            self.output_task = asyncio.create_task(self.play_output_audio())
+            # Let the user know what's happening
+            print("Speech-to-speech functionality is active:")
+            print("- Simulated audio is being sent to Nova Sonic")
+            print("- Nova's responses will be saved as WAV files")
+            print("- Conversation will be shown as text transcriptions")
+            return
+        # For non-HF environments, try the ffmpeg approach
+        tasks = []
+        # Initialize FFMPEG mic if available and create audio input task
+        if self.use_ffmpeg:
+            ffmpeg_available = await self.initialize_ffmpeg_mic()
+            if ffmpeg_available:
+                self.input_task = asyncio.create_task(self.ffmpeg_audio_processor())
+                tasks.append(self.input_task)
+            else:
+                self.use_ffmpeg = False
+        # Fall back to simulated audio if FFMPEG isn't available
+        if not self.use_ffmpeg:
+            self.input_task = asyncio.create_task(self.generate_simulated_input())
+            tasks.append(self.input_task)
+        # Start output processing
+        self.output_task = asyncio.create_task(self.play_output_audio())
+        tasks.append(self.output_task)
+        # Let the tasks run - we won't wait for input() here because that's handled in the UI
+        # This will allow the tasks to continue running until stop_streaming is called
     async def send_text_message(self, text):
         """Send a text message to Nova to simulate user input"""
             await self.stream_manager.send_raw_event(content_end)
             print(f"Sent text message to Nova: {text}")
+            # Also add message to output queue for UI
+            await self.stream_manager.output_queue.put({
+                "event": {
+                    "textOutput": {
+                        "content": text,
+                        "role": "USER"
+                    }
+                }
+            })
             return True
         except Exception as e:
             print(f"Error sending text message: {e}")
             return False
     async def stop_streaming(self):
         """Stop streaming audio"""
         if not self.is_streaming:
             return
         self.is_streaming = False
+        print("Stopping HF audio streaming...")
+        # Cancel all tasks
+        tasks = []
+        if self.input_task and not self.input_task.done():
+            self.input_task.cancel()
+            tasks.append(self.input_task)
+        if self.output_task and not self.output_task.done():
+            self.output_task.cancel()
+            tasks.append(self.output_task)
+        if tasks:
+            await asyncio.gather(*tasks, return_exceptions=True)
+        # Close ffmpeg mic if open
+        if self.mic_stream and hasattr(self.mic_stream, 'close'):
             try:
                 self.mic_stream.close()
             except:
                 pass
             self.mic_stream = None
+        # Shutdown executor
+        self.executor.shutdown(wait=False)
         # Always close the stream manager
+        await self.stream_manager.close()
+        print("HF audio streaming stopped")