Spaces:

SreekarB
/

app_trial_current

Sleeping

App Files Files Community

SreekarB commited on May 17, 2025

Commit

e2cd21d

verified ·

1 Parent(s): c884291

Upload 9 files

Browse files

Files changed (1) hide show

audio_utils.py +52 -35

audio_utils.py CHANGED Viewed

@@ -1,39 +1,51 @@
-import pyaudio
-import wave
-import base64
 import numpy as np
 from config import INPUT_SAMPLE_RATE, OUTPUT_SAMPLE_RATE, BIT_DEPTH, CHANNELS, CHUNK_SIZE
 class AudioStreamer:
     def __init__(self):
         self.input_rate = INPUT_SAMPLE_RATE
         self.output_rate = OUTPUT_SAMPLE_RATE
-        self.chunk = CHUNK_SIZE
-        self.format = pyaudio.paInt16
         self.channels = CHANNELS
-        self.audio = pyaudio.PyAudio()
         self.last_audio = None
     def record_audio_stream(self):
         """Generator that yields audio chunks from microphone"""
-        stream = self.audio.open(
-            format=self.format,
-            channels=self.channels,
-            rate=self.input_rate,
-            input=True,
-            frames_per_buffer=self.chunk
-        )
         print("Recording... Speak now. (Press Ctrl+C to stop)")
         try:
-            while True:
-                audio_chunk = stream.read(self.chunk, exception_on_overflow=False)
-                yield audio_chunk
         except KeyboardInterrupt:
             print("Recording stopped.")
         finally:
-            stream.stop_stream()
-            stream.close()
     def play_audio_stream(self, audio_data):
         """Play back audio data (supports base64 encoded or raw bytes)"""
@@ -54,18 +66,17 @@ class AudioStreamer:
                 print(f"Error decoding base64 audio: {e}")
                 return
-        # Open a stream for playback
-        stream = self.audio.open(
-            format=self.format,
-            channels=self.channels,
-            rate=self.output_rate,  # Use output sample rate for Nova's audio
-            output=True
-        )
-        # Play the audio
-        stream.write(audio_data)
-        stream.stop_stream()
-        stream.close()
     def replay_last_audio(self):
         """Replay the last audio response"""
@@ -82,9 +93,15 @@ class AudioStreamer:
                 # Extract base64 content
                 b64_content = audio_data.split("b'", 1)[1].rsplit("'", 1)[0]
                 audio_data = base64.b64decode(b64_content)
-        # Convert to the correct format for Nova
-        # Nova expects 16-bit, mono, 16kHz LPCM
         return audio_data
     def decode_nova_audio(self, audio_data):
@@ -100,5 +117,5 @@ class AudioStreamer:
         return audio_data
     def cleanup(self):
-        """Clean up the PyAudio resource"""
-        self.audio.terminate()

+import sounddevice as sd
+import soundfile as sf
 import numpy as np
+import base64
+import io
+import time
+import threading
 from config import INPUT_SAMPLE_RATE, OUTPUT_SAMPLE_RATE, BIT_DEPTH, CHANNELS, CHUNK_SIZE
 class AudioStreamer:
     def __init__(self):
         self.input_rate = INPUT_SAMPLE_RATE
         self.output_rate = OUTPUT_SAMPLE_RATE
         self.channels = CHANNELS
+        self.chunk_size = CHUNK_SIZE  # Samples per chunk
         self.last_audio = None
+        self.recording = False
+        self.stream = None
     def record_audio_stream(self):
         """Generator that yields audio chunks from microphone"""
+        self.recording = True
+        # Calculate chunk duration in seconds
+        chunk_duration = self.chunk_size / self.input_rate
         print("Recording... Speak now. (Press Ctrl+C to stop)")
         try:
+            while self.recording:
+                # Record a small chunk of audio
+                audio_chunk = sd.rec(
+                    int(self.chunk_size),
+                    samplerate=self.input_rate,
+                    channels=self.channels,
+                    dtype='int16'
+                )
+                sd.wait()  # Wait until recording is finished
+                # Convert to bytes for streaming to AWS
+                audio_bytes = audio_chunk.tobytes()
+                yield audio_bytes
         except KeyboardInterrupt:
             print("Recording stopped.")
         finally:
+            self.recording = False
     def play_audio_stream(self, audio_data):
         """Play back audio data (supports base64 encoded or raw bytes)"""
                 print(f"Error decoding base64 audio: {e}")
                 return
+        # Convert bytes to numpy array
+        try:
+            # Convert bytes to numpy array (assuming 16-bit PCM format)
+            audio_array = np.frombuffer(audio_data, dtype=np.int16)
+            # Play the audio
+            sd.play(audio_array, self.output_rate)
+            sd.wait()  # Wait until audio is finished playing
+        except Exception as e:
+            print(f"Error playing audio: {e}")
     def replay_last_audio(self):
         """Replay the last audio response"""
                 # Extract base64 content
                 b64_content = audio_data.split("b'", 1)[1].rsplit("'", 1)[0]
                 audio_data = base64.b64decode(b64_content)
+        # If it's already bytes, return as is
+        if isinstance(audio_data, bytes):
+            return audio_data
+        # If it's a numpy array, convert to bytes
+        if isinstance(audio_data, np.ndarray):
+            return audio_data.tobytes()
         return audio_data
     def decode_nova_audio(self, audio_data):
         return audio_data
     def cleanup(self):
+        """Stop recording and clean up resources"""
+        self.recording = False