SreekarB commited on
Commit
e2cd21d
·
verified ·
1 Parent(s): c884291

Upload 9 files

Browse files
Files changed (1) hide show
  1. audio_utils.py +52 -35
audio_utils.py CHANGED
@@ -1,39 +1,51 @@
1
- import pyaudio
2
- import wave
3
- import base64
4
  import numpy as np
 
 
 
 
5
  from config import INPUT_SAMPLE_RATE, OUTPUT_SAMPLE_RATE, BIT_DEPTH, CHANNELS, CHUNK_SIZE
6
 
7
  class AudioStreamer:
8
  def __init__(self):
9
  self.input_rate = INPUT_SAMPLE_RATE
10
  self.output_rate = OUTPUT_SAMPLE_RATE
11
- self.chunk = CHUNK_SIZE
12
- self.format = pyaudio.paInt16
13
  self.channels = CHANNELS
14
- self.audio = pyaudio.PyAudio()
15
  self.last_audio = None
 
 
16
 
17
  def record_audio_stream(self):
18
  """Generator that yields audio chunks from microphone"""
19
- stream = self.audio.open(
20
- format=self.format,
21
- channels=self.channels,
22
- rate=self.input_rate,
23
- input=True,
24
- frames_per_buffer=self.chunk
25
- )
26
 
27
  print("Recording... Speak now. (Press Ctrl+C to stop)")
 
28
  try:
29
- while True:
30
- audio_chunk = stream.read(self.chunk, exception_on_overflow=False)
31
- yield audio_chunk
 
 
 
 
 
 
 
 
 
 
 
 
32
  except KeyboardInterrupt:
33
  print("Recording stopped.")
34
  finally:
35
- stream.stop_stream()
36
- stream.close()
37
 
38
  def play_audio_stream(self, audio_data):
39
  """Play back audio data (supports base64 encoded or raw bytes)"""
@@ -54,18 +66,17 @@ class AudioStreamer:
54
  print(f"Error decoding base64 audio: {e}")
55
  return
56
 
57
- # Open a stream for playback
58
- stream = self.audio.open(
59
- format=self.format,
60
- channels=self.channels,
61
- rate=self.output_rate, # Use output sample rate for Nova's audio
62
- output=True
63
- )
64
-
65
- # Play the audio
66
- stream.write(audio_data)
67
- stream.stop_stream()
68
- stream.close()
69
 
70
  def replay_last_audio(self):
71
  """Replay the last audio response"""
@@ -82,9 +93,15 @@ class AudioStreamer:
82
  # Extract base64 content
83
  b64_content = audio_data.split("b'", 1)[1].rsplit("'", 1)[0]
84
  audio_data = base64.b64decode(b64_content)
85
-
86
- # Convert to the correct format for Nova
87
- # Nova expects 16-bit, mono, 16kHz LPCM
 
 
 
 
 
 
88
  return audio_data
89
 
90
  def decode_nova_audio(self, audio_data):
@@ -100,5 +117,5 @@ class AudioStreamer:
100
  return audio_data
101
 
102
  def cleanup(self):
103
- """Clean up the PyAudio resource"""
104
- self.audio.terminate()
 
1
+ import sounddevice as sd
2
+ import soundfile as sf
 
3
  import numpy as np
4
+ import base64
5
+ import io
6
+ import time
7
+ import threading
8
  from config import INPUT_SAMPLE_RATE, OUTPUT_SAMPLE_RATE, BIT_DEPTH, CHANNELS, CHUNK_SIZE
9
 
10
  class AudioStreamer:
11
  def __init__(self):
12
  self.input_rate = INPUT_SAMPLE_RATE
13
  self.output_rate = OUTPUT_SAMPLE_RATE
 
 
14
  self.channels = CHANNELS
15
+ self.chunk_size = CHUNK_SIZE # Samples per chunk
16
  self.last_audio = None
17
+ self.recording = False
18
+ self.stream = None
19
 
20
  def record_audio_stream(self):
21
  """Generator that yields audio chunks from microphone"""
22
+ self.recording = True
23
+
24
+ # Calculate chunk duration in seconds
25
+ chunk_duration = self.chunk_size / self.input_rate
 
 
 
26
 
27
  print("Recording... Speak now. (Press Ctrl+C to stop)")
28
+
29
  try:
30
+ while self.recording:
31
+ # Record a small chunk of audio
32
+ audio_chunk = sd.rec(
33
+ int(self.chunk_size),
34
+ samplerate=self.input_rate,
35
+ channels=self.channels,
36
+ dtype='int16'
37
+ )
38
+ sd.wait() # Wait until recording is finished
39
+
40
+ # Convert to bytes for streaming to AWS
41
+ audio_bytes = audio_chunk.tobytes()
42
+
43
+ yield audio_bytes
44
+
45
  except KeyboardInterrupt:
46
  print("Recording stopped.")
47
  finally:
48
+ self.recording = False
 
49
 
50
  def play_audio_stream(self, audio_data):
51
  """Play back audio data (supports base64 encoded or raw bytes)"""
 
66
  print(f"Error decoding base64 audio: {e}")
67
  return
68
 
69
+ # Convert bytes to numpy array
70
+ try:
71
+ # Convert bytes to numpy array (assuming 16-bit PCM format)
72
+ audio_array = np.frombuffer(audio_data, dtype=np.int16)
73
+
74
+ # Play the audio
75
+ sd.play(audio_array, self.output_rate)
76
+ sd.wait() # Wait until audio is finished playing
77
+
78
+ except Exception as e:
79
+ print(f"Error playing audio: {e}")
 
80
 
81
  def replay_last_audio(self):
82
  """Replay the last audio response"""
 
93
  # Extract base64 content
94
  b64_content = audio_data.split("b'", 1)[1].rsplit("'", 1)[0]
95
  audio_data = base64.b64decode(b64_content)
96
+
97
+ # If it's already bytes, return as is
98
+ if isinstance(audio_data, bytes):
99
+ return audio_data
100
+
101
+ # If it's a numpy array, convert to bytes
102
+ if isinstance(audio_data, np.ndarray):
103
+ return audio_data.tobytes()
104
+
105
  return audio_data
106
 
107
  def decode_nova_audio(self, audio_data):
 
117
  return audio_data
118
 
119
  def cleanup(self):
120
+ """Stop recording and clean up resources"""
121
+ self.recording = False