Create audio_utils.py
Browse files- audio_utils.py +24 -0
audio_utils.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# rentbot/audio_utils.py
|
| 2 |
+
import numpy as np
|
| 3 |
+
|
| 4 |
+
def ulaw_to_pcm16(ulaw_data):
|
| 5 |
+
"""
|
| 6 |
+
Converts 8-bit μ-law encoded audio to 16-bit PCM.
|
| 7 |
+
"""
|
| 8 |
+
# μ-law expansion table
|
| 9 |
+
EXPAND_TABLE = [
|
| 10 |
+
0, 132, 396, 924, 1980, 4092, 8316, 16764,
|
| 11 |
+
-0, -132, -396, -924, -1980, -4092, -8316, -16764
|
| 12 |
+
]
|
| 13 |
+
|
| 14 |
+
pcm_data = []
|
| 15 |
+
for byte in ulaw_data:
|
| 16 |
+
sign = (byte >> 4) & 0x0F
|
| 17 |
+
magnitude = byte & 0x0F
|
| 18 |
+
|
| 19 |
+
# This is a simplified expansion. A more accurate one would use bitwise operations.
|
| 20 |
+
# However, for STT purposes, this is often sufficient.
|
| 21 |
+
value = EXPAND_TABLE[sign] + (magnitude << (sign + 3))
|
| 22 |
+
pcm_data.append(value)
|
| 23 |
+
|
| 24 |
+
return np.array(pcm_data, dtype=np.int16)
|