Spaces:
Sleeping
Sleeping
petergits commited on
Commit ·
c08e928
1
Parent(s): 56ce875
2nd checking push speech2text
Browse files- Dockerfile +10 -1
- create_test_audio.py +149 -0
- generate_test_audio.py +136 -0
- mcp_speech_client.py +125 -0
- mcp_speech_service.py +408 -0
- requirements.txt +8 -3
- startup_script.py +212 -0
- testClient.py +141 -0
- whisper_http_wrapper.py +304 -0
Dockerfile
CHANGED
|
@@ -10,8 +10,17 @@ ENV PATH="/home/user/.local/bin:$PATH"
|
|
| 10 |
WORKDIR /app
|
| 11 |
|
| 12 |
COPY --chown=user ./requirements.txt requirements.txt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 14 |
|
| 15 |
COPY --chown=user . /app
|
| 16 |
-
CMD ["
|
|
|
|
| 17 |
|
|
|
|
| 10 |
WORKDIR /app
|
| 11 |
|
| 12 |
COPY --chown=user ./requirements.txt requirements.txt
|
| 13 |
+
COPY create_test_audio.py .
|
| 14 |
+
COPY generate_test_audio.py .
|
| 15 |
+
COPY mcp_speech_client.py .
|
| 16 |
+
COPY mcp_speech_service.py .
|
| 17 |
+
COPY requirements.txt .
|
| 18 |
+
COPY startup_script.py .
|
| 19 |
+
COPY testClient.py .
|
| 20 |
+
COPY whisper_http_wrapper.py .
|
| 21 |
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 22 |
|
| 23 |
COPY --chown=user . /app
|
| 24 |
+
CMD ["python", "startup_script.py"]
|
| 25 |
+
#CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
| 26 |
|
create_test_audio.py
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Create a test audio file for testing the MPC Speech service
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import numpy as np
|
| 7 |
+
import soundfile as sf
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
def create_test_audio(filename="test_audio.wav", duration=3.0, sample_rate=16000):
|
| 11 |
+
"""
|
| 12 |
+
Create a simple test audio file with a sine wave tone
|
| 13 |
+
|
| 14 |
+
Args:
|
| 15 |
+
filename: Output filename
|
| 16 |
+
duration: Duration in seconds
|
| 17 |
+
sample_rate: Sample rate in Hz
|
| 18 |
+
"""
|
| 19 |
+
print(f"🎵 Creating test audio file: {filename}")
|
| 20 |
+
|
| 21 |
+
# Generate time array
|
| 22 |
+
t = np.linspace(0, duration, int(sample_rate * duration), False)
|
| 23 |
+
|
| 24 |
+
# Create a simple tone (440 Hz - A note) with some variation
|
| 25 |
+
frequency1 = 440 # A note
|
| 26 |
+
frequency2 = 523 # C note
|
| 27 |
+
|
| 28 |
+
# Create a chord-like sound
|
| 29 |
+
audio = (
|
| 30 |
+
0.3 * np.sin(2 * np.pi * frequency1 * t) + # A note
|
| 31 |
+
0.2 * np.sin(2 * np.pi * frequency2 * t) + # C note
|
| 32 |
+
0.1 * np.sin(2 * np.pi * 660 * t) # E note
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
# Add some envelope to make it sound more natural
|
| 36 |
+
envelope = np.exp(-t * 0.5) # Exponential decay
|
| 37 |
+
audio = audio * envelope
|
| 38 |
+
|
| 39 |
+
# Normalize to prevent clipping
|
| 40 |
+
audio = audio / np.max(np.abs(audio)) * 0.8
|
| 41 |
+
|
| 42 |
+
# Save as WAV file
|
| 43 |
+
sf.write(filename, audio, sample_rate)
|
| 44 |
+
|
| 45 |
+
file_size = Path(filename).stat().st_size
|
| 46 |
+
print(f"✅ Created {filename}")
|
| 47 |
+
print(f" Duration: {duration}s")
|
| 48 |
+
print(f" Sample rate: {sample_rate} Hz")
|
| 49 |
+
print(f" File size: {file_size:,} bytes")
|
| 50 |
+
|
| 51 |
+
return filename
|
| 52 |
+
|
| 53 |
+
def create_silent_audio(filename="silent_audio.wav", duration=2.0, sample_rate=16000):
|
| 54 |
+
"""Create a silent audio file for testing"""
|
| 55 |
+
print(f"🔇 Creating silent audio file: {filename}")
|
| 56 |
+
|
| 57 |
+
# Create silent audio (zeros)
|
| 58 |
+
audio = np.zeros(int(sample_rate * duration))
|
| 59 |
+
|
| 60 |
+
# Save as WAV file
|
| 61 |
+
sf.write(filename, audio, sample_rate)
|
| 62 |
+
|
| 63 |
+
file_size = Path(filename).stat().st_size
|
| 64 |
+
print(f"✅ Created {filename}")
|
| 65 |
+
print(f" Duration: {duration}s (silent)")
|
| 66 |
+
print(f" Sample rate: {sample_rate} Hz")
|
| 67 |
+
print(f" File size: {file_size:,} bytes")
|
| 68 |
+
|
| 69 |
+
return filename
|
| 70 |
+
|
| 71 |
+
def create_speech_like_audio(filename="speech_test.wav", duration=5.0, sample_rate=16000):
|
| 72 |
+
"""Create a more speech-like test audio with varying frequencies"""
|
| 73 |
+
print(f"🎤 Creating speech-like audio file: {filename}")
|
| 74 |
+
|
| 75 |
+
# Generate time array
|
| 76 |
+
t = np.linspace(0, duration, int(sample_rate * duration), False)
|
| 77 |
+
|
| 78 |
+
# Create speech-like formants (simplified)
|
| 79 |
+
# Human speech typically has formants around 500Hz, 1500Hz, 2500Hz
|
| 80 |
+
formant1 = 500
|
| 81 |
+
formant2 = 1500
|
| 82 |
+
formant3 = 2500
|
| 83 |
+
|
| 84 |
+
# Create a more complex waveform
|
| 85 |
+
audio = (
|
| 86 |
+
0.4 * np.sin(2 * np.pi * formant1 * t) * (1 + 0.3 * np.sin(2 * np.pi * 3 * t)) +
|
| 87 |
+
0.3 * np.sin(2 * np.pi * formant2 * t) * (1 + 0.2 * np.sin(2 * np.pi * 5 * t)) +
|
| 88 |
+
0.2 * np.sin(2 * np.pi * formant3 * t) * (1 + 0.1 * np.sin(2 * np.pi * 7 * t))
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
# Add some noise to make it more realistic
|
| 92 |
+
noise = 0.05 * np.random.normal(0, 1, len(audio))
|
| 93 |
+
audio = audio + noise
|
| 94 |
+
|
| 95 |
+
# Create segments (like words)
|
| 96 |
+
segment_duration = 0.8
|
| 97 |
+
pause_duration = 0.2
|
| 98 |
+
segment_samples = int(segment_duration * sample_rate)
|
| 99 |
+
pause_samples = int(pause_duration * sample_rate)
|
| 100 |
+
|
| 101 |
+
# Apply segmentation
|
| 102 |
+
for i in range(0, len(audio), segment_samples + pause_samples):
|
| 103 |
+
# Keep segment
|
| 104 |
+
segment_end = min(i + segment_samples, len(audio))
|
| 105 |
+
# Add pause
|
| 106 |
+
pause_start = segment_end
|
| 107 |
+
pause_end = min(pause_start + pause_samples, len(audio))
|
| 108 |
+
if pause_end > pause_start:
|
| 109 |
+
audio[pause_start:pause_end] *= 0.1 # Reduce volume for pause
|
| 110 |
+
|
| 111 |
+
# Normalize
|
| 112 |
+
audio = audio / np.max(np.abs(audio)) * 0.7
|
| 113 |
+
|
| 114 |
+
# Save as WAV file
|
| 115 |
+
sf.write(filename, audio, sample_rate)
|
| 116 |
+
|
| 117 |
+
file_size = Path(filename).stat().st_size
|
| 118 |
+
print(f"✅ Created {filename}")
|
| 119 |
+
print(f" Duration: {duration}s")
|
| 120 |
+
print(f" Sample rate: {sample_rate} Hz")
|
| 121 |
+
print(f" File size: {file_size:,} bytes")
|
| 122 |
+
print(f" Note: This is synthetic audio for testing")
|
| 123 |
+
|
| 124 |
+
return filename
|
| 125 |
+
|
| 126 |
+
def main():
|
| 127 |
+
"""Create test audio files"""
|
| 128 |
+
print("🎧 Creating test audio files for MPC Speech service")
|
| 129 |
+
print("=" * 50)
|
| 130 |
+
|
| 131 |
+
try:
|
| 132 |
+
# Create different types of test audio
|
| 133 |
+
create_test_audio("audio.wav", duration=3.0)
|
| 134 |
+
create_silent_audio("silent_test.wav", duration=2.0)
|
| 135 |
+
create_speech_like_audio("speech_test.wav", duration=4.0)
|
| 136 |
+
|
| 137 |
+
print(f"\n✅ All test audio files created successfully!")
|
| 138 |
+
print(f"📁 Files created in current directory:")
|
| 139 |
+
print(f" • audio.wav - Simple tone (for basic testing)")
|
| 140 |
+
print(f" • silent_test.wav - Silent audio (edge case testing)")
|
| 141 |
+
print(f" • speech_test.wav - Speech-like audio (more realistic)")
|
| 142 |
+
print(f"\n🧪 You can now test with: python testClient.py")
|
| 143 |
+
|
| 144 |
+
except Exception as e:
|
| 145 |
+
print(f"❌ Error creating test audio files: {e}")
|
| 146 |
+
print(f"💡 Make sure you have soundfile installed: pip install soundfile")
|
| 147 |
+
|
| 148 |
+
if __name__ == "__main__":
|
| 149 |
+
main()
|
generate_test_audio.py
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Generate test audio files for testing the MPC Speech service
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import numpy as np
|
| 7 |
+
import soundfile as sf
|
| 8 |
+
from scipy import signal
|
| 9 |
+
import math
|
| 10 |
+
|
| 11 |
+
def generate_sine_wave(frequency=440, duration=3.0, sample_rate=16000, amplitude=0.3):
|
| 12 |
+
"""Generate a sine wave audio signal"""
|
| 13 |
+
t = np.linspace(0, duration, int(sample_rate * duration), False)
|
| 14 |
+
wave = amplitude * np.sin(2 * np.pi * frequency * t)
|
| 15 |
+
return wave.astype(np.float32)
|
| 16 |
+
|
| 17 |
+
def generate_chirp(duration=3.0, sample_rate=16000, f0=200, f1=2000, amplitude=0.3):
|
| 18 |
+
"""Generate a frequency sweep (chirp) signal"""
|
| 19 |
+
t = np.linspace(0, duration, int(sample_rate * duration), False)
|
| 20 |
+
wave = amplitude * signal.chirp(t, f0, duration, f1)
|
| 21 |
+
return wave.astype(np.float32)
|
| 22 |
+
|
| 23 |
+
def generate_white_noise(duration=3.0, sample_rate=16000, amplitude=0.1):
|
| 24 |
+
"""Generate white noise"""
|
| 25 |
+
samples = int(sample_rate * duration)
|
| 26 |
+
wave = amplitude * np.random.normal(0, 1, samples)
|
| 27 |
+
return wave.astype(np.float32)
|
| 28 |
+
|
| 29 |
+
def generate_speech_like_signal(duration=5.0, sample_rate=16000):
|
| 30 |
+
"""Generate a speech-like signal with multiple frequency components"""
|
| 31 |
+
t = np.linspace(0, duration, int(sample_rate * duration), False)
|
| 32 |
+
|
| 33 |
+
# Fundamental frequency (varies like speech)
|
| 34 |
+
f0 = 120 + 30 * np.sin(2 * np.pi * 0.5 * t) # Varying pitch
|
| 35 |
+
|
| 36 |
+
# Multiple harmonics
|
| 37 |
+
signal_wave = np.zeros_like(t)
|
| 38 |
+
for harmonic in range(1, 6):
|
| 39 |
+
amplitude = 0.3 / harmonic # Decreasing amplitude for higher harmonics
|
| 40 |
+
signal_wave += amplitude * np.sin(2 * np.pi * harmonic * f0 * t)
|
| 41 |
+
|
| 42 |
+
# Add some formant-like filtering
|
| 43 |
+
# Simple bandpass filtering to simulate formants
|
| 44 |
+
b, a = signal.butter(4, [300, 3000], btype='band', fs=sample_rate)
|
| 45 |
+
signal_wave = signal.filtfilt(b, a, signal_wave)
|
| 46 |
+
|
| 47 |
+
# Add envelope to make it more speech-like
|
| 48 |
+
envelope = np.exp(-0.5 * t) * (1 + 0.5 * np.sin(2 * np.pi * 2 * t))
|
| 49 |
+
signal_wave *= envelope
|
| 50 |
+
|
| 51 |
+
return signal_wave.astype(np.float32)
|
| 52 |
+
|
| 53 |
+
def create_test_audio_files():
|
| 54 |
+
"""Create various test audio files"""
|
| 55 |
+
sample_rate = 16000 # Common sample rate for speech recognition
|
| 56 |
+
|
| 57 |
+
# 1. Simple sine wave (440 Hz - A note)
|
| 58 |
+
sine_wave = generate_sine_wave(440, 3.0, sample_rate)
|
| 59 |
+
sf.write('test_sine_440hz.wav', sine_wave, sample_rate)
|
| 60 |
+
print("Created: test_sine_440hz.wav (3 seconds, 440 Hz sine wave)")
|
| 61 |
+
|
| 62 |
+
# 2. Frequency sweep
|
| 63 |
+
chirp = generate_chirp(3.0, sample_rate)
|
| 64 |
+
sf.write('test_chirp.wav', chirp, sample_rate)
|
| 65 |
+
print("Created: test_chirp.wav (3 seconds, frequency sweep 200-2000 Hz)")
|
| 66 |
+
|
| 67 |
+
# 3. White noise (for testing noise handling)
|
| 68 |
+
noise = generate_white_noise(2.0, sample_rate)
|
| 69 |
+
sf.write('test_noise.wav', noise, sample_rate)
|
| 70 |
+
print("Created: test_noise.wav (2 seconds, white noise)")
|
| 71 |
+
|
| 72 |
+
# 4. Speech-like signal
|
| 73 |
+
speech_like = generate_speech_like_signal(5.0, sample_rate)
|
| 74 |
+
sf.write('test_speech_like.wav', speech_like, sample_rate)
|
| 75 |
+
print("Created: test_speech_like.wav (5 seconds, speech-like signal)")
|
| 76 |
+
|
| 77 |
+
# 5. Mixed signal (sine + noise)
|
| 78 |
+
mixed_duration = 4.0
|
| 79 |
+
sine_component = generate_sine_wave(300, mixed_duration, sample_rate, 0.4)
|
| 80 |
+
noise_component = generate_white_noise(mixed_duration, sample_rate, 0.1)
|
| 81 |
+
mixed_signal = sine_component + noise_component
|
| 82 |
+
sf.write('test_mixed.wav', mixed_signal, sample_rate)
|
| 83 |
+
print("Created: test_mixed.wav (4 seconds, 300 Hz sine + noise)")
|
| 84 |
+
|
| 85 |
+
# 6. Multi-tone signal
|
| 86 |
+
multi_tone = (
|
| 87 |
+
0.3 * generate_sine_wave(261.63, 3.0, sample_rate, 1.0) + # C4
|
| 88 |
+
0.2 * generate_sine_wave(329.63, 3.0, sample_rate, 1.0) + # E4
|
| 89 |
+
0.2 * generate_sine_wave(392.00, 3.0, sample_rate, 1.0) # G4
|
| 90 |
+
) / 3
|
| 91 |
+
sf.write('test_chord.wav', multi_tone, sample_rate)
|
| 92 |
+
print("Created: test_chord.wav (3 seconds, C major chord)")
|
| 93 |
+
|
| 94 |
+
def create_simple_test_audio():
|
| 95 |
+
"""Create a simple test audio file named 'audio.wav' for the examples"""
|
| 96 |
+
sample_rate = 16000
|
| 97 |
+
duration = 3.0
|
| 98 |
+
|
| 99 |
+
# Create a simple melody-like signal
|
| 100 |
+
frequencies = [261.63, 293.66, 329.63, 349.23, 392.00] # C, D, E, F, G
|
| 101 |
+
note_duration = duration / len(frequencies)
|
| 102 |
+
|
| 103 |
+
full_signal = np.array([])
|
| 104 |
+
|
| 105 |
+
for freq in frequencies:
|
| 106 |
+
note = generate_sine_wave(freq, note_duration, sample_rate, 0.3)
|
| 107 |
+
full_signal = np.concatenate([full_signal, note])
|
| 108 |
+
|
| 109 |
+
sf.write('audio.wav', full_signal, sample_rate)
|
| 110 |
+
print("Created: audio.wav (3 seconds, simple melody for testing)")
|
| 111 |
+
|
| 112 |
+
if __name__ == '__main__':
|
| 113 |
+
print("Generating test audio files...")
|
| 114 |
+
print(f"Sample rate: 16000 Hz (common for speech recognition)")
|
| 115 |
+
print("-" * 50)
|
| 116 |
+
|
| 117 |
+
# Install required package if not present
|
| 118 |
+
try:
|
| 119 |
+
from scipy import signal
|
| 120 |
+
except ImportError:
|
| 121 |
+
print("Installing scipy for signal processing...")
|
| 122 |
+
import subprocess
|
| 123 |
+
subprocess.check_call(['pip', 'install', 'scipy'])
|
| 124 |
+
from scipy import signal
|
| 125 |
+
|
| 126 |
+
# Create the simple test file first
|
| 127 |
+
create_simple_test_audio()
|
| 128 |
+
print("-" * 50)
|
| 129 |
+
|
| 130 |
+
# Create various test files
|
| 131 |
+
create_test_audio_files()
|
| 132 |
+
print("-" * 50)
|
| 133 |
+
print("All test audio files created successfully!")
|
| 134 |
+
print("\nYou can now use these files to test your MPC Speech service:")
|
| 135 |
+
print("- audio.wav (for basic examples)")
|
| 136 |
+
print("- test_*.wav files (for various test scenarios)")
|
mcp_speech_client.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Client library for MCP Speech-to-Text service
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import asyncio
|
| 7 |
+
import aiohttp
|
| 8 |
+
import json
|
| 9 |
+
import base64
|
| 10 |
+
import logging
|
| 11 |
+
from pathlib import Path
|
| 12 |
+
from typing import Dict, Any, Optional
|
| 13 |
+
|
| 14 |
+
# Configure logging
|
| 15 |
+
logging.basicConfig(level=logging.INFO)
|
| 16 |
+
logger = logging.getLogger(__name__)
|
| 17 |
+
|
| 18 |
+
class MCPSpeechClient:
|
| 19 |
+
"""Client for MCP Speech-to-Text service"""
|
| 20 |
+
|
| 21 |
+
def __init__(self, service_url: str = "http://localhost:8081"):
|
| 22 |
+
self.service_url = service_url.rstrip('/')
|
| 23 |
+
self.token = None
|
| 24 |
+
self.encryption_key = None
|
| 25 |
+
self.client_session = None
|
| 26 |
+
|
| 27 |
+
async def __aenter__(self):
|
| 28 |
+
self.client_session = aiohttp.ClientSession()
|
| 29 |
+
return self
|
| 30 |
+
|
| 31 |
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
| 32 |
+
if self.client_session:
|
| 33 |
+
await self.client_session.close()
|
| 34 |
+
|
| 35 |
+
async def authenticate(self, client_id: str = "test_client") -> bool:
|
| 36 |
+
"""Authenticate with the MCP service"""
|
| 37 |
+
try:
|
| 38 |
+
if not self.client_session:
|
| 39 |
+
self.client_session = aiohttp.ClientSession()
|
| 40 |
+
|
| 41 |
+
async with self.client_session.post(
|
| 42 |
+
f"{self.service_url}/auth",
|
| 43 |
+
json={"client_id": client_id}
|
| 44 |
+
) as response:
|
| 45 |
+
if response.status == 200:
|
| 46 |
+
data = await response.json()
|
| 47 |
+
self.token = data.get("token")
|
| 48 |
+
self.encryption_key = data.get("encryption_key")
|
| 49 |
+
logger.info("Authentication successful")
|
| 50 |
+
return True
|
| 51 |
+
else:
|
| 52 |
+
error = await response.text()
|
| 53 |
+
logger.error(f"Authentication failed: {error}")
|
| 54 |
+
return False
|
| 55 |
+
except Exception as e:
|
| 56 |
+
logger.error(f"Authentication error: {e}")
|
| 57 |
+
return False
|
| 58 |
+
|
| 59 |
+
async def transcribe_file(self, audio_file_path: str, session_id: Optional[str] = None) -> Dict[str, Any]:
|
| 60 |
+
"""Transcribe an audio file"""
|
| 61 |
+
if not self.token:
|
| 62 |
+
if not await self.authenticate():
|
| 63 |
+
return {"status": "error", "error": "Authentication failed"}
|
| 64 |
+
|
| 65 |
+
try:
|
| 66 |
+
if not Path(audio_file_path).exists():
|
| 67 |
+
raise FileNotFoundError(f"Audio file not found: {audio_file_path}")
|
| 68 |
+
|
| 69 |
+
headers = {
|
| 70 |
+
"Authorization": f"Bearer {self.token}"
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
if session_id:
|
| 74 |
+
headers["X-Session-ID"] = session_id
|
| 75 |
+
|
| 76 |
+
with open(audio_file_path, 'rb') as f:
|
| 77 |
+
form_data = aiohttp.FormData()
|
| 78 |
+
form_data.add_field('audio', f, filename=Path(audio_file_path).name)
|
| 79 |
+
|
| 80 |
+
async with self.client_session.post(
|
| 81 |
+
f"{self.service_url}/transcribe",
|
| 82 |
+
data=form_data,
|
| 83 |
+
headers=headers
|
| 84 |
+
) as response:
|
| 85 |
+
if response.status == 200:
|
| 86 |
+
return await response.json()
|
| 87 |
+
else:
|
| 88 |
+
error_text = await response.text()
|
| 89 |
+
return {
|
| 90 |
+
"status": "error",
|
| 91 |
+
"error": f"HTTP {response.status}: {error_text}"
|
| 92 |
+
}
|
| 93 |
+
except Exception as e:
|
| 94 |
+
logger.error(f"Transcription error: {e}")
|
| 95 |
+
return {"status": "error", "error": str(e)}
|
| 96 |
+
|
| 97 |
+
async def get_status(self) -> Dict[str, Any]:
|
| 98 |
+
"""Get service status"""
|
| 99 |
+
try:
|
| 100 |
+
if not self.client_session:
|
| 101 |
+
self.client_session = aiohttp.ClientSession()
|
| 102 |
+
|
| 103 |
+
async with self.client_session.get(f"{self.service_url}/status") as response:
|
| 104 |
+
if response.status == 200:
|
| 105 |
+
return await response.json()
|
| 106 |
+
else:
|
| 107 |
+
return {
|
| 108 |
+
"status": "error",
|
| 109 |
+
"error": f"HTTP {response.status}: {await response.text()}"
|
| 110 |
+
}
|
| 111 |
+
except Exception as e:
|
| 112 |
+
logger.error(f"Status check error: {e}")
|
| 113 |
+
return {"status": "error", "error": str(e)}
|
| 114 |
+
|
| 115 |
+
# Convenience functions for backwards compatibility
|
| 116 |
+
async def check_service_status(service_url: str = "http://localhost:8081") -> Dict[str, Any]:
|
| 117 |
+
"""Check if the MCP service is running"""
|
| 118 |
+
async with MCPSpeechClient(service_url) as client:
|
| 119 |
+
return await client.get_status()
|
| 120 |
+
|
| 121 |
+
async def transcribe_audio_file(audio_file_path: str, service_url: str = "http://localhost:8081") -> Dict[str, Any]:
|
| 122 |
+
"""Transcribe an audio file using the MCP service"""
|
| 123 |
+
async with MCPSpeechClient(service_url) as client:
|
| 124 |
+
result = await client.transcribe_file(audio_file_path, session_id="single_request")
|
| 125 |
+
return result
|
mcp_speech_service.py
ADDED
|
@@ -0,0 +1,408 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
MCP Speech-to-Text Service
|
| 4 |
+
Exposes realtime-whisper-macbook service through a secure MCP interface
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import asyncio
|
| 8 |
+
import json
|
| 9 |
+
import logging
|
| 10 |
+
import os
|
| 11 |
+
import subprocess
|
| 12 |
+
import tempfile
|
| 13 |
+
import time
|
| 14 |
+
from datetime import datetime
|
| 15 |
+
from pathlib import Path
|
| 16 |
+
from typing import Dict, List, Optional, Any
|
| 17 |
+
import hashlib
|
| 18 |
+
import hmac
|
| 19 |
+
import base64
|
| 20 |
+
|
| 21 |
+
import aiohttp
|
| 22 |
+
from aiohttp import web, WSMsgType
|
| 23 |
+
import aiofiles
|
| 24 |
+
import numpy as np
|
| 25 |
+
import soundfile as sf
|
| 26 |
+
from cryptography.fernet import Fernet
|
| 27 |
+
|
| 28 |
+
# Configure logging
|
| 29 |
+
logging.basicConfig(level=logging.INFO)
|
| 30 |
+
logger = logging.getLogger(__name__)
|
| 31 |
+
|
| 32 |
+
class MCPSpeechService:
|
| 33 |
+
"""MCP Service for Speech-to-Text processing"""
|
| 34 |
+
|
| 35 |
+
def __init__(self, whisper_service_url: str = "http://localhost:8000",
|
| 36 |
+
encryption_key: Optional[bytes] = None):
|
| 37 |
+
self.whisper_service_url = whisper_service_url
|
| 38 |
+
self.encryption_key = encryption_key or Fernet.generate_key()
|
| 39 |
+
self.cipher = Fernet(self.encryption_key)
|
| 40 |
+
self.active_sessions: Dict[str, Dict] = {}
|
| 41 |
+
self.auth_tokens: Dict[str, Dict] = {}
|
| 42 |
+
|
| 43 |
+
def generate_session_id(self) -> str:
|
| 44 |
+
"""Generate a unique session ID"""
|
| 45 |
+
return hashlib.sha256(f"{time.time()}{os.urandom(16)}".encode()).hexdigest()[:16]
|
| 46 |
+
|
| 47 |
+
def generate_auth_token(self, client_id: str) -> str:
|
| 48 |
+
"""Generate JWT-like auth token for client"""
|
| 49 |
+
payload = {
|
| 50 |
+
"client_id": client_id,
|
| 51 |
+
"issued_at": int(time.time()),
|
| 52 |
+
"expires_at": int(time.time()) + 3600 # 1 hour expiry
|
| 53 |
+
}
|
| 54 |
+
token_data = json.dumps(payload).encode()
|
| 55 |
+
signature = hmac.new(self.encryption_key, token_data, hashlib.sha256).hexdigest()
|
| 56 |
+
token = base64.b64encode(token_data).decode() + "." + signature
|
| 57 |
+
self.auth_tokens[token] = payload
|
| 58 |
+
return token
|
| 59 |
+
|
| 60 |
+
def verify_auth_token(self, token: str) -> Optional[Dict]:
|
| 61 |
+
"""Verify and decode auth token"""
|
| 62 |
+
try:
|
| 63 |
+
if "." not in token:
|
| 64 |
+
return None
|
| 65 |
+
|
| 66 |
+
token_b64, signature = token.rsplit(".", 1)
|
| 67 |
+
token_data = base64.b64decode(token_b64.encode())
|
| 68 |
+
|
| 69 |
+
# Verify signature
|
| 70 |
+
expected_sig = hmac.new(self.encryption_key, token_data, hashlib.sha256).hexdigest()
|
| 71 |
+
if not hmac.compare_digest(signature, expected_sig):
|
| 72 |
+
return None
|
| 73 |
+
|
| 74 |
+
payload = json.loads(token_data.decode())
|
| 75 |
+
|
| 76 |
+
# Check expiry
|
| 77 |
+
if payload.get("expires_at", 0) < int(time.time()):
|
| 78 |
+
return None
|
| 79 |
+
|
| 80 |
+
return payload
|
| 81 |
+
except Exception as e:
|
| 82 |
+
logger.error(f"Token verification failed: {e}")
|
| 83 |
+
return None
|
| 84 |
+
|
| 85 |
+
async def encrypt_data(self, data: bytes) -> bytes:
|
| 86 |
+
"""Encrypt sensitive data"""
|
| 87 |
+
return self.cipher.encrypt(data)
|
| 88 |
+
|
| 89 |
+
async def decrypt_data(self, encrypted_data: bytes) -> bytes:
|
| 90 |
+
"""Decrypt sensitive data"""
|
| 91 |
+
return self.cipher.decrypt(encrypted_data)
|
| 92 |
+
|
| 93 |
+
async def process_audio_chunk(self, audio_data: bytes, session_id: str,
|
| 94 |
+
format: str = "wav") -> Dict[str, Any]:
|
| 95 |
+
"""Process audio chunk through whisper service"""
|
| 96 |
+
try:
|
| 97 |
+
# Create temporary file for audio data
|
| 98 |
+
with tempfile.NamedTemporaryFile(suffix=f".{format}", delete=False) as temp_file:
|
| 99 |
+
temp_file.write(audio_data)
|
| 100 |
+
temp_file_path = temp_file.name
|
| 101 |
+
|
| 102 |
+
try:
|
| 103 |
+
# Call the whisper service
|
| 104 |
+
async with aiohttp.ClientSession() as session:
|
| 105 |
+
with open(temp_file_path, 'rb') as f:
|
| 106 |
+
form_data = aiohttp.FormData()
|
| 107 |
+
form_data.add_field('audio', f, filename=f'audio.{format}')
|
| 108 |
+
|
| 109 |
+
async with session.post(
|
| 110 |
+
f"{self.whisper_service_url}/transcribe",
|
| 111 |
+
data=form_data
|
| 112 |
+
) as response:
|
| 113 |
+
if response.status == 200:
|
| 114 |
+
result = await response.json()
|
| 115 |
+
return {
|
| 116 |
+
"session_id": session_id,
|
| 117 |
+
"timestamp": datetime.utcnow().isoformat(),
|
| 118 |
+
"transcription": result,
|
| 119 |
+
"status": "success"
|
| 120 |
+
}
|
| 121 |
+
else:
|
| 122 |
+
error_text = await response.text()
|
| 123 |
+
logger.error(f"Whisper service error: {error_text}")
|
| 124 |
+
return {
|
| 125 |
+
"session_id": session_id,
|
| 126 |
+
"timestamp": datetime.utcnow().isoformat(),
|
| 127 |
+
"error": f"Service error: {response.status}",
|
| 128 |
+
"status": "error"
|
| 129 |
+
}
|
| 130 |
+
finally:
|
| 131 |
+
# Clean up temporary file
|
| 132 |
+
os.unlink(temp_file_path)
|
| 133 |
+
|
| 134 |
+
except Exception as e:
|
| 135 |
+
logger.error(f"Audio processing error: {e}")
|
| 136 |
+
return {
|
| 137 |
+
"session_id": session_id,
|
| 138 |
+
"timestamp": datetime.utcnow().isoformat(),
|
| 139 |
+
"error": str(e),
|
| 140 |
+
"status": "error"
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
async def handle_auth(self, request):
|
| 144 |
+
"""Handle authentication endpoint"""
|
| 145 |
+
try:
|
| 146 |
+
data = await request.json()
|
| 147 |
+
client_id = data.get('client_id')
|
| 148 |
+
|
| 149 |
+
if not client_id:
|
| 150 |
+
return web.Response(
|
| 151 |
+
text=json.dumps({"error": "client_id required"}),
|
| 152 |
+
status=400,
|
| 153 |
+
content_type='application/json'
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
+
token = self.generate_auth_token(client_id)
|
| 157 |
+
|
| 158 |
+
return web.Response(
|
| 159 |
+
text=json.dumps({
|
| 160 |
+
"token": token,
|
| 161 |
+
"encryption_key": base64.b64encode(self.encryption_key).decode(),
|
| 162 |
+
"expires_in": 3600
|
| 163 |
+
}),
|
| 164 |
+
content_type='application/json'
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
except Exception as e:
|
| 168 |
+
logger.error(f"Auth error: {e}")
|
| 169 |
+
return web.Response(
|
| 170 |
+
text=json.dumps({"error": "Authentication failed"}),
|
| 171 |
+
status=500,
|
| 172 |
+
content_type='application/json'
|
| 173 |
+
)
|
| 174 |
+
|
| 175 |
+
async def handle_transcribe(self, request):
|
| 176 |
+
"""Handle transcription endpoint"""
|
| 177 |
+
try:
|
| 178 |
+
# Verify authentication
|
| 179 |
+
auth_header = request.headers.get('Authorization', '')
|
| 180 |
+
if not auth_header.startswith('Bearer '):
|
| 181 |
+
return web.Response(
|
| 182 |
+
text=json.dumps({"error": "Missing or invalid authorization"}),
|
| 183 |
+
status=401,
|
| 184 |
+
content_type='application/json'
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
token = auth_header[7:] # Remove 'Bearer ' prefix
|
| 188 |
+
auth_payload = self.verify_auth_token(token)
|
| 189 |
+
if not auth_payload:
|
| 190 |
+
return web.Response(
|
| 191 |
+
text=json.dumps({"error": "Invalid or expired token"}),
|
| 192 |
+
status=401,
|
| 193 |
+
content_type='application/json'
|
| 194 |
+
)
|
| 195 |
+
|
| 196 |
+
# Get session ID
|
| 197 |
+
session_id = request.headers.get('X-Session-ID')
|
| 198 |
+
if not session_id:
|
| 199 |
+
session_id = self.generate_session_id()
|
| 200 |
+
|
| 201 |
+
# Handle multipart form data (audio file)
|
| 202 |
+
reader = await request.multipart()
|
| 203 |
+
audio_data = None
|
| 204 |
+
|
| 205 |
+
async for part in reader:
|
| 206 |
+
if part.name == 'audio':
|
| 207 |
+
audio_data = await part.read()
|
| 208 |
+
break
|
| 209 |
+
|
| 210 |
+
if not audio_data:
|
| 211 |
+
return web.Response(
|
| 212 |
+
text=json.dumps({"error": "No audio data provided"}),
|
| 213 |
+
status=400,
|
| 214 |
+
content_type='application/json'
|
| 215 |
+
)
|
| 216 |
+
|
| 217 |
+
# Process audio
|
| 218 |
+
result = await self.process_audio_chunk(audio_data, session_id)
|
| 219 |
+
|
| 220 |
+
# Encrypt sensitive data if requested
|
| 221 |
+
if request.headers.get('X-Encrypt-Response') == 'true':
|
| 222 |
+
encrypted_result = await self.encrypt_data(json.dumps(result).encode())
|
| 223 |
+
return web.Response(
|
| 224 |
+
body=encrypted_result,
|
| 225 |
+
content_type='application/octet-stream',
|
| 226 |
+
headers={'X-Encrypted': 'true'}
|
| 227 |
+
)
|
| 228 |
+
|
| 229 |
+
return web.Response(
|
| 230 |
+
text=json.dumps(result),
|
| 231 |
+
content_type='application/json'
|
| 232 |
+
)
|
| 233 |
+
|
| 234 |
+
except Exception as e:
|
| 235 |
+
logger.error(f"Transcription error: {e}")
|
| 236 |
+
return web.Response(
|
| 237 |
+
text=json.dumps({"error": "Transcription failed"}),
|
| 238 |
+
status=500,
|
| 239 |
+
content_type='application/json'
|
| 240 |
+
)
|
| 241 |
+
|
| 242 |
+
async def handle_websocket(self, request):
|
| 243 |
+
"""Handle WebSocket connections for real-time transcription"""
|
| 244 |
+
ws = web.WebSocketResponse()
|
| 245 |
+
await ws.prepare(request)
|
| 246 |
+
|
| 247 |
+
session_id = None
|
| 248 |
+
auth_payload = None
|
| 249 |
+
|
| 250 |
+
try:
|
| 251 |
+
async for msg in ws:
|
| 252 |
+
if msg.type == WSMsgType.TEXT:
|
| 253 |
+
try:
|
| 254 |
+
data = json.loads(msg.data)
|
| 255 |
+
|
| 256 |
+
if data.get('type') == 'auth':
|
| 257 |
+
# Handle authentication
|
| 258 |
+
token = data.get('token')
|
| 259 |
+
auth_payload = self.verify_auth_token(token)
|
| 260 |
+
if auth_payload:
|
| 261 |
+
session_id = self.generate_session_id()
|
| 262 |
+
self.active_sessions[session_id] = {
|
| 263 |
+
'client_id': auth_payload['client_id'],
|
| 264 |
+
'connected_at': time.time(),
|
| 265 |
+
'ws': ws
|
| 266 |
+
}
|
| 267 |
+
await ws.send_text(json.dumps({
|
| 268 |
+
'type': 'auth_success',
|
| 269 |
+
'session_id': session_id
|
| 270 |
+
}))
|
| 271 |
+
else:
|
| 272 |
+
await ws.send_text(json.dumps({
|
| 273 |
+
'type': 'auth_error',
|
| 274 |
+
'message': 'Invalid token'
|
| 275 |
+
}))
|
| 276 |
+
|
| 277 |
+
elif data.get('type') == 'audio_chunk':
|
| 278 |
+
# Handle audio chunk
|
| 279 |
+
if not auth_payload or not session_id:
|
| 280 |
+
await ws.send_text(json.dumps({
|
| 281 |
+
'type': 'error',
|
| 282 |
+
'message': 'Not authenticated'
|
| 283 |
+
}))
|
| 284 |
+
continue
|
| 285 |
+
|
| 286 |
+
# Decode base64 audio data
|
| 287 |
+
audio_b64 = data.get('audio_data', '')
|
| 288 |
+
audio_data = base64.b64decode(audio_b64)
|
| 289 |
+
|
| 290 |
+
# Process audio
|
| 291 |
+
result = await self.process_audio_chunk(audio_data, session_id)
|
| 292 |
+
|
| 293 |
+
await ws.send_text(json.dumps({
|
| 294 |
+
'type': 'transcription_result',
|
| 295 |
+
'data': result
|
| 296 |
+
}))
|
| 297 |
+
|
| 298 |
+
except json.JSONDecodeError:
|
| 299 |
+
await ws.send_text(json.dumps({
|
| 300 |
+
'type': 'error',
|
| 301 |
+
'message': 'Invalid JSON'
|
| 302 |
+
}))
|
| 303 |
+
|
| 304 |
+
elif msg.type == WSMsgType.BINARY:
|
| 305 |
+
# Handle binary audio data
|
| 306 |
+
if not auth_payload or not session_id:
|
| 307 |
+
await ws.send_text(json.dumps({
|
| 308 |
+
'type': 'error',
|
| 309 |
+
'message': 'Not authenticated'
|
| 310 |
+
}))
|
| 311 |
+
continue
|
| 312 |
+
|
| 313 |
+
# Process binary audio data
|
| 314 |
+
result = await self.process_audio_chunk(msg.data, session_id)
|
| 315 |
+
|
| 316 |
+
await ws.send_text(json.dumps({
|
| 317 |
+
'type': 'transcription_result',
|
| 318 |
+
'data': result
|
| 319 |
+
}))
|
| 320 |
+
|
| 321 |
+
elif msg.type == WSMsgType.ERROR:
|
| 322 |
+
logger.error(f'WebSocket error: {ws.exception()}')
|
| 323 |
+
break
|
| 324 |
+
|
| 325 |
+
except Exception as e:
|
| 326 |
+
logger.error(f"WebSocket error: {e}")
|
| 327 |
+
finally:
|
| 328 |
+
# Clean up session
|
| 329 |
+
if session_id and session_id in self.active_sessions:
|
| 330 |
+
del self.active_sessions[session_id]
|
| 331 |
+
|
| 332 |
+
return ws
|
| 333 |
+
|
| 334 |
+
async def handle_status(self, request):
|
| 335 |
+
"""Handle status endpoint"""
|
| 336 |
+
return web.Response(
|
| 337 |
+
text=json.dumps({
|
| 338 |
+
"service": "MCP Speech-to-Text",
|
| 339 |
+
"status": "running",
|
| 340 |
+
"active_sessions": len(self.active_sessions),
|
| 341 |
+
"whisper_service": self.whisper_service_url,
|
| 342 |
+
"timestamp": datetime.utcnow().isoformat()
|
| 343 |
+
}),
|
| 344 |
+
content_type='application/json'
|
| 345 |
+
)
|
| 346 |
+
|
| 347 |
+
def create_app(self) -> web.Application:
|
| 348 |
+
"""Create the web application"""
|
| 349 |
+
app = web.Application()
|
| 350 |
+
|
| 351 |
+
# Add routes
|
| 352 |
+
app.router.add_post('/auth', self.handle_auth)
|
| 353 |
+
app.router.add_post('/transcribe', self.handle_transcribe)
|
| 354 |
+
app.router.add_get('/ws', self.handle_websocket)
|
| 355 |
+
app.router.add_get('/status', self.handle_status)
|
| 356 |
+
|
| 357 |
+
# Fixed CORS middleware
|
| 358 |
+
@web.middleware
|
| 359 |
+
async def cors_middleware(request, handler):
|
| 360 |
+
if request.method == 'OPTIONS':
|
| 361 |
+
# Handle preflight requests
|
| 362 |
+
response = web.Response()
|
| 363 |
+
else:
|
| 364 |
+
response = await handler(request)
|
| 365 |
+
|
| 366 |
+
response.headers['Access-Control-Allow-Origin'] = '*'
|
| 367 |
+
response.headers['Access-Control-Allow-Methods'] = 'GET, POST, OPTIONS'
|
| 368 |
+
response.headers['Access-Control-Allow-Headers'] = 'Content-Type, Authorization, X-Session-ID, X-Encrypt-Response'
|
| 369 |
+
return response
|
| 370 |
+
|
| 371 |
+
app.middlewares.append(cors_middleware)
|
| 372 |
+
|
| 373 |
+
return app
|
| 374 |
+
|
| 375 |
+
async def main():
|
| 376 |
+
"""Main function to run the MCP service"""
|
| 377 |
+
# Configuration
|
| 378 |
+
host = os.getenv('MCP_HOST', '0.0.0.0')
|
| 379 |
+
port = int(os.getenv('MCP_PORT', '8081'))
|
| 380 |
+
whisper_url = os.getenv('WHISPER_SERVICE_URL', 'http://localhost:8000')
|
| 381 |
+
|
| 382 |
+
# Create service
|
| 383 |
+
service = MCPSpeechService(whisper_service_url=whisper_url)
|
| 384 |
+
app = service.create_app()
|
| 385 |
+
|
| 386 |
+
logger.info(f"Starting MCP Speech-to-Text service on {host}:{port}")
|
| 387 |
+
logger.info(f"Whisper service URL: {whisper_url}")
|
| 388 |
+
logger.info(f"Encryption key: {base64.b64encode(service.encryption_key).decode()}")
|
| 389 |
+
|
| 390 |
+
# Run the service
|
| 391 |
+
runner = web.AppRunner(app)
|
| 392 |
+
await runner.setup()
|
| 393 |
+
site = web.TCPSite(runner, host, port)
|
| 394 |
+
await site.start()
|
| 395 |
+
|
| 396 |
+
logger.info("MCP Service is running...")
|
| 397 |
+
|
| 398 |
+
try:
|
| 399 |
+
# Keep the service running
|
| 400 |
+
while True:
|
| 401 |
+
await asyncio.sleep(1)
|
| 402 |
+
except KeyboardInterrupt:
|
| 403 |
+
logger.info("Shutting down MCP service...")
|
| 404 |
+
finally:
|
| 405 |
+
await runner.cleanup()
|
| 406 |
+
|
| 407 |
+
if __name__ == '__main__':
|
| 408 |
+
asyncio.run(main())
|
requirements.txt
CHANGED
|
@@ -1,3 +1,8 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
aiohttp==3.8.5
|
| 2 |
+
aiofiles==23.2.0
|
| 3 |
+
numpy==1.24.3
|
| 4 |
+
soundfile==0.12.1
|
| 5 |
+
cryptography==41.0.3
|
| 6 |
+
openai-whisper==20231117
|
| 7 |
+
torch>=1.13.0
|
| 8 |
+
torchaudio>=0.13.0
|
startup_script.py
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Startup script to run both Whisper HTTP service and MCP service
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import asyncio
|
| 7 |
+
import subprocess
|
| 8 |
+
import sys
|
| 9 |
+
import time
|
| 10 |
+
import signal
|
| 11 |
+
import os
|
| 12 |
+
from pathlib import Path
|
| 13 |
+
|
| 14 |
+
class ServiceManager:
|
| 15 |
+
def __init__(self):
|
| 16 |
+
self.whisper_process = None
|
| 17 |
+
self.mcp_process = None
|
| 18 |
+
self.running = False
|
| 19 |
+
|
| 20 |
+
async def start_whisper_service(self):
|
| 21 |
+
"""Start the Whisper HTTP service"""
|
| 22 |
+
print("🔄 Starting Whisper HTTP service...")
|
| 23 |
+
|
| 24 |
+
# Check if whisper_http_wrapper.py exists
|
| 25 |
+
if not Path("whisper_http_wrapper.py").exists():
|
| 26 |
+
print("❌ whisper_http_wrapper.py not found in current directory")
|
| 27 |
+
return False
|
| 28 |
+
|
| 29 |
+
try:
|
| 30 |
+
self.whisper_process = subprocess.Popen([
|
| 31 |
+
sys.executable, "whisper_http_wrapper.py"
|
| 32 |
+
], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
| 33 |
+
|
| 34 |
+
# Wait a bit for the service to start
|
| 35 |
+
await asyncio.sleep(3)
|
| 36 |
+
|
| 37 |
+
if self.whisper_process.poll() is None:
|
| 38 |
+
print("✅ Whisper HTTP service started successfully")
|
| 39 |
+
return True
|
| 40 |
+
else:
|
| 41 |
+
stdout, stderr = self.whisper_process.communicate()
|
| 42 |
+
print(f"❌ Whisper HTTP service failed to start:")
|
| 43 |
+
print(f"STDOUT: {stdout}")
|
| 44 |
+
print(f"STDERR: {stderr}")
|
| 45 |
+
return False
|
| 46 |
+
|
| 47 |
+
except Exception as e:
|
| 48 |
+
print(f"❌ Error starting Whisper HTTP service: {e}")
|
| 49 |
+
return False
|
| 50 |
+
|
| 51 |
+
async def start_mcp_service(self):
|
| 52 |
+
"""Start the MCP service"""
|
| 53 |
+
print("🔄 Starting MCP Speech service...")
|
| 54 |
+
|
| 55 |
+
# Check if mcp_speech_service.py exists
|
| 56 |
+
if not Path("mcp_speech_service.py").exists():
|
| 57 |
+
print("❌ mcp_speech_service.py not found in current directory")
|
| 58 |
+
return False
|
| 59 |
+
|
| 60 |
+
try:
|
| 61 |
+
self.mcp_process = subprocess.Popen([
|
| 62 |
+
sys.executable, "mcp_speech_service.py"
|
| 63 |
+
], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
| 64 |
+
|
| 65 |
+
# Wait a bit for the service to start
|
| 66 |
+
await asyncio.sleep(2)
|
| 67 |
+
|
| 68 |
+
if self.mcp_process.poll() is None:
|
| 69 |
+
print("✅ MCP Speech service started successfully")
|
| 70 |
+
return True
|
| 71 |
+
else:
|
| 72 |
+
stdout, stderr = self.mc_process.communicate()
|
| 73 |
+
print(f"❌ MCP Speech service failed to start:")
|
| 74 |
+
print(f"STDOUT: {stdout}")
|
| 75 |
+
print(f"STDERR: {stderr}")
|
| 76 |
+
return False
|
| 77 |
+
|
| 78 |
+
except Exception as e:
|
| 79 |
+
print(f"❌ Error starting MCP Speech service: {e}")
|
| 80 |
+
return False
|
| 81 |
+
|
| 82 |
+
async def check_services(self):
|
| 83 |
+
"""Check if services are still running"""
|
| 84 |
+
import aiohttp
|
| 85 |
+
|
| 86 |
+
# Check Whisper service
|
| 87 |
+
whisper_ok = False
|
| 88 |
+
try:
|
| 89 |
+
async with aiohttp.ClientSession() as session:
|
| 90 |
+
async with session.get("http://localhost:8000/health", timeout=5) as response:
|
| 91 |
+
if response.status == 200:
|
| 92 |
+
whisper_ok = True
|
| 93 |
+
except:
|
| 94 |
+
pass
|
| 95 |
+
|
| 96 |
+
# Check MCP service
|
| 97 |
+
mcp_ok = False
|
| 98 |
+
try:
|
| 99 |
+
async with aiohttp.ClientSession() as session:
|
| 100 |
+
async with session.get("http://localhost:8081/status", timeout=5) as response:
|
| 101 |
+
if response.status == 200:
|
| 102 |
+
mcp_ok = True
|
| 103 |
+
except:
|
| 104 |
+
pass
|
| 105 |
+
|
| 106 |
+
return whisper_ok, mcp_ok
|
| 107 |
+
|
| 108 |
+
def stop_services(self):
|
| 109 |
+
"""Stop all services"""
|
| 110 |
+
print("\n🛑 Stopping services...")
|
| 111 |
+
|
| 112 |
+
if self.mcp_process and self.mcp_process.poll() is None:
|
| 113 |
+
print(" Stopping MCP service...")
|
| 114 |
+
self.mcp_process.terminate()
|
| 115 |
+
try:
|
| 116 |
+
self.mcp_process.wait(timeout=5)
|
| 117 |
+
except subprocess.TimeoutExpired:
|
| 118 |
+
self.mcp_process.kill()
|
| 119 |
+
|
| 120 |
+
if self.whisper_process and self.whisper_process.poll() is None:
|
| 121 |
+
print(" Stopping Whisper service...")
|
| 122 |
+
self.whisper_process.terminate()
|
| 123 |
+
try:
|
| 124 |
+
self.whisper_process.wait(timeout=5)
|
| 125 |
+
except subprocess.TimeoutExpired:
|
| 126 |
+
self.whisper_process.kill()
|
| 127 |
+
|
| 128 |
+
print("✅ Services stopped")
|
| 129 |
+
|
| 130 |
+
def signal_handler(self, signum, frame):
|
| 131 |
+
"""Handle Ctrl+C"""
|
| 132 |
+
print(f"\n📡 Received signal {signum}")
|
| 133 |
+
self.running = False
|
| 134 |
+
self.stop_services()
|
| 135 |
+
sys.exit(0)
|
| 136 |
+
|
| 137 |
+
async def run(self):
|
| 138 |
+
"""Run both services"""
|
| 139 |
+
# Set up signal handlers
|
| 140 |
+
signal.signal(signal.SIGINT, self.signal_handler)
|
| 141 |
+
signal.signal(signal.SIGTERM, self.signal_handler)
|
| 142 |
+
|
| 143 |
+
print("🚀 Starting MCP Speech-to-Text Services")
|
| 144 |
+
print("=" * 50)
|
| 145 |
+
|
| 146 |
+
# Start Whisper service first
|
| 147 |
+
if not await self.start_whisper_service():
|
| 148 |
+
print("❌ Failed to start Whisper service. Exiting.")
|
| 149 |
+
return
|
| 150 |
+
|
| 151 |
+
# Start MCP service
|
| 152 |
+
if not await self.start_mcp_service():
|
| 153 |
+
print("❌ Failed to start MCP service. Stopping Whisper service.")
|
| 154 |
+
self.stop_services()
|
| 155 |
+
return
|
| 156 |
+
|
| 157 |
+
print("\n✅ Both services are running!")
|
| 158 |
+
print("📋 Service URLs:")
|
| 159 |
+
print(" • Whisper HTTP: http://localhost:8000")
|
| 160 |
+
print(" • MCP Service: http://localhost:8081")
|
| 161 |
+
print("\n🧪 You can now run: python testClient.py")
|
| 162 |
+
print("⏹️ Press Ctrl+C to stop all services")
|
| 163 |
+
|
| 164 |
+
self.running = True
|
| 165 |
+
|
| 166 |
+
# Monitor services
|
| 167 |
+
try:
|
| 168 |
+
while self.running:
|
| 169 |
+
await asyncio.sleep(10) # Check every 10 seconds
|
| 170 |
+
|
| 171 |
+
whisper_ok, mcp_ok = await self.check_services()
|
| 172 |
+
|
| 173 |
+
if not whisper_ok:
|
| 174 |
+
print("⚠️ Whisper service appears to be down")
|
| 175 |
+
if not mcp_ok:
|
| 176 |
+
print("⚠️ MCP service appears to be down")
|
| 177 |
+
|
| 178 |
+
if not whisper_ok or not mcp_ok:
|
| 179 |
+
print("🔄 Attempting to restart services...")
|
| 180 |
+
self.stop_services()
|
| 181 |
+
await asyncio.sleep(2)
|
| 182 |
+
|
| 183 |
+
if not whisper_ok:
|
| 184 |
+
await self.start_whisper_service()
|
| 185 |
+
if not mcp_ok:
|
| 186 |
+
await self.start_mcp_service()
|
| 187 |
+
|
| 188 |
+
except KeyboardInterrupt:
|
| 189 |
+
pass
|
| 190 |
+
finally:
|
| 191 |
+
self.stop_services()
|
| 192 |
+
|
| 193 |
+
async def main():
|
| 194 |
+
"""Main function"""
|
| 195 |
+
if len(sys.argv) > 1 and sys.argv[1] == "--help":
|
| 196 |
+
print("MCP Speech-to-Text Service Manager")
|
| 197 |
+
print("Usage: python startup.py")
|
| 198 |
+
print("\nThis script will:")
|
| 199 |
+
print("1. Start the Whisper HTTP service on port 8000")
|
| 200 |
+
print("2. Start the MCP Speech service on port 8081")
|
| 201 |
+
print("3. Monitor both services and restart if needed")
|
| 202 |
+
print("4. Stop all services when you press Ctrl+C")
|
| 203 |
+
return
|
| 204 |
+
|
| 205 |
+
manager = ServiceManager()
|
| 206 |
+
await manager.run()
|
| 207 |
+
|
| 208 |
+
if __name__ == "__main__":
|
| 209 |
+
try:
|
| 210 |
+
asyncio.run(main())
|
| 211 |
+
except KeyboardInterrupt:
|
| 212 |
+
print("\n👋 Goodbye!")
|
testClient.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Test client for MPC Speech-to-Text service
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import asyncio
|
| 7 |
+
import sys
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
from mcp_speech_client import transcribe_audio_file, check_service_status
|
| 10 |
+
|
| 11 |
+
async def main():
|
| 12 |
+
"""Main test function"""
|
| 13 |
+
print("🧪 Testing MPC Speech-to-Text Client")
|
| 14 |
+
print("=" * 50)
|
| 15 |
+
|
| 16 |
+
# First, check if the services are running
|
| 17 |
+
print("1️⃣ Checking service status...")
|
| 18 |
+
status = await check_service_status()
|
| 19 |
+
print(f" Service status: {status.get('status', 'unknown')}")
|
| 20 |
+
|
| 21 |
+
if status.get("status") == "error":
|
| 22 |
+
print("\n⚠️ Service appears to be down!")
|
| 23 |
+
print("💡 To start the services, run:")
|
| 24 |
+
print(" python startup.py")
|
| 25 |
+
print("\n Or start them manually:")
|
| 26 |
+
print(" 1. Start Whisper HTTP service: python whisper_http_wrapper.py")
|
| 27 |
+
print(" 2. Start MPC service: python mpc_speech_service.py")
|
| 28 |
+
return
|
| 29 |
+
|
| 30 |
+
print("✅ Services are running!")
|
| 31 |
+
print(f" Active sessions: {status.get('active_sessions', 0)}")
|
| 32 |
+
|
| 33 |
+
print("\n2️⃣ Testing audio file transcription...")
|
| 34 |
+
|
| 35 |
+
# Look for available audio files
|
| 36 |
+
test_files = [
|
| 37 |
+
"audio.wav",
|
| 38 |
+
"speech_test.wav",
|
| 39 |
+
"test_audio.wav",
|
| 40 |
+
"silent_test.wav"
|
| 41 |
+
]
|
| 42 |
+
|
| 43 |
+
audio_file = None
|
| 44 |
+
for file in test_files:
|
| 45 |
+
if Path(file).exists():
|
| 46 |
+
audio_file = file
|
| 47 |
+
break
|
| 48 |
+
|
| 49 |
+
if not audio_file:
|
| 50 |
+
print("❌ No test audio files found!")
|
| 51 |
+
print("💡 Create test audio files by running:")
|
| 52 |
+
print(" python create_test_audio.py")
|
| 53 |
+
print("\n Or provide your own audio file and update the test_files list.")
|
| 54 |
+
return
|
| 55 |
+
|
| 56 |
+
print(f"🎵 Using audio file: {audio_file}")
|
| 57 |
+
|
| 58 |
+
try:
|
| 59 |
+
# Test transcription
|
| 60 |
+
print(" Sending transcription request...")
|
| 61 |
+
result = await transcribe_audio_file(audio_file)
|
| 62 |
+
|
| 63 |
+
print(f"\n📋 Transcription result:")
|
| 64 |
+
print(f" Status: {result.get('status')}")
|
| 65 |
+
print(f" Session ID: {result.get('session_id')}")
|
| 66 |
+
|
| 67 |
+
if result.get("status") == "success":
|
| 68 |
+
transcription = result.get("transcription", {})
|
| 69 |
+
|
| 70 |
+
if isinstance(transcription, dict):
|
| 71 |
+
text = transcription.get("result", {}).get("text", "No text found")
|
| 72 |
+
processing_time = transcription.get("result", {}).get("processing_time", 0)
|
| 73 |
+
model_info = transcription.get("result", {}).get("model_info", {})
|
| 74 |
+
|
| 75 |
+
print(f"✅ Transcription successful!")
|
| 76 |
+
print(f" Text: '{text}'")
|
| 77 |
+
print(f" Processing time: {processing_time:.2f}s")
|
| 78 |
+
print(f" Model: {model_info.get('model', 'unknown')}")
|
| 79 |
+
print(f" Device: {model_info.get('device', 'unknown')}")
|
| 80 |
+
|
| 81 |
+
# If it's synthetic audio, explain the result
|
| 82 |
+
if "test" in audio_file.lower() or "speech" in audio_file.lower():
|
| 83 |
+
print(f"\n💡 Note: This was synthetic test audio, so the transcription")
|
| 84 |
+
print(f" result may not be meaningful. Try with real speech audio")
|
| 85 |
+
print(f" for better results.")
|
| 86 |
+
else:
|
| 87 |
+
print(f" Raw transcription: {transcription}")
|
| 88 |
+
|
| 89 |
+
else:
|
| 90 |
+
error_msg = result.get('error', 'Unknown error')
|
| 91 |
+
print(f"❌ Transcription failed: {error_msg}")
|
| 92 |
+
|
| 93 |
+
# Provide specific help for common errors
|
| 94 |
+
if "Cannot connect" in error_msg and "8000" in error_msg:
|
| 95 |
+
print("\n💡 The Whisper HTTP service (port 8000) is not running.")
|
| 96 |
+
print(" Start it with: python whisper_http_wrapper.py")
|
| 97 |
+
elif "Authentication" in error_msg:
|
| 98 |
+
print("\n💡 Authentication issue with the MPC service.")
|
| 99 |
+
print(" Check if the MPC service is running properly.")
|
| 100 |
+
|
| 101 |
+
except FileNotFoundError:
|
| 102 |
+
print(f"❌ Audio file '{audio_file}' not found.")
|
| 103 |
+
except Exception as e:
|
| 104 |
+
print(f"❌ Unexpected error during transcription: {e}")
|
| 105 |
+
|
| 106 |
+
print(f"\n🏁 Test completed!")
|
| 107 |
+
|
| 108 |
+
def check_dependencies():
|
| 109 |
+
"""Check if required dependencies are available"""
|
| 110 |
+
missing_deps = []
|
| 111 |
+
|
| 112 |
+
try:
|
| 113 |
+
import aiohttp
|
| 114 |
+
except ImportError:
|
| 115 |
+
missing_deps.append("aiohttp")
|
| 116 |
+
|
| 117 |
+
try:
|
| 118 |
+
import soundfile
|
| 119 |
+
except ImportError:
|
| 120 |
+
missing_deps.append("soundfile")
|
| 121 |
+
|
| 122 |
+
if missing_deps:
|
| 123 |
+
print("❌ Missing dependencies:")
|
| 124 |
+
for dep in missing_deps:
|
| 125 |
+
print(f" • {dep}")
|
| 126 |
+
print(f"\n💡 Install them with: pip install {' '.join(missing_deps)}")
|
| 127 |
+
return False
|
| 128 |
+
|
| 129 |
+
return True
|
| 130 |
+
|
| 131 |
+
if __name__ == "__main__":
|
| 132 |
+
if not check_dependencies():
|
| 133 |
+
sys.exit(1)
|
| 134 |
+
|
| 135 |
+
try:
|
| 136 |
+
asyncio.run(main())
|
| 137 |
+
except KeyboardInterrupt:
|
| 138 |
+
print("\n👋 Test interrupted by user")
|
| 139 |
+
except Exception as e:
|
| 140 |
+
print(f"\n❌ Test failed with error: {e}")
|
| 141 |
+
sys.exit(1)
|
whisper_http_wrapper.py
ADDED
|
@@ -0,0 +1,304 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
HTTP API Wrapper for realtime-whisper-macbook
|
| 4 |
+
This adds HTTP endpoints to the existing whisper functionality
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import asyncio
|
| 8 |
+
import json
|
| 9 |
+
import logging
|
| 10 |
+
import os
|
| 11 |
+
import tempfile
|
| 12 |
+
import threading
|
| 13 |
+
import time
|
| 14 |
+
from pathlib import Path
|
| 15 |
+
from typing import Dict, Any, Optional
|
| 16 |
+
|
| 17 |
+
import numpy as np
|
| 18 |
+
import soundfile as sf
|
| 19 |
+
import torch
|
| 20 |
+
import whisper
|
| 21 |
+
from aiohttp import web
|
| 22 |
+
import aiofiles
|
| 23 |
+
|
| 24 |
+
# Configure logging
|
| 25 |
+
logging.basicConfig(level=logging.INFO)
|
| 26 |
+
logger = logging.getLogger(__name__)
|
| 27 |
+
|
| 28 |
+
class WhisperHTTPService:
|
| 29 |
+
"""HTTP wrapper for Whisper transcription service"""
|
| 30 |
+
|
| 31 |
+
def __init__(self, model_name: str = "base", device: str = "auto"):
|
| 32 |
+
"""
|
| 33 |
+
Initialize the Whisper HTTP service
|
| 34 |
+
|
| 35 |
+
Args:
|
| 36 |
+
model_name: Whisper model to use (tiny, base, small, medium, large)
|
| 37 |
+
device: Device to run on (cpu, cuda, mps, auto)
|
| 38 |
+
"""
|
| 39 |
+
self.model_name = model_name
|
| 40 |
+
|
| 41 |
+
# Auto-detect device if not specified
|
| 42 |
+
if device == "auto":
|
| 43 |
+
if torch.cuda.is_available():
|
| 44 |
+
self.device = "cuda"
|
| 45 |
+
#elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
|
| 46 |
+
# self.device = "mps" # Apple Silicon
|
| 47 |
+
else:
|
| 48 |
+
self.device = "cpu"
|
| 49 |
+
else:
|
| 50 |
+
self.device = device
|
| 51 |
+
|
| 52 |
+
logger.info(f"Using device: {self.device}")
|
| 53 |
+
|
| 54 |
+
# Load Whisper model
|
| 55 |
+
logger.info(f"Loading Whisper model: {model_name}")
|
| 56 |
+
self.model = whisper.load_model(model_name, device=self.device)
|
| 57 |
+
logger.info("Whisper model loaded successfully")
|
| 58 |
+
|
| 59 |
+
# Statistics
|
| 60 |
+
self.stats = {
|
| 61 |
+
"requests_processed": 0,
|
| 62 |
+
"total_audio_duration": 0.0,
|
| 63 |
+
"average_processing_time": 0.0,
|
| 64 |
+
"start_time": time.time()
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
def transcribe_audio_file(self, audio_file_path: str, **kwargs) -> Dict[str, Any]:
|
| 68 |
+
"""
|
| 69 |
+
Transcribe audio file using Whisper
|
| 70 |
+
|
| 71 |
+
Args:
|
| 72 |
+
audio_file_path: Path to audio file
|
| 73 |
+
**kwargs: Additional Whisper parameters
|
| 74 |
+
|
| 75 |
+
Returns:
|
| 76 |
+
Transcription result dictionary
|
| 77 |
+
"""
|
| 78 |
+
try:
|
| 79 |
+
start_time = time.time()
|
| 80 |
+
|
| 81 |
+
# Default Whisper options
|
| 82 |
+
options = {
|
| 83 |
+
"language": kwargs.get("language"), # None for auto-detection
|
| 84 |
+
"task": kwargs.get("task", "transcribe"), # transcribe or translate
|
| 85 |
+
"temperature": kwargs.get("temperature", 0.0),
|
| 86 |
+
"best_of": kwargs.get("best_of", 5),
|
| 87 |
+
"beam_size": kwargs.get("beam_size", 5),
|
| 88 |
+
"patience": kwargs.get("patience", 1.0),
|
| 89 |
+
"length_penalty": kwargs.get("length_penalty", 1.0),
|
| 90 |
+
"suppress_tokens": kwargs.get("suppress_tokens", "-1"),
|
| 91 |
+
"initial_prompt": kwargs.get("initial_prompt"),
|
| 92 |
+
"condition_on_previous_text": kwargs.get("condition_on_previous_text", True),
|
| 93 |
+
"fp16": kwargs.get("fp16", True if self.device == "cuda" else False),
|
| 94 |
+
"compression_ratio_threshold": kwargs.get("compression_ratio_threshold", 2.4),
|
| 95 |
+
"logprob_threshold": kwargs.get("logprob_threshold", -1.0),
|
| 96 |
+
"no_speech_threshold": kwargs.get("no_speech_threshold", 0.6),
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
# Remove None values
|
| 100 |
+
options = {k: v for k, v in options.items() if v is not None}
|
| 101 |
+
|
| 102 |
+
# Transcribe
|
| 103 |
+
result = self.model.transcribe(audio_file_path, **options)
|
| 104 |
+
|
| 105 |
+
processing_time = time.time() - start_time
|
| 106 |
+
|
| 107 |
+
# Update statistics
|
| 108 |
+
self.stats["requests_processed"] += 1
|
| 109 |
+
if "segments" in result:
|
| 110 |
+
audio_duration = max([seg["end"] for seg in result["segments"]], default=0)
|
| 111 |
+
self.stats["total_audio_duration"] += audio_duration
|
| 112 |
+
|
| 113 |
+
# Calculate average processing time
|
| 114 |
+
total_requests = self.stats["requests_processed"]
|
| 115 |
+
self.stats["average_processing_time"] = (
|
| 116 |
+
(self.stats["average_processing_time"] * (total_requests - 1) + processing_time) / total_requests
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
# Add metadata
|
| 120 |
+
result["processing_time"] = processing_time
|
| 121 |
+
result["model"] = self.model_name
|
| 122 |
+
result["device"] = self.device
|
| 123 |
+
|
| 124 |
+
logger.info(f"Transcribed audio in {processing_time:.2f}s: '{result['text'][:100]}...'")
|
| 125 |
+
|
| 126 |
+
return {
|
| 127 |
+
"success": True,
|
| 128 |
+
"result": result,
|
| 129 |
+
"processing_time": processing_time,
|
| 130 |
+
"model_info": {
|
| 131 |
+
"model": self.model_name,
|
| 132 |
+
"device": self.device
|
| 133 |
+
}
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
except Exception as e:
|
| 137 |
+
logger.error(f"Transcription error: {e}")
|
| 138 |
+
return {
|
| 139 |
+
"success": False,
|
| 140 |
+
"error": str(e),
|
| 141 |
+
"model_info": {
|
| 142 |
+
"model": self.model_name,
|
| 143 |
+
"device": self.device
|
| 144 |
+
}
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
async def handle_transcribe(self, request):
|
| 148 |
+
"""Handle transcription HTTP requests"""
|
| 149 |
+
try:
|
| 150 |
+
# Handle multipart form data
|
| 151 |
+
reader = await request.multipart()
|
| 152 |
+
audio_data = None
|
| 153 |
+
options = {}
|
| 154 |
+
|
| 155 |
+
async for part in reader:
|
| 156 |
+
if part.name == 'audio':
|
| 157 |
+
audio_data = await part.read()
|
| 158 |
+
elif part.name == 'options':
|
| 159 |
+
options_text = await part.text()
|
| 160 |
+
try:
|
| 161 |
+
options = json.loads(options_text)
|
| 162 |
+
except json.JSONDecodeError:
|
| 163 |
+
pass
|
| 164 |
+
elif part.name in ['language', 'task', 'temperature', 'beam_size']:
|
| 165 |
+
# Handle individual parameters
|
| 166 |
+
options[part.name] = await part.text()
|
| 167 |
+
|
| 168 |
+
if not audio_data:
|
| 169 |
+
return web.Response(
|
| 170 |
+
text=json.dumps({"error": "No audio data provided"}),
|
| 171 |
+
status=400,
|
| 172 |
+
content_type='application/json'
|
| 173 |
+
)
|
| 174 |
+
|
| 175 |
+
# Save audio data to temporary file
|
| 176 |
+
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file:
|
| 177 |
+
temp_file.write(audio_data)
|
| 178 |
+
temp_file_path = temp_file.name
|
| 179 |
+
|
| 180 |
+
try:
|
| 181 |
+
# Convert options to appropriate types
|
| 182 |
+
if 'temperature' in options:
|
| 183 |
+
options['temperature'] = float(options['temperature'])
|
| 184 |
+
if 'beam_size' in options:
|
| 185 |
+
options['beam_size'] = int(options['beam_size'])
|
| 186 |
+
|
| 187 |
+
# Transcribe
|
| 188 |
+
result = self.transcribe_audio_file(temp_file_path, **options)
|
| 189 |
+
|
| 190 |
+
return web.Response(
|
| 191 |
+
text=json.dumps(result),
|
| 192 |
+
content_type='application/json'
|
| 193 |
+
)
|
| 194 |
+
|
| 195 |
+
finally:
|
| 196 |
+
# Clean up temporary file
|
| 197 |
+
try:
|
| 198 |
+
os.unlink(temp_file_path)
|
| 199 |
+
except:
|
| 200 |
+
pass
|
| 201 |
+
|
| 202 |
+
except Exception as e:
|
| 203 |
+
logger.error(f"Request handling error: {e}")
|
| 204 |
+
return web.Response(
|
| 205 |
+
text=json.dumps({"error": f"Request processing failed: {str(e)}"}),
|
| 206 |
+
status=500,
|
| 207 |
+
content_type='application/json'
|
| 208 |
+
)
|
| 209 |
+
|
| 210 |
+
async def handle_health(self, request):
|
| 211 |
+
"""Health check endpoint"""
|
| 212 |
+
uptime = time.time() - self.stats["start_time"]
|
| 213 |
+
|
| 214 |
+
health_info = {
|
| 215 |
+
"status": "healthy",
|
| 216 |
+
"model": self.model_name,
|
| 217 |
+
"device": self.device,
|
| 218 |
+
"uptime_seconds": uptime,
|
| 219 |
+
"statistics": self.stats.copy()
|
| 220 |
+
}
|
| 221 |
+
|
| 222 |
+
return web.Response(
|
| 223 |
+
text=json.dumps(health_info),
|
| 224 |
+
content_type='application/json'
|
| 225 |
+
)
|
| 226 |
+
|
| 227 |
+
async def handle_models(self, request):
|
| 228 |
+
"""List available models"""
|
| 229 |
+
available_models = ["tiny", "base", "small", "medium", "large", "large-v2", "large-v3"]
|
| 230 |
+
|
| 231 |
+
return web.Response(
|
| 232 |
+
text=json.dumps({
|
| 233 |
+
"available_models": available_models,
|
| 234 |
+
"current_model": self.model_name,
|
| 235 |
+
"device": self.device
|
| 236 |
+
}),
|
| 237 |
+
content_type='application/json'
|
| 238 |
+
)
|
| 239 |
+
|
| 240 |
+
def create_app(self) -> web.Application:
|
| 241 |
+
"""Create the web application"""
|
| 242 |
+
app = web.Application(client_max_size=50*1024*1024) # 50MB max file size
|
| 243 |
+
|
| 244 |
+
# Add routes
|
| 245 |
+
app.router.add_post('/transcribe', self.handle_transcribe)
|
| 246 |
+
app.router.add_get('/health', self.handle_health)
|
| 247 |
+
app.router.add_get('/models', self.handle_models)
|
| 248 |
+
|
| 249 |
+
# Add CORS middleware
|
| 250 |
+
async def cors_middleware(request, handler):
|
| 251 |
+
if request.method == 'OPTIONS':
|
| 252 |
+
# Handle preflight requests
|
| 253 |
+
response = web.Response()
|
| 254 |
+
else:
|
| 255 |
+
response = await handler(request)
|
| 256 |
+
|
| 257 |
+
response.headers['Access-Control-Allow-Origin'] = '*'
|
| 258 |
+
response.headers['Access-Control-Allow-Methods'] = 'GET, POST, OPTIONS'
|
| 259 |
+
response.headers['Access-Control-Allow-Headers'] = 'Content-Type, Authorization'
|
| 260 |
+
return response
|
| 261 |
+
|
| 262 |
+
app.middlewares.append(cors_middleware)
|
| 263 |
+
|
| 264 |
+
return app
|
| 265 |
+
|
| 266 |
+
async def main():
|
| 267 |
+
"""Main function to run the Whisper HTTP service"""
|
| 268 |
+
# Configuration from environment variables
|
| 269 |
+
host = os.getenv('WHISPER_HOST', '127.0.0.1')
|
| 270 |
+
port = int(os.getenv('WHISPER_PORT', '8000'))
|
| 271 |
+
model_name = os.getenv('WHISPER_MODEL', 'base')
|
| 272 |
+
device = os.getenv('WHISPER_DEVICE', 'auto')
|
| 273 |
+
|
| 274 |
+
# Create service
|
| 275 |
+
logger.info("Initializing Whisper HTTP service...")
|
| 276 |
+
service = WhisperHTTPService(model_name=model_name, device=device)
|
| 277 |
+
app = service.create_app()
|
| 278 |
+
|
| 279 |
+
logger.info(f"Starting Whisper HTTP service on {host}:{port}")
|
| 280 |
+
logger.info(f"Model: {model_name}, Device: {device}")
|
| 281 |
+
|
| 282 |
+
# Run the service
|
| 283 |
+
runner = web.AppRunner(app)
|
| 284 |
+
await runner.setup()
|
| 285 |
+
site = web.TCPSite(runner, host, port)
|
| 286 |
+
await site.start()
|
| 287 |
+
|
| 288 |
+
logger.info("Whisper HTTP service is running!")
|
| 289 |
+
logger.info(f"Endpoints available:")
|
| 290 |
+
logger.info(f" POST http://{host}:{port}/transcribe - Transcribe audio")
|
| 291 |
+
logger.info(f" GET http://{host}:{port}/health - Health check")
|
| 292 |
+
logger.info(f" GET http://{host}:{port}/models - List models")
|
| 293 |
+
|
| 294 |
+
try:
|
| 295 |
+
# Keep the service running
|
| 296 |
+
while True:
|
| 297 |
+
await asyncio.sleep(1)
|
| 298 |
+
except KeyboardInterrupt:
|
| 299 |
+
logger.info("Shutting down Whisper HTTP service...")
|
| 300 |
+
finally:
|
| 301 |
+
await runner.cleanup()
|
| 302 |
+
|
| 303 |
+
if __name__ == '__main__':
|
| 304 |
+
asyncio.run(main())
|