Spaces:
Sleeping
Sleeping
| from faster_whisper import WhisperModel | |
| import numpy as np | |
| import scipy.signal | |
| import soundfile as sf | |
| model_size = "base.en" | |
| model = WhisperModel(model_size, device="cpu", compute_type="float32") | |
| def process_audio(audio_file): | |
| sample_rate, audio_data = audio_file | |
| print(sample_rate) | |
| print(max(audio_data)) | |
| if audio_data.ndim > 1 and audio_data.shape[1] > 1: | |
| # Mix stereo channels by averaging them | |
| audio_data = np.mean(audio_data, axis=1) | |
| #normalise audio data | |
| np_audio_float32 = audio_data.astype(np.float32) / 32768.0 | |
| np_audio_16k = scipy.signal.resample(np_audio_float32, int(len(np_audio_float32) * 16000 / sample_rate)) | |
| return np_audio_16k | |
| def transcribe(audio): | |
| segments, info = model.transcribe(process_audio(audio), beam_size=5, language='en') | |
| text = "".join([segment.text for segment in segments]) | |
| return text | |