Spaces:
Configuration error
Configuration error
| import io | |
| from fastrtc.utils import audio_to_int16 | |
| from pydub import AudioSegment | |
| import wave | |
| import torch | |
| import torchaudio | |
| import numpy as np | |
| def audio_to_bytes(audio_tuple, sample_rate=24000) -> io.BufferedReader: | |
| sr, audio_data = audio_tuple | |
| audio_int16 = audio_to_int16(audio_tuple) | |
| buffer = io.BytesIO() | |
| with wave.open(buffer, "wb") as wf: | |
| wf.setnchannels(1) | |
| wf.setsampwidth(2) | |
| wf.setframerate(sr) | |
| wf.writeframes(audio_int16.tobytes()) | |
| buffer.seek(0) | |
| buffer.name = "audio.wav" | |
| return buffer | |
| def resample_audio(audio_buffer:io.BytesIO): | |
| audio_buffer.seek(0) | |
| audio_segment = AudioSegment.from_file(audio_buffer, format="mp3") | |
| samples = np.array(audio_segment.get_array_of_samples()).astype(np.float32) / (2 ** 15) | |
| if audio_segment.channels == 2: | |
| samples = samples.reshape((-1, 2)).mean(axis=1) | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| audio_tensor = torch.from_numpy(samples).unsqueeze(0).to(device) | |
| resampler = torchaudio.transforms.Resample( | |
| orig_freq=audio_segment.frame_rate, | |
| new_freq=24000 | |
| ).to(device) | |
| resampled_tensor = resampler(audio_tensor) | |
| resampled = resampled_tensor.squeeze(0).cpu().numpy() | |
| return resampled |