Spaces:
Build error
Build error
| import subprocess | |
| import sys | |
| import os | |
| from pydub import AudioSegment | |
| import io | |
| import wave | |
| def process_audio(wav_file, model_name, lang): | |
| """ | |
| Processes an audio file using a specified model and returns the processed string. | |
| Resamples the file to 16 kHz in-memory if needed. | |
| :param wav_file: Path to the WAV file | |
| :param model_name: Name of the model to use | |
| :return: Processed string output from the audio processing | |
| :raises: Exception if an error occurs during processing | |
| """ | |
| model = f"ggml-{model_name}.bin" | |
| # Check if the model file exists | |
| if not os.path.exists(model): | |
| raise FileNotFoundError(f"Model file not found: {model} \n\nDownload a model with this command:\n\n> bash ./models/download-ggml-model.sh {model_name}\n\n") | |
| if not os.path.exists(wav_file): | |
| raise FileNotFoundError(f"WAV file not found: {wav_file}") | |
| # Load and resample the audio to 16 kHz if necessary | |
| audio = AudioSegment.from_wav(wav_file) | |
| if audio.frame_rate != 16000: | |
| print(f"Resampling {wav_file} to 16 kHz...") | |
| audio = audio.set_frame_rate(16000) | |
| # Export the audio to an in-memory bytes buffer | |
| audio_buffer = io.BytesIO() | |
| audio.export(audio_buffer, format="wav") | |
| audio_buffer.seek(0) | |
| # Prepare the audio for the subprocess by writing it to a temporary file | |
| with wave.open(audio_buffer, 'rb') as wf: | |
| # Prepare a temporary file in-memory for subprocess | |
| temp_file = io.BytesIO() | |
| with wave.open(temp_file, 'wb') as temp_wav: | |
| temp_wav.setnchannels(wf.getnchannels()) | |
| temp_wav.setsampwidth(wf.getsampwidth()) | |
| temp_wav.setframerate(16000) | |
| temp_wav.writeframes(wf.readframes(wf.getnframes())) | |
| temp_file.seek(0) | |
| # Write temp_file to disk if needed, or pass to subprocess directly if possible. | |
| # Here we assume it's passed to subprocess via its filename as before | |
| temp_wav_path = '/tmp/temp_audio.wav' | |
| with open(temp_wav_path, 'wb') as f: | |
| f.write(temp_file.read()) | |
| # Run the processing using the temporary WAV file | |
| full_command = f"./main -m {model} -f {temp_wav_path} -l {lang} -np -nt" | |
| # Execute the command | |
| process = subprocess.Popen(full_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
| # Get the output and error (if any) | |
| output, error = process.communicate() | |
| if error: | |
| raise Exception(f"Error processing audio: {error.decode('utf-8')}") | |
| # Process and return the output string | |
| decoded_str = output.decode('utf-8').strip() | |
| processed_str = decoded_str.replace('[BLANK_AUDIO]', '').strip() | |
| return processed_str | |