Spaces:
Sleeping
Sleeping
| # | |
| # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org> | |
| # SPDX-License-Identifier: Apache-2.0 | |
| # | |
| import os | |
| import time | |
| import tempfile | |
| import numpy as np | |
| import scipy.io.wavfile | |
| from ..core.state import temporary_files_registry, temporary_files_lock | |
| from ..core.memory import trigger_background_cleanup_check | |
| def convert_audio_data_to_pcm_int16(audio_data): | |
| if audio_data.dtype == np.float32 or audio_data.dtype == np.float64: | |
| audio_data_clipped = np.clip(audio_data, -1.0, 1.0) | |
| audio_data_int16 = (audio_data_clipped * 32767).astype(np.int16) | |
| return audio_data_int16 | |
| if audio_data.dtype == np.int32: | |
| audio_data_int16 = (audio_data >> 16).astype(np.int16) | |
| return audio_data_int16 | |
| if audio_data.dtype == np.uint8: | |
| audio_data_int16 = ((audio_data.astype(np.int16) - 128) * 256).astype(np.int16) | |
| return audio_data_int16 | |
| if audio_data.dtype == np.int16: | |
| return audio_data | |
| if audio_data.dtype == np.int64: | |
| audio_data_int16 = (audio_data >> 48).astype(np.int16) | |
| return audio_data_int16 | |
| return audio_data.astype(np.int16) | |
| def convert_stereo_to_mono(audio_data): | |
| if len(audio_data.shape) == 1: | |
| return audio_data | |
| if len(audio_data.shape) == 2: | |
| if audio_data.shape[0] > audio_data.shape[1]: | |
| audio_data = audio_data.T | |
| if audio_data.shape[0] > 1: | |
| mono_audio = np.mean(audio_data, axis=0) | |
| return mono_audio.astype(audio_data.dtype) | |
| return audio_data[0] | |
| return audio_data | |
| def register_temporary_file(file_path): | |
| with temporary_files_lock: | |
| temporary_files_registry[file_path] = time.time() | |
| trigger_background_cleanup_check() | |
| def convert_wav_file_to_pcm_format(input_path): | |
| try: | |
| sample_rate, audio_data = scipy.io.wavfile.read(input_path) | |
| if len(audio_data.shape) > 1: | |
| audio_data = convert_stereo_to_mono(audio_data) | |
| audio_data_pcm = convert_audio_data_to_pcm_int16(audio_data) | |
| output_file = tempfile.NamedTemporaryFile(suffix="_pcm_converted.wav", delete=False) | |
| scipy.io.wavfile.write(output_file.name, sample_rate, audio_data_pcm) | |
| register_temporary_file(output_file.name) | |
| return output_file.name, None | |
| except Exception as conversion_error: | |
| return None, f"Failed to convert WAV to PCM format: {str(conversion_error)}" | |
| def convert_audio_using_pydub(input_path, target_sample_rate=None): | |
| try: | |
| from pydub import AudioSegment | |
| audio_segment = AudioSegment.from_file(input_path) | |
| audio_segment = audio_segment.set_channels(1) | |
| audio_segment = audio_segment.set_sample_width(2) | |
| if target_sample_rate is not None: | |
| audio_segment = audio_segment.set_frame_rate(target_sample_rate) | |
| output_file = tempfile.NamedTemporaryFile(suffix="_pydub_converted.wav", delete=False) | |
| audio_segment.export(output_file.name, format="wav") | |
| register_temporary_file(output_file.name) | |
| return output_file.name, None | |
| except ImportError: | |
| return None, "pydub_library_not_available" | |
| except Exception as conversion_error: | |
| error_message = str(conversion_error) | |
| if "ffmpeg" in error_message.lower() or "ffprobe" in error_message.lower(): | |
| return None, "ffmpeg_not_available" | |
| return None, f"Failed to convert audio using pydub: {error_message}" | |
| def convert_audio_using_soundfile(input_path): | |
| try: | |
| import soundfile | |
| audio_data, sample_rate = soundfile.read(input_path, dtype='float32') | |
| if len(audio_data.shape) > 1: | |
| audio_data = np.mean(audio_data, axis=1) | |
| audio_data_pcm = convert_audio_data_to_pcm_int16(audio_data) | |
| output_file = tempfile.NamedTemporaryFile(suffix="_soundfile_converted.wav", delete=False) | |
| scipy.io.wavfile.write(output_file.name, sample_rate, audio_data_pcm) | |
| register_temporary_file(output_file.name) | |
| return output_file.name, None | |
| except ImportError: | |
| return None, "soundfile_library_not_available" | |
| except Exception as conversion_error: | |
| return None, f"Failed to convert audio using soundfile: {str(conversion_error)}" | |
| def convert_audio_using_librosa(input_path): | |
| try: | |
| import librosa | |
| audio_data, sample_rate = librosa.load(input_path, sr=None, mono=True) | |
| audio_data_pcm = convert_audio_data_to_pcm_int16(audio_data) | |
| output_file = tempfile.NamedTemporaryFile(suffix="_librosa_converted.wav", delete=False) | |
| scipy.io.wavfile.write(output_file.name, sample_rate, audio_data_pcm) | |
| register_temporary_file(output_file.name) | |
| return output_file.name, None | |
| except ImportError: | |
| return None, "librosa_library_not_available" | |
| except Exception as conversion_error: | |
| return None, f"Failed to convert audio using librosa: {str(conversion_error)}" | |
| def convert_non_wav_audio_to_wav(input_path): | |
| converted_path, pydub_error = convert_audio_using_pydub(input_path) | |
| if converted_path is not None: | |
| return converted_path, None, "pydub" | |
| converted_path, soundfile_error = convert_audio_using_soundfile(input_path) | |
| if converted_path is not None: | |
| return converted_path, None, "soundfile" | |
| converted_path, librosa_error = convert_audio_using_librosa(input_path) | |
| if converted_path is not None: | |
| return converted_path, None, "librosa" | |
| pydub_unavailable = pydub_error in ["pydub_library_not_available", "ffmpeg_not_available"] | |
| soundfile_unavailable = soundfile_error == "soundfile_library_not_available" | |
| librosa_unavailable = librosa_error == "librosa_library_not_available" | |
| if pydub_unavailable and soundfile_unavailable and librosa_unavailable: | |
| return None, "No audio conversion library is available on the server. Please upload a WAV file directly.", None | |
| all_errors = [] | |
| if not pydub_unavailable and pydub_error: | |
| all_errors.append(f"pydub: {pydub_error}") | |
| if not soundfile_unavailable and soundfile_error: | |
| all_errors.append(f"soundfile: {soundfile_error}") | |
| if not librosa_unavailable and librosa_error: | |
| all_errors.append(f"librosa: {librosa_error}") | |
| if all_errors: | |
| combined_error = " | ".join(all_errors) | |
| return None, f"Audio conversion failed with all available methods. {combined_error}", None | |
| return None, "Audio conversion failed. Please try uploading a different audio file or use WAV format.", None | |
| def prepare_audio_file_for_voice_cloning(input_path): | |
| from .validator import perform_comprehensive_audio_validation, get_format_display_name | |
| is_valid, is_wav_format, detected_format, validation_error = perform_comprehensive_audio_validation(input_path) | |
| if not is_valid: | |
| return None, validation_error, False, detected_format | |
| if is_wav_format: | |
| converted_path, conversion_error = convert_wav_file_to_pcm_format(input_path) | |
| if converted_path is not None: | |
| return converted_path, None, False, 'wav' | |
| return None, conversion_error, False, 'wav' | |
| format_display_name = get_format_display_name(detected_format) | |
| converted_path, conversion_error, conversion_method = convert_non_wav_audio_to_wav(input_path) | |
| if converted_path is not None: | |
| final_path, pcm_error = convert_wav_file_to_pcm_format(converted_path) | |
| if final_path is not None: | |
| return final_path, None, True, detected_format | |
| return converted_path, None, True, detected_format | |
| return None, conversion_error, True, detected_format | |
| def convert_audio_to_pcm_wav(input_path): | |
| converted_path, error, was_converted, detected_format = prepare_audio_file_for_voice_cloning(input_path) | |
| if converted_path is not None: | |
| return converted_path | |
| if error: | |
| print(f"Warning: Audio conversion failed - {error}") | |
| return input_path |