|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
|
import wave |
|
|
from config import ( |
|
|
SUPPORTED_AUDIO_EXTENSIONS, |
|
|
AUDIO_FORMAT_DISPLAY_NAME_OVERRIDES, |
|
|
MAXIMUM_VOICE_CLONE_FILE_SIZE_BYTES |
|
|
) |
|
|
|
|
|
def build_format_display_names_from_supported_extensions(): |
|
|
format_display_names = {} |
|
|
|
|
|
for extension in SUPPORTED_AUDIO_EXTENSIONS: |
|
|
format_code = extension.lstrip(".") |
|
|
|
|
|
if format_code in AUDIO_FORMAT_DISPLAY_NAME_OVERRIDES: |
|
|
format_display_names[format_code] = AUDIO_FORMAT_DISPLAY_NAME_OVERRIDES[format_code] |
|
|
else: |
|
|
format_display_names[format_code] = format_code.upper() |
|
|
|
|
|
format_display_names["unknown"] = "Unknown" |
|
|
|
|
|
return format_display_names |
|
|
|
|
|
FORMAT_DISPLAY_NAMES = build_format_display_names_from_supported_extensions() |
|
|
|
|
|
def get_audio_file_extension(file_path): |
|
|
if not file_path: |
|
|
return None |
|
|
|
|
|
_, extension = os.path.splitext(file_path) |
|
|
|
|
|
return extension.lower() |
|
|
|
|
|
def is_supported_audio_extension(file_path): |
|
|
extension = get_audio_file_extension(file_path) |
|
|
|
|
|
if extension is None: |
|
|
return False |
|
|
|
|
|
return extension in SUPPORTED_AUDIO_EXTENSIONS |
|
|
|
|
|
def format_file_size_for_display(size_bytes): |
|
|
if size_bytes < 1024: |
|
|
return f"{size_bytes} bytes" |
|
|
|
|
|
elif size_bytes < 1024 * 1024: |
|
|
return f"{size_bytes / 1024:.1f} KB" |
|
|
|
|
|
else: |
|
|
return f"{size_bytes / (1024 * 1024):.2f} MB" |
|
|
|
|
|
def validate_file_size_for_voice_cloning(file_path): |
|
|
if not file_path: |
|
|
return False, "No audio file provided." |
|
|
|
|
|
try: |
|
|
file_size = os.path.getsize(file_path) |
|
|
|
|
|
except OSError as size_error: |
|
|
return False, f"Cannot read file size: {str(size_error)}" |
|
|
|
|
|
if file_size > MAXIMUM_VOICE_CLONE_FILE_SIZE_BYTES: |
|
|
max_size_display = format_file_size_for_display(MAXIMUM_VOICE_CLONE_FILE_SIZE_BYTES) |
|
|
actual_size_display = format_file_size_for_display(file_size) |
|
|
return False, f"Audio file size ({actual_size_display}) exceeds the maximum allowed size of {max_size_display}. Please upload a smaller audio file." |
|
|
|
|
|
return True, None |
|
|
|
|
|
def validate_file_exists_and_readable(file_path): |
|
|
if not file_path: |
|
|
return False, "No audio file provided." |
|
|
|
|
|
if not os.path.exists(file_path): |
|
|
return False, "Audio file does not exist." |
|
|
|
|
|
if not os.path.isfile(file_path): |
|
|
return False, "The provided path is not a valid file." |
|
|
|
|
|
try: |
|
|
file_size = os.path.getsize(file_path) |
|
|
|
|
|
except OSError as size_error: |
|
|
return False, f"Cannot read file size: {str(size_error)}" |
|
|
|
|
|
if file_size == 0: |
|
|
return False, "Audio file is empty (0 bytes)." |
|
|
|
|
|
if file_size < 44: |
|
|
return False, "Audio file is too small to be a valid audio file." |
|
|
|
|
|
try: |
|
|
with open(file_path, "rb") as test_file: |
|
|
test_file.read(1) |
|
|
except IOError as read_error: |
|
|
return False, f"Audio file is not readable: {str(read_error)}" |
|
|
|
|
|
return True, None |
|
|
|
|
|
def detect_audio_format_from_header(file_path): |
|
|
try: |
|
|
with open(file_path, "rb") as audio_file: |
|
|
header_bytes = audio_file.read(32) |
|
|
|
|
|
if len(header_bytes) < 4: |
|
|
return None, "File is too small to determine audio format." |
|
|
|
|
|
if len(header_bytes) >= 12: |
|
|
if header_bytes[:4] == b"RIFF" and header_bytes[8:12] == b"WAVE": |
|
|
return "wav", None |
|
|
|
|
|
if header_bytes[:3] == b"ID3": |
|
|
return "mp3", None |
|
|
|
|
|
if len(header_bytes) >= 2: |
|
|
first_two_bytes = header_bytes[:2] |
|
|
|
|
|
mp3_sync_bytes = [ |
|
|
b"\xff\xfb", |
|
|
b"\xff\xfa", |
|
|
b"\xff\xf3", |
|
|
b"\xff\xf2", |
|
|
b"\xff\xe0", |
|
|
b"\xff\xe2", |
|
|
b"\xff\xe3" |
|
|
] |
|
|
|
|
|
if first_two_bytes in mp3_sync_bytes: |
|
|
return "mp3", None |
|
|
|
|
|
if header_bytes[:4] == b"fLaC": |
|
|
return "flac", None |
|
|
|
|
|
if header_bytes[:4] == b"OggS": |
|
|
return "ogg", None |
|
|
|
|
|
if len(header_bytes) >= 12: |
|
|
if header_bytes[:4] == b"FORM" and header_bytes[8:12] in [b"AIFF", b"AIFC"]: |
|
|
return "aiff", None |
|
|
|
|
|
if len(header_bytes) >= 8: |
|
|
if header_bytes[4:8] == b"ftyp": |
|
|
return "m4a", None |
|
|
|
|
|
if len(header_bytes) >= 4: |
|
|
if header_bytes[:4] == b"\x1aE\xdf\xa3": |
|
|
return "webm", None |
|
|
|
|
|
if len(header_bytes) >= 8: |
|
|
if header_bytes[4:8] in [b"mdat", b"moov", b"free", b"skip", b"wide"]: |
|
|
return "m4a", None |
|
|
|
|
|
file_extension = get_audio_file_extension(file_path) |
|
|
|
|
|
if file_extension and file_extension in SUPPORTED_AUDIO_EXTENSIONS: |
|
|
return file_extension.lstrip("."), None |
|
|
|
|
|
return "unknown", "Could not determine audio format from file header. The file may be corrupted or in an unsupported format." |
|
|
|
|
|
except IOError as io_error: |
|
|
return None, f"Error reading file header: {str(io_error)}" |
|
|
|
|
|
except Exception as detection_error: |
|
|
return None, f"Unexpected error detecting audio format: {str(detection_error)}" |
|
|
|
|
|
def validate_wav_file_structure(file_path): |
|
|
try: |
|
|
with wave.open(file_path, "rb") as wav_file: |
|
|
number_of_channels = wav_file.getnchannels() |
|
|
sample_width_bytes = wav_file.getsampwidth() |
|
|
sample_rate = wav_file.getframerate() |
|
|
number_of_frames = wav_file.getnframes() |
|
|
|
|
|
if number_of_channels < 1: |
|
|
return False, "WAV file has no audio channels." |
|
|
|
|
|
if number_of_channels > 16: |
|
|
return False, f"WAV file has too many channels ({number_of_channels}). Maximum supported is 16." |
|
|
|
|
|
if sample_width_bytes < 1: |
|
|
return False, "WAV file has invalid sample width (less than 1 byte)." |
|
|
|
|
|
if sample_width_bytes > 4: |
|
|
return False, f"WAV file has unsupported sample width ({sample_width_bytes} bytes). Maximum supported is 4 bytes (32-bit)." |
|
|
|
|
|
if sample_rate < 100: |
|
|
return False, f"WAV file has invalid sample rate ({sample_rate} Hz). Minimum supported is 100 Hz." |
|
|
|
|
|
if sample_rate > 384000: |
|
|
return False, f"WAV file has unsupported sample rate ({sample_rate} Hz). Maximum supported is 384000 Hz." |
|
|
|
|
|
if number_of_frames < 1: |
|
|
return False, "WAV file contains no audio frames." |
|
|
|
|
|
audio_duration_seconds = number_of_frames / sample_rate |
|
|
|
|
|
if audio_duration_seconds < 0.1: |
|
|
return False, f"Audio is too short ({audio_duration_seconds:.2f} seconds). Minimum duration is 0.1 seconds." |
|
|
|
|
|
if audio_duration_seconds > 60: |
|
|
return False, f"Audio is too long ({audio_duration_seconds:.0f} seconds). Maximum duration is 1 minute." |
|
|
|
|
|
return True, None |
|
|
|
|
|
except wave.Error as wav_error: |
|
|
error_message = str(wav_error) |
|
|
|
|
|
if "file does not start with RIFF id" in error_message: |
|
|
return False, "File has .wav extension but is not a valid WAV file. It may be a different audio format renamed to .wav." |
|
|
|
|
|
if "unknown format" in error_message.lower(): |
|
|
return False, "WAV file uses an unsupported audio encoding format." |
|
|
|
|
|
return False, f"Invalid WAV file structure: {error_message}" |
|
|
|
|
|
except EOFError: |
|
|
return False, "WAV file is truncated or corrupted (unexpected end of file)." |
|
|
|
|
|
except Exception as validation_error: |
|
|
return False, f"Error validating WAV file: {str(validation_error)}" |
|
|
|
|
|
def perform_comprehensive_audio_validation(file_path): |
|
|
file_exists_valid, file_exists_error = validate_file_exists_and_readable(file_path) |
|
|
|
|
|
if not file_exists_valid: |
|
|
return False, False, None, file_exists_error |
|
|
|
|
|
file_extension = get_audio_file_extension(file_path) |
|
|
|
|
|
if not is_supported_audio_extension(file_path): |
|
|
supported_formats_list = ", ".join(SUPPORTED_AUDIO_EXTENSIONS) |
|
|
return False, False, None, f"Unsupported file format '{file_extension}'. Supported formats are: {supported_formats_list}" |
|
|
|
|
|
detected_format, detection_error = detect_audio_format_from_header(file_path) |
|
|
|
|
|
if detected_format is None: |
|
|
return False, False, None, detection_error |
|
|
|
|
|
is_wav_format = (detected_format == "wav") |
|
|
|
|
|
if is_wav_format: |
|
|
wav_structure_valid, wav_structure_error = validate_wav_file_structure(file_path) |
|
|
|
|
|
if not wav_structure_valid: |
|
|
return False, True, "wav", wav_structure_error |
|
|
|
|
|
return True, is_wav_format, detected_format, None |
|
|
|
|
|
def perform_voice_clone_file_validation(file_path): |
|
|
file_size_valid, file_size_error = validate_file_size_for_voice_cloning(file_path) |
|
|
|
|
|
if not file_size_valid: |
|
|
return False, False, None, file_size_error |
|
|
|
|
|
return perform_comprehensive_audio_validation(file_path) |
|
|
|
|
|
def get_format_display_name(format_code): |
|
|
if format_code is None: |
|
|
return "Unknown" |
|
|
|
|
|
if format_code in FORMAT_DISPLAY_NAMES: |
|
|
return FORMAT_DISPLAY_NAMES[format_code] |
|
|
|
|
|
return format_code.upper() |