File size: 9,294 Bytes
dae9fa5 02b5975 dae9fa5 02b5975 dae9fa5 02b5975 dae9fa5 02b5975 dae9fa5 02b5975 dae9fa5 02b5975 dae9fa5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 |
#
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
# SPDX-License-Identifier: Apache-2.0
#
import os
import wave
from config import (
SUPPORTED_AUDIO_EXTENSIONS,
AUDIO_FORMAT_DISPLAY_NAME_OVERRIDES,
MAXIMUM_VOICE_CLONE_FILE_SIZE_BYTES
)
def build_format_display_names_from_supported_extensions():
format_display_names = {}
for extension in SUPPORTED_AUDIO_EXTENSIONS:
format_code = extension.lstrip(".")
if format_code in AUDIO_FORMAT_DISPLAY_NAME_OVERRIDES:
format_display_names[format_code] = AUDIO_FORMAT_DISPLAY_NAME_OVERRIDES[format_code]
else:
format_display_names[format_code] = format_code.upper()
format_display_names["unknown"] = "Unknown"
return format_display_names
FORMAT_DISPLAY_NAMES = build_format_display_names_from_supported_extensions()
def get_audio_file_extension(file_path):
if not file_path:
return None
_, extension = os.path.splitext(file_path)
return extension.lower()
def is_supported_audio_extension(file_path):
extension = get_audio_file_extension(file_path)
if extension is None:
return False
return extension in SUPPORTED_AUDIO_EXTENSIONS
def format_file_size_for_display(size_bytes):
if size_bytes < 1024:
return f"{size_bytes} bytes"
elif size_bytes < 1024 * 1024:
return f"{size_bytes / 1024:.1f} KB"
else:
return f"{size_bytes / (1024 * 1024):.2f} MB"
def validate_file_size_for_voice_cloning(file_path):
if not file_path:
return False, "No audio file provided."
try:
file_size = os.path.getsize(file_path)
except OSError as size_error:
return False, f"Cannot read file size: {str(size_error)}"
if file_size > MAXIMUM_VOICE_CLONE_FILE_SIZE_BYTES:
max_size_display = format_file_size_for_display(MAXIMUM_VOICE_CLONE_FILE_SIZE_BYTES)
actual_size_display = format_file_size_for_display(file_size)
return False, f"Audio file size ({actual_size_display}) exceeds the maximum allowed size of {max_size_display}. Please upload a smaller audio file."
return True, None
def validate_file_exists_and_readable(file_path):
if not file_path:
return False, "No audio file provided."
if not os.path.exists(file_path):
return False, "Audio file does not exist."
if not os.path.isfile(file_path):
return False, "The provided path is not a valid file."
try:
file_size = os.path.getsize(file_path)
except OSError as size_error:
return False, f"Cannot read file size: {str(size_error)}"
if file_size == 0:
return False, "Audio file is empty (0 bytes)."
if file_size < 44:
return False, "Audio file is too small to be a valid audio file."
try:
with open(file_path, "rb") as test_file:
test_file.read(1)
except IOError as read_error:
return False, f"Audio file is not readable: {str(read_error)}"
return True, None
def detect_audio_format_from_header(file_path):
try:
with open(file_path, "rb") as audio_file:
header_bytes = audio_file.read(32)
if len(header_bytes) < 4:
return None, "File is too small to determine audio format."
if len(header_bytes) >= 12:
if header_bytes[:4] == b"RIFF" and header_bytes[8:12] == b"WAVE":
return "wav", None
if header_bytes[:3] == b"ID3":
return "mp3", None
if len(header_bytes) >= 2:
first_two_bytes = header_bytes[:2]
mp3_sync_bytes = [
b"\xff\xfb",
b"\xff\xfa",
b"\xff\xf3",
b"\xff\xf2",
b"\xff\xe0",
b"\xff\xe2",
b"\xff\xe3"
]
if first_two_bytes in mp3_sync_bytes:
return "mp3", None
if header_bytes[:4] == b"fLaC":
return "flac", None
if header_bytes[:4] == b"OggS":
return "ogg", None
if len(header_bytes) >= 12:
if header_bytes[:4] == b"FORM" and header_bytes[8:12] in [b"AIFF", b"AIFC"]:
return "aiff", None
if len(header_bytes) >= 8:
if header_bytes[4:8] == b"ftyp":
return "m4a", None
if len(header_bytes) >= 4:
if header_bytes[:4] == b"\x1aE\xdf\xa3":
return "webm", None
if len(header_bytes) >= 8:
if header_bytes[4:8] in [b"mdat", b"moov", b"free", b"skip", b"wide"]:
return "m4a", None
file_extension = get_audio_file_extension(file_path)
if file_extension and file_extension in SUPPORTED_AUDIO_EXTENSIONS:
return file_extension.lstrip("."), None
return "unknown", "Could not determine audio format from file header. The file may be corrupted or in an unsupported format."
except IOError as io_error:
return None, f"Error reading file header: {str(io_error)}"
except Exception as detection_error:
return None, f"Unexpected error detecting audio format: {str(detection_error)}"
def validate_wav_file_structure(file_path):
try:
with wave.open(file_path, "rb") as wav_file:
number_of_channels = wav_file.getnchannels()
sample_width_bytes = wav_file.getsampwidth()
sample_rate = wav_file.getframerate()
number_of_frames = wav_file.getnframes()
if number_of_channels < 1:
return False, "WAV file has no audio channels."
if number_of_channels > 16:
return False, f"WAV file has too many channels ({number_of_channels}). Maximum supported is 16."
if sample_width_bytes < 1:
return False, "WAV file has invalid sample width (less than 1 byte)."
if sample_width_bytes > 4:
return False, f"WAV file has unsupported sample width ({sample_width_bytes} bytes). Maximum supported is 4 bytes (32-bit)."
if sample_rate < 100:
return False, f"WAV file has invalid sample rate ({sample_rate} Hz). Minimum supported is 100 Hz."
if sample_rate > 384000:
return False, f"WAV file has unsupported sample rate ({sample_rate} Hz). Maximum supported is 384000 Hz."
if number_of_frames < 1:
return False, "WAV file contains no audio frames."
audio_duration_seconds = number_of_frames / sample_rate
if audio_duration_seconds < 0.1:
return False, f"Audio is too short ({audio_duration_seconds:.2f} seconds). Minimum duration is 0.1 seconds."
if audio_duration_seconds > 60:
return False, f"Audio is too long ({audio_duration_seconds:.0f} seconds). Maximum duration is 1 minute."
return True, None
except wave.Error as wav_error:
error_message = str(wav_error)
if "file does not start with RIFF id" in error_message:
return False, "File has .wav extension but is not a valid WAV file. It may be a different audio format renamed to .wav."
if "unknown format" in error_message.lower():
return False, "WAV file uses an unsupported audio encoding format."
return False, f"Invalid WAV file structure: {error_message}"
except EOFError:
return False, "WAV file is truncated or corrupted (unexpected end of file)."
except Exception as validation_error:
return False, f"Error validating WAV file: {str(validation_error)}"
def perform_comprehensive_audio_validation(file_path):
file_exists_valid, file_exists_error = validate_file_exists_and_readable(file_path)
if not file_exists_valid:
return False, False, None, file_exists_error
file_extension = get_audio_file_extension(file_path)
if not is_supported_audio_extension(file_path):
supported_formats_list = ", ".join(SUPPORTED_AUDIO_EXTENSIONS)
return False, False, None, f"Unsupported file format '{file_extension}'. Supported formats are: {supported_formats_list}"
detected_format, detection_error = detect_audio_format_from_header(file_path)
if detected_format is None:
return False, False, None, detection_error
is_wav_format = (detected_format == "wav")
if is_wav_format:
wav_structure_valid, wav_structure_error = validate_wav_file_structure(file_path)
if not wav_structure_valid:
return False, True, "wav", wav_structure_error
return True, is_wav_format, detected_format, None
def perform_voice_clone_file_validation(file_path):
file_size_valid, file_size_error = validate_file_size_for_voice_cloning(file_path)
if not file_size_valid:
return False, False, None, file_size_error
return perform_comprehensive_audio_validation(file_path)
def get_format_display_name(format_code):
if format_code is None:
return "Unknown"
if format_code in FORMAT_DISPLAY_NAMES:
return FORMAT_DISPLAY_NAMES[format_code]
return format_code.upper() |