tts

Sleeping

App Files Files Community

tts / src /audio /validator.py

hadadrjt

Pocket TTS: Implement safe and efficient processing mechanisms.

02b5975 25 days ago

raw

history blame contribute delete

9.29 kB

	#
	# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
	# SPDX-License-Identifier: Apache-2.0
	#

	import os
	import wave
	from config import (
	SUPPORTED_AUDIO_EXTENSIONS,
	AUDIO_FORMAT_DISPLAY_NAME_OVERRIDES,
	MAXIMUM_VOICE_CLONE_FILE_SIZE_BYTES
	)

	def build_format_display_names_from_supported_extensions():
	format_display_names = {}

	for extension in SUPPORTED_AUDIO_EXTENSIONS:
	format_code = extension.lstrip(".")

	if format_code in AUDIO_FORMAT_DISPLAY_NAME_OVERRIDES:
	format_display_names[format_code] = AUDIO_FORMAT_DISPLAY_NAME_OVERRIDES[format_code]
	else:
	format_display_names[format_code] = format_code.upper()

	format_display_names["unknown"] = "Unknown"

	return format_display_names

	FORMAT_DISPLAY_NAMES = build_format_display_names_from_supported_extensions()

	def get_audio_file_extension(file_path):
	if not file_path:
	return None

	_, extension = os.path.splitext(file_path)

	return extension.lower()

	def is_supported_audio_extension(file_path):
	extension = get_audio_file_extension(file_path)

	if extension is None:
	return False

	return extension in SUPPORTED_AUDIO_EXTENSIONS

	def format_file_size_for_display(size_bytes):
	if size_bytes < 1024:
	return f"{size_bytes} bytes"

	elif size_bytes < 1024 * 1024:
	return f"{size_bytes / 1024:.1f} KB"

	else:
	return f"{size_bytes / (1024 * 1024):.2f} MB"

	def validate_file_size_for_voice_cloning(file_path):
	if not file_path:
	return False, "No audio file provided."

	try:
	file_size = os.path.getsize(file_path)

	except OSError as size_error:
	return False, f"Cannot read file size: {str(size_error)}"

	if file_size > MAXIMUM_VOICE_CLONE_FILE_SIZE_BYTES:
	max_size_display = format_file_size_for_display(MAXIMUM_VOICE_CLONE_FILE_SIZE_BYTES)
	actual_size_display = format_file_size_for_display(file_size)
	return False, f"Audio file size ({actual_size_display}) exceeds the maximum allowed size of {max_size_display}. Please upload a smaller audio file."

	return True, None

	def validate_file_exists_and_readable(file_path):
	if not file_path:
	return False, "No audio file provided."

	if not os.path.exists(file_path):
	return False, "Audio file does not exist."

	if not os.path.isfile(file_path):
	return False, "The provided path is not a valid file."

	try:
	file_size = os.path.getsize(file_path)

	except OSError as size_error:
	return False, f"Cannot read file size: {str(size_error)}"

	if file_size == 0:
	return False, "Audio file is empty (0 bytes)."

	if file_size < 44:
	return False, "Audio file is too small to be a valid audio file."

	try:
	with open(file_path, "rb") as test_file:
	test_file.read(1)
	except IOError as read_error:
	return False, f"Audio file is not readable: {str(read_error)}"

	return True, None

	def detect_audio_format_from_header(file_path):
	try:
	with open(file_path, "rb") as audio_file:
	header_bytes = audio_file.read(32)

	if len(header_bytes) < 4:
	return None, "File is too small to determine audio format."

	if len(header_bytes) >= 12:
	if header_bytes[:4] == b"RIFF" and header_bytes[8:12] == b"WAVE":
	return "wav", None

	if header_bytes[:3] == b"ID3":
	return "mp3", None

	if len(header_bytes) >= 2:
	first_two_bytes = header_bytes[:2]

	mp3_sync_bytes = [
	b"\xff\xfb",
	b"\xff\xfa",
	b"\xff\xf3",
	b"\xff\xf2",
	b"\xff\xe0",
	b"\xff\xe2",
	b"\xff\xe3"
	]

	if first_two_bytes in mp3_sync_bytes:
	return "mp3", None

	if header_bytes[:4] == b"fLaC":
	return "flac", None

	if header_bytes[:4] == b"OggS":
	return "ogg", None

	if len(header_bytes) >= 12:
	if header_bytes[:4] == b"FORM" and header_bytes[8:12] in [b"AIFF", b"AIFC"]:
	return "aiff", None

	if len(header_bytes) >= 8:
	if header_bytes[4:8] == b"ftyp":
	return "m4a", None

	if len(header_bytes) >= 4:
	if header_bytes[:4] == b"\x1aE\xdf\xa3":
	return "webm", None

	if len(header_bytes) >= 8:
	if header_bytes[4:8] in [b"mdat", b"moov", b"free", b"skip", b"wide"]:
	return "m4a", None

	file_extension = get_audio_file_extension(file_path)

	if file_extension and file_extension in SUPPORTED_AUDIO_EXTENSIONS:
	return file_extension.lstrip("."), None

	return "unknown", "Could not determine audio format from file header. The file may be corrupted or in an unsupported format."

	except IOError as io_error:
	return None, f"Error reading file header: {str(io_error)}"

	except Exception as detection_error:
	return None, f"Unexpected error detecting audio format: {str(detection_error)}"

	def validate_wav_file_structure(file_path):
	try:
	with wave.open(file_path, "rb") as wav_file:
	number_of_channels = wav_file.getnchannels()
	sample_width_bytes = wav_file.getsampwidth()
	sample_rate = wav_file.getframerate()
	number_of_frames = wav_file.getnframes()

	if number_of_channels < 1:
	return False, "WAV file has no audio channels."

	if number_of_channels > 16:
	return False, f"WAV file has too many channels ({number_of_channels}). Maximum supported is 16."

	if sample_width_bytes < 1:
	return False, "WAV file has invalid sample width (less than 1 byte)."

	if sample_width_bytes > 4:
	return False, f"WAV file has unsupported sample width ({sample_width_bytes} bytes). Maximum supported is 4 bytes (32-bit)."

	if sample_rate < 100:
	return False, f"WAV file has invalid sample rate ({sample_rate} Hz). Minimum supported is 100 Hz."

	if sample_rate > 384000:
	return False, f"WAV file has unsupported sample rate ({sample_rate} Hz). Maximum supported is 384000 Hz."

	if number_of_frames < 1:
	return False, "WAV file contains no audio frames."

	audio_duration_seconds = number_of_frames / sample_rate

	if audio_duration_seconds < 0.1:
	return False, f"Audio is too short ({audio_duration_seconds:.2f} seconds). Minimum duration is 0.1 seconds."

	if audio_duration_seconds > 60:
	return False, f"Audio is too long ({audio_duration_seconds:.0f} seconds). Maximum duration is 1 minute."

	return True, None

	except wave.Error as wav_error:
	error_message = str(wav_error)

	if "file does not start with RIFF id" in error_message:
	return False, "File has .wav extension but is not a valid WAV file. It may be a different audio format renamed to .wav."

	if "unknown format" in error_message.lower():
	return False, "WAV file uses an unsupported audio encoding format."

	return False, f"Invalid WAV file structure: {error_message}"

	except EOFError:
	return False, "WAV file is truncated or corrupted (unexpected end of file)."

	except Exception as validation_error:
	return False, f"Error validating WAV file: {str(validation_error)}"

	def perform_comprehensive_audio_validation(file_path):
	file_exists_valid, file_exists_error = validate_file_exists_and_readable(file_path)

	if not file_exists_valid:
	return False, False, None, file_exists_error

	file_extension = get_audio_file_extension(file_path)

	if not is_supported_audio_extension(file_path):
	supported_formats_list = ", ".join(SUPPORTED_AUDIO_EXTENSIONS)
	return False, False, None, f"Unsupported file format '{file_extension}'. Supported formats are: {supported_formats_list}"

	detected_format, detection_error = detect_audio_format_from_header(file_path)

	if detected_format is None:
	return False, False, None, detection_error

	is_wav_format = (detected_format == "wav")

	if is_wav_format:
	wav_structure_valid, wav_structure_error = validate_wav_file_structure(file_path)

	if not wav_structure_valid:
	return False, True, "wav", wav_structure_error

	return True, is_wav_format, detected_format, None

	def perform_voice_clone_file_validation(file_path):
	file_size_valid, file_size_error = validate_file_size_for_voice_cloning(file_path)

	if not file_size_valid:
	return False, False, None, file_size_error

	return perform_comprehensive_audio_validation(file_path)

	def get_format_display_name(format_code):
	if format_code is None:
	return "Unknown"

	if format_code in FORMAT_DISPLAY_NAMES:
	return FORMAT_DISPLAY_NAMES[format_code]

	return format_code.upper()