File size: 9,294 Bytes
dae9fa5
 
 
 
 
 
 
 
 
02b5975
 
dae9fa5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
02b5975
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dae9fa5
 
 
 
 
 
 
 
 
 
 
 
02b5975
dae9fa5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
02b5975
dae9fa5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
02b5975
 
dae9fa5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
02b5975
 
 
 
 
 
 
 
dae9fa5
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
#
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
# SPDX-License-Identifier: Apache-2.0
#

import os
import wave
from config import (
    SUPPORTED_AUDIO_EXTENSIONS,
    AUDIO_FORMAT_DISPLAY_NAME_OVERRIDES,
    MAXIMUM_VOICE_CLONE_FILE_SIZE_BYTES
)

def build_format_display_names_from_supported_extensions():
    format_display_names = {}

    for extension in SUPPORTED_AUDIO_EXTENSIONS:
        format_code = extension.lstrip(".")

        if format_code in AUDIO_FORMAT_DISPLAY_NAME_OVERRIDES:
            format_display_names[format_code] = AUDIO_FORMAT_DISPLAY_NAME_OVERRIDES[format_code]
        else:
            format_display_names[format_code] = format_code.upper()

    format_display_names["unknown"] = "Unknown"

    return format_display_names

FORMAT_DISPLAY_NAMES = build_format_display_names_from_supported_extensions()

def get_audio_file_extension(file_path):
    if not file_path:
        return None

    _, extension = os.path.splitext(file_path)

    return extension.lower()

def is_supported_audio_extension(file_path):
    extension = get_audio_file_extension(file_path)

    if extension is None:
        return False

    return extension in SUPPORTED_AUDIO_EXTENSIONS

def format_file_size_for_display(size_bytes):
    if size_bytes < 1024:
        return f"{size_bytes} bytes"

    elif size_bytes < 1024 * 1024:
        return f"{size_bytes / 1024:.1f} KB"

    else:
        return f"{size_bytes / (1024 * 1024):.2f} MB"

def validate_file_size_for_voice_cloning(file_path):
    if not file_path:
        return False, "No audio file provided."

    try:
        file_size = os.path.getsize(file_path)
 
    except OSError as size_error:
        return False, f"Cannot read file size: {str(size_error)}"

    if file_size > MAXIMUM_VOICE_CLONE_FILE_SIZE_BYTES:
        max_size_display = format_file_size_for_display(MAXIMUM_VOICE_CLONE_FILE_SIZE_BYTES)
        actual_size_display = format_file_size_for_display(file_size)
        return False, f"Audio file size ({actual_size_display}) exceeds the maximum allowed size of {max_size_display}. Please upload a smaller audio file."

    return True, None

def validate_file_exists_and_readable(file_path):
    if not file_path:
        return False, "No audio file provided."

    if not os.path.exists(file_path):
        return False, "Audio file does not exist."

    if not os.path.isfile(file_path):
        return False, "The provided path is not a valid file."

    try:
        file_size = os.path.getsize(file_path)

    except OSError as size_error:
        return False, f"Cannot read file size: {str(size_error)}"

    if file_size == 0:
        return False, "Audio file is empty (0 bytes)."

    if file_size < 44:
        return False, "Audio file is too small to be a valid audio file."

    try:
        with open(file_path, "rb") as test_file:
            test_file.read(1)
    except IOError as read_error:
        return False, f"Audio file is not readable: {str(read_error)}"

    return True, None

def detect_audio_format_from_header(file_path):
    try:
        with open(file_path, "rb") as audio_file:
            header_bytes = audio_file.read(32)

            if len(header_bytes) < 4:
                return None, "File is too small to determine audio format."

            if len(header_bytes) >= 12:
                if header_bytes[:4] == b"RIFF" and header_bytes[8:12] == b"WAVE":
                    return "wav", None

            if header_bytes[:3] == b"ID3":
                return "mp3", None

            if len(header_bytes) >= 2:
                first_two_bytes = header_bytes[:2]
 
                mp3_sync_bytes = [
                    b"\xff\xfb",
                    b"\xff\xfa",
                    b"\xff\xf3",
                    b"\xff\xf2",
                    b"\xff\xe0",
                    b"\xff\xe2",
                    b"\xff\xe3"
                ]

                if first_two_bytes in mp3_sync_bytes:
                    return "mp3", None

            if header_bytes[:4] == b"fLaC":
                return "flac", None

            if header_bytes[:4] == b"OggS":
                return "ogg", None

            if len(header_bytes) >= 12:
                if header_bytes[:4] == b"FORM" and header_bytes[8:12] in [b"AIFF", b"AIFC"]:
                    return "aiff", None

            if len(header_bytes) >= 8:
                if header_bytes[4:8] == b"ftyp":
                    return "m4a", None

            if len(header_bytes) >= 4:
                if header_bytes[:4] == b"\x1aE\xdf\xa3":
                    return "webm", None

            if len(header_bytes) >= 8:
                if header_bytes[4:8] in [b"mdat", b"moov", b"free", b"skip", b"wide"]:
                    return "m4a", None

            file_extension = get_audio_file_extension(file_path)

            if file_extension and file_extension in SUPPORTED_AUDIO_EXTENSIONS:
                return file_extension.lstrip("."), None

            return "unknown", "Could not determine audio format from file header. The file may be corrupted or in an unsupported format."

    except IOError as io_error:
        return None, f"Error reading file header: {str(io_error)}"

    except Exception as detection_error:
        return None, f"Unexpected error detecting audio format: {str(detection_error)}"

def validate_wav_file_structure(file_path):
    try:
        with wave.open(file_path, "rb") as wav_file:
            number_of_channels = wav_file.getnchannels()
            sample_width_bytes = wav_file.getsampwidth()
            sample_rate = wav_file.getframerate()
            number_of_frames = wav_file.getnframes()

            if number_of_channels < 1:
                return False, "WAV file has no audio channels."

            if number_of_channels > 16:
                return False, f"WAV file has too many channels ({number_of_channels}). Maximum supported is 16."

            if sample_width_bytes < 1:
                return False, "WAV file has invalid sample width (less than 1 byte)."

            if sample_width_bytes > 4:
                return False, f"WAV file has unsupported sample width ({sample_width_bytes} bytes). Maximum supported is 4 bytes (32-bit)."

            if sample_rate < 100:
                return False, f"WAV file has invalid sample rate ({sample_rate} Hz). Minimum supported is 100 Hz."

            if sample_rate > 384000:
                return False, f"WAV file has unsupported sample rate ({sample_rate} Hz). Maximum supported is 384000 Hz."

            if number_of_frames < 1:
                return False, "WAV file contains no audio frames."

            audio_duration_seconds = number_of_frames / sample_rate

            if audio_duration_seconds < 0.1:
                return False, f"Audio is too short ({audio_duration_seconds:.2f} seconds). Minimum duration is 0.1 seconds."

            if audio_duration_seconds > 60:
                return False, f"Audio is too long ({audio_duration_seconds:.0f} seconds). Maximum duration is 1 minute."

            return True, None

    except wave.Error as wav_error:
        error_message = str(wav_error)

        if "file does not start with RIFF id" in error_message:
            return False, "File has .wav extension but is not a valid WAV file. It may be a different audio format renamed to .wav."

        if "unknown format" in error_message.lower():
            return False, "WAV file uses an unsupported audio encoding format."

        return False, f"Invalid WAV file structure: {error_message}"

    except EOFError:
        return False, "WAV file is truncated or corrupted (unexpected end of file)."

    except Exception as validation_error:
        return False, f"Error validating WAV file: {str(validation_error)}"

def perform_comprehensive_audio_validation(file_path):
    file_exists_valid, file_exists_error = validate_file_exists_and_readable(file_path)

    if not file_exists_valid:
        return False, False, None, file_exists_error

    file_extension = get_audio_file_extension(file_path)

    if not is_supported_audio_extension(file_path):
        supported_formats_list = ", ".join(SUPPORTED_AUDIO_EXTENSIONS)
        return False, False, None, f"Unsupported file format '{file_extension}'. Supported formats are: {supported_formats_list}"

    detected_format, detection_error = detect_audio_format_from_header(file_path)

    if detected_format is None:
        return False, False, None, detection_error

    is_wav_format = (detected_format == "wav")

    if is_wav_format:
        wav_structure_valid, wav_structure_error = validate_wav_file_structure(file_path)

        if not wav_structure_valid:
            return False, True, "wav", wav_structure_error

    return True, is_wav_format, detected_format, None

def perform_voice_clone_file_validation(file_path):
    file_size_valid, file_size_error = validate_file_size_for_voice_cloning(file_path)

    if not file_size_valid:
        return False, False, None, file_size_error

    return perform_comprehensive_audio_validation(file_path)

def get_format_display_name(format_code):
    if format_code is None:
        return "Unknown"

    if format_code in FORMAT_DISPLAY_NAMES:
        return FORMAT_DISPLAY_NAMES[format_code]

    return format_code.upper()