Spaces:
Sleeping
Sleeping
print debug
Browse files- src/main.py +25 -2
src/main.py
CHANGED
|
@@ -38,7 +38,7 @@ def get_youtube_video_id(url, ignore_playlist=True):
|
|
| 38 |
http://www.youtube.com/embed/SA2iWivDJiE
|
| 39 |
http://www.youtube.com/v/SA2iWivDJiE?version=3&hl=en_US
|
| 40 |
"""
|
| 41 |
-
query = urlparse(url)
|
| 42 |
if query.hostname == 'youtu.be':
|
| 43 |
if query.path[1:] == 'watch':
|
| 44 |
return query.query[2:]
|
|
@@ -63,6 +63,7 @@ def get_youtube_video_id(url, ignore_playlist=True):
|
|
| 63 |
|
| 64 |
|
| 65 |
def yt_download(link):
|
|
|
|
| 66 |
ydl_opts = {
|
| 67 |
'format': 'bestaudio',
|
| 68 |
'outtmpl': '%(title)s',
|
|
@@ -77,6 +78,7 @@ def yt_download(link):
|
|
| 77 |
result = ydl.extract_info(link, download=True)
|
| 78 |
download_path = ydl.prepare_filename(result, outtmpl='%(title)s.mp3')
|
| 79 |
|
|
|
|
| 80 |
return download_path
|
| 81 |
|
| 82 |
|
|
@@ -88,6 +90,7 @@ def raise_exception(error_msg, is_webui):
|
|
| 88 |
|
| 89 |
|
| 90 |
def get_rvc_model(voice_model, is_webui):
|
|
|
|
| 91 |
rvc_model_filename, rvc_index_filename = None, None
|
| 92 |
model_dir = os.path.join(rvc_models_dir, voice_model)
|
| 93 |
for file in os.listdir(model_dir):
|
|
@@ -101,10 +104,12 @@ def get_rvc_model(voice_model, is_webui):
|
|
| 101 |
error_msg = f'No model file exists in {model_dir}.'
|
| 102 |
raise_exception(error_msg, is_webui)
|
| 103 |
|
|
|
|
| 104 |
return os.path.join(model_dir, rvc_model_filename), os.path.join(model_dir, rvc_index_filename) if rvc_index_filename else ''
|
| 105 |
|
| 106 |
|
| 107 |
def get_audio_paths(song_dir):
|
|
|
|
| 108 |
orig_song_path = None
|
| 109 |
instrumentals_path = None
|
| 110 |
main_vocals_dereverb_path = None
|
|
@@ -121,10 +126,12 @@ def get_audio_paths(song_dir):
|
|
| 121 |
elif file.endswith('_Vocals_Backup.wav'):
|
| 122 |
backup_vocals_path = os.path.join(song_dir, file)
|
| 123 |
|
|
|
|
| 124 |
return orig_song_path, instrumentals_path, main_vocals_dereverb_path, backup_vocals_path
|
| 125 |
|
| 126 |
|
| 127 |
def convert_to_stereo(audio_path):
|
|
|
|
| 128 |
wave, sr = librosa.load(audio_path, mono=False, sr=44100)
|
| 129 |
|
| 130 |
# check if mono
|
|
@@ -132,12 +139,15 @@ def convert_to_stereo(audio_path):
|
|
| 132 |
stereo_path = f'{os.path.splitext(audio_path)[0]}_stereo.wav'
|
| 133 |
command = shlex.split(f'ffmpeg -y -loglevel error -i "{audio_path}" -ac 2 -f wav "{stereo_path}"')
|
| 134 |
subprocess.run(command)
|
|
|
|
| 135 |
return stereo_path
|
| 136 |
else:
|
|
|
|
| 137 |
return audio_path
|
| 138 |
|
| 139 |
|
| 140 |
def pitch_shift(audio_path, pitch_change):
|
|
|
|
| 141 |
output_path = f'{os.path.splitext(audio_path)[0]}_p{pitch_change}.wav'
|
| 142 |
if not os.path.exists(output_path):
|
| 143 |
y, sr = sf.read(audio_path)
|
|
@@ -146,16 +156,20 @@ def pitch_shift(audio_path, pitch_change):
|
|
| 146 |
y_shifted = tfm.build_array(input_array=y, sample_rate_in=sr)
|
| 147 |
sf.write(output_path, y_shifted, sr)
|
| 148 |
|
|
|
|
| 149 |
return output_path
|
| 150 |
|
| 151 |
|
| 152 |
def get_hash(filepath):
|
|
|
|
| 153 |
with open(filepath, 'rb') as f:
|
| 154 |
file_hash = hashlib.blake2b()
|
| 155 |
while chunk := f.read(8192):
|
| 156 |
file_hash.update(chunk)
|
| 157 |
|
| 158 |
-
|
|
|
|
|
|
|
| 159 |
|
| 160 |
|
| 161 |
def display_progress(message, percent, is_webui, progress=None):
|
|
@@ -166,6 +180,7 @@ def display_progress(message, percent, is_webui, progress=None):
|
|
| 166 |
|
| 167 |
|
| 168 |
def preprocess_song(song_input, mdx_model_params, song_id, is_webui, input_type, progress=None):
|
|
|
|
| 169 |
keep_orig = False
|
| 170 |
if input_type == 'yt':
|
| 171 |
display_progress('[~] Downloading song...', 0, is_webui, progress)
|
|
@@ -189,10 +204,12 @@ def preprocess_song(song_input, mdx_model_params, song_id, is_webui, input_type,
|
|
| 189 |
display_progress('[~] Applying DeReverb to Vocals...', 0.3, is_webui, progress)
|
| 190 |
_, main_vocals_dereverb_path = run_mdx(mdx_model_params, song_output_dir, os.path.join(mdxnet_models_dir, 'Reverb_HQ_By_FoxJoy.onnx'), main_vocals_path, invert_suffix='DeReverb', exclude_main=True, denoise=True)
|
| 191 |
|
|
|
|
| 192 |
return orig_song_path, vocals_path, instrumentals_path, main_vocals_path, backup_vocals_path, main_vocals_dereverb_path
|
| 193 |
|
| 194 |
|
| 195 |
def voice_change(voice_model, vocals_path, output_path, pitch_change, f0_method, index_rate, filter_radius, rms_mix_rate, protect, crepe_hop_length, is_webui):
|
|
|
|
| 196 |
rvc_model_path, rvc_index_path = get_rvc_model(voice_model, is_webui)
|
| 197 |
device = 'cuda:0'
|
| 198 |
config = Config(device, True)
|
|
@@ -203,9 +220,11 @@ def voice_change(voice_model, vocals_path, output_path, pitch_change, f0_method,
|
|
| 203 |
rvc_infer(rvc_index_path, index_rate, vocals_path, output_path, pitch_change, f0_method, cpt, version, net_g, filter_radius, tgt_sr, rms_mix_rate, protect, crepe_hop_length, vc, hubert_model)
|
| 204 |
del hubert_model, cpt
|
| 205 |
gc.collect()
|
|
|
|
| 206 |
|
| 207 |
|
| 208 |
def add_audio_effects(audio_path, reverb_rm_size, reverb_wet, reverb_dry, reverb_damping):
|
|
|
|
| 209 |
output_path = f'{os.path.splitext(audio_path)[0]}_mixed.wav'
|
| 210 |
|
| 211 |
# Initialize audio effects plugins
|
|
@@ -225,14 +244,17 @@ def add_audio_effects(audio_path, reverb_rm_size, reverb_wet, reverb_dry, reverb
|
|
| 225 |
effected = board(chunk, f.samplerate, reset=False)
|
| 226 |
o.write(effected)
|
| 227 |
|
|
|
|
| 228 |
return output_path
|
| 229 |
|
| 230 |
|
| 231 |
def combine_audio(audio_paths, output_path, main_gain, backup_gain, inst_gain, output_format):
|
|
|
|
| 232 |
main_vocal_audio = AudioSegment.from_wav(audio_paths[0]) - 4 + main_gain
|
| 233 |
backup_vocal_audio = AudioSegment.from_wav(audio_paths[1]) - 6 + backup_gain
|
| 234 |
instrumental_audio = AudioSegment.from_wav(audio_paths[2]) - 7 + inst_gain
|
| 235 |
main_vocal_audio.overlay(backup_vocal_audio).overlay(instrumental_audio).export(output_path, format=output_format)
|
|
|
|
| 236 |
|
| 237 |
@spaces.GPU
|
| 238 |
def song_cover_pipeline(song_input, voice_model, pitch_change, keep_files,
|
|
@@ -241,6 +263,7 @@ def song_cover_pipeline(song_input, voice_model, pitch_change, keep_files,
|
|
| 241 |
reverb_rm_size=0.15, reverb_wet=0.2, reverb_dry=0.8, reverb_damping=0.7, output_format='mp3',
|
| 242 |
progress=gr.Progress()):
|
| 243 |
try:
|
|
|
|
| 244 |
if not song_input or not voice_model:
|
| 245 |
raise_exception('Ensure that the song input field and voice model field is filled.', is_webui)
|
| 246 |
|
|
|
|
| 38 |
http://www.youtube.com/embed/SA2iWivDJiE
|
| 39 |
http://www.youtube.com/v/SA2iWivDJiE?version=3&hl=en_US
|
| 40 |
"""
|
| 41 |
+
query = urlparse(url, allow_fragments=True)
|
| 42 |
if query.hostname == 'youtu.be':
|
| 43 |
if query.path[1:] == 'watch':
|
| 44 |
return query.query[2:]
|
|
|
|
| 63 |
|
| 64 |
|
| 65 |
def yt_download(link):
|
| 66 |
+
print("[~] Downloading YouTube audio...")
|
| 67 |
ydl_opts = {
|
| 68 |
'format': 'bestaudio',
|
| 69 |
'outtmpl': '%(title)s',
|
|
|
|
| 78 |
result = ydl.extract_info(link, download=True)
|
| 79 |
download_path = ydl.prepare_filename(result, outtmpl='%(title)s.mp3')
|
| 80 |
|
| 81 |
+
print(f"[+] YouTube audio downloaded: {download_path}")
|
| 82 |
return download_path
|
| 83 |
|
| 84 |
|
|
|
|
| 90 |
|
| 91 |
|
| 92 |
def get_rvc_model(voice_model, is_webui):
|
| 93 |
+
print(f"[~] Getting RVC model: {voice_model}")
|
| 94 |
rvc_model_filename, rvc_index_filename = None, None
|
| 95 |
model_dir = os.path.join(rvc_models_dir, voice_model)
|
| 96 |
for file in os.listdir(model_dir):
|
|
|
|
| 104 |
error_msg = f'No model file exists in {model_dir}.'
|
| 105 |
raise_exception(error_msg, is_webui)
|
| 106 |
|
| 107 |
+
print(f"[+] RVC model found: {rvc_model_filename}")
|
| 108 |
return os.path.join(model_dir, rvc_model_filename), os.path.join(model_dir, rvc_index_filename) if rvc_index_filename else ''
|
| 109 |
|
| 110 |
|
| 111 |
def get_audio_paths(song_dir):
|
| 112 |
+
print(f"[~] Getting audio paths from: {song_dir}")
|
| 113 |
orig_song_path = None
|
| 114 |
instrumentals_path = None
|
| 115 |
main_vocals_dereverb_path = None
|
|
|
|
| 126 |
elif file.endswith('_Vocals_Backup.wav'):
|
| 127 |
backup_vocals_path = os.path.join(song_dir, file)
|
| 128 |
|
| 129 |
+
print(f"[+] Audio paths found: {orig_song_path}, {instrumentals_path}, {main_vocals_dereverb_path}, {backup_vocals_path}")
|
| 130 |
return orig_song_path, instrumentals_path, main_vocals_dereverb_path, backup_vocals_path
|
| 131 |
|
| 132 |
|
| 133 |
def convert_to_stereo(audio_path):
|
| 134 |
+
print(f"[~] Converting to stereo: {audio_path}")
|
| 135 |
wave, sr = librosa.load(audio_path, mono=False, sr=44100)
|
| 136 |
|
| 137 |
# check if mono
|
|
|
|
| 139 |
stereo_path = f'{os.path.splitext(audio_path)[0]}_stereo.wav'
|
| 140 |
command = shlex.split(f'ffmpeg -y -loglevel error -i "{audio_path}" -ac 2 -f wav "{stereo_path}"')
|
| 141 |
subprocess.run(command)
|
| 142 |
+
print(f"[+] Converted to stereo: {stereo_path}")
|
| 143 |
return stereo_path
|
| 144 |
else:
|
| 145 |
+
print("[+] Audio already in stereo")
|
| 146 |
return audio_path
|
| 147 |
|
| 148 |
|
| 149 |
def pitch_shift(audio_path, pitch_change):
|
| 150 |
+
print(f"[~] Pitch shifting: {audio_path} by {pitch_change}")
|
| 151 |
output_path = f'{os.path.splitext(audio_path)[0]}_p{pitch_change}.wav'
|
| 152 |
if not os.path.exists(output_path):
|
| 153 |
y, sr = sf.read(audio_path)
|
|
|
|
| 156 |
y_shifted = tfm.build_array(input_array=y, sample_rate_in=sr)
|
| 157 |
sf.write(output_path, y_shifted, sr)
|
| 158 |
|
| 159 |
+
print(f"[+] Pitch shifted audio saved: {output_path}")
|
| 160 |
return output_path
|
| 161 |
|
| 162 |
|
| 163 |
def get_hash(filepath):
|
| 164 |
+
print(f"[~] Generating hash for: {filepath}")
|
| 165 |
with open(filepath, 'rb') as f:
|
| 166 |
file_hash = hashlib.blake2b()
|
| 167 |
while chunk := f.read(8192):
|
| 168 |
file_hash.update(chunk)
|
| 169 |
|
| 170 |
+
hash_value = file_hash.hexdigest()[:11]
|
| 171 |
+
print(f"[+] Hash generated: {hash_value}")
|
| 172 |
+
return hash_value
|
| 173 |
|
| 174 |
|
| 175 |
def display_progress(message, percent, is_webui, progress=None):
|
|
|
|
| 180 |
|
| 181 |
|
| 182 |
def preprocess_song(song_input, mdx_model_params, song_id, is_webui, input_type, progress=None):
|
| 183 |
+
print("[~] Starting song preprocessing...")
|
| 184 |
keep_orig = False
|
| 185 |
if input_type == 'yt':
|
| 186 |
display_progress('[~] Downloading song...', 0, is_webui, progress)
|
|
|
|
| 204 |
display_progress('[~] Applying DeReverb to Vocals...', 0.3, is_webui, progress)
|
| 205 |
_, main_vocals_dereverb_path = run_mdx(mdx_model_params, song_output_dir, os.path.join(mdxnet_models_dir, 'Reverb_HQ_By_FoxJoy.onnx'), main_vocals_path, invert_suffix='DeReverb', exclude_main=True, denoise=True)
|
| 206 |
|
| 207 |
+
print("[+] Song preprocessing completed")
|
| 208 |
return orig_song_path, vocals_path, instrumentals_path, main_vocals_path, backup_vocals_path, main_vocals_dereverb_path
|
| 209 |
|
| 210 |
|
| 211 |
def voice_change(voice_model, vocals_path, output_path, pitch_change, f0_method, index_rate, filter_radius, rms_mix_rate, protect, crepe_hop_length, is_webui):
|
| 212 |
+
print(f"[~] Starting voice change: {voice_model}")
|
| 213 |
rvc_model_path, rvc_index_path = get_rvc_model(voice_model, is_webui)
|
| 214 |
device = 'cuda:0'
|
| 215 |
config = Config(device, True)
|
|
|
|
| 220 |
rvc_infer(rvc_index_path, index_rate, vocals_path, output_path, pitch_change, f0_method, cpt, version, net_g, filter_radius, tgt_sr, rms_mix_rate, protect, crepe_hop_length, vc, hubert_model)
|
| 221 |
del hubert_model, cpt
|
| 222 |
gc.collect()
|
| 223 |
+
print(f"[+] Voice change completed: {output_path}")
|
| 224 |
|
| 225 |
|
| 226 |
def add_audio_effects(audio_path, reverb_rm_size, reverb_wet, reverb_dry, reverb_damping):
|
| 227 |
+
print(f"[~] Adding audio effects: {audio_path}")
|
| 228 |
output_path = f'{os.path.splitext(audio_path)[0]}_mixed.wav'
|
| 229 |
|
| 230 |
# Initialize audio effects plugins
|
|
|
|
| 244 |
effected = board(chunk, f.samplerate, reset=False)
|
| 245 |
o.write(effected)
|
| 246 |
|
| 247 |
+
print(f"[+] Audio effects added: {output_path}")
|
| 248 |
return output_path
|
| 249 |
|
| 250 |
|
| 251 |
def combine_audio(audio_paths, output_path, main_gain, backup_gain, inst_gain, output_format):
|
| 252 |
+
print(f"[~] Combining audio: {audio_paths}")
|
| 253 |
main_vocal_audio = AudioSegment.from_wav(audio_paths[0]) - 4 + main_gain
|
| 254 |
backup_vocal_audio = AudioSegment.from_wav(audio_paths[1]) - 6 + backup_gain
|
| 255 |
instrumental_audio = AudioSegment.from_wav(audio_paths[2]) - 7 + inst_gain
|
| 256 |
main_vocal_audio.overlay(backup_vocal_audio).overlay(instrumental_audio).export(output_path, format=output_format)
|
| 257 |
+
print(f"[+] Audio combined: {output_path}")
|
| 258 |
|
| 259 |
@spaces.GPU
|
| 260 |
def song_cover_pipeline(song_input, voice_model, pitch_change, keep_files,
|
|
|
|
| 263 |
reverb_rm_size=0.15, reverb_wet=0.2, reverb_dry=0.8, reverb_damping=0.7, output_format='mp3',
|
| 264 |
progress=gr.Progress()):
|
| 265 |
try:
|
| 266 |
+
print("[~] Starting AI Cover Generation Pipeline...")
|
| 267 |
if not song_input or not voice_model:
|
| 268 |
raise_exception('Ensure that the song input field and voice model field is filled.', is_webui)
|
| 269 |
|