Spaces:
Runtime error
Runtime error
pengdaqian
commited on
Commit
·
737aeb3
1
Parent(s):
27d3bc5
fix
Browse files- app.py +70 -20
- music/__init__.py +0 -0
- music/search.py +90 -0
- requirements.txt +2 -0
app.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
from vits.models import SynthesizerInfer
|
| 2 |
from omegaconf import OmegaConf
|
| 3 |
import torchcrepe
|
|
@@ -8,6 +9,7 @@ import gradio as gr
|
|
| 8 |
import librosa
|
| 9 |
import numpy as np
|
| 10 |
import soundfile
|
|
|
|
| 11 |
|
| 12 |
import logging
|
| 13 |
|
|
@@ -78,7 +80,6 @@ model.to(device)
|
|
| 78 |
|
| 79 |
|
| 80 |
def svc_change(argswave, argsspk):
|
| 81 |
-
|
| 82 |
argsppg = "svc_tmp.ppg.npy"
|
| 83 |
os.system(f"python whisper/inference.py -w {argswave} -p {argsppg}")
|
| 84 |
|
|
@@ -132,7 +133,7 @@ def svc_change(argswave, argsspk):
|
|
| 132 |
sub_pit = pit[cut_s:cut_e].unsqueeze(0).to(device)
|
| 133 |
sub_len = torch.LongTensor([cut_e - cut_s]).to(device)
|
| 134 |
sub_har = source[:, :, cut_s *
|
| 135 |
-
|
| 136 |
sub_out = model.inference(sub_ppg, sub_pit, spk, sub_len, sub_har)
|
| 137 |
sub_out = sub_out[0, 0].data.cpu().detach().numpy()
|
| 138 |
|
|
@@ -170,31 +171,80 @@ def svc_main(sid, input_audio):
|
|
| 170 |
audio = librosa.to_mono(audio.transpose(1, 0))
|
| 171 |
if sampling_rate != 16000:
|
| 172 |
audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
|
| 173 |
-
if
|
| 174 |
-
audio = audio[:16000*100]
|
| 175 |
wav_path = "temp.wav"
|
| 176 |
soundfile.write(wav_path, audio, 16000, format="wav")
|
| 177 |
out_audio = svc_change(wav_path, f"configs/singers/singer00{sid}.npy")
|
| 178 |
return "Success", (48000, out_audio)
|
| 179 |
|
| 180 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
app = gr.Blocks()
|
| 182 |
with app:
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
|
| 200 |
app.launch()
|
|
|
|
| 1 |
+
from music.music import get_random_spit, get_albums
|
| 2 |
from vits.models import SynthesizerInfer
|
| 3 |
from omegaconf import OmegaConf
|
| 4 |
import torchcrepe
|
|
|
|
| 9 |
import librosa
|
| 10 |
import numpy as np
|
| 11 |
import soundfile
|
| 12 |
+
import random
|
| 13 |
|
| 14 |
import logging
|
| 15 |
|
|
|
|
| 80 |
|
| 81 |
|
| 82 |
def svc_change(argswave, argsspk):
|
|
|
|
| 83 |
argsppg = "svc_tmp.ppg.npy"
|
| 84 |
os.system(f"python whisper/inference.py -w {argswave} -p {argsppg}")
|
| 85 |
|
|
|
|
| 133 |
sub_pit = pit[cut_s:cut_e].unsqueeze(0).to(device)
|
| 134 |
sub_len = torch.LongTensor([cut_e - cut_s]).to(device)
|
| 135 |
sub_har = source[:, :, cut_s *
|
| 136 |
+
hop_size:cut_e * hop_size].to(device)
|
| 137 |
sub_out = model.inference(sub_ppg, sub_pit, spk, sub_len, sub_har)
|
| 138 |
sub_out = sub_out[0, 0].data.cpu().detach().numpy()
|
| 139 |
|
|
|
|
| 171 |
audio = librosa.to_mono(audio.transpose(1, 0))
|
| 172 |
if sampling_rate != 16000:
|
| 173 |
audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
|
| 174 |
+
if len(audio) > 16000 * 100:
|
| 175 |
+
audio = audio[:16000 * 100]
|
| 176 |
wav_path = "temp.wav"
|
| 177 |
soundfile.write(wav_path, audio, 16000, format="wav")
|
| 178 |
out_audio = svc_change(wav_path, f"configs/singers/singer00{sid}.npy")
|
| 179 |
return "Success", (48000, out_audio)
|
| 180 |
|
| 181 |
|
| 182 |
+
def auto_search(name):
|
| 183 |
+
config = {'logfilepath': 'musicdl.log', 'savedir': 'downloaded', 'search_size_per_source': 5, 'proxies': {}}
|
| 184 |
+
albums = get_albums(keywords=name, config=config)
|
| 185 |
+
album = random.choice(albums)
|
| 186 |
+
save_path = get_random_spit(album)
|
| 187 |
+
return save_path
|
| 188 |
+
|
| 189 |
+
|
| 190 |
app = gr.Blocks()
|
| 191 |
with app:
|
| 192 |
+
title = "Singer Voice Clone 0.1 Demo"
|
| 193 |
+
desc = """ small singer voice clone Demo App. <br />
|
| 194 |
+
Enter keywords auto search music to clone or upload music yourself
|
| 195 |
+
It's just a simplified demo, you can use more advanced features optimize music quality <br />"""
|
| 196 |
+
tutorial_link = "https://docs.cworld.ai"
|
| 197 |
+
|
| 198 |
+
gr.HTML(
|
| 199 |
+
f"""
|
| 200 |
+
<div style="text-align: center; margin: 0 auto;">
|
| 201 |
+
<div
|
| 202 |
+
style="
|
| 203 |
+
display: inline-flex;
|
| 204 |
+
align-items: center;
|
| 205 |
+
gap: 0.8rem;
|
| 206 |
+
font-size: 1.75rem;
|
| 207 |
+
"
|
| 208 |
+
>
|
| 209 |
+
<svg height="100%" stroke-miterlimit="10" style="fill-rule:nonzero;clip-rule:evenodd;stroke-linecap:round;stroke-linejoin:round;" version="1.1" viewBox="0 0 100 100" width="100%" xml:space="preserve" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
|
| 210 |
+
<defs/>
|
| 211 |
+
<clipPath id="ArtboardFrame">
|
| 212 |
+
<rect height="100" width="100" x="0" y="0"/>
|
| 213 |
+
</clipPath>
|
| 214 |
+
<g clip-path="url(#ArtboardFrame)" id="SvgjsG2907">
|
| 215 |
+
<g opacity="1">
|
| 216 |
+
<g opacity="1">
|
| 217 |
+
<path d="M49.5597 6.74187C73.4486 6.74187 92.893 26.1863 92.893 50.0752C92.893 73.9641 73.4486 93.4085 49.5597 93.4085C25.6708 93.4085 6.22637 73.9641 6.22637 50.0752C6.22637 26.1863 25.6708 6.74187 49.5597 6.74187M49.5597 0.075206C21.893 0.075206-0.440293 22.4085-0.440293 50.0752C-0.440293 77.7419 21.893 100.075 49.5597 100.075C77.2264 100.075 99.5597 77.7419 99.5597 50.0752C99.5597 22.4085 77.2264 0.075206 49.5597 0.075206L49.5597 0.075206Z" fill="#111111" fill-rule="nonzero" opacity="1" stroke="none"/>
|
| 218 |
+
<path d="M55.1153 77.853L44.0042 77.853L44.0042 72.2974C44.0042 69.1863 46.4486 66.7419 49.5597 66.7419L49.5597 66.7419C52.6708 66.7419 55.1153 69.1863 55.1153 72.2974L55.1153 77.853Z" fill="#111111" fill-rule="nonzero" opacity="1" stroke="none"/>
|
| 219 |
+
<path d="M21.7819 33.4085L32.893 33.4085L32.893 33.4085L32.893 55.6308L32.893 55.6308L21.7819 55.6308L21.7819 55.6308L21.7819 33.4085L21.7819 33.4085Z" fill="#111111" fill-rule="nonzero" opacity="1" stroke="none"/>
|
| 220 |
+
<path d="M66.2264 33.4085L77.3375 33.4085L77.3375 33.4085L77.3375 55.6308L77.3375 55.6308L66.2264 55.6308L66.2264 55.6308L66.2264 33.4085L66.2264 33.4085Z" fill="#111111" fill-rule="nonzero" opacity="1" stroke="none"/>
|
| 221 |
+
</g>
|
| 222 |
+
</g>
|
| 223 |
+
</g>
|
| 224 |
+
</svg>
|
| 225 |
+
<h1 style="font-weight: 900; margin-bottom: 7px;margin-top:5px">
|
| 226 |
+
{title}
|
| 227 |
+
</h1>
|
| 228 |
+
</div>
|
| 229 |
+
<p style="margin-bottom: 10px; font-size: 94%; line-height: 23px;">
|
| 230 |
+
{desc}
|
| 231 |
+
There is the <a href="{tutorial_link}"> tutorial </a>
|
| 232 |
+
</p>
|
| 233 |
+
</div>
|
| 234 |
+
"""
|
| 235 |
+
)
|
| 236 |
+
|
| 237 |
+
sid = gr.Dropdown(label="Singer", choices=["22", "33", "47", "51"], value="47")
|
| 238 |
+
|
| 239 |
+
vc_input2 = gr.Textbox(label="Music Name")
|
| 240 |
+
vc_search = gr.Button("Auto Search", variant="primary")
|
| 241 |
+
vc_input3 = gr.Audio(label="Upload Music Yourself")
|
| 242 |
+
|
| 243 |
+
vc_search.click(auto_search, [vc_input2], [vc_input3])
|
| 244 |
+
|
| 245 |
+
vc_submit = gr.Button("Convert", variant="primary")
|
| 246 |
+
vc_output1 = gr.Textbox(label="Run Status")
|
| 247 |
+
vc_output2 = gr.Audio(label="Result Audio")
|
| 248 |
+
vc_submit.click(svc_main, [sid, vc_input3], [vc_output1, vc_output2])
|
| 249 |
|
| 250 |
app.launch()
|
music/__init__.py
ADDED
|
File without changes
|
music/search.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os.path
|
| 2 |
+
import random
|
| 3 |
+
|
| 4 |
+
from musicdl import musicdl
|
| 5 |
+
from musicdl.modules import Downloader
|
| 6 |
+
from pydub import AudioSegment
|
| 7 |
+
|
| 8 |
+
def is_integer(string):
|
| 9 |
+
if string.isdigit():
|
| 10 |
+
return int(string)
|
| 11 |
+
else:
|
| 12 |
+
return 0
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def is_numeric(string):
|
| 16 |
+
if string.isdigit():
|
| 17 |
+
return True
|
| 18 |
+
if string.count('.') == 1:
|
| 19 |
+
integer_part, decimal_part = string.split('.')
|
| 20 |
+
if integer_part.isdigit() and decimal_part.isdigit():
|
| 21 |
+
return True
|
| 22 |
+
return False
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def time_to_seconds(time_string):
|
| 26 |
+
hours, minutes, seconds = map(lambda x: is_integer(x), time_string.split(':'))
|
| 27 |
+
total_seconds = hours * 3600 + minutes * 60 + seconds
|
| 28 |
+
return total_seconds
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def size_to_int(size_string):
|
| 32 |
+
prefix_size_str = size_string[:-2] # 去除最后的单位部分,转换为浮点数
|
| 33 |
+
if not is_numeric(prefix_size_str):
|
| 34 |
+
return 5.1 * 1024 * 1024
|
| 35 |
+
unit = size_string[-2:] # 获取单位部分
|
| 36 |
+
size = float(prefix_size_str)
|
| 37 |
+
if unit == 'KB':
|
| 38 |
+
size *= 1024 # 转换为字节
|
| 39 |
+
elif unit == 'MB':
|
| 40 |
+
size *= 1024 * 1024
|
| 41 |
+
elif unit == 'GB':
|
| 42 |
+
size *= 1024 * 1024 * 1024
|
| 43 |
+
elif unit == 'TB':
|
| 44 |
+
size *= 1024 * 1024 * 1024 * 1024
|
| 45 |
+
|
| 46 |
+
return int(size) # 转换为整数
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def get_albums(keywords, config):
|
| 50 |
+
target_srcs = [
|
| 51 |
+
'kugou', 'kuwo', 'qqmusic', 'qianqian', 'fivesing',
|
| 52 |
+
'netease', 'migu', 'joox', 'yiting',
|
| 53 |
+
]
|
| 54 |
+
client = musicdl.musicdl(config=config)
|
| 55 |
+
results = client.search(keywords, target_srcs)
|
| 56 |
+
albums_set = set()
|
| 57 |
+
valid_albums = []
|
| 58 |
+
for albums in results.values():
|
| 59 |
+
if len(albums) == 0:
|
| 60 |
+
continue
|
| 61 |
+
for album in albums:
|
| 62 |
+
if album['songname'] in albums_set:
|
| 63 |
+
continue
|
| 64 |
+
if album['ext'] != 'mp3':
|
| 65 |
+
continue
|
| 66 |
+
if size_to_int(album['filesize']) > 5 * 1024 * 1024:
|
| 67 |
+
continue
|
| 68 |
+
if time_to_seconds(album['duration']) > 300:
|
| 69 |
+
continue
|
| 70 |
+
else:
|
| 71 |
+
albums_set.add(album['songname'])
|
| 72 |
+
valid_albums.append(album)
|
| 73 |
+
return valid_albums
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def get_random_spit(songinfo):
|
| 77 |
+
d = Downloader(songinfo)
|
| 78 |
+
d.start()
|
| 79 |
+
save_path = os.path.join(songinfo["savedir"], f"{songinfo['savename']}.{songinfo['ext']}")
|
| 80 |
+
song = AudioSegment.from_mp3(save_path)
|
| 81 |
+
# pydub does things in milliseconds
|
| 82 |
+
length = len(song)
|
| 83 |
+
left_idx = length / 2 - 15 * 1000
|
| 84 |
+
right_idx = length / 2 + 15 * 1000
|
| 85 |
+
if left_idx < 0:
|
| 86 |
+
left_idx = 0
|
| 87 |
+
if right_idx > length:
|
| 88 |
+
right_idx = length
|
| 89 |
+
middle_30s = song[left_idx:right_idx]
|
| 90 |
+
middle_30s.export(save_path, format="mp3")
|
requirements.txt
CHANGED
|
@@ -13,3 +13,5 @@ torchcrepe
|
|
| 13 |
transformers
|
| 14 |
tqdm
|
| 15 |
librosa
|
|
|
|
|
|
|
|
|
| 13 |
transformers
|
| 14 |
tqdm
|
| 15 |
librosa
|
| 16 |
+
pydub
|
| 17 |
+
musicdl
|