|
|
|
|
|
|
|
|
|
|
|
import os |
|
|
import sys |
|
|
import shutil |
|
|
import zipfile |
|
|
import urllib.request |
|
|
from argparse import Namespace |
|
|
from cog import BasePredictor, Input, Path as CogPath |
|
|
|
|
|
sys.path.insert(0, os.path.abspath("src")) |
|
|
|
|
|
import main as m |
|
|
|
|
|
|
|
|
def download_online_model(url, dir_name): |
|
|
print(f"[~] Downloading voice model with name {dir_name}...") |
|
|
zip_name = url.split("/")[-1] |
|
|
extraction_folder = os.path.join(m.rvc_models_dir, dir_name) |
|
|
if os.path.exists(extraction_folder): |
|
|
print(f"Voice model directory {dir_name} already exists! Skipping download.") |
|
|
return |
|
|
|
|
|
if "pixeldrain.com" in url: |
|
|
url = f"https://pixeldrain.com/api/file/{zip_name}" |
|
|
|
|
|
urllib.request.urlretrieve(url, zip_name) |
|
|
|
|
|
print("[~] Extracting zip...") |
|
|
with zipfile.ZipFile(zip_name, "r") as zip_ref: |
|
|
for member in zip_ref.infolist(): |
|
|
|
|
|
if member.is_dir(): |
|
|
continue |
|
|
|
|
|
|
|
|
os.makedirs(extraction_folder, exist_ok=True) |
|
|
|
|
|
|
|
|
with zip_ref.open(member) as source, open( |
|
|
os.path.join(extraction_folder, os.path.basename(member.filename)), "wb" |
|
|
) as target: |
|
|
shutil.copyfileobj(source, target) |
|
|
print(f"[+] {dir_name} Model successfully downloaded!") |
|
|
|
|
|
|
|
|
class Predictor(BasePredictor): |
|
|
def setup(self) -> None: |
|
|
"""Load the model into memory to make running multiple predictions efficient""" |
|
|
pass |
|
|
|
|
|
def predict( |
|
|
self, |
|
|
song_input: CogPath = Input( |
|
|
description="Upload your audio file here.", |
|
|
default=None, |
|
|
), |
|
|
rvc_model: str = Input( |
|
|
description="RVC model for a specific voice. If using a custom model, this should match the name of the downloaded model. If a 'custom_rvc_model_download_url' is provided, this will be automatically set to the name of the downloaded model.", |
|
|
default="Squidward", |
|
|
choices=[ |
|
|
"Squidward", |
|
|
"MrKrabs", |
|
|
"Plankton", |
|
|
"Drake", |
|
|
"Vader", |
|
|
"Trump", |
|
|
"Biden", |
|
|
"Obama", |
|
|
"Guitar", |
|
|
"Voilin", |
|
|
"CUSTOM", |
|
|
"SamA", |
|
|
], |
|
|
), |
|
|
custom_rvc_model_download_url: str = Input( |
|
|
description="URL to download a custom RVC model. If provided, the model will be downloaded (if it doesn't already exist) and used for prediction, regardless of the 'rvc_model' value.", |
|
|
default=None, |
|
|
), |
|
|
pitch_change: str = Input( |
|
|
description="Adjust pitch of AI vocals. Options: `no-change`, `male-to-female`, `female-to-male`.", |
|
|
default="no-change", |
|
|
choices=["no-change", "male-to-female", "female-to-male"], |
|
|
), |
|
|
index_rate: float = Input( |
|
|
description="Control how much of the AI's accent to leave in the vocals.", |
|
|
default=0.5, |
|
|
ge=0, |
|
|
le=1, |
|
|
), |
|
|
filter_radius: int = Input( |
|
|
description="If >=3: apply median filtering median filtering to the harvested pitch results.", |
|
|
default=3, |
|
|
ge=0, |
|
|
le=7, |
|
|
), |
|
|
rms_mix_rate: float = Input( |
|
|
description="Control how much to use the original vocal's loudness (0) or a fixed loudness (1).", |
|
|
default=0.25, |
|
|
ge=0, |
|
|
le=1, |
|
|
), |
|
|
pitch_detection_algorithm: str = Input( |
|
|
description="Best option is rmvpe (clarity in vocals), then mangio-crepe (smoother vocals).", |
|
|
default="rmvpe", |
|
|
choices=["rmvpe", "mangio-crepe"], |
|
|
), |
|
|
crepe_hop_length: int = Input( |
|
|
description="When `pitch_detection_algo` is set to `mangio-crepe`, this controls how often it checks for pitch changes in milliseconds. Lower values lead to longer conversions and higher risk of voice cracks, but better pitch accuracy.", |
|
|
default=128, |
|
|
), |
|
|
protect: float = Input( |
|
|
description="Control how much of the original vocals' breath and voiceless consonants to leave in the AI vocals. Set 0.5 to disable.", |
|
|
default=0.33, |
|
|
ge=0, |
|
|
le=0.5, |
|
|
), |
|
|
main_vocals_volume_change: float = Input( |
|
|
description="Control volume of main AI vocals. Use -3 to decrease the volume by 3 decibels, or 3 to increase the volume by 3 decibels.", |
|
|
default=0, |
|
|
), |
|
|
backup_vocals_volume_change: float = Input( |
|
|
description="Control volume of backup AI vocals.", |
|
|
default=0, |
|
|
), |
|
|
instrumental_volume_change: float = Input( |
|
|
description="Control volume of the background music/instrumentals.", |
|
|
default=0, |
|
|
), |
|
|
pitch_change_all: float = Input( |
|
|
description="Change pitch/key of background music, backup vocals and AI vocals in semitones. Reduces sound quality slightly.", |
|
|
default=0, |
|
|
), |
|
|
reverb_size: float = Input( |
|
|
description="The larger the room, the longer the reverb time.", |
|
|
default=0.15, |
|
|
ge=0, |
|
|
le=1, |
|
|
), |
|
|
reverb_wetness: float = Input( |
|
|
description="Level of AI vocals with reverb.", |
|
|
default=0.2, |
|
|
ge=0, |
|
|
le=1, |
|
|
), |
|
|
reverb_dryness: float = Input( |
|
|
description="Level of AI vocals without reverb.", |
|
|
default=0.8, |
|
|
ge=0, |
|
|
le=1, |
|
|
), |
|
|
reverb_damping: float = Input( |
|
|
description="Absorption of high frequencies in the reverb.", |
|
|
default=0.7, |
|
|
ge=0, |
|
|
le=1, |
|
|
), |
|
|
output_format: str = Input( |
|
|
description="wav for best quality and large file size, mp3 for decent quality and small file size.", |
|
|
default="mp3", |
|
|
choices=["mp3", "wav"], |
|
|
), |
|
|
) -> CogPath: |
|
|
""" |
|
|
Runs a single prediction on the model. |
|
|
|
|
|
Required Parameters: |
|
|
song_input (CogPath): Upload your audio file here. |
|
|
rvc_model (str): RVC model for a specific voice. Default is "Squidward". If a 'custom_rvc_model_download_url' is provided, this will be automatically set to the name of the downloaded model. |
|
|
pitch_change (float): Change pitch of AI vocals in octaves. Set to 0 for no change. Generally, use 1 for male to female conversions and -1 for vice-versa. |
|
|
|
|
|
Optional Parameters: |
|
|
custom_rvc_model_download_url (str): URL to download a custom RVC model. If provided, the model will be downloaded (if it doesn't already exist) and used for prediction, regardless of the 'rvc_model' value. Defaults to None. |
|
|
index_rate (float): Control how much of the AI's accent to leave in the vocals. 0 <= INDEX_RATE <= 1. Defaults to 0.5. |
|
|
filter_radius (int): If >=3: apply median filtering median filtering to the harvested pitch results. 0 <= FILTER_RADIUS <= 7. Defaults to 3. |
|
|
rms_mix_rate (float): Control how much to use the original vocal's loudness (0) or a fixed loudness (1). 0 <= RMS_MIX_RATE <= 1. Defaults to 0.25. |
|
|
pitch_detection_algorithm (str): Best option is rmvpe (clarity in vocals), then mangio-crepe (smoother vocals). Defaults to "rmvpe". |
|
|
crepe_hop_length (int): Controls how often it checks for pitch changes in milliseconds when using mangio-crepe algo specifically. Lower values leads to longer conversions and higher risk of voice cracks, but better pitch accuracy. Defaults to 128. |
|
|
protect (float): Control how much of the original vocals' breath and voiceless consonants to leave in the AI vocals. Set 0.5 to disable. 0 <= PROTECT <= 0.5. Defaults to 0.33. |
|
|
main_vocals_volume_change (float): Control volume of main AI vocals. Use -3 to decrease the volume by 3 decibels, or 3 to increase the volume by 3 decibels. Defaults to 0. |
|
|
backup_vocals_volume_change (float): Control volume of backup AI vocals. Defaults to 0. |
|
|
instrumental_volume_change (float): Control volume of the background music/instrumentals. Defaults to 0. |
|
|
pitch_change_all (float): Change pitch/key of background music, backup vocals and AI vocals in semitones. Reduces sound quality slightly. Defaults to 0. |
|
|
reverb_size (float): The larger the room, the longer the reverb time. 0 <= REVERB_SIZE <= 1. Defaults to 0.15. |
|
|
reverb_wetness (float): Level of AI vocals with reverb. 0 <= REVERB_WETNESS <= 1. Defaults to 0.2. |
|
|
reverb_dryness (float): Level of AI vocals without reverb. 0 <= REVERB_DRYNESS <= 1. Defaults to 0.8. |
|
|
reverb_damping (float): Absorption of high frequencies in the reverb. 0 <= REVERB_DAMPING <= 1. Defaults to 0.7. |
|
|
output_format (str): wav for best quality and large file size, mp3 for decent quality and small file size. Defaults to "mp3". |
|
|
|
|
|
Returns: |
|
|
CogPath: The output path of the generated audio file. |
|
|
""" |
|
|
|
|
|
if custom_rvc_model_download_url: |
|
|
custom_rvc_model_download_name = urllib.parse.unquote( |
|
|
custom_rvc_model_download_url.split("/")[-1] |
|
|
) |
|
|
custom_rvc_model_download_name = os.path.splitext( |
|
|
custom_rvc_model_download_name |
|
|
)[0] |
|
|
print( |
|
|
f"[!] The model will be downloaded as '{custom_rvc_model_download_name}'." |
|
|
) |
|
|
download_online_model( |
|
|
url=custom_rvc_model_download_url, |
|
|
dir_name=custom_rvc_model_download_name, |
|
|
) |
|
|
rvc_model = custom_rvc_model_download_name |
|
|
else: |
|
|
print( |
|
|
"[!] Since URL was provided, we will try to download the model and use it (even if `rvc_model` is not set to 'CUSTOM')." |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
if pitch_change == "no-change": |
|
|
pitch_change = 0 |
|
|
elif pitch_change == "male-to-female": |
|
|
pitch_change = 1 |
|
|
else: |
|
|
pitch_change = -1 |
|
|
|
|
|
args = Namespace( |
|
|
song_input=str(song_input), |
|
|
rvc_dirname=(model_dir_name := rvc_model), |
|
|
pitch_change=pitch_change, |
|
|
keep_files=(keep_files := False), |
|
|
index_rate=index_rate, |
|
|
filter_radius=filter_radius, |
|
|
rms_mix_rate=rms_mix_rate, |
|
|
pitch_detection_algo=pitch_detection_algorithm, |
|
|
crepe_hop_length=crepe_hop_length, |
|
|
protect=protect, |
|
|
main_vol=main_vocals_volume_change, |
|
|
backup_vol=backup_vocals_volume_change, |
|
|
inst_vol=instrumental_volume_change, |
|
|
pitch_change_all=pitch_change_all, |
|
|
reverb_size=reverb_size, |
|
|
reverb_wetness=reverb_wetness, |
|
|
reverb_dryness=reverb_dryness, |
|
|
reverb_damping=reverb_damping, |
|
|
output_format=output_format, |
|
|
) |
|
|
|
|
|
rvc_dirname = args.rvc_dirname |
|
|
if not os.path.exists(os.path.join(m.rvc_models_dir, rvc_dirname)): |
|
|
raise Exception( |
|
|
f"The folder {os.path.join(m.rvc_models_dir, rvc_dirname)} does not exist." |
|
|
) |
|
|
|
|
|
cover_path = m.song_cover_pipeline( |
|
|
args.song_input, |
|
|
rvc_dirname, |
|
|
args.pitch_change, |
|
|
args.keep_files, |
|
|
main_gain=args.main_vol, |
|
|
backup_gain=args.backup_vol, |
|
|
inst_gain=args.inst_vol, |
|
|
index_rate=args.index_rate, |
|
|
filter_radius=args.filter_radius, |
|
|
rms_mix_rate=args.rms_mix_rate, |
|
|
f0_method=args.pitch_detection_algo, |
|
|
crepe_hop_length=args.crepe_hop_length, |
|
|
protect=args.protect, |
|
|
pitch_change_all=args.pitch_change_all, |
|
|
reverb_rm_size=args.reverb_size, |
|
|
reverb_wet=args.reverb_wetness, |
|
|
reverb_dry=args.reverb_dryness, |
|
|
reverb_damping=args.reverb_damping, |
|
|
output_format=args.output_format, |
|
|
) |
|
|
print(f"[+] Cover generated at {cover_path}") |
|
|
|
|
|
|
|
|
return CogPath(cover_path) |
|
|
|