| import streamlit as st |
| import pandas as pd |
| import numpy as np |
| import matplotlib.pyplot as plt |
| import json |
| import os |
| import tempfile |
| import shutil |
| import requests |
| from pathlib import Path |
| temp_dir = tempfile.TemporaryDirectory() |
| global ckpt_temp_file |
| global audio_temp_file |
| global config_temp_file |
| |
| from utils.hparams import hparams |
| from preprocessing.data_gen_utils import get_pitch_parselmouth,get_pitch_crepe |
| import numpy as np |
| import matplotlib.pyplot as plt |
| import IPython.display as ipd |
| import utils |
| import librosa |
| import torchcrepe |
| from infer import * |
| import logging |
| from infer_tools.infer_tool import * |
| import io |
| import parselmouth |
| from parselmouth.praat import call |
|
|
| clip_completed = False |
| def render_audio(ckpt_temp_file, config_temp_file, audio_temp_file, title, title2, title3, choice, noise_step, use_mel_as_base): |
| logging.getLogger('numba').setLevel(logging.WARNING) |
| title = int(title) |
| title2 = int(title2) |
| project_name = "Unnamed" |
| model_path = ckpt_temp_file |
| config_path= config_temp_file |
| hubert_gpu=True |
| svc_model = Svc(project_name,config_path,hubert_gpu, model_path) |
| print('model loaded') |
| wav_fn = audio_temp_file |
| demoaudio, sr = librosa.load(wav_fn) |
| key = title |
| |
| pndm_speedup = 20 |
| wav_gen='que.wav' |
| |
| |
| with st.spinner("Rendering Audio..."): |
| f0_tst, f0_pred, audio = run_clip(svc_model,file_path=wav_fn, key=key, acc=title2, use_crepe=choice, use_pe=False, thre=0.05, |
| use_gt_mel=use_mel_as_base, add_noise_step=noise_step,project_name=project_name,out_path=wav_gen) |
| |
| formant_shift_ratio_str = title3 |
| formant_shift_ratio = float(formant_shift_ratio_str) |
| |
| |
| if formant_shift_ratio != 1.0: |
| sound = parselmouth.Sound(wav_gen) |
| print(wav_gen) |
| sound.get_power() |
| sampling_rate = sound.sampling_frequency |
| print(sampling_rate) |
| resampled_sound = sound.resample(sampling_rate) |
| print(resampled_sound) |
| factor = formant_shift_ratio |
| print(factor) |
| manipulated_sound = call(sound, "Change gender", 75, 500, factor, 0, 1, 1) |
| print(manipulated_sound) |
| manipulated_sound.save("que.wav", "WAV") |
| print("Gender correct!") |
| clip_completed = True |
| if clip_completed: |
| st.audio(wav_gen) |
|
|
| |
| st.set_page_config( |
| page_title="DiffSVC Render", |
| page_icon="🧊", |
| initial_sidebar_state="expanded", |
| ) |
| |
| col1, col2 = st.columns(2) |
| col1.title('DIFF-SVC Render') |
| col2.title('Settings') |
| ckpt = col1.file_uploader("Choose your CKPT", type='ckpt') |
| if ckpt is not None: |
| |
| with tempfile.NamedTemporaryFile(mode="wb", suffix='.ckpt', delete=False) as temp: |
| |
| bytes_data = ckpt.getvalue() |
| |
| temp.write(bytes_data) |
| ckpt_temp_file = temp.name |
| |
| print(temp.name) |
| |
| if "ckpt_temp_file" in locals(): |
| st.success("File saved to: {}".format(ckpt_temp_file)) |
| |
| config = col1.file_uploader("Choose your config", type='yaml') |
| if config is not None: |
| |
| with tempfile.NamedTemporaryFile(mode="wb", suffix='.yaml', delete=False) as temp: |
| |
| bytes_data = config.getvalue() |
| |
| temp.write(bytes_data) |
| config_temp_file = temp.name |
| |
| print(temp.name) |
| |
| if "config_temp_file" in locals(): |
| st.success("File saved to: {}".format(config_temp_file)) |
|
|
| audio = col1.file_uploader("Choose your audio", type=["wav"]) |
| if audio is not None: |
| |
| with tempfile.NamedTemporaryFile(mode="wb", suffix='.wav', delete=False) as temp: |
| |
| bytes_data = audio.getvalue() |
| |
| temp.write(bytes_data) |
| audio_temp_file = temp.name |
| |
| print(temp.name) |
| |
| if "audio_temp_file" in locals(): |
| st.success("File saved to: {}".format(audio_temp_file)) |
|
|
| title = col2.number_input("Key", value=0, step=1, min_value=-12, max_value=12) |
| title2 = col2.number_input("Speedup", value=20, step=1, min_value=1, max_value=100) |
| title3 = col2.number_input("Gender Flag", value=1.00, step=0.01, min_value=0.70, max_value=1.30, help='Default is 1.0, it works by decimals, setting it at 1.05 will make your render sound more female-ish, setting it to 0.95 will make it sound more masculine, for example.') |
| choice = col2.checkbox('Use Crepe', value=False) |
| |
| use_mel_as_base = col2.checkbox('Use Mel as Base', value=False, help='gt mel: Enabling this will use the input audio as a base and will unlock a new parameter, do not use this if you dont know what it does.') |
| noise_step = 600 |
| |
| if use_mel_as_base: |
| noise_step = col2.number_input('Noise Step', value=600, min_value=1, max_value=1000, step=50) |
| else: |
| noise_step = 600 |
| password = col2.text_input("Enter password", help='Password can be got by agreeing to TOS and getting allowed after validation, you can go to the TOS here:') |
| correct_password = os.environ.get("gatepassword") |
| |
| if st.button("Render audio"): |
| if password == correct_password: |
| st.error("Password is IAgreeToTheTOS, from haru0l") |
| |
| else: |
| render_audio(ckpt_temp_file, config_temp_file, audio_temp_file, title, title2, title3, noise_step, choice, use_mel_as_base) |
|
|