Delete main
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- main/app/app.py +0 -524
- main/app/core/csrt.py +0 -72
- main/app/core/downloads.py +0 -208
- main/app/core/editing.py +0 -92
- main/app/core/f0_extract.py +0 -54
- main/app/core/inference.py +0 -441
- main/app/core/model_utils.py +0 -164
- main/app/core/presets.py +0 -166
- main/app/core/process.py +0 -135
- main/app/core/realtime.py +0 -174
- main/app/core/realtime_client.py +0 -114
- main/app/core/restart.py +0 -48
- main/app/core/separate.py +0 -95
- main/app/core/training.py +0 -265
- main/app/core/tts.py +0 -100
- main/app/core/ui.py +0 -362
- main/app/core/utils.py +0 -61
- main/app/parser.py +0 -369
- main/app/run_tensorboard.py +0 -32
- main/app/tabs/downloads/downloads.py +0 -112
- main/app/tabs/editing/child/audio_effects.py +0 -393
- main/app/tabs/editing/child/quirk.py +0 -48
- main/app/tabs/editing/editing.py +0 -20
- main/app/tabs/extra/child/convert_model.py +0 -31
- main/app/tabs/extra/child/create_srt.py +0 -56
- main/app/tabs/extra/child/f0_extract.py +0 -51
- main/app/tabs/extra/child/fushion.py +0 -45
- main/app/tabs/extra/child/read_model.py +0 -29
- main/app/tabs/extra/child/settings.py +0 -61
- main/app/tabs/extra/extra.py +0 -40
- main/app/tabs/inference/child/convert.py +0 -328
- main/app/tabs/inference/child/convert_tts.py +0 -280
- main/app/tabs/inference/child/convert_with_whisper.py +0 -164
- main/app/tabs/inference/child/separate.py +0 -263
- main/app/tabs/inference/inference.py +0 -30
- main/app/tabs/realtime/realtime.py +0 -226
- main/app/tabs/realtime/realtime_client.py +0 -210
- main/app/tabs/training/child/create_dataset.py +0 -282
- main/app/tabs/training/child/create_reference.py +0 -97
- main/app/tabs/training/child/training.py +0 -259
- main/app/tabs/training/training.py +0 -25
- main/app/variables.py +0 -117
- main/configs/config.json +0 -622
- main/configs/config.py +0 -131
- main/configs/rpc.py +0 -78
- main/configs/v1/32000.json +0 -46
- main/configs/v1/40000.json +0 -46
- main/configs/v1/48000.json +0 -46
- main/configs/v2/32000.json +0 -42
- main/configs/v2/40000.json +0 -42
main/app/app.py
DELETED
|
@@ -1,524 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import io
|
| 3 |
-
import ssl
|
| 4 |
-
import sys
|
| 5 |
-
import time
|
| 6 |
-
import codecs
|
| 7 |
-
import logging
|
| 8 |
-
import warnings
|
| 9 |
-
|
| 10 |
-
import gradio as gr
|
| 11 |
-
|
| 12 |
-
sys.path.append(os.getcwd())
|
| 13 |
-
start_time = time.time()
|
| 14 |
-
|
| 15 |
-
from main.app.tabs.extra.extra import extra_tab
|
| 16 |
-
from main.app.tabs.editing.editing import editing_tab
|
| 17 |
-
from main.app.tabs.training.training import training_tab
|
| 18 |
-
from main.app.tabs.downloads.downloads import download_tab
|
| 19 |
-
from main.app.tabs.inference.inference import inference_tab
|
| 20 |
-
from main.configs.rpc import connect_discord_ipc, send_discord_rpc
|
| 21 |
-
from main.app.variables import logger, config, translations, theme, font, configs, language, allow_disk
|
| 22 |
-
|
| 23 |
-
ssl._create_default_https_context = ssl._create_unverified_context
|
| 24 |
-
|
| 25 |
-
warnings.filterwarnings("ignore")
|
| 26 |
-
for l in ["httpx", "gradio", "uvicorn", "httpcore", "urllib3"]:
|
| 27 |
-
logging.getLogger(l).setLevel(logging.ERROR)
|
| 28 |
-
|
| 29 |
-
js_code = """
|
| 30 |
-
() => {
|
| 31 |
-
window._activeStream = null;
|
| 32 |
-
window._audioCtx = null;
|
| 33 |
-
window._workletNode = null;
|
| 34 |
-
window._playbackNode = null;
|
| 35 |
-
window._ws = null;
|
| 36 |
-
|
| 37 |
-
function setStatus(msg, use_alert = true) {
|
| 38 |
-
const realtimeStatus = document.querySelector("#realtime-status-info h2.output-class");
|
| 39 |
-
if (use_alert) alert(msg);
|
| 40 |
-
|
| 41 |
-
if (realtimeStatus) {
|
| 42 |
-
realtimeStatus.innerText = msg;
|
| 43 |
-
realtimeStatus.style.whiteSpace = "nowrap";
|
| 44 |
-
realtimeStatus.style.textAlign = "center";
|
| 45 |
-
}
|
| 46 |
-
}
|
| 47 |
-
|
| 48 |
-
async function addModuleFromString(ctx, codeStr) {
|
| 49 |
-
const blob = new Blob([codeStr], {type: 'application/javascript'});
|
| 50 |
-
const url = URL.createObjectURL(blob);
|
| 51 |
-
|
| 52 |
-
await ctx.audioWorklet.addModule(url);
|
| 53 |
-
URL.revokeObjectURL(url);
|
| 54 |
-
};
|
| 55 |
-
|
| 56 |
-
function createOutputRoute(audioCtx, playbackNode, sinkId, gainValue = 1.0) {
|
| 57 |
-
const dest = audioCtx.createMediaStreamDestination();
|
| 58 |
-
const gainNode = audioCtx.createGain();
|
| 59 |
-
gainNode.gain.value = gainValue;
|
| 60 |
-
|
| 61 |
-
playbackNode.connect(gainNode);
|
| 62 |
-
gainNode.connect(dest);
|
| 63 |
-
|
| 64 |
-
const el = document.createElement('audio');
|
| 65 |
-
el.autoplay = true;
|
| 66 |
-
el.srcObject = dest.stream;
|
| 67 |
-
el.style.display = 'none';
|
| 68 |
-
document.body.appendChild(el);
|
| 69 |
-
|
| 70 |
-
if (el.setSinkId) el.setSinkId(sinkId).catch(err => console.error(err));
|
| 71 |
-
return { dest, gainNode, el };
|
| 72 |
-
}
|
| 73 |
-
|
| 74 |
-
const inputWorkletSource = `
|
| 75 |
-
class InputProcessor extends AudioWorkletProcessor {
|
| 76 |
-
constructor() {
|
| 77 |
-
super();
|
| 78 |
-
this.buffer = new Float32Array(0);
|
| 79 |
-
this.block_frame = 128;
|
| 80 |
-
this.port.onmessage = (e) => {
|
| 81 |
-
if (e.data && e.data.block_frame) this.block_frame = e.data.block_frame;
|
| 82 |
-
};
|
| 83 |
-
}
|
| 84 |
-
|
| 85 |
-
process(inputs) {
|
| 86 |
-
const input = inputs[0];
|
| 87 |
-
if (!input || !input[0]) return true;
|
| 88 |
-
const frame = input[0];
|
| 89 |
-
|
| 90 |
-
const newBuf = new Float32Array(this.buffer.length + frame.length);
|
| 91 |
-
newBuf.set(this.buffer, 0);
|
| 92 |
-
newBuf.set(frame, this.buffer.length);
|
| 93 |
-
this.buffer = newBuf;
|
| 94 |
-
|
| 95 |
-
while (this.buffer.length >= this.block_frame) {
|
| 96 |
-
const chunk = this.buffer.slice(0, this.block_frame);
|
| 97 |
-
|
| 98 |
-
this.port.postMessage({chunk}, [chunk.buffer]);
|
| 99 |
-
this.buffer = this.buffer.slice(this.block_frame);
|
| 100 |
-
}
|
| 101 |
-
|
| 102 |
-
return true;
|
| 103 |
-
}
|
| 104 |
-
}
|
| 105 |
-
registerProcessor('input-processor', InputProcessor);
|
| 106 |
-
`;
|
| 107 |
-
|
| 108 |
-
const playbackWorkletSource = `
|
| 109 |
-
class PlaybackProcessor extends AudioWorkletProcessor {
|
| 110 |
-
constructor(options) {
|
| 111 |
-
super(options);
|
| 112 |
-
const bufferSize = options.processorOptions && options.processorOptions.bufferSize ? options.processorOptions.bufferSize: 98304;
|
| 113 |
-
this.buffer = new Float32Array(bufferSize);
|
| 114 |
-
this.bufferCapacity = bufferSize;
|
| 115 |
-
this.writePointer = 0;
|
| 116 |
-
this.readPointer = 0;
|
| 117 |
-
this.availableSamples = 0;
|
| 118 |
-
this.port.onmessage = (e) => {
|
| 119 |
-
if (e.data && e.data.chunk) {
|
| 120 |
-
const chunk = new Float32Array(e.data.chunk);
|
| 121 |
-
const chunkSize = chunk.length;
|
| 122 |
-
|
| 123 |
-
if (this.availableSamples + chunkSize > this.bufferCapacity) return;
|
| 124 |
-
|
| 125 |
-
for (let i = 0; i < chunkSize; i++) {
|
| 126 |
-
this.buffer[this.writePointer] = chunk[i];
|
| 127 |
-
this.writePointer = (this.writePointer + 1) % this.bufferCapacity;
|
| 128 |
-
}
|
| 129 |
-
|
| 130 |
-
this.availableSamples += chunkSize;
|
| 131 |
-
}
|
| 132 |
-
};
|
| 133 |
-
}
|
| 134 |
-
|
| 135 |
-
process(inputs, outputs) {
|
| 136 |
-
const output = outputs[0];
|
| 137 |
-
if (!output || !output[0]) return true;
|
| 138 |
-
|
| 139 |
-
const frame = output[0];
|
| 140 |
-
const frameSize = frame.length;
|
| 141 |
-
|
| 142 |
-
if (this.availableSamples >= frameSize) {
|
| 143 |
-
for (let i = 0; i < frameSize; i++) {
|
| 144 |
-
frame[i] = this.buffer[this.readPointer];
|
| 145 |
-
this.readPointer = (this.readPointer + 1) % this.bufferCapacity;
|
| 146 |
-
}
|
| 147 |
-
this.availableSamples -= frameSize;
|
| 148 |
-
} else {
|
| 149 |
-
frame.fill(0);
|
| 150 |
-
}
|
| 151 |
-
|
| 152 |
-
if (output.length > 1) output[1].set(output[0]);
|
| 153 |
-
return true;
|
| 154 |
-
}
|
| 155 |
-
}
|
| 156 |
-
registerProcessor('playback-processor', PlaybackProcessor);
|
| 157 |
-
`;
|
| 158 |
-
|
| 159 |
-
window.getAudioDevices = async function() {
|
| 160 |
-
if (!navigator.mediaDevices) {
|
| 161 |
-
setStatus("__MEDIA_DEVICES__");
|
| 162 |
-
return {"inputs": {}, "outputs": {}};
|
| 163 |
-
}
|
| 164 |
-
|
| 165 |
-
try {
|
| 166 |
-
await navigator.mediaDevices.getUserMedia({ audio: true });
|
| 167 |
-
} catch (err) {
|
| 168 |
-
console.error(err);
|
| 169 |
-
setStatus("__MIC_INACCESSIBLE__")
|
| 170 |
-
|
| 171 |
-
return {"inputs": {}, "outputs": {}};
|
| 172 |
-
}
|
| 173 |
-
|
| 174 |
-
const devices = await navigator.mediaDevices.enumerateDevices();
|
| 175 |
-
const inputs = {};
|
| 176 |
-
const outputs = {};
|
| 177 |
-
|
| 178 |
-
for (const device of devices) {
|
| 179 |
-
if (device.kind === "audioinput") {
|
| 180 |
-
inputs[device.label] = device.deviceId
|
| 181 |
-
} else if (device.kind === "audiooutput") {
|
| 182 |
-
outputs[device.label] = device.deviceId
|
| 183 |
-
}
|
| 184 |
-
}
|
| 185 |
-
|
| 186 |
-
if (!Object.keys(inputs).length && !Object.keys(outputs).length) return {"inputs": {}, "outputs": {}};
|
| 187 |
-
return {"inputs": inputs, "outputs": outputs};
|
| 188 |
-
};
|
| 189 |
-
|
| 190 |
-
window.StreamAudioRealtime = async function(
|
| 191 |
-
monitor,
|
| 192 |
-
vad_enabled,
|
| 193 |
-
input_audio_device,
|
| 194 |
-
output_audio_device,
|
| 195 |
-
monitor_output_device,
|
| 196 |
-
input_audio_gain,
|
| 197 |
-
output_audio_gain,
|
| 198 |
-
monitor_audio_gain,
|
| 199 |
-
chunk_size,
|
| 200 |
-
pitch,
|
| 201 |
-
model_pth,
|
| 202 |
-
model_index,
|
| 203 |
-
index_strength,
|
| 204 |
-
onnx_f0_mode,
|
| 205 |
-
f0_method,
|
| 206 |
-
hop_length,
|
| 207 |
-
embed_mode,
|
| 208 |
-
embedders,
|
| 209 |
-
custom_embedders,
|
| 210 |
-
f0_autotune,
|
| 211 |
-
proposal_pitch,
|
| 212 |
-
f0_autotune_strength,
|
| 213 |
-
proposal_pitch_threshold,
|
| 214 |
-
rms_mix_rate,
|
| 215 |
-
protect,
|
| 216 |
-
filter_radius,
|
| 217 |
-
silent_threshold,
|
| 218 |
-
extra_convert_size,
|
| 219 |
-
cross_fade_overlap_size,
|
| 220 |
-
vad_sensitivity,
|
| 221 |
-
vad_frame_ms,
|
| 222 |
-
clean_audio,
|
| 223 |
-
clean_strength
|
| 224 |
-
) {
|
| 225 |
-
const SampleRate = 48000;
|
| 226 |
-
const ReadChunkSize = Math.round(chunk_size * SampleRate / 1000 / 128);
|
| 227 |
-
const block_frame = parseInt(ReadChunkSize) * 128;
|
| 228 |
-
const ButtonState = { start_button: true, stop_button: false };
|
| 229 |
-
const devices = await window.getAudioDevices();
|
| 230 |
-
|
| 231 |
-
input_audio_device = devices["inputs"][input_audio_device];
|
| 232 |
-
output_audio_device = devices["outputs"][output_audio_device];
|
| 233 |
-
if (monitor && devices["outputs"][monitor_output_device]) monitor_output_device = devices["outputs"][monitor_output_device];
|
| 234 |
-
|
| 235 |
-
try {
|
| 236 |
-
if (!input_audio_device || !output_audio_device) {
|
| 237 |
-
setStatus("__PROVIDE_AUDIO_DEVICE__");
|
| 238 |
-
return ButtonState;
|
| 239 |
-
}
|
| 240 |
-
|
| 241 |
-
if (monitor && !monitor_output_device) {
|
| 242 |
-
setStatus("__PROVIDE_MONITOR_DEVICE__");
|
| 243 |
-
return ButtonState;
|
| 244 |
-
}
|
| 245 |
-
|
| 246 |
-
if (!model_pth) {
|
| 247 |
-
setStatus("__PROVIDE_MODEL__")
|
| 248 |
-
return ButtonState;
|
| 249 |
-
}
|
| 250 |
-
|
| 251 |
-
setStatus("__START_REALTIME__", use_alert=false)
|
| 252 |
-
|
| 253 |
-
const stream = await navigator.mediaDevices.getUserMedia({
|
| 254 |
-
audio: {
|
| 255 |
-
deviceId: { exact: input_audio_device },
|
| 256 |
-
channelCount: 1,
|
| 257 |
-
sampleRate: SampleRate,
|
| 258 |
-
echoCancellation: false,
|
| 259 |
-
noiseSuppression: false,
|
| 260 |
-
autoGainControl: false
|
| 261 |
-
}
|
| 262 |
-
});
|
| 263 |
-
|
| 264 |
-
window._activeStream = stream;
|
| 265 |
-
window._audioCtx = new AudioContext({ sampleRate: SampleRate, latencyHint: "interactive" });
|
| 266 |
-
|
| 267 |
-
await addModuleFromString(window._audioCtx, inputWorkletSource);
|
| 268 |
-
await addModuleFromString(window._audioCtx, playbackWorkletSource);
|
| 269 |
-
|
| 270 |
-
const src = window._audioCtx.createMediaStreamSource(stream);
|
| 271 |
-
const inputNode = new AudioWorkletNode(window._audioCtx, 'input-processor');
|
| 272 |
-
const playbackNode = new AudioWorkletNode(window._audioCtx, 'playback-processor', {
|
| 273 |
-
processorOptions: {
|
| 274 |
-
bufferSize: block_frame * 2
|
| 275 |
-
}
|
| 276 |
-
});
|
| 277 |
-
|
| 278 |
-
inputNode.port.postMessage({ block_frame: block_frame });
|
| 279 |
-
src.connect(inputNode);
|
| 280 |
-
|
| 281 |
-
createOutputRoute(window._audioCtx, playbackNode, output_audio_device, output_audio_gain / 100);
|
| 282 |
-
if (monitor && monitor_output_device) createOutputRoute(window._audioCtx, playbackNode, monitor_output_device, monitor_audio_gain / 100);
|
| 283 |
-
|
| 284 |
-
const protocol = (location.protocol === "https:") ? "wss:" : "ws:";
|
| 285 |
-
const wsUrl = protocol + '//' + location.hostname + `:${location.port}` + '/api/ws-audio';
|
| 286 |
-
const ws = new WebSocket(wsUrl);
|
| 287 |
-
|
| 288 |
-
ButtonState.start_button = false;
|
| 289 |
-
ButtonState.stop_button = true;
|
| 290 |
-
|
| 291 |
-
ws.binaryType = "arraybuffer";
|
| 292 |
-
window._ws = ws;
|
| 293 |
-
|
| 294 |
-
ws.onopen = () => {
|
| 295 |
-
console.log("__WS_CONNECTED__")
|
| 296 |
-
|
| 297 |
-
ws.send(
|
| 298 |
-
JSON.stringify({
|
| 299 |
-
type: 'init',
|
| 300 |
-
chunk_size: ReadChunkSize,
|
| 301 |
-
embedders: embedders,
|
| 302 |
-
model_pth: model_pth,
|
| 303 |
-
custom_embedders: custom_embedders,
|
| 304 |
-
cross_fade_overlap_size: cross_fade_overlap_size,
|
| 305 |
-
extra_convert_size: extra_convert_size,
|
| 306 |
-
model_index: model_index,
|
| 307 |
-
f0_method: f0_method,
|
| 308 |
-
f0_onnx: onnx_f0_mode,
|
| 309 |
-
embedders_mode: embed_mode,
|
| 310 |
-
hop_length: hop_length,
|
| 311 |
-
silent_threshold: silent_threshold,
|
| 312 |
-
vad_enabled: vad_enabled,
|
| 313 |
-
vad_sensitivity: vad_sensitivity,
|
| 314 |
-
vad_frame_ms: vad_frame_ms,
|
| 315 |
-
clean_audio: clean_audio,
|
| 316 |
-
clean_strength: clean_strength,
|
| 317 |
-
f0_up_key: pitch,
|
| 318 |
-
index_rate: index_strength,
|
| 319 |
-
protect: protect,
|
| 320 |
-
filter_radius: filter_radius,
|
| 321 |
-
rms_mix_rate: rms_mix_rate,
|
| 322 |
-
f0_autotune: f0_autotune,
|
| 323 |
-
f0_autotune_strength: f0_autotune_strength,
|
| 324 |
-
proposal_pitch: proposal_pitch,
|
| 325 |
-
proposal_pitch_threshold: proposal_pitch_threshold,
|
| 326 |
-
input_audio_gain: input_audio_gain
|
| 327 |
-
})
|
| 328 |
-
);
|
| 329 |
-
};
|
| 330 |
-
|
| 331 |
-
inputNode.port.onmessage = (e) => {
|
| 332 |
-
const chunk = e.data && e.data.chunk;
|
| 333 |
-
|
| 334 |
-
if (!chunk) return;
|
| 335 |
-
if (ws.readyState === WebSocket.OPEN) ws.send(chunk);
|
| 336 |
-
};
|
| 337 |
-
|
| 338 |
-
ws.onmessage = (ev) => {
|
| 339 |
-
if (typeof ev.data === 'string') {
|
| 340 |
-
const msg = JSON.parse(ev.data);
|
| 341 |
-
|
| 342 |
-
if (msg.type === 'latency') setStatus(`__LATENCY__: ${msg.value.toFixed(1)} ms`, use_alert=false)
|
| 343 |
-
if (msg.type === 'warnings') {
|
| 344 |
-
setStatus(msg.value);
|
| 345 |
-
StopAudioStream();
|
| 346 |
-
}
|
| 347 |
-
|
| 348 |
-
return;
|
| 349 |
-
}
|
| 350 |
-
|
| 351 |
-
const ab = ev.data;
|
| 352 |
-
playbackNode.port.postMessage({ chunk: ab }, [ab]);
|
| 353 |
-
};
|
| 354 |
-
|
| 355 |
-
ws.onclose = () => console.log("__WS_CLOSED__");
|
| 356 |
-
window._workletNode = inputNode;
|
| 357 |
-
window._playbackNode = playbackNode;
|
| 358 |
-
|
| 359 |
-
if (window._audioCtx.state === 'suspended') await window._audioCtx.resume();
|
| 360 |
-
|
| 361 |
-
console.log("__REALTIME_STARTED__");
|
| 362 |
-
return ButtonState;
|
| 363 |
-
} catch (err) {
|
| 364 |
-
console.error("__ERROR__", err);
|
| 365 |
-
alert("__ERROR__" + err.message);
|
| 366 |
-
|
| 367 |
-
return StopAudioStream();
|
| 368 |
-
}
|
| 369 |
-
};
|
| 370 |
-
|
| 371 |
-
window.StopAudioStream = async function() {
|
| 372 |
-
try {
|
| 373 |
-
if (window._ws) {
|
| 374 |
-
window._ws.close();
|
| 375 |
-
window._ws = null;
|
| 376 |
-
}
|
| 377 |
-
|
| 378 |
-
if (window._activeStream) {
|
| 379 |
-
window._activeStream.getTracks().forEach(t => t.stop());
|
| 380 |
-
window._activeStream = null;
|
| 381 |
-
}
|
| 382 |
-
|
| 383 |
-
if (window._workletNode) {
|
| 384 |
-
window._workletNode.disconnect();
|
| 385 |
-
window._workletNode = null;
|
| 386 |
-
}
|
| 387 |
-
|
| 388 |
-
if (window._playbackNode) {
|
| 389 |
-
window._playbackNode.disconnect();
|
| 390 |
-
window._playbackNode = null;
|
| 391 |
-
}
|
| 392 |
-
|
| 393 |
-
if (window._audioCtx) {
|
| 394 |
-
await window._audioCtx.close();
|
| 395 |
-
window._audioCtx = null;
|
| 396 |
-
}
|
| 397 |
-
|
| 398 |
-
document.querySelectorAll('audio').forEach(a => a.remove());
|
| 399 |
-
setStatus("__REALTIME_HAS_STOP__", use_alert=false);
|
| 400 |
-
|
| 401 |
-
return {"start_button": true, "stop_button": false};
|
| 402 |
-
} catch (e) {
|
| 403 |
-
setStatus(`__ERROR__ ${e}`);
|
| 404 |
-
|
| 405 |
-
return {"start_button": false, "stop_button": true}
|
| 406 |
-
}
|
| 407 |
-
};
|
| 408 |
-
}
|
| 409 |
-
""".replace(
|
| 410 |
-
"__MEDIA_DEVICES__", translations["media_devices"]
|
| 411 |
-
).replace(
|
| 412 |
-
"__MIC_INACCESSIBLE__", translations["mic_inaccessible"]
|
| 413 |
-
).replace(
|
| 414 |
-
"__PROVIDE_AUDIO_DEVICE__", translations["provide_audio_device"]
|
| 415 |
-
).replace(
|
| 416 |
-
"__PROVIDE_MONITOR_DEVICE__", translations["provide_monitor_device"]
|
| 417 |
-
).replace(
|
| 418 |
-
"__START_REALTIME__", translations["start_realtime"]
|
| 419 |
-
).replace(
|
| 420 |
-
"__LATENCY__", translations['latency']
|
| 421 |
-
).replace(
|
| 422 |
-
"__WS_CONNECTED__", translations["ws_connected"]
|
| 423 |
-
).replace(
|
| 424 |
-
"__WS_CLOSED__", translations["ws_closed"]
|
| 425 |
-
).replace(
|
| 426 |
-
"__REALTIME_STARTED__", translations["realtime_is_ready"]
|
| 427 |
-
).replace(
|
| 428 |
-
"__ERROR__", translations["error_occurred"].format(e="")
|
| 429 |
-
).replace(
|
| 430 |
-
"__REALTIME_HAS_STOP__", translations["realtime_has_stop"]
|
| 431 |
-
).replace(
|
| 432 |
-
"__PROVIDE_MODEL__", translations["provide_file"].format(filename=translations["model"])
|
| 433 |
-
)
|
| 434 |
-
|
| 435 |
-
client_mode = True # "--client" in sys.argv
|
| 436 |
-
|
| 437 |
-
with gr.Blocks(
|
| 438 |
-
title="📱 Vietnamese-RVC GUI BY ANH",
|
| 439 |
-
js=js_code if client_mode else None,
|
| 440 |
-
theme=theme,
|
| 441 |
-
css="<style> @import url('{fonts}'); * {{font-family: 'Courgette', cursive !important;}} body, html {{font-family: 'Courgette', cursive !important;}} h1, h2, h3, h4, h5, h6, p, button, input, textarea, label, span, div, select {{font-family: 'Courgette', cursive !important;}} </style>".format(fonts=font or "https://fonts.googleapis.com/css2?family=Courgette&display=swap")
|
| 442 |
-
) as app:
|
| 443 |
-
gr.HTML("<h1 style='text-align: center;'>🎵VIETNAMESE RVC BY ANH🎵</h1>")
|
| 444 |
-
gr.HTML(f"<h3 style='text-align: center;'>{translations['title']}</h3>")
|
| 445 |
-
|
| 446 |
-
with gr.Tabs():
|
| 447 |
-
inference_tab()
|
| 448 |
-
editing_tab()
|
| 449 |
-
|
| 450 |
-
if client_mode:
|
| 451 |
-
from main.app.tabs.realtime.realtime_client import realtime_client_tab
|
| 452 |
-
realtime_client_tab()
|
| 453 |
-
else:
|
| 454 |
-
from main.app.tabs.realtime.realtime import realtime_tab
|
| 455 |
-
realtime_tab()
|
| 456 |
-
|
| 457 |
-
training_tab()
|
| 458 |
-
download_tab()
|
| 459 |
-
extra_tab(app)
|
| 460 |
-
|
| 461 |
-
with gr.Row():
|
| 462 |
-
gr.Markdown(translations["rick_roll"].format(rickroll=codecs.decode('uggcf://jjj.lbhghor.pbz/jngpu?i=qDj4j9JtKpD', 'rot13')))
|
| 463 |
-
|
| 464 |
-
with gr.Row():
|
| 465 |
-
gr.Markdown(translations["terms_of_use"])
|
| 466 |
-
|
| 467 |
-
with gr.Row():
|
| 468 |
-
gr.Markdown(translations["exemption"])
|
| 469 |
-
|
| 470 |
-
if __name__ == "__main__":
|
| 471 |
-
logger.info(config.device.replace("privateuseone", "dml"))
|
| 472 |
-
logger.info(translations["start_app"])
|
| 473 |
-
logger.info(translations["set_lang"].format(lang=language))
|
| 474 |
-
|
| 475 |
-
port = configs.get("app_port", 7860)
|
| 476 |
-
server_name = configs.get("server_name", "0.0.0.0")
|
| 477 |
-
share = "--share" in sys.argv
|
| 478 |
-
|
| 479 |
-
original_stdout = sys.stdout
|
| 480 |
-
sys.stdout = io.StringIO()
|
| 481 |
-
|
| 482 |
-
for i in range(configs.get("num_of_restart", 5)):
|
| 483 |
-
try:
|
| 484 |
-
gradio_app, _, share_url = app.queue().launch(
|
| 485 |
-
favicon_path=configs["ico_path"],
|
| 486 |
-
server_name=server_name,
|
| 487 |
-
server_port=port,
|
| 488 |
-
show_error=configs.get("app_show_error", False),
|
| 489 |
-
inbrowser="--open" in sys.argv,
|
| 490 |
-
share=share,
|
| 491 |
-
allowed_paths=allow_disk,
|
| 492 |
-
prevent_thread_lock=True,
|
| 493 |
-
quiet=True
|
| 494 |
-
)
|
| 495 |
-
break
|
| 496 |
-
except OSError:
|
| 497 |
-
logger.debug(translations["port"].format(port=port))
|
| 498 |
-
port -= 1
|
| 499 |
-
except Exception as e:
|
| 500 |
-
logger.error(translations["error_occurred"].format(e=e))
|
| 501 |
-
sys.exit(1)
|
| 502 |
-
|
| 503 |
-
if client_mode:
|
| 504 |
-
from main.app.core.realtime_client import app as fastapi_app
|
| 505 |
-
gradio_app.mount("/api", fastapi_app)
|
| 506 |
-
|
| 507 |
-
sys.stdout = original_stdout
|
| 508 |
-
|
| 509 |
-
if configs.get("discord_presence", True):
|
| 510 |
-
pipe = connect_discord_ipc()
|
| 511 |
-
if pipe:
|
| 512 |
-
try:
|
| 513 |
-
logger.info(translations["start_rpc"])
|
| 514 |
-
send_discord_rpc(pipe)
|
| 515 |
-
except KeyboardInterrupt:
|
| 516 |
-
logger.info(translations["stop_rpc"])
|
| 517 |
-
pipe.close()
|
| 518 |
-
|
| 519 |
-
logger.info(f"{translations['running_local_url']}: {server_name}:{port}")
|
| 520 |
-
if share: logger.info(f"{translations['running_share_url']}: {share_url}")
|
| 521 |
-
logger.info(f"{translations['gradio_start']}: {(time.time() - start_time):.2f}s")
|
| 522 |
-
|
| 523 |
-
while 1:
|
| 524 |
-
time.sleep(5)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/core/csrt.py
DELETED
|
@@ -1,72 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
sys.path.append(os.getcwd())
|
| 5 |
-
|
| 6 |
-
from main.app.core.inference import whisper_process
|
| 7 |
-
from main.library.utils import check_spk_diarization
|
| 8 |
-
from main.app.core.ui import gr_info, gr_warning, process_output
|
| 9 |
-
from main.app.variables import config, translations, configs, logger
|
| 10 |
-
|
| 11 |
-
def create_srt(model_size, input_audio, output_file, word_timestamps):
|
| 12 |
-
import multiprocessing as mp
|
| 13 |
-
|
| 14 |
-
if not input_audio or not os.path.exists(input_audio) or os.path.isdir(input_audio):
|
| 15 |
-
gr_warning(translations["input_not_valid"])
|
| 16 |
-
return [None]*2
|
| 17 |
-
|
| 18 |
-
if not output_file.endswith(".srt"): output_file += ".srt"
|
| 19 |
-
|
| 20 |
-
if not output_file:
|
| 21 |
-
gr_warning(translations["output_not_valid"])
|
| 22 |
-
return [None]*2
|
| 23 |
-
|
| 24 |
-
output_dir = os.path.dirname(output_file)
|
| 25 |
-
if not os.path.exists(output_dir): os.makedirs(output_dir, exist_ok=True)
|
| 26 |
-
|
| 27 |
-
info = ""
|
| 28 |
-
output_file = process_output(output_file)
|
| 29 |
-
|
| 30 |
-
check_spk_diarization(model_size, speechbrain=False)
|
| 31 |
-
gr_info(translations["csrt"])
|
| 32 |
-
|
| 33 |
-
try:
|
| 34 |
-
mp.set_start_method("spawn")
|
| 35 |
-
except:
|
| 36 |
-
pass
|
| 37 |
-
|
| 38 |
-
whisper_queue = mp.Queue()
|
| 39 |
-
whisperprocess = mp.Process(target=whisper_process, args=(model_size, input_audio, configs, config.device, whisper_queue, word_timestamps))
|
| 40 |
-
whisperprocess.start()
|
| 41 |
-
|
| 42 |
-
segments = whisper_queue.get()
|
| 43 |
-
|
| 44 |
-
with open(output_file, "w", encoding="utf-8") as f:
|
| 45 |
-
for i, segment in enumerate(segments):
|
| 46 |
-
start = segment["start"]
|
| 47 |
-
end = segment["end"]
|
| 48 |
-
text = segment["text"].strip()
|
| 49 |
-
|
| 50 |
-
index = f"{i+1}\n"
|
| 51 |
-
timestamp = f"{format_timestamp(start)} --> {format_timestamp(end)}\n"
|
| 52 |
-
text1 = f"{text}\n\n"
|
| 53 |
-
|
| 54 |
-
f.write(index)
|
| 55 |
-
f.write(timestamp)
|
| 56 |
-
f.write(text1)
|
| 57 |
-
|
| 58 |
-
info = info + index + timestamp + text1
|
| 59 |
-
logger.info(info)
|
| 60 |
-
|
| 61 |
-
gr_info(translations["success"])
|
| 62 |
-
|
| 63 |
-
return [{"value": output_file, "visible": True, "__type__": "update"}, info]
|
| 64 |
-
|
| 65 |
-
def format_timestamp(seconds):
|
| 66 |
-
hours = int(seconds // 3600)
|
| 67 |
-
minutes = int((seconds % 3600) // 60)
|
| 68 |
-
|
| 69 |
-
seconds = int(seconds % 60)
|
| 70 |
-
miliseconds = int((seconds - int(seconds)) * 1000)
|
| 71 |
-
|
| 72 |
-
return f"{hours:02}:{minutes:02}:{seconds:02},{miliseconds:03}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/core/downloads.py
DELETED
|
@@ -1,208 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import re
|
| 3 |
-
import sys
|
| 4 |
-
import json
|
| 5 |
-
import codecs
|
| 6 |
-
import shutil
|
| 7 |
-
import yt_dlp
|
| 8 |
-
import warnings
|
| 9 |
-
import requests
|
| 10 |
-
|
| 11 |
-
from bs4 import BeautifulSoup
|
| 12 |
-
|
| 13 |
-
sys.path.append(os.getcwd())
|
| 14 |
-
|
| 15 |
-
from main.tools import huggingface, gdown, meganz, mediafire, pixeldrain
|
| 16 |
-
from main.app.variables import logger, translations, model_options, configs
|
| 17 |
-
from main.app.core.process import move_files_from_directory, fetch_pretrained_data, extract_name_model
|
| 18 |
-
from main.app.core.ui import gr_info, gr_warning, gr_error, process_output, replace_url, replace_modelname
|
| 19 |
-
|
| 20 |
-
def download_url(url):
|
| 21 |
-
if not url:
|
| 22 |
-
gr_warning(translations["provide_url"])
|
| 23 |
-
return [None]*3
|
| 24 |
-
|
| 25 |
-
if not os.path.exists(configs["audios_path"]): os.makedirs(configs["audios_path"], exist_ok=True)
|
| 26 |
-
|
| 27 |
-
with warnings.catch_warnings():
|
| 28 |
-
warnings.filterwarnings("ignore")
|
| 29 |
-
ydl_opts = {
|
| 30 |
-
"format": "bestaudio/best",
|
| 31 |
-
"postprocessors": [{
|
| 32 |
-
"key": "FFmpegExtractAudio",
|
| 33 |
-
"preferredcodec": "wav",
|
| 34 |
-
"preferredquality": "192"
|
| 35 |
-
}],
|
| 36 |
-
"quiet": True,
|
| 37 |
-
"no_warnings": True,
|
| 38 |
-
"noplaylist": True,
|
| 39 |
-
"verbose": False
|
| 40 |
-
}
|
| 41 |
-
|
| 42 |
-
gr_info(translations["start"].format(start=translations["download_music"]))
|
| 43 |
-
|
| 44 |
-
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 45 |
-
audio_output = os.path.join(configs["audios_path"], re.sub(r'\s+', '-', re.sub(r'[^\w\s\u4e00-\u9fff\uac00-\ud7af\u0400-\u04FF\u1100-\u11FF]', '', ydl.extract_info(url, download=False).get('title', 'video')).strip()))
|
| 46 |
-
if os.path.exists(audio_output): shutil.rmtree(audio_output, ignore_errors=True)
|
| 47 |
-
|
| 48 |
-
ydl_opts['outtmpl'] = audio_output
|
| 49 |
-
|
| 50 |
-
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 51 |
-
audio_output = process_output(audio_output + ".wav")
|
| 52 |
-
|
| 53 |
-
ydl.download([url])
|
| 54 |
-
|
| 55 |
-
gr_info(translations["success"])
|
| 56 |
-
return [audio_output, audio_output, translations["success"]]
|
| 57 |
-
|
| 58 |
-
def move_file(file, download_dir, model):
|
| 59 |
-
weights_dir = configs["weights_path"]
|
| 60 |
-
logs_dir = configs["logs_path"]
|
| 61 |
-
|
| 62 |
-
if not os.path.exists(weights_dir): os.makedirs(weights_dir, exist_ok=True)
|
| 63 |
-
if not os.path.exists(logs_dir): os.makedirs(logs_dir, exist_ok=True)
|
| 64 |
-
|
| 65 |
-
if file.endswith(".zip"): shutil.unpack_archive(file, download_dir)
|
| 66 |
-
move_files_from_directory(download_dir, weights_dir, logs_dir, model)
|
| 67 |
-
|
| 68 |
-
def download_model(url=None, model=None):
|
| 69 |
-
if not url: return gr_warning(translations["provide_url"])
|
| 70 |
-
|
| 71 |
-
url = replace_url(url)
|
| 72 |
-
download_dir = "download_model"
|
| 73 |
-
|
| 74 |
-
os.makedirs(download_dir, exist_ok=True)
|
| 75 |
-
|
| 76 |
-
try:
|
| 77 |
-
gr_info(translations["start"].format(start=translations["download"]))
|
| 78 |
-
|
| 79 |
-
if "huggingface.co" in url: file = huggingface.HF_download_file(url, download_dir)
|
| 80 |
-
elif "google.com" in url: file = gdown.gdown_download(url, download_dir)
|
| 81 |
-
elif "mediafire.com" in url: file = mediafire.Mediafire_Download(url, download_dir)
|
| 82 |
-
elif "pixeldrain.com" in url: file = pixeldrain.pixeldrain(url, download_dir)
|
| 83 |
-
elif "mega.nz" in url: file = meganz.mega_download_url(url, download_dir)
|
| 84 |
-
else:
|
| 85 |
-
gr_warning(translations["not_support_url"])
|
| 86 |
-
return translations["not_support_url"]
|
| 87 |
-
|
| 88 |
-
if not model:
|
| 89 |
-
modelname = os.path.basename(file)
|
| 90 |
-
model = extract_name_model(modelname) if modelname.endswith(".index") else os.path.splitext(modelname)[0]
|
| 91 |
-
if model is None: model = os.path.splitext(modelname)[0]
|
| 92 |
-
|
| 93 |
-
model = replace_modelname(model)
|
| 94 |
-
|
| 95 |
-
move_file(file, download_dir, model)
|
| 96 |
-
gr_info(translations["success"])
|
| 97 |
-
|
| 98 |
-
return translations["success"]
|
| 99 |
-
except Exception as e:
|
| 100 |
-
gr_error(message=translations["error_occurred"].format(e=e))
|
| 101 |
-
return translations["error_occurred"].format(e=e)
|
| 102 |
-
finally:
|
| 103 |
-
shutil.rmtree(download_dir, ignore_errors=True)
|
| 104 |
-
|
| 105 |
-
def download_pretrained_model(choices, model, sample_rate):
|
| 106 |
-
pretraineds_custom_path = configs["pretrained_custom_path"]
|
| 107 |
-
|
| 108 |
-
if choices == translations["list_model"]:
|
| 109 |
-
paths = fetch_pretrained_data()[model][sample_rate]
|
| 110 |
-
|
| 111 |
-
if not os.path.exists(pretraineds_custom_path): os.makedirs(pretraineds_custom_path, exist_ok=True)
|
| 112 |
-
url = codecs.decode("uggcf://uhttvatsnpr.pb/NauC/Ivrganzrfr-EIP-Cebwrpg/erfbyir/znva/cergenvarq_phfgbz/", "rot13") + paths
|
| 113 |
-
|
| 114 |
-
gr_info(translations["download_pretrain"])
|
| 115 |
-
file = huggingface.HF_download_file(replace_url(url), os.path.join(pretraineds_custom_path, paths))
|
| 116 |
-
|
| 117 |
-
if file.endswith(".zip"):
|
| 118 |
-
shutil.unpack_archive(file, pretraineds_custom_path)
|
| 119 |
-
os.remove(file)
|
| 120 |
-
|
| 121 |
-
gr_info(translations["success"])
|
| 122 |
-
return translations["success"]
|
| 123 |
-
elif choices == translations["download_url"]:
|
| 124 |
-
pretrain_is_zip = model.endswith(".zip") or model.endswith(".zip?download=true") or sample_rate.endswith(".zip") or sample_rate.endswith(".zip?download=true")
|
| 125 |
-
urls = []
|
| 126 |
-
|
| 127 |
-
if not model and not pretrain_is_zip:
|
| 128 |
-
gr_warning(translations["provide_pretrain"].format(dg="D"))
|
| 129 |
-
return [None]*2
|
| 130 |
-
|
| 131 |
-
if not sample_rate and not pretrain_is_zip:
|
| 132 |
-
gr_warning(translations["provide_pretrain"].format(dg="G"))
|
| 133 |
-
return [None]*2
|
| 134 |
-
|
| 135 |
-
gr_info(translations["download_pretrain"])
|
| 136 |
-
|
| 137 |
-
if model: urls.append(model)
|
| 138 |
-
if sample_rate: urls.append(sample_rate)
|
| 139 |
-
|
| 140 |
-
for url in urls:
|
| 141 |
-
url = replace_url(url)
|
| 142 |
-
|
| 143 |
-
if "huggingface.co" in url: file = huggingface.HF_download_file(url, pretraineds_custom_path)
|
| 144 |
-
elif "google.com" in url: file = gdown.gdown_download(url, pretraineds_custom_path)
|
| 145 |
-
elif "mediafire.com" in url: file = mediafire.Mediafire_Download(url, pretraineds_custom_path)
|
| 146 |
-
elif "pixeldrain.com" in url: file = pixeldrain.pixeldrain(url, pretraineds_custom_path)
|
| 147 |
-
elif "mega.nz" in url: file = meganz.mega_download_url(url, pretraineds_custom_path)
|
| 148 |
-
else:
|
| 149 |
-
gr_warning(translations["not_support_url"])
|
| 150 |
-
return translations["not_support_url"], translations["not_support_url"]
|
| 151 |
-
|
| 152 |
-
if file.endswith(".zip"):
|
| 153 |
-
shutil.unpack_archive(file, pretraineds_custom_path)
|
| 154 |
-
if os.path.exists(file): os.remove(file)
|
| 155 |
-
|
| 156 |
-
gr_info(translations["success"])
|
| 157 |
-
return translations["success"], translations["success"]
|
| 158 |
-
|
| 159 |
-
def fetch_models_data(search):
|
| 160 |
-
all_table_data = []
|
| 161 |
-
page = 1
|
| 162 |
-
|
| 163 |
-
while 1:
|
| 164 |
-
try:
|
| 165 |
-
response = requests.post(url=codecs.decode("uggcf://ibvpr-zbqryf.pbz/srgpu_qngn.cuc", "rot13"), data={"page": page, "search": search})
|
| 166 |
-
|
| 167 |
-
if response.status_code == 200:
|
| 168 |
-
table_data = response.json().get("table", "")
|
| 169 |
-
if not table_data.strip(): break
|
| 170 |
-
|
| 171 |
-
all_table_data.append(table_data)
|
| 172 |
-
page += 1
|
| 173 |
-
else:
|
| 174 |
-
logger.debug(f"{translations['code_error']} {response.status_code}")
|
| 175 |
-
break
|
| 176 |
-
except json.JSONDecodeError:
|
| 177 |
-
logger.debug(translations["json_error"])
|
| 178 |
-
break
|
| 179 |
-
except requests.RequestException as e:
|
| 180 |
-
logger.debug(translations["requests_error"].format(e=e))
|
| 181 |
-
break
|
| 182 |
-
|
| 183 |
-
return all_table_data
|
| 184 |
-
|
| 185 |
-
def search_models(name):
|
| 186 |
-
if not name:
|
| 187 |
-
gr_warning(translations["provide_name"])
|
| 188 |
-
return [None]*2
|
| 189 |
-
|
| 190 |
-
gr_info(translations["start"].format(start=translations["search"]))
|
| 191 |
-
|
| 192 |
-
tables = fetch_models_data(name)
|
| 193 |
-
|
| 194 |
-
if len(tables) == 0:
|
| 195 |
-
gr_info(translations["not_found"].format(name=name))
|
| 196 |
-
return [None]*2
|
| 197 |
-
else:
|
| 198 |
-
model_options.clear()
|
| 199 |
-
|
| 200 |
-
for table in tables:
|
| 201 |
-
for row in BeautifulSoup(table, "html.parser").select("tr"):
|
| 202 |
-
name_tag, url_tag = row.find("a", {"class": "fs-5"}), row.find("a", {"class": "btn btn-sm fw-bold btn-light ms-0 p-1 ps-2 pe-2"})
|
| 203 |
-
url = url_tag["href"].replace("https://easyaivoice.com/run?url=", "")
|
| 204 |
-
if "huggingface" in url:
|
| 205 |
-
if name_tag and url_tag: model_options[replace_modelname(name_tag.text)] = url
|
| 206 |
-
|
| 207 |
-
gr_info(translations["found"].format(results=len(model_options)))
|
| 208 |
-
return [{"value": "", "choices": model_options, "interactive": True, "visible": True, "__type__": "update"}, {"value": translations["downloads"], "visible": True, "__type__": "update"}]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/core/editing.py
DELETED
|
@@ -1,92 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
import random
|
| 4 |
-
import subprocess
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.variables import python, translations, configs
|
| 9 |
-
from main.app.core.ui import gr_info, gr_warning, process_output, replace_export_format
|
| 10 |
-
|
| 11 |
-
def audio_effects(input_path, output_path, resample, resample_sr, chorus_depth, chorus_rate, chorus_mix, chorus_delay, chorus_feedback, distortion_drive, reverb_room_size, reverb_damping, reverb_wet_level, reverb_dry_level, reverb_width, reverb_freeze_mode, pitch_shift, delay_seconds, delay_feedback, delay_mix, compressor_threshold, compressor_ratio, compressor_attack_ms, compressor_release_ms, limiter_threshold, limiter_release, gain_db, bitcrush_bit_depth, clipping_threshold, phaser_rate_hz, phaser_depth, phaser_centre_frequency_hz, phaser_feedback, phaser_mix, bass_boost_db, bass_boost_frequency, treble_boost_db, treble_boost_frequency, fade_in_duration, fade_out_duration, export_format, chorus, distortion, reverb, delay, compressor, limiter, gain, bitcrush, clipping, phaser, treble_bass_boost, fade_in_out, audio_combination, audio_combination_input, main_vol, combine_vol):
|
| 12 |
-
if not input_path or not os.path.exists(input_path) or os.path.isdir(input_path):
|
| 13 |
-
gr_warning(translations["input_not_valid"])
|
| 14 |
-
return None
|
| 15 |
-
|
| 16 |
-
if not output_path:
|
| 17 |
-
gr_warning(translations["output_not_valid"])
|
| 18 |
-
return None
|
| 19 |
-
|
| 20 |
-
if os.path.isdir(output_path): output_path = os.path.join(output_path, f"audio_effects.{export_format}")
|
| 21 |
-
output_dir = os.path.dirname(output_path) or output_path
|
| 22 |
-
|
| 23 |
-
if not os.path.exists(output_dir): os.makedirs(output_dir, exist_ok=True)
|
| 24 |
-
output_path = process_output(output_path)
|
| 25 |
-
|
| 26 |
-
gr_info(translations["start"].format(start=translations["apply_effect"]))
|
| 27 |
-
|
| 28 |
-
subprocess.run([python, configs["audio_effects_path"], "--input_path", input_path, "--output_path", output_path, "--resample", str(resample), "--resample_sr", str(resample_sr), "--chorus_depth", str(chorus_depth), "--chorus_rate", str(chorus_rate), "--chorus_mix", str(chorus_mix), "--chorus_delay", str(chorus_delay), "--chorus_feedback", str(chorus_feedback), "--drive_db", str(distortion_drive), "--reverb_room_size", str(reverb_room_size), "--reverb_damping", str(reverb_damping), "--reverb_wet_level", str(reverb_wet_level), "--reverb_dry_level", str(reverb_dry_level), "--reverb_width", str(reverb_width), "--reverb_freeze_mode", str(reverb_freeze_mode), "--pitch_shift", str(pitch_shift), "--delay_seconds", str(delay_seconds), "--delay_feedback", str(delay_feedback), "--delay_mix", str(delay_mix), "--compressor_threshold", str(compressor_threshold), "--compressor_ratio", str(compressor_ratio), "--compressor_attack_ms", str(compressor_attack_ms), "--compressor_release_ms", str(compressor_release_ms), "--limiter_threshold", str(limiter_threshold), "--limiter_release", str(limiter_release), "--gain_db", str(gain_db), "--bitcrush_bit_depth", str(bitcrush_bit_depth), "--clipping_threshold", str(clipping_threshold), "--phaser_rate_hz", str(phaser_rate_hz), "--phaser_depth", str(phaser_depth), "--phaser_centre_frequency_hz", str(phaser_centre_frequency_hz), "--phaser_feedback", str(phaser_feedback), "--phaser_mix", str(phaser_mix), "--bass_boost_db", str(bass_boost_db), "--bass_boost_frequency", str(bass_boost_frequency), "--treble_boost_db", str(treble_boost_db), "--treble_boost_frequency", str(treble_boost_frequency), "--fade_in_duration", str(fade_in_duration), "--fade_out_duration", str(fade_out_duration), "--export_format", export_format, "--chorus", str(chorus), "--distortion", str(distortion), "--reverb", str(reverb), "--pitchshift", str(pitch_shift != 0), "--delay", str(delay), "--compressor", str(compressor), "--limiter", str(limiter), "--gain", str(gain), "--bitcrush", str(bitcrush), "--clipping", str(clipping), "--phaser", str(phaser), "--treble_bass_boost", str(treble_bass_boost), "--fade_in_out", str(fade_in_out), "--audio_combination", str(audio_combination), "--audio_combination_input", audio_combination_input, "--main_volume", str(main_vol), "--combination_volume", str(combine_vol)])
|
| 29 |
-
|
| 30 |
-
gr_info(translations["success"])
|
| 31 |
-
return replace_export_format(output_path, export_format)
|
| 32 |
-
|
| 33 |
-
def apply_voice_quirk(audio_path, mode, output_path, export_format):
|
| 34 |
-
if not audio_path or not os.path.exists(audio_path) or os.path.isdir(audio_path):
|
| 35 |
-
gr_warning(translations["input_not_valid"])
|
| 36 |
-
return None
|
| 37 |
-
|
| 38 |
-
if not output_path:
|
| 39 |
-
gr_warning(translations["output_not_valid"])
|
| 40 |
-
return None
|
| 41 |
-
|
| 42 |
-
if os.path.isdir(output_path): output_path = os.path.join(output_path, f"audio_quirk.{export_format}")
|
| 43 |
-
output_dir = os.path.dirname(output_path) or output_path
|
| 44 |
-
|
| 45 |
-
if not os.path.exists(output_dir): os.makedirs(output_dir, exist_ok=True)
|
| 46 |
-
output_path = process_output(output_path)
|
| 47 |
-
|
| 48 |
-
gr_info(translations["start"].format(start=translations["apply_effect"]))
|
| 49 |
-
|
| 50 |
-
import librosa
|
| 51 |
-
import numpy as np
|
| 52 |
-
import soundfile as sf
|
| 53 |
-
|
| 54 |
-
def vibrato(y, sr, freq=5, depth=0.003):
|
| 55 |
-
return y[np.clip((np.arange(len(y)) + (depth * np.sin(2 * np.pi * freq * (np.arange(len(y)) / sr))) * sr).astype(int), 0, len(y) - 1)]
|
| 56 |
-
|
| 57 |
-
y, sr = librosa.load(audio_path, sr=None)
|
| 58 |
-
output_path = replace_export_format(output_path, export_format)
|
| 59 |
-
|
| 60 |
-
mode = translations["quirk_choice"][mode]
|
| 61 |
-
if mode == 0: mode = random.randint(1, 16)
|
| 62 |
-
|
| 63 |
-
if mode == 1: y *= np.random.uniform(0.5, 0.8, size=len(y))
|
| 64 |
-
elif mode == 2: y = librosa.effects.pitch_shift(y=y + np.random.normal(0, 0.01, y.shape), sr=sr, n_steps=np.random.uniform(-1.5, -3.5))
|
| 65 |
-
elif mode == 3: y = librosa.effects.time_stretch(librosa.effects.pitch_shift(y=y, sr=sr, n_steps=3), rate=1.2)
|
| 66 |
-
elif mode == 4: y = librosa.effects.time_stretch(librosa.effects.pitch_shift(y=y, sr=sr, n_steps=8), rate=1.3)
|
| 67 |
-
elif mode == 5: y = librosa.effects.time_stretch(librosa.effects.pitch_shift(y=y, sr=sr, n_steps=-3), rate=0.75)
|
| 68 |
-
elif mode == 6: y *= np.sin(np.linspace(0, np.pi * 20, len(y))) * 0.5 + 0.5
|
| 69 |
-
elif mode == 7: y = librosa.effects.time_stretch(vibrato(librosa.effects.pitch_shift(y=y, sr=sr, n_steps=-4), sr, freq=3, depth=0.004), rate=0.85)
|
| 70 |
-
elif mode == 8: y *= 0.6 + np.pad(y, (sr // 2, 0), mode='constant')[:len(y)] * 0.4
|
| 71 |
-
elif mode == 9: y = librosa.effects.pitch_shift(y=y, sr=sr, n_steps=2) + np.sin(np.linspace(0, np.pi * 20, len(y))) * 0.02
|
| 72 |
-
elif mode == 10: y = vibrato(y, sr, freq=8, depth=0.005)
|
| 73 |
-
elif mode == 11: y = librosa.effects.time_stretch(librosa.effects.pitch_shift(y=y, sr=sr, n_steps=4), rate=1.25)
|
| 74 |
-
elif mode == 12: y = np.hstack([np.pad(f, (0, int(len(f)*0.3)), mode='edge') for f in librosa.util.frame(y, frame_length=2048, hop_length=512).T])
|
| 75 |
-
elif mode == 13: y = np.concatenate([y, np.sin(2 * np.pi * np.linspace(0, 1, int(0.05 * sr))) * 0.02])
|
| 76 |
-
elif mode == 14: y += np.random.normal(0, 0.005, len(y))
|
| 77 |
-
elif mode == 15:
|
| 78 |
-
frame = int(sr * 0.2)
|
| 79 |
-
chunks = [y[i:i + frame] for i in range(0, len(y), frame)]
|
| 80 |
-
|
| 81 |
-
np.random.shuffle(chunks)
|
| 82 |
-
y = np.concatenate(chunks)
|
| 83 |
-
elif mode == 16:
|
| 84 |
-
frame = int(sr * 0.3)
|
| 85 |
-
|
| 86 |
-
for i in range(0, len(y), frame * 2):
|
| 87 |
-
y[i:i+frame] = y[i:i+frame][::-1]
|
| 88 |
-
|
| 89 |
-
sf.write(output_path, y, sr, format=export_format)
|
| 90 |
-
gr_info(translations["success"])
|
| 91 |
-
|
| 92 |
-
return output_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/core/f0_extract.py
DELETED
|
@@ -1,54 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
sys.path.append(os.getcwd())
|
| 5 |
-
|
| 6 |
-
from main.app.core.ui import gr_info, gr_warning
|
| 7 |
-
from main.app.variables import config, translations, configs
|
| 8 |
-
|
| 9 |
-
def f0_extract(audio, f0_method, f0_onnx):
|
| 10 |
-
if not audio or not os.path.exists(audio) or os.path.isdir(audio):
|
| 11 |
-
gr_warning(translations["input_not_valid"])
|
| 12 |
-
return [None]*2
|
| 13 |
-
|
| 14 |
-
import librosa
|
| 15 |
-
import numpy as np
|
| 16 |
-
import matplotlib.pyplot as plt
|
| 17 |
-
|
| 18 |
-
from main.library.utils import check_assets, load_audio
|
| 19 |
-
from main.library.predictors.Generator import Generator
|
| 20 |
-
|
| 21 |
-
check_assets(f0_method, "", f0_onnx, "")
|
| 22 |
-
|
| 23 |
-
f0_path = os.path.join(configs["f0_path"], os.path.splitext(os.path.basename(audio))[0])
|
| 24 |
-
image_path = os.path.join(f0_path, "f0.png")
|
| 25 |
-
txt_path = os.path.join(f0_path, "f0.txt")
|
| 26 |
-
|
| 27 |
-
gr_info(translations["start_extract"])
|
| 28 |
-
|
| 29 |
-
if not os.path.exists(f0_path): os.makedirs(f0_path, exist_ok=True)
|
| 30 |
-
|
| 31 |
-
y = load_audio(audio, sample_rate=16000)
|
| 32 |
-
f0_generator = Generator(16000, 160, 50, 1100, 0.5, is_half=config.is_half, device=config.device, f0_onnx_mode=f0_onnx, del_onnx_model=f0_onnx)
|
| 33 |
-
_, pitchf = f0_generator.calculator(config.x_pad, f0_method, y, 0, None, 3, False, 0, None, False)
|
| 34 |
-
|
| 35 |
-
F_temp = np.array(pitchf, dtype=np.float32)
|
| 36 |
-
F_temp[F_temp == 0] = np.nan
|
| 37 |
-
|
| 38 |
-
f0 = 1200 * np.log2(F_temp / librosa.midi_to_hz(0))
|
| 39 |
-
|
| 40 |
-
plt.figure(figsize=(10, 4))
|
| 41 |
-
plt.plot(f0)
|
| 42 |
-
plt.title(f0_method)
|
| 43 |
-
plt.xlabel(translations["time_frames"])
|
| 44 |
-
plt.ylabel(translations["Frequency"])
|
| 45 |
-
plt.savefig(image_path)
|
| 46 |
-
plt.close()
|
| 47 |
-
|
| 48 |
-
with open(txt_path, "w") as f:
|
| 49 |
-
for i, f0_value in enumerate(f0):
|
| 50 |
-
f.write(f"{i * 100.0},{f0_value}\n")
|
| 51 |
-
|
| 52 |
-
gr_info(translations["extract_done"])
|
| 53 |
-
|
| 54 |
-
return [txt_path, image_path]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/core/inference.py
DELETED
|
@@ -1,441 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import re
|
| 3 |
-
import gc
|
| 4 |
-
import sys
|
| 5 |
-
import shutil
|
| 6 |
-
import datetime
|
| 7 |
-
import subprocess
|
| 8 |
-
|
| 9 |
-
import numpy as np
|
| 10 |
-
|
| 11 |
-
sys.path.append(os.getcwd())
|
| 12 |
-
|
| 13 |
-
from main.app.variables import logger, config, configs, translations, python
|
| 14 |
-
from main.app.core.ui import gr_info, gr_warning, gr_error, process_output, replace_export_format
|
| 15 |
-
|
| 16 |
-
def convert(pitch, filter_radius, index_rate, rms_mix_rate, protect, hop_length, f0_method, input_path, output_path, pth_path, index_path, f0_autotune, clean_audio, clean_strength, export_format, embedder_model, resample_sr, split_audio, f0_autotune_strength, checkpointing, f0_onnx, embedders_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, proposal_pitch, proposal_pitch_threshold, audio_processing=False, alpha=0.5):
|
| 17 |
-
subprocess.run([
|
| 18 |
-
python,
|
| 19 |
-
configs["convert_path"],
|
| 20 |
-
"--pitch", str(pitch),
|
| 21 |
-
"--filter_radius", str(filter_radius),
|
| 22 |
-
"--index_rate", str(index_rate),
|
| 23 |
-
"--rms_mix_rate", str(rms_mix_rate),
|
| 24 |
-
"--protect", str(protect),
|
| 25 |
-
"--hop_length", str(hop_length),
|
| 26 |
-
"--f0_method", f0_method,
|
| 27 |
-
"--input_path", input_path,
|
| 28 |
-
"--output_path", output_path,
|
| 29 |
-
"--pth_path", pth_path,
|
| 30 |
-
"--index_path", index_path,
|
| 31 |
-
"--f0_autotune", str(f0_autotune),
|
| 32 |
-
"--clean_audio", str(clean_audio),
|
| 33 |
-
"--clean_strength", str(clean_strength),
|
| 34 |
-
"--export_format", export_format,
|
| 35 |
-
"--embedder_model", embedder_model,
|
| 36 |
-
"--resample_sr", str(resample_sr),
|
| 37 |
-
"--split_audio", str(split_audio),
|
| 38 |
-
"--f0_autotune_strength", str(f0_autotune_strength),
|
| 39 |
-
"--checkpointing", str(checkpointing),
|
| 40 |
-
"--f0_onnx", str(f0_onnx),
|
| 41 |
-
"--embedders_mode", embedders_mode,
|
| 42 |
-
"--formant_shifting", str(formant_shifting),
|
| 43 |
-
"--formant_qfrency", str(formant_qfrency),
|
| 44 |
-
"--formant_timbre", str(formant_timbre),
|
| 45 |
-
"--f0_file", f0_file,
|
| 46 |
-
"--proposal_pitch", str(proposal_pitch),
|
| 47 |
-
"--proposal_pitch_threshold", str(proposal_pitch_threshold),
|
| 48 |
-
"--audio_processing", str(audio_processing),
|
| 49 |
-
"--alpha", str(alpha)
|
| 50 |
-
])
|
| 51 |
-
|
| 52 |
-
def convert_audio(clean, autotune, use_audio, use_original, convert_backing, not_merge_backing, merge_instrument, pitch, clean_strength, model, index, index_rate, input, output, format, method, hybrid_method, hop_length, embedders, custom_embedders, resample_sr, filter_radius, rms_mix_rate, protect, split_audio, f0_autotune_strength, input_audio_name, checkpointing, onnx_f0_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, embedders_mode, proposal_pitch, proposal_pitch_threshold, audio_processing=False, alpha=0.5):
|
| 53 |
-
model_path = os.path.join(configs["weights_path"], model) if not os.path.exists(model) else model
|
| 54 |
-
|
| 55 |
-
return_none = [None]*6
|
| 56 |
-
return_none[5] = {"visible": True, "__type__": "update"}
|
| 57 |
-
|
| 58 |
-
if not use_audio:
|
| 59 |
-
if merge_instrument or not_merge_backing or convert_backing or use_original:
|
| 60 |
-
gr_warning(translations["turn_on_use_audio"])
|
| 61 |
-
return return_none
|
| 62 |
-
|
| 63 |
-
if use_original:
|
| 64 |
-
if convert_backing:
|
| 65 |
-
gr_warning(translations["turn_off_convert_backup"])
|
| 66 |
-
return return_none
|
| 67 |
-
elif not_merge_backing:
|
| 68 |
-
gr_warning(translations["turn_off_merge_backup"])
|
| 69 |
-
return return_none
|
| 70 |
-
|
| 71 |
-
if not model or not os.path.exists(model_path) or os.path.isdir(model_path) or not model.endswith((".pth", ".onnx")):
|
| 72 |
-
gr_warning(translations["provide_file"].format(filename=translations["model"]))
|
| 73 |
-
return return_none
|
| 74 |
-
|
| 75 |
-
f0method, embedder_model = (method if method != "hybrid" else hybrid_method), (embedders if embedders != "custom" else custom_embedders)
|
| 76 |
-
|
| 77 |
-
if use_audio:
|
| 78 |
-
output_audio = os.path.join(configs["audios_path"], input_audio_name)
|
| 79 |
-
|
| 80 |
-
from main.library.utils import pydub_load
|
| 81 |
-
|
| 82 |
-
def get_audio_file(label):
|
| 83 |
-
matching_files = [f for f in os.listdir(output_audio) if label in f]
|
| 84 |
-
|
| 85 |
-
if not matching_files: return translations["notfound"]
|
| 86 |
-
return os.path.join(output_audio, matching_files[0])
|
| 87 |
-
|
| 88 |
-
output_path = os.path.join(output_audio, f"Convert_Vocals.{format}")
|
| 89 |
-
output_backing = os.path.join(output_audio, f"Convert_Backing.{format}")
|
| 90 |
-
output_merge_backup = os.path.join(output_audio, f"Vocals+Backing.{format}")
|
| 91 |
-
output_merge_instrument = os.path.join(output_audio, f"Vocals+Instruments.{format}")
|
| 92 |
-
|
| 93 |
-
if os.path.exists(output_audio): os.makedirs(output_audio, exist_ok=True)
|
| 94 |
-
output_path = process_output(output_path)
|
| 95 |
-
|
| 96 |
-
if use_original:
|
| 97 |
-
original_vocal = get_audio_file('Original_Vocals_No_Reverb.')
|
| 98 |
-
|
| 99 |
-
if original_vocal == translations["notfound"]: original_vocal = get_audio_file('Original_Vocals.')
|
| 100 |
-
|
| 101 |
-
if original_vocal == translations["notfound"]:
|
| 102 |
-
gr_warning(translations["not_found_original_vocal"])
|
| 103 |
-
return return_none
|
| 104 |
-
|
| 105 |
-
input_path = original_vocal
|
| 106 |
-
else:
|
| 107 |
-
main_vocal = get_audio_file('Main_Vocals_No_Reverb.')
|
| 108 |
-
backing_vocal = get_audio_file('Backing_Vocals.')
|
| 109 |
-
|
| 110 |
-
if main_vocal == translations["notfound"]: main_vocal = get_audio_file('Main_Vocals.')
|
| 111 |
-
if main_vocal == translations["notfound"]:
|
| 112 |
-
gr_warning(translations["not_found_main_vocal"])
|
| 113 |
-
return return_none
|
| 114 |
-
|
| 115 |
-
if not not_merge_backing and backing_vocal == translations["notfound"]:
|
| 116 |
-
gr_warning(translations["not_found_backing_vocal"])
|
| 117 |
-
return return_none
|
| 118 |
-
|
| 119 |
-
input_path = main_vocal
|
| 120 |
-
backing_path = backing_vocal
|
| 121 |
-
|
| 122 |
-
gr_info(translations["convert_vocal"])
|
| 123 |
-
|
| 124 |
-
convert(pitch, filter_radius, index_rate, rms_mix_rate, protect, hop_length, f0method, input_path, output_path, model_path, index, autotune, clean, clean_strength, format, embedder_model, resample_sr, split_audio, f0_autotune_strength, checkpointing, onnx_f0_mode, embedders_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, proposal_pitch, proposal_pitch_threshold, audio_processing, alpha)
|
| 125 |
-
|
| 126 |
-
gr_info(translations["convert_success"])
|
| 127 |
-
|
| 128 |
-
if convert_backing:
|
| 129 |
-
output_backing = process_output(output_backing)
|
| 130 |
-
|
| 131 |
-
gr_info(translations["convert_backup"])
|
| 132 |
-
|
| 133 |
-
convert(pitch, filter_radius, index_rate, rms_mix_rate, protect, hop_length, f0method, backing_path, output_backing, model_path, index, autotune, clean, clean_strength, format, embedder_model, resample_sr, split_audio, f0_autotune_strength, checkpointing, onnx_f0_mode, embedders_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, proposal_pitch, proposal_pitch_threshold, audio_processing, alpha)
|
| 134 |
-
|
| 135 |
-
gr_info(translations["convert_backup_success"])
|
| 136 |
-
|
| 137 |
-
try:
|
| 138 |
-
if not not_merge_backing and not use_original:
|
| 139 |
-
backing_source = output_backing if convert_backing else backing_vocal
|
| 140 |
-
|
| 141 |
-
output_merge_backup = process_output(output_merge_backup)
|
| 142 |
-
|
| 143 |
-
gr_info(translations["merge_backup"])
|
| 144 |
-
|
| 145 |
-
pydub_load(output_path, volume=-4).overlay(pydub_load(backing_source, volume=-6)).export(output_merge_backup, format=format)
|
| 146 |
-
|
| 147 |
-
gr_info(translations["merge_success"])
|
| 148 |
-
|
| 149 |
-
if merge_instrument:
|
| 150 |
-
vocals = output_merge_backup if not not_merge_backing and not use_original else output_path
|
| 151 |
-
|
| 152 |
-
output_merge_instrument = process_output(output_merge_instrument)
|
| 153 |
-
|
| 154 |
-
gr_info(translations["merge_instruments_process"])
|
| 155 |
-
|
| 156 |
-
instruments = get_audio_file('Instruments.')
|
| 157 |
-
|
| 158 |
-
if instruments == translations["notfound"]:
|
| 159 |
-
gr_warning(translations["not_found_instruments"])
|
| 160 |
-
output_merge_instrument = None
|
| 161 |
-
else: pydub_load(instruments, volume=-7).overlay(pydub_load(vocals, volume=-4 if use_original else None)).export(output_merge_instrument, format=format)
|
| 162 |
-
|
| 163 |
-
gr_info(translations["merge_success"])
|
| 164 |
-
except:
|
| 165 |
-
return return_none
|
| 166 |
-
|
| 167 |
-
return [(None if use_original else output_path), output_backing, (None if not_merge_backing and use_original else output_merge_backup), (output_path if use_original else None), (output_merge_instrument if merge_instrument else None), {"visible": True, "__type__": "update"}]
|
| 168 |
-
else:
|
| 169 |
-
if not input or not os.path.exists(input):
|
| 170 |
-
gr_warning(translations["input_not_valid"])
|
| 171 |
-
return return_none
|
| 172 |
-
|
| 173 |
-
if not output:
|
| 174 |
-
gr_warning(translations["output_not_valid"])
|
| 175 |
-
return return_none
|
| 176 |
-
|
| 177 |
-
output = replace_export_format(output, format)
|
| 178 |
-
|
| 179 |
-
if os.path.isdir(input):
|
| 180 |
-
gr_info(translations["is_folder"])
|
| 181 |
-
|
| 182 |
-
if not [f for f in os.listdir(input) if f.lower().endswith(("wav", "mp3", "flac", "ogg", "opus", "m4a", "mp4", "aac", "alac", "wma", "aiff", "webm", "ac3"))]:
|
| 183 |
-
gr_warning(translations["not_found_in_folder"])
|
| 184 |
-
return return_none
|
| 185 |
-
|
| 186 |
-
gr_info(translations["batch_convert"])
|
| 187 |
-
|
| 188 |
-
output_dir = os.path.dirname(output) or output
|
| 189 |
-
convert(pitch, filter_radius, index_rate, rms_mix_rate, protect, hop_length, f0method, input, output_dir, model_path, index, autotune, clean, clean_strength, format, embedder_model, resample_sr, split_audio, f0_autotune_strength, checkpointing, onnx_f0_mode, embedders_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, proposal_pitch, proposal_pitch_threshold, audio_processing, alpha)
|
| 190 |
-
|
| 191 |
-
gr_info(translations["batch_convert_success"])
|
| 192 |
-
|
| 193 |
-
return return_none
|
| 194 |
-
else:
|
| 195 |
-
output_dir = os.path.dirname(output) or output
|
| 196 |
-
|
| 197 |
-
if not os.path.exists(output_dir): os.makedirs(output_dir, exist_ok=True)
|
| 198 |
-
output = process_output(output)
|
| 199 |
-
|
| 200 |
-
gr_info(translations["convert_vocal"])
|
| 201 |
-
|
| 202 |
-
convert(pitch, filter_radius, index_rate, rms_mix_rate, protect, hop_length, f0method, input, output, model_path, index, autotune, clean, clean_strength, format, embedder_model, resample_sr, split_audio, f0_autotune_strength, checkpointing, onnx_f0_mode, embedders_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, proposal_pitch, proposal_pitch_threshold, audio_processing, alpha)
|
| 203 |
-
|
| 204 |
-
gr_info(translations["convert_success"])
|
| 205 |
-
|
| 206 |
-
return_none[0] = output
|
| 207 |
-
return return_none
|
| 208 |
-
|
| 209 |
-
def convert_selection(clean, autotune, use_audio, use_original, convert_backing, not_merge_backing, merge_instrument, pitch, clean_strength, model, index, index_rate, input, output, format, method, hybrid_method, hop_length, embedders, custom_embedders, resample_sr, filter_radius, rms_mix_rate, protect, split_audio, f0_autotune_strength, checkpointing, onnx_f0_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, embedders_mode, proposal_pitch, proposal_pitch_threshold, audio_processing=False, alpha=0.5):
|
| 210 |
-
if use_audio:
|
| 211 |
-
gr_info(translations["search_separate"])
|
| 212 |
-
choice = [f for f in os.listdir(configs["audios_path"]) if os.path.isdir(os.path.join(configs["audios_path"], f))] if config.debug_mode else [f for f in os.listdir(configs["audios_path"]) if os.path.isdir(os.path.join(configs["audios_path"], f)) and any(file.lower().endswith((".wav", ".mp3", ".flac", ".ogg", ".opus", ".m4a", ".mp4", ".aac", ".alac", ".wma", ".aiff", ".webm", ".ac3")) for file in os.listdir(os.path.join(configs["audios_path"], f)))]
|
| 213 |
-
|
| 214 |
-
gr_info(translations["found_choice"].format(choice=len(choice)))
|
| 215 |
-
|
| 216 |
-
if len(choice) == 0:
|
| 217 |
-
gr_warning(translations["separator==0"])
|
| 218 |
-
|
| 219 |
-
return [{"choices": [], "value": "", "interactive": False, "visible": False, "__type__": "update"}, None, None, None, None, None, {"visible": True, "__type__": "update"}, {"visible": False, "__type__": "update"}]
|
| 220 |
-
elif len(choice) == 1:
|
| 221 |
-
convert_output = convert_audio(clean, autotune, use_audio, use_original, convert_backing, not_merge_backing, merge_instrument, pitch, clean_strength, model, index, index_rate, None, None, format, method, hybrid_method, hop_length, embedders, custom_embedders, resample_sr, filter_radius, rms_mix_rate, protect, split_audio, f0_autotune_strength, choice[0], checkpointing, onnx_f0_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, embedders_mode, proposal_pitch, proposal_pitch_threshold, audio_processing, alpha)
|
| 222 |
-
|
| 223 |
-
return [{"choices": [], "value": "", "interactive": False, "visible": False, "__type__": "update"}, convert_output[0], convert_output[1], convert_output[2], convert_output[3], convert_output[4], {"visible": True, "__type__": "update"}, {"visible": False, "__type__": "update"}]
|
| 224 |
-
else: return [{"choices": choice, "value": choice[0], "interactive": True, "visible": True, "__type__": "update"}, None, None, None, None, None, {"visible": False, "__type__": "update"}, {"visible": True, "__type__": "update"}]
|
| 225 |
-
else:
|
| 226 |
-
main_convert = convert_audio(clean, autotune, use_audio, use_original, convert_backing, not_merge_backing, merge_instrument, pitch, clean_strength, model, index, index_rate, input, output, format, method, hybrid_method, hop_length, embedders, custom_embedders, resample_sr, filter_radius, rms_mix_rate, protect, split_audio, f0_autotune_strength, None, checkpointing, onnx_f0_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, embedders_mode, proposal_pitch, proposal_pitch_threshold, audio_processing, alpha)
|
| 227 |
-
|
| 228 |
-
return [{"choices": [], "value": "", "interactive": False, "visible": False, "__type__": "update"}, main_convert[0], None, None, None, None, {"visible": True, "__type__": "update"}, {"visible": False, "__type__": "update"}]
|
| 229 |
-
|
| 230 |
-
def whisper_process(model_size, input_audio, configs, device, out_queue, word_timestamps=True):
|
| 231 |
-
from main.library.speaker_diarization.whisper import load_model
|
| 232 |
-
|
| 233 |
-
try:
|
| 234 |
-
segments = load_model(model_size, device=device).transcribe(input_audio, fp16=configs.get("fp16", False), word_timestamps=word_timestamps)
|
| 235 |
-
out_queue.put(segments["segments"])
|
| 236 |
-
except Exception as e:
|
| 237 |
-
out_queue.put(e)
|
| 238 |
-
finally:
|
| 239 |
-
del segments
|
| 240 |
-
gc.collect()
|
| 241 |
-
|
| 242 |
-
def convert_with_whisper(num_spk, model_size, cleaner, clean_strength, autotune, f0_autotune_strength, checkpointing, model_1, model_2, model_index_1, model_index_2, pitch_1, pitch_2, index_strength_1, index_strength_2, export_format, input_audio, output_audio, onnx_f0_mode, method, hybrid_method, hop_length, embed_mode, embedders, custom_embedders, resample_sr, filter_radius, rms_mix_rate, protect, formant_shifting, formant_qfrency_1, formant_timbre_1, formant_qfrency_2, formant_timbre_2, proposal_pitch, proposal_pitch_threshold, audio_processing=False, alpha=0.5):
|
| 243 |
-
import librosa
|
| 244 |
-
import multiprocessing as mp
|
| 245 |
-
|
| 246 |
-
from pydub import AudioSegment
|
| 247 |
-
from sklearn.cluster import AgglomerativeClustering
|
| 248 |
-
|
| 249 |
-
from main.library.utils import clear_gpu_cache
|
| 250 |
-
from main.library.speaker_diarization.audio import Audio
|
| 251 |
-
from main.library.speaker_diarization.segment import Segment
|
| 252 |
-
from main.library.utils import check_spk_diarization, pydub_load
|
| 253 |
-
from main.library.speaker_diarization.embedding import SpeechBrainPretrainedSpeakerEmbedding
|
| 254 |
-
|
| 255 |
-
check_spk_diarization(model_size)
|
| 256 |
-
model_pth_1, model_pth_2 = os.path.join(configs["weights_path"], model_1) if not os.path.exists(model_1) else model_1, os.path.join(configs["weights_path"], model_2) if not os.path.exists(model_2) else model_2
|
| 257 |
-
|
| 258 |
-
if (not model_1 or not os.path.exists(model_pth_1) or os.path.isdir(model_pth_1) or not model_pth_1.endswith((".pth", ".onnx"))) and (not model_2 or not os.path.exists(model_pth_2) or os.path.isdir(model_pth_2) or not model_pth_2.endswith((".pth", ".onnx"))):
|
| 259 |
-
gr_warning(translations["provide_file"].format(filename=translations["model"]))
|
| 260 |
-
return None
|
| 261 |
-
|
| 262 |
-
if not model_1: model_pth_1 = model_pth_2
|
| 263 |
-
if not model_2: model_pth_2 = model_pth_1
|
| 264 |
-
|
| 265 |
-
if not input_audio or not os.path.exists(input_audio) or os.path.isdir(input_audio):
|
| 266 |
-
gr_warning(translations["input_not_valid"])
|
| 267 |
-
return None
|
| 268 |
-
|
| 269 |
-
if not output_audio:
|
| 270 |
-
gr_warning(translations["output_not_valid"])
|
| 271 |
-
return None
|
| 272 |
-
|
| 273 |
-
output_audio = process_output(output_audio)
|
| 274 |
-
gr_info(translations["start_whisper"])
|
| 275 |
-
|
| 276 |
-
try:
|
| 277 |
-
try:
|
| 278 |
-
mp.set_start_method("spawn")
|
| 279 |
-
except:
|
| 280 |
-
pass
|
| 281 |
-
|
| 282 |
-
whisper_queue = mp.Queue()
|
| 283 |
-
whisperprocess = mp.Process(target=whisper_process, args=(model_size, input_audio, configs, config.device, whisper_queue, True))
|
| 284 |
-
whisperprocess.start()
|
| 285 |
-
|
| 286 |
-
segments = whisper_queue.get()
|
| 287 |
-
audio = Audio()
|
| 288 |
-
|
| 289 |
-
embedding_model = SpeechBrainPretrainedSpeakerEmbedding(embedding=os.path.join(configs["speaker_diarization_path"], "models", "speechbrain"), device=config.device)
|
| 290 |
-
y, sr = librosa.load(input_audio, sr=None)
|
| 291 |
-
duration = len(y) / sr
|
| 292 |
-
|
| 293 |
-
def segment_embedding(segment):
|
| 294 |
-
waveform, _ = audio.crop(input_audio, Segment(segment["start"], min(duration, segment["end"])))
|
| 295 |
-
return embedding_model(waveform.mean(dim=0, keepdim=True)[None] if waveform.shape[0] == 2 else waveform[None])
|
| 296 |
-
|
| 297 |
-
def time(secs):
|
| 298 |
-
return datetime.timedelta(seconds=round(secs))
|
| 299 |
-
|
| 300 |
-
def merge_audio(files_list, time_stamps, original_file_path, output_path, format):
|
| 301 |
-
def extract_number(filename):
|
| 302 |
-
match = re.search(r'_(\d+)', filename)
|
| 303 |
-
return int(match.group(1)) if match else 0
|
| 304 |
-
|
| 305 |
-
total_duration = len(pydub_load(original_file_path))
|
| 306 |
-
combined = AudioSegment.empty()
|
| 307 |
-
current_position = 0
|
| 308 |
-
|
| 309 |
-
for file, (start_i, end_i) in zip(sorted(files_list, key=extract_number), time_stamps):
|
| 310 |
-
if start_i > current_position: combined += AudioSegment.silent(duration=start_i - current_position)
|
| 311 |
-
|
| 312 |
-
combined += pydub_load(file)
|
| 313 |
-
current_position = end_i
|
| 314 |
-
|
| 315 |
-
if current_position < total_duration: combined += AudioSegment.silent(duration=total_duration - current_position)
|
| 316 |
-
combined.export(output_path, format=format)
|
| 317 |
-
|
| 318 |
-
return output_path
|
| 319 |
-
|
| 320 |
-
embeddings = np.zeros(shape=(len(segments), 192))
|
| 321 |
-
for i, segment in enumerate(segments):
|
| 322 |
-
embeddings[i] = segment_embedding(segment)
|
| 323 |
-
|
| 324 |
-
labels = AgglomerativeClustering(num_spk).fit(np.nan_to_num(embeddings)).labels_
|
| 325 |
-
for i in range(len(segments)):
|
| 326 |
-
segments[i]["speaker"] = 'SPEAKER ' + str(labels[i] + 1)
|
| 327 |
-
|
| 328 |
-
merged_segments, current_text = [], []
|
| 329 |
-
current_speaker, current_start = None, None
|
| 330 |
-
|
| 331 |
-
for i, segment in enumerate(segments):
|
| 332 |
-
speaker = segment["speaker"]
|
| 333 |
-
start_time = segment["start"]
|
| 334 |
-
text = segment["text"][1:]
|
| 335 |
-
|
| 336 |
-
if speaker == current_speaker:
|
| 337 |
-
current_text.append(text)
|
| 338 |
-
end_time = segment["end"]
|
| 339 |
-
else:
|
| 340 |
-
if current_speaker is not None: merged_segments.append({"speaker": current_speaker, "start": current_start, "end": end_time, "text": " ".join(current_text)})
|
| 341 |
-
|
| 342 |
-
current_speaker = speaker
|
| 343 |
-
current_start = start_time
|
| 344 |
-
current_text = [text]
|
| 345 |
-
end_time = segment["end"]
|
| 346 |
-
|
| 347 |
-
if current_speaker is not None: merged_segments.append({"speaker": current_speaker, "start": current_start, "end": end_time, "text": " ".join(current_text)})
|
| 348 |
-
|
| 349 |
-
gr_info(translations["whisper_done"])
|
| 350 |
-
|
| 351 |
-
x = ""
|
| 352 |
-
for segment in merged_segments:
|
| 353 |
-
x += f"\n{segment['speaker']} {str(time(segment['start']))} - {str(time(segment['end']))}\n"
|
| 354 |
-
x += segment["text"] + "\n"
|
| 355 |
-
|
| 356 |
-
logger.info(x)
|
| 357 |
-
|
| 358 |
-
del audio, embedding_model, segments, labels
|
| 359 |
-
clear_gpu_cache()
|
| 360 |
-
gc.collect()
|
| 361 |
-
|
| 362 |
-
gr_info(translations["process_audio"])
|
| 363 |
-
|
| 364 |
-
audio = pydub_load(input_audio)
|
| 365 |
-
output_folder = "audios_temp"
|
| 366 |
-
|
| 367 |
-
if os.path.exists(output_folder): shutil.rmtree(output_folder, ignore_errors=True)
|
| 368 |
-
for f in [output_folder, os.path.join(output_folder, "1"), os.path.join(output_folder, "2")]:
|
| 369 |
-
os.makedirs(f, exist_ok=True)
|
| 370 |
-
|
| 371 |
-
time_stamps, processed_segments = [], []
|
| 372 |
-
for i, segment in enumerate(merged_segments):
|
| 373 |
-
start_ms = int(segment["start"] * 1000)
|
| 374 |
-
end_ms = int(segment["end"] * 1000)
|
| 375 |
-
|
| 376 |
-
index = i + 1
|
| 377 |
-
|
| 378 |
-
segment_filename = os.path.join(output_folder, "1" if i % 2 == 1 else "2", f"segment_{index}.wav")
|
| 379 |
-
audio[start_ms:end_ms].export(segment_filename, format="wav")
|
| 380 |
-
|
| 381 |
-
processed_segments.append(os.path.join(output_folder, "1" if i % 2 == 1 else "2", f"segment_{index}_output.wav"))
|
| 382 |
-
time_stamps.append((start_ms, end_ms))
|
| 383 |
-
|
| 384 |
-
f0method, embedder_model = (method if method != "hybrid" else hybrid_method), (embedders if embedders != "custom" else custom_embedders)
|
| 385 |
-
|
| 386 |
-
gr_info(translations["process_done_start_convert"])
|
| 387 |
-
|
| 388 |
-
convert(pitch_1, filter_radius, index_strength_1, rms_mix_rate, protect, hop_length, f0method, os.path.join(output_folder, "1"), output_folder, model_pth_1, model_index_1, autotune, cleaner, clean_strength, "wav", embedder_model, resample_sr, False, f0_autotune_strength, checkpointing, onnx_f0_mode, embed_mode, formant_shifting, formant_qfrency_1, formant_timbre_1, "", proposal_pitch, proposal_pitch_threshold, audio_processing, alpha)
|
| 389 |
-
convert(pitch_2, filter_radius, index_strength_2, rms_mix_rate, protect, hop_length, f0method, os.path.join(output_folder, "2"), output_folder, model_pth_2, model_index_2, autotune, cleaner, clean_strength, "wav", embedder_model, resample_sr, False, f0_autotune_strength, checkpointing, onnx_f0_mode, embed_mode, formant_shifting, formant_qfrency_2, formant_timbre_2, "", proposal_pitch, proposal_pitch_threshold, audio_processing, alpha)
|
| 390 |
-
|
| 391 |
-
gr_info(translations["convert_success"])
|
| 392 |
-
return merge_audio(processed_segments, time_stamps, input_audio, replace_export_format(output_audio, export_format), export_format)
|
| 393 |
-
except Exception as e:
|
| 394 |
-
gr_error(translations["error_occurred"].format(e=e))
|
| 395 |
-
import traceback
|
| 396 |
-
logger.debug(traceback.format_exc())
|
| 397 |
-
return None
|
| 398 |
-
finally:
|
| 399 |
-
if os.path.exists("audios_temp"): shutil.rmtree("audios_temp", ignore_errors=True)
|
| 400 |
-
|
| 401 |
-
def convert_tts(clean, autotune, pitch, clean_strength, model, index, index_rate, input, output, format, method, hybrid_method, hop_length, embedders, custom_embedders, resample_sr, filter_radius, rms_mix_rate, protect, split_audio, f0_autotune_strength, checkpointing, onnx_f0_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, embedders_mode, proposal_pitch, proposal_pitch_threshold, audio_processing=False, alpha=0.5):
|
| 402 |
-
model_path = os.path.join(configs["weights_path"], model) if not os.path.exists(model) else model
|
| 403 |
-
|
| 404 |
-
if not model_path or not os.path.exists(model_path) or os.path.isdir(model_path) or not model.endswith((".pth", ".onnx")):
|
| 405 |
-
gr_warning(translations["provide_file"].format(filename=translations["model"]))
|
| 406 |
-
return None
|
| 407 |
-
|
| 408 |
-
if not input or not os.path.exists(input):
|
| 409 |
-
gr_warning(translations["input_not_valid"])
|
| 410 |
-
return None
|
| 411 |
-
|
| 412 |
-
if os.path.isdir(input):
|
| 413 |
-
input_audio = [f for f in os.listdir(input) if "tts" in f and f.lower().endswith(("wav", "mp3", "flac", "ogg", "opus", "m4a", "mp4", "aac", "alac", "wma", "aiff", "webm", "ac3"))]
|
| 414 |
-
|
| 415 |
-
if not input_audio:
|
| 416 |
-
gr_warning(translations["not_found_in_folder"])
|
| 417 |
-
return None
|
| 418 |
-
|
| 419 |
-
input = os.path.join(input, input_audio[0])
|
| 420 |
-
|
| 421 |
-
if not output:
|
| 422 |
-
gr_warning(translations["output_not_valid"])
|
| 423 |
-
return None
|
| 424 |
-
|
| 425 |
-
output = replace_export_format(output, format)
|
| 426 |
-
if os.path.isdir(output): output = os.path.join(output, f"tts.{format}")
|
| 427 |
-
|
| 428 |
-
output_dir = os.path.dirname(output)
|
| 429 |
-
if not os.path.exists(output_dir): os.makedirs(output_dir, exist_ok=True)
|
| 430 |
-
|
| 431 |
-
output = process_output(output)
|
| 432 |
-
|
| 433 |
-
f0method = method if method != "hybrid" else hybrid_method
|
| 434 |
-
embedder_model = embedders if embedders != "custom" else custom_embedders
|
| 435 |
-
|
| 436 |
-
gr_info(translations["convert_vocal"])
|
| 437 |
-
|
| 438 |
-
convert(pitch, filter_radius, index_rate, rms_mix_rate, protect, hop_length, f0method, input, output, model_path, index, autotune, clean, clean_strength, format, embedder_model, resample_sr, split_audio, f0_autotune_strength, checkpointing, onnx_f0_mode, embedders_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, proposal_pitch, proposal_pitch_threshold, audio_processing, alpha)
|
| 439 |
-
|
| 440 |
-
gr_info(translations["convert_success"])
|
| 441 |
-
return output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/core/model_utils.py
DELETED
|
@@ -1,164 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
import json
|
| 4 |
-
import torch
|
| 5 |
-
import datetime
|
| 6 |
-
|
| 7 |
-
sys.path.append(os.getcwd())
|
| 8 |
-
|
| 9 |
-
from main.app.core.ui import gr_info, gr_warning, gr_error
|
| 10 |
-
from main.app.variables import config, logger, translations, configs
|
| 11 |
-
|
| 12 |
-
def fushion_model_pth(name, pth_1, pth_2, ratio):
|
| 13 |
-
if not name.endswith(".pth"): name = name + ".pth"
|
| 14 |
-
|
| 15 |
-
if not pth_1 or not os.path.exists(pth_1) or not pth_1.endswith(".pth"):
|
| 16 |
-
gr_warning(translations["provide_file"].format(filename=translations["model"] + " 1"))
|
| 17 |
-
return [translations["provide_file"].format(filename=translations["model"] + " 1"), None]
|
| 18 |
-
|
| 19 |
-
if not pth_2 or not os.path.exists(pth_2) or not pth_2.endswith(".pth"):
|
| 20 |
-
gr_warning(translations["provide_file"].format(filename=translations["model"] + " 2"))
|
| 21 |
-
return [translations["provide_file"].format(filename=translations["model"] + " 2"), None]
|
| 22 |
-
|
| 23 |
-
from collections import OrderedDict
|
| 24 |
-
|
| 25 |
-
def extract(ckpt):
|
| 26 |
-
a = ckpt["model"]
|
| 27 |
-
opt = OrderedDict()
|
| 28 |
-
opt["weight"] = {}
|
| 29 |
-
|
| 30 |
-
for key in a.keys():
|
| 31 |
-
if "enc_q" in key: continue
|
| 32 |
-
|
| 33 |
-
opt["weight"][key] = a[key]
|
| 34 |
-
|
| 35 |
-
return opt
|
| 36 |
-
|
| 37 |
-
try:
|
| 38 |
-
ckpt1 = torch.load(pth_1, map_location="cpu", weights_only=True)
|
| 39 |
-
ckpt2 = torch.load(pth_2, map_location="cpu", weights_only=True)
|
| 40 |
-
|
| 41 |
-
if ckpt1["sr"] != ckpt2["sr"]:
|
| 42 |
-
gr_warning(translations["sr_not_same"])
|
| 43 |
-
return [translations["sr_not_same"], None]
|
| 44 |
-
|
| 45 |
-
cfg = ckpt1["config"]
|
| 46 |
-
cfg_f0 = ckpt1["f0"]
|
| 47 |
-
cfg_version = ckpt1["version"]
|
| 48 |
-
cfg_sr = ckpt1["sr"]
|
| 49 |
-
|
| 50 |
-
vocoder = ckpt1.get("vocoder", "Default")
|
| 51 |
-
rms_extract = ckpt1.get("energy", False)
|
| 52 |
-
|
| 53 |
-
ckpt1 = extract(ckpt1) if "model" in ckpt1 else ckpt1["weight"]
|
| 54 |
-
ckpt2 = extract(ckpt2) if "model" in ckpt2 else ckpt2["weight"]
|
| 55 |
-
|
| 56 |
-
if sorted(list(ckpt1.keys())) != sorted(list(ckpt2.keys())):
|
| 57 |
-
gr_warning(translations["architectures_not_same"])
|
| 58 |
-
return [translations["architectures_not_same"], None]
|
| 59 |
-
|
| 60 |
-
gr_info(translations["start"].format(start=translations["fushion_model"]))
|
| 61 |
-
|
| 62 |
-
opt = OrderedDict()
|
| 63 |
-
opt["weight"] = {}
|
| 64 |
-
|
| 65 |
-
for key in ckpt1.keys():
|
| 66 |
-
if key == "emb_g.weight" and ckpt1[key].shape != ckpt2[key].shape:
|
| 67 |
-
min_shape0 = min(ckpt1[key].shape[0], ckpt2[key].shape[0])
|
| 68 |
-
opt["weight"][key] = (ratio * (ckpt1[key][:min_shape0].float()) + (1 - ratio) * (ckpt2[key][:min_shape0].float())).half()
|
| 69 |
-
else: opt["weight"][key] = (ratio * (ckpt1[key].float()) + (1 - ratio) * (ckpt2[key].float())).half()
|
| 70 |
-
|
| 71 |
-
opt["config"] = cfg
|
| 72 |
-
opt["sr"] = cfg_sr
|
| 73 |
-
opt["f0"] = cfg_f0
|
| 74 |
-
opt["version"] = cfg_version
|
| 75 |
-
opt["infos"] = translations["model_fushion_info"].format(name=name, pth_1=pth_1, pth_2=pth_2, ratio=ratio)
|
| 76 |
-
opt["vocoder"] = vocoder
|
| 77 |
-
opt["energy"] = rms_extract
|
| 78 |
-
|
| 79 |
-
output_model = configs["weights_path"]
|
| 80 |
-
if not os.path.exists(output_model): os.makedirs(output_model, exist_ok=True)
|
| 81 |
-
|
| 82 |
-
torch.save(opt, os.path.join(output_model, name))
|
| 83 |
-
|
| 84 |
-
gr_info(translations["success"])
|
| 85 |
-
return [translations["success"], os.path.join(output_model, name)]
|
| 86 |
-
except Exception as e:
|
| 87 |
-
gr_error(message=translations["error_occurred"].format(e=e))
|
| 88 |
-
return [e, None]
|
| 89 |
-
|
| 90 |
-
def fushion_model(name, path_1, path_2, ratio):
|
| 91 |
-
if not name:
|
| 92 |
-
gr_warning(translations["provide_name_is_save"])
|
| 93 |
-
return [translations["provide_name_is_save"], None]
|
| 94 |
-
|
| 95 |
-
if path_1.endswith(".pth") and path_2.endswith(".pth"): return fushion_model_pth(name, path_1, path_2, ratio)
|
| 96 |
-
else:
|
| 97 |
-
gr_warning(translations["format_not_valid"])
|
| 98 |
-
return [None, None]
|
| 99 |
-
|
| 100 |
-
def onnx_export(model_path):
|
| 101 |
-
if not model_path.endswith(".pth"): model_path += ".pth"
|
| 102 |
-
if not model_path or not os.path.exists(model_path) or not model_path.endswith(".pth"): return gr_warning(translations["provide_file"].format(filename=translations["model"]))
|
| 103 |
-
|
| 104 |
-
try:
|
| 105 |
-
gr_info(translations["start_onnx_export"])
|
| 106 |
-
|
| 107 |
-
from main.library.onnx.onnx_export import onnx_exporter
|
| 108 |
-
output = onnx_exporter(model_path, model_path.replace(".pth", ".onnx"), is_half=config.is_half, device=config.device)
|
| 109 |
-
|
| 110 |
-
gr_info(translations["success"])
|
| 111 |
-
return output
|
| 112 |
-
except Exception as e:
|
| 113 |
-
return gr_error(e)
|
| 114 |
-
|
| 115 |
-
def model_info(path):
|
| 116 |
-
if not path or not os.path.exists(path) or os.path.isdir(path) or not path.endswith((".pth", ".onnx")): return gr_warning(translations["provide_file"].format(filename=translations["model"]))
|
| 117 |
-
|
| 118 |
-
def prettify_date(date_str):
|
| 119 |
-
if date_str == translations["not_found_create_time"]: return None
|
| 120 |
-
|
| 121 |
-
try:
|
| 122 |
-
return datetime.datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S.%f").strftime("%Y-%m-%d %H:%M:%S")
|
| 123 |
-
except ValueError as e:
|
| 124 |
-
logger.debug(e)
|
| 125 |
-
return translations["format_not_valid"]
|
| 126 |
-
|
| 127 |
-
if path.endswith(".pth"): model_data = torch.load(path, map_location="cpu")
|
| 128 |
-
else:
|
| 129 |
-
import onnx
|
| 130 |
-
|
| 131 |
-
model = onnx.load(path)
|
| 132 |
-
model_data = None
|
| 133 |
-
|
| 134 |
-
for prop in model.metadata_props:
|
| 135 |
-
if prop.key == "model_info":
|
| 136 |
-
model_data = json.loads(prop.value)
|
| 137 |
-
break
|
| 138 |
-
|
| 139 |
-
gr_info(translations["read_info"])
|
| 140 |
-
|
| 141 |
-
epochs = model_data.get("epoch", None)
|
| 142 |
-
if epochs is None:
|
| 143 |
-
epochs = model_data.get("info", None)
|
| 144 |
-
try:
|
| 145 |
-
epoch = epochs.replace("epoch", "").replace("e", "").isdigit()
|
| 146 |
-
if epoch and epochs is None: epochs = translations["not_found"].format(name=translations["epoch"])
|
| 147 |
-
except:
|
| 148 |
-
pass
|
| 149 |
-
|
| 150 |
-
steps = model_data.get("step", translations["not_found"].format(name=translations["step"]))
|
| 151 |
-
sr = model_data.get("sr", translations["not_found"].format(name=translations["sr"]))
|
| 152 |
-
f0 = model_data.get("f0", translations["not_found"].format(name=translations["f0"]))
|
| 153 |
-
version = model_data.get("version", translations["not_found"].format(name=translations["version"]))
|
| 154 |
-
creation_date = model_data.get("creation_date", translations["not_found_create_time"])
|
| 155 |
-
model_hash = model_data.get("model_hash", translations["not_found"].format(name="model_hash"))
|
| 156 |
-
pitch_guidance = translations["trained_f0"] if f0 else translations["not_f0"]
|
| 157 |
-
creation_date_str = prettify_date(creation_date) if creation_date else translations["not_found_create_time"]
|
| 158 |
-
model_name = model_data.get("model_name", translations["unregistered"])
|
| 159 |
-
model_author = model_data.get("author", translations["not_author"])
|
| 160 |
-
vocoder = model_data.get("vocoder", "Default")
|
| 161 |
-
rms_extract = model_data.get("energy", False)
|
| 162 |
-
|
| 163 |
-
gr_info(translations["success"])
|
| 164 |
-
return translations["model_info"].format(model_name=model_name, model_author=model_author, epochs=epochs, steps=steps, version=version, sr=sr, pitch_guidance=pitch_guidance, model_hash=model_hash, creation_date_str=creation_date_str, vocoder=vocoder, rms_extract=rms_extract)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/core/presets.py
DELETED
|
@@ -1,166 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
import json
|
| 4 |
-
|
| 5 |
-
sys.path.append(os.getcwd())
|
| 6 |
-
|
| 7 |
-
from main.app.variables import translations, configs
|
| 8 |
-
from main.app.core.ui import gr_info, gr_warning, change_preset_choices, change_effect_preset_choices
|
| 9 |
-
|
| 10 |
-
def load_presets(presets, cleaner, autotune, pitch, clean_strength, index_strength, resample_sr, filter_radius, rms_mix_rate, protect, split_audio, f0_autotune_strength, formant_shifting, formant_qfrency, formant_timbre, proposal_pitch, proposal_pitch_threshold):
|
| 11 |
-
if not presets: gr_warning(translations["provide_file_settings"])
|
| 12 |
-
|
| 13 |
-
file = {}
|
| 14 |
-
if presets:
|
| 15 |
-
with open(os.path.join(configs["presets_path"], presets)) as f:
|
| 16 |
-
file = json.load(f)
|
| 17 |
-
|
| 18 |
-
gr_info(translations["load_presets"].format(presets=presets))
|
| 19 |
-
|
| 20 |
-
return [file.get("cleaner", cleaner), file.get("autotune", autotune), file.get("pitch", pitch), file.get("clean_strength", clean_strength), file.get("index_strength", index_strength), file.get("resample_sr", resample_sr), file.get("filter_radius", filter_radius), file.get("rms_mix_rate", rms_mix_rate), file.get("protect", protect), file.get("split_audio", split_audio), file.get("f0_autotune_strength", f0_autotune_strength), file.get("formant_shifting", formant_shifting), file.get("formant_qfrency", formant_qfrency), file.get("formant_timbre", formant_timbre), file.get("proposal_pitch", proposal_pitch), file.get("proposal_pitch_threshold", proposal_pitch_threshold)]
|
| 21 |
-
|
| 22 |
-
def save_presets(name, cleaner, autotune, pitch, clean_strength, index_strength, resample_sr, filter_radius, rms_mix_rate, protect, split_audio, f0_autotune_strength, cleaner_chbox, autotune_chbox, pitch_chbox, index_strength_chbox, resample_sr_chbox, filter_radius_chbox, rms_mix_rate_chbox, protect_chbox, split_audio_chbox, formant_shifting_chbox, formant_shifting, formant_qfrency, formant_timbre, proposal_pitch, proposal_pitch_threshold):
|
| 23 |
-
if not name: return gr_warning(translations["provide_filename_settings"])
|
| 24 |
-
if not any([cleaner_chbox, autotune_chbox, pitch_chbox, index_strength_chbox, resample_sr_chbox, filter_radius_chbox, rms_mix_rate_chbox, protect_chbox, split_audio_chbox, formant_shifting_chbox]): return gr_warning(translations["choose1"])
|
| 25 |
-
|
| 26 |
-
settings = {}
|
| 27 |
-
|
| 28 |
-
for checkbox, data in [(cleaner_chbox, {"cleaner": cleaner, "clean_strength": clean_strength}), (autotune_chbox, {"autotune": autotune, "f0_autotune_strength": f0_autotune_strength}), (pitch_chbox, {"pitch": pitch}), (index_strength_chbox, {"index_strength": index_strength}), (resample_sr_chbox, {"resample_sr": resample_sr}), (filter_radius_chbox, {"filter_radius": filter_radius}), (rms_mix_rate_chbox, {"rms_mix_rate": rms_mix_rate}), (protect_chbox, {"protect": protect}), (split_audio_chbox, {"split_audio": split_audio}), (formant_shifting_chbox, {"formant_shifting": formant_shifting, "formant_qfrency": formant_qfrency, "formant_timbre": formant_timbre}), (proposal_pitch, {"proposal_pitch": proposal_pitch, "proposal_pitch_threshold": proposal_pitch_threshold})]:
|
| 29 |
-
if checkbox: settings.update(data)
|
| 30 |
-
|
| 31 |
-
with open(os.path.join(configs["presets_path"], name + ".conversion.json"), "w") as f:
|
| 32 |
-
json.dump(settings, f, indent=4)
|
| 33 |
-
|
| 34 |
-
gr_info(translations["export_settings"].format(name=name))
|
| 35 |
-
return change_preset_choices()
|
| 36 |
-
|
| 37 |
-
def audio_effect_load_presets(presets, resample_checkbox, audio_effect_resample_sr, chorus_depth, chorus_rate_hz, chorus_mix, chorus_centre_delay_ms, chorus_feedback, distortion_drive_db, reverb_room_size, reverb_damping, reverb_wet_level, reverb_dry_level, reverb_width, reverb_freeze_mode, pitch_shift_semitones, delay_second, delay_feedback, delay_mix, compressor_threshold_db, compressor_ratio, compressor_attack_ms, compressor_release_ms, limiter_threshold_db, limiter_release_ms, gain_db, bitcrush_bit_depth, clipping_threshold_db, phaser_rate_hz, phaser_depth, phaser_centre_frequency_hz, phaser_feedback, phaser_mix, bass_boost, bass_frequency, treble_boost, treble_frequency, fade_in, fade_out, chorus_check_box, distortion_checkbox, reverb_check_box, delay_check_box, compressor_check_box, limiter, gain_checkbox, bitcrush_checkbox, clipping_checkbox, phaser_check_box, bass_or_treble, fade):
|
| 38 |
-
if not presets: gr_warning(translations["provide_file_settings"])
|
| 39 |
-
|
| 40 |
-
file = {}
|
| 41 |
-
if presets:
|
| 42 |
-
with open(os.path.join(configs["presets_path"], presets)) as f:
|
| 43 |
-
file = json.load(f)
|
| 44 |
-
|
| 45 |
-
gr_info(translations["load_presets"].format(presets=presets))
|
| 46 |
-
return [
|
| 47 |
-
file.get("resample_checkbox", resample_checkbox), file.get("audio_effect_resample_sr", audio_effect_resample_sr),
|
| 48 |
-
file.get("chorus_depth", chorus_depth), file.get("chorus_rate_hz", chorus_rate_hz),
|
| 49 |
-
file.get("chorus_mix", chorus_mix), file.get("chorus_centre_delay_ms", chorus_centre_delay_ms),
|
| 50 |
-
file.get("chorus_feedback", chorus_feedback), file.get("distortion_drive_db", distortion_drive_db),
|
| 51 |
-
file.get("reverb_room_size", reverb_room_size), file.get("reverb_damping", reverb_damping),
|
| 52 |
-
file.get("reverb_wet_level", reverb_wet_level), file.get("reverb_dry_level", reverb_dry_level),
|
| 53 |
-
file.get("reverb_width", reverb_width), file.get("reverb_freeze_mode", reverb_freeze_mode),
|
| 54 |
-
file.get("pitch_shift_semitones", pitch_shift_semitones), file.get("delay_second", delay_second),
|
| 55 |
-
file.get("delay_feedback", delay_feedback), file.get("delay_mix", delay_mix),
|
| 56 |
-
file.get("compressor_threshold_db", compressor_threshold_db), file.get("compressor_ratio", compressor_ratio),
|
| 57 |
-
file.get("compressor_attack_ms", compressor_attack_ms), file.get("compressor_release_ms", compressor_release_ms),
|
| 58 |
-
file.get("limiter_threshold_db", limiter_threshold_db), file.get("limiter_release_ms", limiter_release_ms),
|
| 59 |
-
file.get("gain_db", gain_db), file.get("bitcrush_bit_depth", bitcrush_bit_depth),
|
| 60 |
-
file.get("clipping_threshold_db", clipping_threshold_db), file.get("phaser_rate_hz", phaser_rate_hz),
|
| 61 |
-
file.get("phaser_depth", phaser_depth), file.get("phaser_centre_frequency_hz", phaser_centre_frequency_hz),
|
| 62 |
-
file.get("phaser_feedback", phaser_feedback), file.get("phaser_mix", phaser_mix),
|
| 63 |
-
file.get("bass_boost", bass_boost), file.get("bass_frequency", bass_frequency),
|
| 64 |
-
file.get("treble_boost", treble_boost), file.get("treble_frequency", treble_frequency),
|
| 65 |
-
file.get("fade_in", fade_in), file.get("fade_out", fade_out),
|
| 66 |
-
file.get("chorus_check_box", chorus_check_box), file.get("distortion_checkbox", distortion_checkbox),
|
| 67 |
-
file.get("reverb_check_box", reverb_check_box), file.get("delay_check_box", delay_check_box),
|
| 68 |
-
file.get("compressor_check_box", compressor_check_box), file.get("limiter", limiter),
|
| 69 |
-
file.get("gain_checkbox", gain_checkbox), file.get("bitcrush_checkbox", bitcrush_checkbox),
|
| 70 |
-
file.get("clipping_checkbox", clipping_checkbox), file.get("phaser_check_box", phaser_check_box),
|
| 71 |
-
file.get("bass_or_treble", bass_or_treble), file.get("fade", fade)
|
| 72 |
-
]
|
| 73 |
-
|
| 74 |
-
def audio_effect_save_presets(name, resample_checkbox, audio_effect_resample_sr, chorus_depth, chorus_rate_hz, chorus_mix, chorus_centre_delay_ms, chorus_feedback, distortion_drive_db, reverb_room_size, reverb_damping, reverb_wet_level, reverb_dry_level, reverb_width, reverb_freeze_mode, pitch_shift_semitones, delay_second, delay_feedback, delay_mix, compressor_threshold_db, compressor_ratio, compressor_attack_ms, compressor_release_ms, limiter_threshold_db, limiter_release_ms, gain_db, bitcrush_bit_depth, clipping_threshold_db, phaser_rate_hz, phaser_depth, phaser_centre_frequency_hz, phaser_feedback, phaser_mix, bass_boost, bass_frequency, treble_boost, treble_frequency, fade_in, fade_out, chorus_check_box, distortion_checkbox, reverb_check_box, delay_check_box, compressor_check_box, limiter, gain_checkbox, bitcrush_checkbox, clipping_checkbox, phaser_check_box, bass_or_treble, fade):
|
| 75 |
-
if not name: return gr_warning(translations["provide_filename_settings"])
|
| 76 |
-
if not any([resample_checkbox, chorus_check_box, distortion_checkbox, reverb_check_box, delay_check_box, compressor_check_box, limiter, gain_checkbox, bitcrush_checkbox, clipping_checkbox, phaser_check_box, bass_or_treble, fade, pitch_shift_semitones != 0]): return gr_warning(translations["choose1"])
|
| 77 |
-
|
| 78 |
-
settings = {}
|
| 79 |
-
|
| 80 |
-
for checkbox, data in [
|
| 81 |
-
(resample_checkbox, {
|
| 82 |
-
"resample_checkbox": resample_checkbox,
|
| 83 |
-
"audio_effect_resample_sr": audio_effect_resample_sr
|
| 84 |
-
}),
|
| 85 |
-
(chorus_check_box, {
|
| 86 |
-
"chorus_check_box": chorus_check_box,
|
| 87 |
-
"chorus_depth": chorus_depth,
|
| 88 |
-
"chorus_rate_hz": chorus_rate_hz,
|
| 89 |
-
"chorus_mix": chorus_mix,
|
| 90 |
-
"chorus_centre_delay_ms": chorus_centre_delay_ms,
|
| 91 |
-
"chorus_feedback": chorus_feedback
|
| 92 |
-
}),
|
| 93 |
-
(distortion_checkbox, {
|
| 94 |
-
"distortion_checkbox": distortion_checkbox,
|
| 95 |
-
"distortion_drive_db": distortion_drive_db
|
| 96 |
-
}),
|
| 97 |
-
(reverb_check_box, {
|
| 98 |
-
"reverb_check_box": reverb_check_box,
|
| 99 |
-
"reverb_room_size": reverb_room_size,
|
| 100 |
-
"reverb_damping": reverb_damping,
|
| 101 |
-
"reverb_wet_level": reverb_wet_level,
|
| 102 |
-
"reverb_dry_level": reverb_dry_level,
|
| 103 |
-
"reverb_width": reverb_width,
|
| 104 |
-
"reverb_freeze_mode": reverb_freeze_mode
|
| 105 |
-
}),
|
| 106 |
-
(pitch_shift_semitones != 0, {
|
| 107 |
-
"pitch_shift_semitones": pitch_shift_semitones
|
| 108 |
-
}),
|
| 109 |
-
(delay_check_box, {
|
| 110 |
-
"delay_check_box": delay_check_box,
|
| 111 |
-
"delay_second": delay_second,
|
| 112 |
-
"delay_feedback": delay_feedback,
|
| 113 |
-
"delay_mix": delay_mix
|
| 114 |
-
}),
|
| 115 |
-
(compressor_check_box, {
|
| 116 |
-
"compressor_check_box": compressor_check_box,
|
| 117 |
-
"compressor_threshold_db": compressor_threshold_db,
|
| 118 |
-
"compressor_ratio": compressor_ratio,
|
| 119 |
-
"compressor_attack_ms": compressor_attack_ms,
|
| 120 |
-
"compressor_release_ms": compressor_release_ms
|
| 121 |
-
}),
|
| 122 |
-
(limiter, {
|
| 123 |
-
"limiter": limiter,
|
| 124 |
-
"limiter_threshold_db": limiter_threshold_db,
|
| 125 |
-
"limiter_release_ms": limiter_release_ms
|
| 126 |
-
}),
|
| 127 |
-
(gain_checkbox, {
|
| 128 |
-
"gain_checkbox": gain_checkbox,
|
| 129 |
-
"gain_db": gain_db
|
| 130 |
-
}),
|
| 131 |
-
(bitcrush_checkbox, {
|
| 132 |
-
"bitcrush_checkbox": bitcrush_checkbox,
|
| 133 |
-
"bitcrush_bit_depth": bitcrush_bit_depth
|
| 134 |
-
}),
|
| 135 |
-
(clipping_checkbox, {
|
| 136 |
-
"clipping_checkbox": clipping_checkbox,
|
| 137 |
-
"clipping_threshold_db": clipping_threshold_db
|
| 138 |
-
}),
|
| 139 |
-
(phaser_check_box, {
|
| 140 |
-
"phaser_check_box": phaser_check_box,
|
| 141 |
-
"phaser_rate_hz": phaser_rate_hz,
|
| 142 |
-
"phaser_depth": phaser_depth,
|
| 143 |
-
"phaser_centre_frequency_hz": phaser_centre_frequency_hz,
|
| 144 |
-
"phaser_feedback": phaser_feedback,
|
| 145 |
-
"phaser_mix": phaser_mix
|
| 146 |
-
}),
|
| 147 |
-
(bass_or_treble, {
|
| 148 |
-
"bass_or_treble": bass_or_treble,
|
| 149 |
-
"bass_boost": bass_boost,
|
| 150 |
-
"bass_frequency": bass_frequency,
|
| 151 |
-
"treble_boost": treble_boost,
|
| 152 |
-
"treble_frequency": treble_frequency
|
| 153 |
-
}),
|
| 154 |
-
(fade, {
|
| 155 |
-
"fade": fade,
|
| 156 |
-
"fade_in": fade_in,
|
| 157 |
-
"fade_out": fade_out
|
| 158 |
-
})
|
| 159 |
-
]:
|
| 160 |
-
if checkbox: settings.update(data)
|
| 161 |
-
|
| 162 |
-
with open(os.path.join(configs["presets_path"], name + ".effect.json"), "w") as f:
|
| 163 |
-
json.dump(settings, f, indent=4)
|
| 164 |
-
|
| 165 |
-
gr_info(translations["export_settings"].format(name=name))
|
| 166 |
-
return change_effect_preset_choices()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/core/process.py
DELETED
|
@@ -1,135 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import re
|
| 3 |
-
import sys
|
| 4 |
-
import shutil
|
| 5 |
-
import codecs
|
| 6 |
-
import zipfile
|
| 7 |
-
import requests
|
| 8 |
-
|
| 9 |
-
sys.path.append(os.getcwd())
|
| 10 |
-
|
| 11 |
-
from main.app.variables import logger, translations, configs
|
| 12 |
-
from main.app.core.ui import gr_info, gr_warning, gr_error, process_output, replace_punctuation
|
| 13 |
-
|
| 14 |
-
def read_docx_text(path):
|
| 15 |
-
import xml.etree.ElementTree
|
| 16 |
-
|
| 17 |
-
with zipfile.ZipFile(path) as docx:
|
| 18 |
-
with docx.open("word/document.xml") as document_xml:
|
| 19 |
-
xml_content = document_xml.read()
|
| 20 |
-
|
| 21 |
-
WORD_NAMESPACE = '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}'
|
| 22 |
-
|
| 23 |
-
paragraphs = []
|
| 24 |
-
for paragraph in xml.etree.ElementTree.XML(xml_content).iter(WORD_NAMESPACE + 'p'):
|
| 25 |
-
texts = [node.text for node in paragraph.iter(WORD_NAMESPACE + 't') if node.text]
|
| 26 |
-
if texts: paragraphs.append(''.join(texts))
|
| 27 |
-
|
| 28 |
-
return '\n'.join(paragraphs)
|
| 29 |
-
|
| 30 |
-
def process_input(file_path):
|
| 31 |
-
if file_path.endswith(".srt"): file_contents = ""
|
| 32 |
-
elif file_path.endswith(".docx"): file_contents = read_docx_text(file_path)
|
| 33 |
-
else:
|
| 34 |
-
try:
|
| 35 |
-
with open(file_path, "r", encoding="utf-8") as file:
|
| 36 |
-
file_contents = file.read()
|
| 37 |
-
except Exception as e:
|
| 38 |
-
gr_warning(translations["read_error"])
|
| 39 |
-
logger.debug(e)
|
| 40 |
-
file_contents = ""
|
| 41 |
-
|
| 42 |
-
gr_info(translations["upload_success"].format(name=translations["text"]))
|
| 43 |
-
return file_contents
|
| 44 |
-
|
| 45 |
-
def move_files_from_directory(src_dir, dest_weights, dest_logs, model_name):
|
| 46 |
-
for root, _, files in os.walk(src_dir):
|
| 47 |
-
for file in files:
|
| 48 |
-
file_path = os.path.join(root, file)
|
| 49 |
-
if file.endswith(".index"):
|
| 50 |
-
model_log_dir = os.path.join(dest_logs, model_name)
|
| 51 |
-
os.makedirs(model_log_dir, exist_ok=True)
|
| 52 |
-
|
| 53 |
-
filepath = process_output(os.path.join(model_log_dir, replace_punctuation(file)))
|
| 54 |
-
|
| 55 |
-
shutil.move(file_path, filepath)
|
| 56 |
-
elif file.endswith(".pth") and not file.startswith("D_") and not file.startswith("G_"):
|
| 57 |
-
pth_path = process_output(os.path.join(dest_weights, model_name + ".pth"))
|
| 58 |
-
|
| 59 |
-
shutil.move(file_path, pth_path)
|
| 60 |
-
elif file.endswith(".onnx") and not file.startswith("D_") and not file.startswith("G_"):
|
| 61 |
-
pth_path = process_output(os.path.join(dest_weights, model_name + ".onnx"))
|
| 62 |
-
|
| 63 |
-
shutil.move(file_path, pth_path)
|
| 64 |
-
|
| 65 |
-
def extract_name_model(filename):
|
| 66 |
-
match = re.search(r"_([A-Za-z0-9]+)(?=_v\d*)", replace_punctuation(filename))
|
| 67 |
-
return match.group(1) if match else None
|
| 68 |
-
|
| 69 |
-
def save_drop_model(dropboxs):
|
| 70 |
-
weight_folder = configs["weights_path"]
|
| 71 |
-
logs_folder = configs["logs_path"]
|
| 72 |
-
save_model_temp = "save_model_temp"
|
| 73 |
-
|
| 74 |
-
if not os.path.exists(weight_folder): os.makedirs(weight_folder, exist_ok=True)
|
| 75 |
-
if not os.path.exists(logs_folder): os.makedirs(logs_folder, exist_ok=True)
|
| 76 |
-
if not os.path.exists(save_model_temp): os.makedirs(save_model_temp, exist_ok=True)
|
| 77 |
-
|
| 78 |
-
try:
|
| 79 |
-
for dropbox in dropboxs:
|
| 80 |
-
shutil.move(dropbox, save_model_temp)
|
| 81 |
-
file_name = os.path.basename(dropbox)
|
| 82 |
-
|
| 83 |
-
if file_name.endswith(".zip"):
|
| 84 |
-
shutil.unpack_archive(os.path.join(save_model_temp, file_name), save_model_temp)
|
| 85 |
-
move_files_from_directory(save_model_temp, weight_folder, logs_folder, file_name.replace(".zip", ""))
|
| 86 |
-
elif file_name.endswith((".pth", ".onnx")):
|
| 87 |
-
output_file = process_output(os.path.join(weight_folder, file_name))
|
| 88 |
-
|
| 89 |
-
shutil.move(os.path.join(save_model_temp, file_name), output_file)
|
| 90 |
-
elif file_name.endswith(".index"):
|
| 91 |
-
modelname = extract_name_model(file_name)
|
| 92 |
-
if modelname is None: modelname = os.path.splitext(os.path.basename(file_name))[0]
|
| 93 |
-
|
| 94 |
-
model_logs = os.path.join(logs_folder, modelname)
|
| 95 |
-
if not os.path.exists(model_logs): os.makedirs(model_logs, exist_ok=True)
|
| 96 |
-
|
| 97 |
-
shutil.move(os.path.join(save_model_temp, file_name), model_logs)
|
| 98 |
-
else:
|
| 99 |
-
gr_warning(translations["unable_analyze_model"])
|
| 100 |
-
return None
|
| 101 |
-
|
| 102 |
-
gr_info(translations["upload_success"].format(name=translations["model"]))
|
| 103 |
-
return None
|
| 104 |
-
except Exception as e:
|
| 105 |
-
gr_error(message=translations["error_occurred"].format(e=e))
|
| 106 |
-
return None
|
| 107 |
-
finally:
|
| 108 |
-
shutil.rmtree(save_model_temp, ignore_errors=True)
|
| 109 |
-
|
| 110 |
-
def zip_file(name, pth, index):
|
| 111 |
-
pth_path = os.path.join(configs["weights_path"], pth)
|
| 112 |
-
if not pth or not os.path.exists(pth_path) or not pth.endswith((".pth", ".onnx")): return gr_warning(translations["provide_file"].format(filename=translations["model"]))
|
| 113 |
-
|
| 114 |
-
zip_file_path = os.path.join(configs["logs_path"], name, name + ".zip")
|
| 115 |
-
gr_info(translations["start"].format(start=translations["zip"]))
|
| 116 |
-
|
| 117 |
-
with zipfile.ZipFile(zip_file_path, 'w') as zipf:
|
| 118 |
-
zipf.write(pth_path, os.path.basename(pth_path))
|
| 119 |
-
if index: zipf.write(index, os.path.basename(index))
|
| 120 |
-
|
| 121 |
-
gr_info(translations["success"])
|
| 122 |
-
return {"visible": True, "value": zip_file_path, "__type__": "update"}
|
| 123 |
-
|
| 124 |
-
def fetch_pretrained_data():
|
| 125 |
-
try:
|
| 126 |
-
response = requests.get(codecs.decode("uggcf://uhttvatsnpr.pb/NauC/Ivrganzrfr-EIP-Cebwrpg/erfbyir/znva/wfba/phfgbz_cergenvarq.wfba", "rot13"))
|
| 127 |
-
response.raise_for_status()
|
| 128 |
-
|
| 129 |
-
return response.json()
|
| 130 |
-
except:
|
| 131 |
-
return {}
|
| 132 |
-
|
| 133 |
-
def update_sample_rate_dropdown(model):
|
| 134 |
-
data = fetch_pretrained_data()
|
| 135 |
-
if model != translations["success"]: return {"choices": list(data[model].keys()), "value": list(data[model].keys())[0], "__type__": "update"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/core/realtime.py
DELETED
|
@@ -1,174 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
import time
|
| 4 |
-
|
| 5 |
-
sys.path.append(os.getcwd())
|
| 6 |
-
|
| 7 |
-
from main.app.variables import translations, configs
|
| 8 |
-
from main.app.core.ui import gr_info, gr_warning, audio_device
|
| 9 |
-
|
| 10 |
-
running, callbacks, audio_manager = False, None, None
|
| 11 |
-
|
| 12 |
-
PIPELINE_SAMPLE_RATE = 16000
|
| 13 |
-
DEVICE_SAMPLE_RATE = 48000
|
| 14 |
-
|
| 15 |
-
interactive_true = {"interactive": True, "__type__": "update"}
|
| 16 |
-
interactive_false = {"interactive": False, "__type__": "update"}
|
| 17 |
-
|
| 18 |
-
def realtime_start(
|
| 19 |
-
monitor,
|
| 20 |
-
exclusive_mode,
|
| 21 |
-
vad_enabled,
|
| 22 |
-
input_audio_device,
|
| 23 |
-
output_audio_device,
|
| 24 |
-
monitor_output_device,
|
| 25 |
-
input_audio_gain,
|
| 26 |
-
output_audio_gain,
|
| 27 |
-
monitor_audio_gain,
|
| 28 |
-
input_asio_channels,
|
| 29 |
-
output_asio_channels,
|
| 30 |
-
monitor_asio_channels,
|
| 31 |
-
chunk_size,
|
| 32 |
-
pitch,
|
| 33 |
-
model_pth,
|
| 34 |
-
model_index,
|
| 35 |
-
index_strength,
|
| 36 |
-
onnx_f0_mode,
|
| 37 |
-
f0_method,
|
| 38 |
-
hop_length,
|
| 39 |
-
embed_mode,
|
| 40 |
-
embedders,
|
| 41 |
-
custom_embedders,
|
| 42 |
-
f0_autotune,
|
| 43 |
-
proposal_pitch,
|
| 44 |
-
f0_autotune_strength,
|
| 45 |
-
proposal_pitch_threshold,
|
| 46 |
-
rms_mix_rate,
|
| 47 |
-
protect,
|
| 48 |
-
filter_radius,
|
| 49 |
-
silent_threshold,
|
| 50 |
-
extra_convert_size,
|
| 51 |
-
cross_fade_overlap_size,
|
| 52 |
-
vad_sensitivity,
|
| 53 |
-
vad_frame_ms,
|
| 54 |
-
clean_audio,
|
| 55 |
-
clean_strength
|
| 56 |
-
):
|
| 57 |
-
global running, callbacks, audio_manager
|
| 58 |
-
running = True
|
| 59 |
-
|
| 60 |
-
gr_info(translations["start_realtime"])
|
| 61 |
-
yield translations["start_realtime"], interactive_false, interactive_true
|
| 62 |
-
|
| 63 |
-
if not input_audio_device or not output_audio_device:
|
| 64 |
-
gr_warning(translations["provide_audio_device"])
|
| 65 |
-
yield translations["provide_audio_device"], interactive_true, interactive_false
|
| 66 |
-
return
|
| 67 |
-
|
| 68 |
-
if monitor and not monitor_output_device:
|
| 69 |
-
gr_warning(translations["provide_monitor_device"])
|
| 70 |
-
yield translations["provide_monitor_device"], interactive_true, interactive_false
|
| 71 |
-
return
|
| 72 |
-
|
| 73 |
-
model_pth = os.path.join(configs["weights_path"], model_pth) if not os.path.exists(model_pth) else model_pth
|
| 74 |
-
embedder_model = (embedders if embedders != "custom" else custom_embedders)
|
| 75 |
-
|
| 76 |
-
if not model_pth or not os.path.exists(model_pth) or os.path.isdir(model_pth) or not model_pth.endswith((".pth", ".onnx")):
|
| 77 |
-
gr_warning(translations["provide_file"].format(filename=translations["model"]))
|
| 78 |
-
yield translations["provide_file"].format(filename=translations["model"]), interactive_true, interactive_false
|
| 79 |
-
return
|
| 80 |
-
|
| 81 |
-
input_devices, output_devices = audio_device()
|
| 82 |
-
input_device_id = input_devices[input_audio_device][0]
|
| 83 |
-
output_device_id = output_devices[output_audio_device][0]
|
| 84 |
-
output_monitor_id = output_devices[monitor_output_device][0] if monitor else None
|
| 85 |
-
|
| 86 |
-
input_audio_gain /= 100.0
|
| 87 |
-
output_audio_gain /= 100.0
|
| 88 |
-
monitor_audio_gain /= 100.0
|
| 89 |
-
|
| 90 |
-
chunk_size = int(chunk_size * DEVICE_SAMPLE_RATE / 1000 / 128)
|
| 91 |
-
|
| 92 |
-
from main.inference.realtime.callbacks import AudioCallbacks
|
| 93 |
-
|
| 94 |
-
callbacks = AudioCallbacks(
|
| 95 |
-
pass_through=False,
|
| 96 |
-
read_chunk_size=chunk_size,
|
| 97 |
-
cross_fade_overlap_size=cross_fade_overlap_size,
|
| 98 |
-
input_sample_rate=DEVICE_SAMPLE_RATE,
|
| 99 |
-
output_sample_rate=DEVICE_SAMPLE_RATE,
|
| 100 |
-
extra_convert_size=extra_convert_size,
|
| 101 |
-
model_path=model_pth,
|
| 102 |
-
index_path=model_index,
|
| 103 |
-
f0_method=f0_method,
|
| 104 |
-
f0_onnx=onnx_f0_mode,
|
| 105 |
-
embedder_model=embedder_model,
|
| 106 |
-
embedders_mode=embed_mode,
|
| 107 |
-
sample_rate=PIPELINE_SAMPLE_RATE,
|
| 108 |
-
hop_length=hop_length,
|
| 109 |
-
silent_threshold=silent_threshold,
|
| 110 |
-
f0_up_key=pitch,
|
| 111 |
-
index_rate=index_strength,
|
| 112 |
-
protect=protect,
|
| 113 |
-
filter_radius=filter_radius,
|
| 114 |
-
rms_mix_rate=rms_mix_rate,
|
| 115 |
-
f0_autotune=f0_autotune,
|
| 116 |
-
f0_autotune_strength=f0_autotune_strength,
|
| 117 |
-
proposal_pitch=proposal_pitch,
|
| 118 |
-
proposal_pitch_threshold=proposal_pitch_threshold,
|
| 119 |
-
input_audio_gain=input_audio_gain,
|
| 120 |
-
output_audio_gain=output_audio_gain,
|
| 121 |
-
monitor_audio_gain=monitor_audio_gain,
|
| 122 |
-
monitor=monitor,
|
| 123 |
-
vad_enabled=vad_enabled,
|
| 124 |
-
vad_sensitivity=vad_sensitivity,
|
| 125 |
-
vad_frame_ms=vad_frame_ms,
|
| 126 |
-
clean_audio=clean_audio,
|
| 127 |
-
clean_strength=clean_strength
|
| 128 |
-
)
|
| 129 |
-
|
| 130 |
-
audio_manager = callbacks.audio
|
| 131 |
-
audio_manager.start(
|
| 132 |
-
input_device_id=input_device_id,
|
| 133 |
-
output_device_id=output_device_id,
|
| 134 |
-
output_monitor_id=output_monitor_id,
|
| 135 |
-
exclusive_mode=exclusive_mode,
|
| 136 |
-
asio_input_channel=input_asio_channels,
|
| 137 |
-
asio_output_channel=output_asio_channels,
|
| 138 |
-
asio_output_monitor_channel=monitor_asio_channels,
|
| 139 |
-
read_chunk_size=chunk_size,
|
| 140 |
-
input_audio_sample_rate=DEVICE_SAMPLE_RATE,
|
| 141 |
-
output_monitor_sample_rate=DEVICE_SAMPLE_RATE
|
| 142 |
-
)
|
| 143 |
-
|
| 144 |
-
gr_info(translations["realtime_is_ready"])
|
| 145 |
-
|
| 146 |
-
while running and callbacks is not None and audio_manager is not None:
|
| 147 |
-
time.sleep(0.1)
|
| 148 |
-
if hasattr(callbacks, "latency"): yield f"{translations['latency']}: {callbacks.latency:.2f} ms", interactive_false, interactive_true
|
| 149 |
-
|
| 150 |
-
return translations["realtime_has_stop"], interactive_true, interactive_false
|
| 151 |
-
|
| 152 |
-
def realtime_stop():
|
| 153 |
-
global running, callbacks, audio_manager
|
| 154 |
-
|
| 155 |
-
if running and audio_manager is not None and callbacks is not None:
|
| 156 |
-
gr_info(translations["stop_realtime"])
|
| 157 |
-
|
| 158 |
-
audio_manager.stop()
|
| 159 |
-
running = False
|
| 160 |
-
|
| 161 |
-
if hasattr(callbacks, "latency"): del callbacks.latency
|
| 162 |
-
del audio_manager, callbacks
|
| 163 |
-
|
| 164 |
-
audio_manager = callbacks = None
|
| 165 |
-
gr_info(translations["realtime_has_stop"])
|
| 166 |
-
|
| 167 |
-
from main.library.utils import clear_gpu_cache
|
| 168 |
-
clear_gpu_cache()
|
| 169 |
-
|
| 170 |
-
return translations["realtime_has_stop"], interactive_true, interactive_false
|
| 171 |
-
else:
|
| 172 |
-
gr_warning(translations["realtime_not_found"])
|
| 173 |
-
|
| 174 |
-
return translations["realtime_not_found"], interactive_true, interactive_false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/core/realtime_client.py
DELETED
|
@@ -1,114 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
import json
|
| 4 |
-
|
| 5 |
-
import numpy as np
|
| 6 |
-
|
| 7 |
-
from fastapi import FastAPI, WebSocketDisconnect, WebSocket
|
| 8 |
-
|
| 9 |
-
sys.path.append(os.getcwd())
|
| 10 |
-
|
| 11 |
-
from main.library.utils import clear_gpu_cache
|
| 12 |
-
from main.app.variables import configs, translations, logger
|
| 13 |
-
from main.inference.realtime.realtime import VoiceChanger, RVC_Realtime
|
| 14 |
-
|
| 15 |
-
app = FastAPI()
|
| 16 |
-
vc_instance = None
|
| 17 |
-
|
| 18 |
-
PIPELINE_SAMPLE_RATE = 16000
|
| 19 |
-
DEVICE_SAMPLE_RATE = 48000
|
| 20 |
-
|
| 21 |
-
@app.websocket("/ws-audio")
|
| 22 |
-
async def websocket_audio(ws: WebSocket):
|
| 23 |
-
global vc_instance
|
| 24 |
-
await ws.accept()
|
| 25 |
-
|
| 26 |
-
logger.info(translations["ws_connected"])
|
| 27 |
-
|
| 28 |
-
try:
|
| 29 |
-
text = await ws.receive_text()
|
| 30 |
-
params = json.loads(text)
|
| 31 |
-
|
| 32 |
-
read_chunk_size = int(params["chunk_size"])
|
| 33 |
-
block_frame = read_chunk_size * 128
|
| 34 |
-
embedders = params["embedders"]
|
| 35 |
-
|
| 36 |
-
model_pth = params["model_pth"]
|
| 37 |
-
model_pth = os.path.join(configs["weights_path"], model_pth) if not os.path.exists(model_pth) else model_pth
|
| 38 |
-
|
| 39 |
-
if not model_pth or not os.path.exists(model_pth) or os.path.isdir(model_pth) or not model_pth.endswith((".pth", ".onnx")):
|
| 40 |
-
logger.warning(translations["provide_file"].format(filename=translations["model"]))
|
| 41 |
-
await ws.send_text(json.dumps({"type": "warnings", "value": translations["provide_file"].format(filename=translations["model"])}))
|
| 42 |
-
return
|
| 43 |
-
|
| 44 |
-
logger.info(translations["start_realtime"])
|
| 45 |
-
|
| 46 |
-
if vc_instance is None:
|
| 47 |
-
vc_instance = VoiceChanger(
|
| 48 |
-
read_chunk_size=read_chunk_size,
|
| 49 |
-
cross_fade_overlap_size=params["cross_fade_overlap_size"],
|
| 50 |
-
input_sample_rate=DEVICE_SAMPLE_RATE,
|
| 51 |
-
extra_convert_size=params["extra_convert_size"]
|
| 52 |
-
)
|
| 53 |
-
vc_instance.initialize(vc_model=RVC_Realtime(
|
| 54 |
-
model_path=model_pth,
|
| 55 |
-
index_path=params["model_index"],
|
| 56 |
-
f0_method=params["f0_method"],
|
| 57 |
-
f0_onnx=params["f0_onnx"],
|
| 58 |
-
embedder_model=(embedders if embedders != "custom" else params["custom_embedders"]),
|
| 59 |
-
embedders_mode=params["embedders_mode"],
|
| 60 |
-
sample_rate=PIPELINE_SAMPLE_RATE,
|
| 61 |
-
hop_length=params["hop_length"],
|
| 62 |
-
silent_threshold=params["silent_threshold"],
|
| 63 |
-
input_sample_rate=DEVICE_SAMPLE_RATE,
|
| 64 |
-
output_sample_rate=DEVICE_SAMPLE_RATE,
|
| 65 |
-
vad_enabled=params["vad_enabled"],
|
| 66 |
-
vad_sensitivity=params["vad_sensitivity"],
|
| 67 |
-
vad_frame_ms=params["vad_frame_ms"],
|
| 68 |
-
clean_audio=params["clean_audio"],
|
| 69 |
-
clean_strength=params["clean_strength"]
|
| 70 |
-
))
|
| 71 |
-
|
| 72 |
-
logger.info(translations["realtime_is_ready"])
|
| 73 |
-
|
| 74 |
-
while 1:
|
| 75 |
-
audio = await ws.receive_bytes()
|
| 76 |
-
arr = np.frombuffer(audio, dtype=np.float32)
|
| 77 |
-
|
| 78 |
-
if arr.size != block_frame:
|
| 79 |
-
arr = np.pad(arr, (0, block_frame - arr.size)).astype(np.float32) if arr.size < block_frame else arr[:block_frame].astype(np.float32)
|
| 80 |
-
|
| 81 |
-
audio_output, _, perf = vc_instance.on_request(
|
| 82 |
-
arr * (params["input_audio_gain"] / 100.0),
|
| 83 |
-
f0_up_key=params["f0_up_key"],
|
| 84 |
-
index_rate=params["index_rate"],
|
| 85 |
-
protect=params["protect"],
|
| 86 |
-
filter_radius=params["filter_radius"],
|
| 87 |
-
rms_mix_rate=params["rms_mix_rate"],
|
| 88 |
-
f0_autotune=params["f0_autotune"],
|
| 89 |
-
f0_autotune_strength=params["f0_autotune_strength"],
|
| 90 |
-
proposal_pitch=params["proposal_pitch"],
|
| 91 |
-
proposal_pitch_threshold=params["proposal_pitch_threshold"]
|
| 92 |
-
)
|
| 93 |
-
|
| 94 |
-
await ws.send_text(json.dumps({"type": "latency", "value": perf[1]}))
|
| 95 |
-
await ws.send_bytes(audio_output.tobytes())
|
| 96 |
-
except WebSocketDisconnect:
|
| 97 |
-
logger.info(translations["ws_disconnected"])
|
| 98 |
-
except Exception as e:
|
| 99 |
-
import traceback
|
| 100 |
-
logger.debug(traceback.format_exc())
|
| 101 |
-
logger.info(translations["error_occurred"].format(e=e))
|
| 102 |
-
finally:
|
| 103 |
-
if vc_instance is not None:
|
| 104 |
-
del vc_instance
|
| 105 |
-
vc_instance = None
|
| 106 |
-
|
| 107 |
-
clear_gpu_cache()
|
| 108 |
-
|
| 109 |
-
try:
|
| 110 |
-
await ws.close()
|
| 111 |
-
except:
|
| 112 |
-
pass
|
| 113 |
-
|
| 114 |
-
logger.info(translations["ws_closed"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/core/restart.py
DELETED
|
@@ -1,48 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
import json
|
| 4 |
-
import platform
|
| 5 |
-
import subprocess
|
| 6 |
-
|
| 7 |
-
sys.path.append(os.getcwd())
|
| 8 |
-
|
| 9 |
-
from main.app.core.ui import gr_info
|
| 10 |
-
from main.app.variables import python, translations, configs_json
|
| 11 |
-
|
| 12 |
-
def restart_app(app):
|
| 13 |
-
gr_info(translations["30s"])
|
| 14 |
-
os.system("cls" if platform.system() == "Windows" else "clear")
|
| 15 |
-
|
| 16 |
-
app.close()
|
| 17 |
-
subprocess.run([python, os.path.join("main", "app", "app.py")] + [arg for arg in sys.argv[1:] if arg != "--open"])
|
| 18 |
-
|
| 19 |
-
def change_language(lang, app):
|
| 20 |
-
configs = json.load(open(configs_json, "r"))
|
| 21 |
-
|
| 22 |
-
if lang != configs["language"]:
|
| 23 |
-
configs["language"] = lang
|
| 24 |
-
|
| 25 |
-
with open(configs_json, "w") as f:
|
| 26 |
-
json.dump(configs, f, indent=4)
|
| 27 |
-
|
| 28 |
-
restart_app(app)
|
| 29 |
-
|
| 30 |
-
def change_theme(theme, app):
|
| 31 |
-
configs = json.load(open(configs_json, "r"))
|
| 32 |
-
|
| 33 |
-
if theme != configs["theme"]:
|
| 34 |
-
configs["theme"] = theme
|
| 35 |
-
with open(configs_json, "w") as f:
|
| 36 |
-
json.dump(configs, f, indent=4)
|
| 37 |
-
|
| 38 |
-
restart_app(app)
|
| 39 |
-
|
| 40 |
-
def change_font(font, app):
|
| 41 |
-
configs = json.load(open(configs_json, "r"))
|
| 42 |
-
|
| 43 |
-
if font != configs["font"]:
|
| 44 |
-
configs["font"] = font
|
| 45 |
-
with open(configs_json, "w") as f:
|
| 46 |
-
json.dump(configs, f, indent=4)
|
| 47 |
-
|
| 48 |
-
restart_app(app)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/core/separate.py
DELETED
|
@@ -1,95 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
import subprocess
|
| 4 |
-
|
| 5 |
-
sys.path.append(os.getcwd())
|
| 6 |
-
|
| 7 |
-
from main.app.core.ui import gr_info, gr_warning
|
| 8 |
-
from main.app.variables import python, translations, configs
|
| 9 |
-
|
| 10 |
-
def separate_music(
|
| 11 |
-
input_path,
|
| 12 |
-
output_dirs,
|
| 13 |
-
export_format,
|
| 14 |
-
model_name,
|
| 15 |
-
karaoke_model,
|
| 16 |
-
reverb_model,
|
| 17 |
-
denoise_model,
|
| 18 |
-
sample_rate,
|
| 19 |
-
shifts,
|
| 20 |
-
batch_size,
|
| 21 |
-
overlap,
|
| 22 |
-
aggression,
|
| 23 |
-
hop_length,
|
| 24 |
-
window_size,
|
| 25 |
-
segments_size,
|
| 26 |
-
post_process_threshold,
|
| 27 |
-
enable_tta,
|
| 28 |
-
enable_denoise,
|
| 29 |
-
high_end_process,
|
| 30 |
-
enable_post_process,
|
| 31 |
-
separate_backing,
|
| 32 |
-
separate_reverb
|
| 33 |
-
):
|
| 34 |
-
output_dirs = os.path.dirname(output_dirs) or output_dirs
|
| 35 |
-
|
| 36 |
-
if not input_path or not os.path.exists(input_path) or os.path.isdir(input_path):
|
| 37 |
-
gr_warning(translations["input_not_valid"])
|
| 38 |
-
return [None]*4
|
| 39 |
-
|
| 40 |
-
if not os.path.exists(output_dirs):
|
| 41 |
-
gr_warning(translations["output_not_valid"])
|
| 42 |
-
return [None]*4
|
| 43 |
-
|
| 44 |
-
if not os.path.exists(output_dirs): os.makedirs(output_dirs)
|
| 45 |
-
gr_info(translations["start"].format(start=translations["separator_music"]))
|
| 46 |
-
|
| 47 |
-
subprocess.run([
|
| 48 |
-
python, configs["separate_path"],
|
| 49 |
-
"--input_path", input_path,
|
| 50 |
-
"--output_dirs", output_dirs,
|
| 51 |
-
"--export_format", export_format,
|
| 52 |
-
"--model_name", model_name,
|
| 53 |
-
"--karaoke_model", karaoke_model,
|
| 54 |
-
"--reverb_model", reverb_model,
|
| 55 |
-
"--denoise_model", denoise_model,
|
| 56 |
-
"--sample_rate", str(sample_rate),
|
| 57 |
-
"--shifts", str(shifts),
|
| 58 |
-
"--batch_size", str(batch_size),
|
| 59 |
-
"--overlap", str(overlap),
|
| 60 |
-
"--aggression", str(aggression),
|
| 61 |
-
"--hop_length", str(hop_length),
|
| 62 |
-
"--window_size", str(window_size),
|
| 63 |
-
"--segments_size", str(segments_size),
|
| 64 |
-
"--post_process_threshold", str(post_process_threshold),
|
| 65 |
-
"--enable_tta", str(enable_tta),
|
| 66 |
-
"--enable_denoise", str(enable_denoise),
|
| 67 |
-
"--high_end_process", str(high_end_process),
|
| 68 |
-
"--enable_post_process", str(enable_post_process),
|
| 69 |
-
"--separate_backing", str(separate_backing),
|
| 70 |
-
"--separate_reverb", str(separate_reverb),
|
| 71 |
-
])
|
| 72 |
-
|
| 73 |
-
gr_info(translations["success"])
|
| 74 |
-
|
| 75 |
-
filename, _ = os.path.splitext(os.path.basename(input_path))
|
| 76 |
-
output_dirs = os.path.join(output_dirs, filename)
|
| 77 |
-
|
| 78 |
-
return [
|
| 79 |
-
os.path.join(
|
| 80 |
-
output_dirs,
|
| 81 |
-
f"Original_Vocals_No_Reverb.{export_format}" if separate_reverb else f"Original_Vocals.{export_format}"
|
| 82 |
-
),
|
| 83 |
-
os.path.join(
|
| 84 |
-
output_dirs,
|
| 85 |
-
f"Instruments.{export_format}"
|
| 86 |
-
),
|
| 87 |
-
os.path.join(
|
| 88 |
-
output_dirs,
|
| 89 |
-
f"Main_Vocals_No_Reverb.{export_format}" if separate_reverb else f"Main_Vocals.{export_format}"
|
| 90 |
-
) if separate_backing else None,
|
| 91 |
-
os.path.join(
|
| 92 |
-
output_dirs,
|
| 93 |
-
f"Backing_Vocals.{export_format}"
|
| 94 |
-
) if separate_backing else None
|
| 95 |
-
] if os.path.isfile(input_path) else [None]*4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/core/training.py
DELETED
|
@@ -1,265 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
import time
|
| 4 |
-
import shutil
|
| 5 |
-
import codecs
|
| 6 |
-
import threading
|
| 7 |
-
import subprocess
|
| 8 |
-
|
| 9 |
-
sys.path.append(os.getcwd())
|
| 10 |
-
|
| 11 |
-
from main.tools import huggingface
|
| 12 |
-
from main.app.core.ui import gr_info, gr_warning
|
| 13 |
-
from main.app.variables import python, translations, configs
|
| 14 |
-
|
| 15 |
-
def if_done(done, p):
|
| 16 |
-
while 1:
|
| 17 |
-
if p.poll() is None: time.sleep(0.5)
|
| 18 |
-
else: break
|
| 19 |
-
|
| 20 |
-
done[0] = True
|
| 21 |
-
|
| 22 |
-
def log_read(done, name):
|
| 23 |
-
log_file = os.path.join(configs["logs_path"], "app.log")
|
| 24 |
-
|
| 25 |
-
f = open(log_file, "w", encoding="utf-8")
|
| 26 |
-
f.close()
|
| 27 |
-
|
| 28 |
-
while 1:
|
| 29 |
-
with open(log_file, "r", encoding="utf-8") as f:
|
| 30 |
-
yield "".join(line for line in f.readlines() if "DEBUG" not in line and name in line and line.strip() != "")
|
| 31 |
-
|
| 32 |
-
time.sleep(1)
|
| 33 |
-
if done[0]: break
|
| 34 |
-
|
| 35 |
-
with open(log_file, "r", encoding="utf-8") as f:
|
| 36 |
-
log = "".join(line for line in f.readlines() if "DEBUG" not in line and line.strip() != "")
|
| 37 |
-
|
| 38 |
-
yield log
|
| 39 |
-
|
| 40 |
-
def create_dataset(
|
| 41 |
-
input_data,
|
| 42 |
-
output_dirs,
|
| 43 |
-
skip_seconds,
|
| 44 |
-
skip_start_audios,
|
| 45 |
-
skip_end_audios,
|
| 46 |
-
separate,
|
| 47 |
-
model_name,
|
| 48 |
-
reverb_model,
|
| 49 |
-
denoise_model,
|
| 50 |
-
sample_rate,
|
| 51 |
-
shifts,
|
| 52 |
-
batch_size,
|
| 53 |
-
overlap,
|
| 54 |
-
aggression,
|
| 55 |
-
hop_length,
|
| 56 |
-
window_size,
|
| 57 |
-
segments_size,
|
| 58 |
-
post_process_threshold,
|
| 59 |
-
enable_tta,
|
| 60 |
-
enable_denoise,
|
| 61 |
-
high_end_process,
|
| 62 |
-
enable_post_process,
|
| 63 |
-
separate_reverb,
|
| 64 |
-
clean_dataset,
|
| 65 |
-
clean_strength
|
| 66 |
-
):
|
| 67 |
-
gr_info(translations["start"].format(start=translations["create"]))
|
| 68 |
-
|
| 69 |
-
p = subprocess.Popen(f'{python} {configs["create_dataset_path"]} --input_data "{input_data}" --output_dirs "{output_dirs}" --skip_seconds {skip_seconds} --skip_start_audios "{skip_start_audios}" --skip_end_audios "{skip_end_audios}" --separate {separate} --model_name "{model_name}" --reverb_model "{reverb_model}" --denoise_model "{denoise_model}" --sample_rate {sample_rate} --shifts {shifts} --batch_size {batch_size} --overlap {overlap} --aggression {aggression} --hop_length {hop_length} --window_size {window_size} --segments_size {segments_size} --post_process_threshold {post_process_threshold} --enable_tta {enable_tta} --enable_denoise {enable_denoise} --high_end_process {high_end_process} --enable_post_process {enable_post_process} --separate_reverb {separate_reverb} --clean_dataset {clean_dataset} --clean_strength {clean_strength}', shell=True)
|
| 70 |
-
done = [False]
|
| 71 |
-
|
| 72 |
-
threading.Thread(target=if_done, args=(done, p)).start()
|
| 73 |
-
|
| 74 |
-
for log in log_read(done, "create_dataset"):
|
| 75 |
-
yield log
|
| 76 |
-
|
| 77 |
-
def create_reference(audio_path, reference_name, pitch_guidance, use_energy, version, embedder_model, embedders_mode, f0_method, f0_onnx, f0_up_key, filter_radius, f0_autotune, f0_autotune_strength, proposal_pitch, proposal_pitch_threshold, alpha=0.5):
|
| 78 |
-
gr_info(translations["start"].format(start=translations["create_reference"]))
|
| 79 |
-
|
| 80 |
-
p = subprocess.Popen(f'{python} {configs["create_reference_path"]} --audio_path "{audio_path}" --reference_name "{reference_name}" --pitch_guidance {pitch_guidance} --use_energy {use_energy} --version {version} --embedder_model {embedder_model} --embedders_mode {embedders_mode} --f0_method {f0_method} --f0_onnx {f0_onnx} --f0_up_key {f0_up_key} --filter_radius {filter_radius} --f0_autotune {f0_autotune} --f0_autotune_strength {f0_autotune_strength} --proposal_pitch {proposal_pitch} --proposal_pitch_threshold {proposal_pitch_threshold} --alpha {alpha}', shell=True)
|
| 81 |
-
done = [False]
|
| 82 |
-
|
| 83 |
-
threading.Thread(target=if_done, args=(done, p)).start()
|
| 84 |
-
|
| 85 |
-
for log in log_read(done, "create_reference"):
|
| 86 |
-
yield log
|
| 87 |
-
|
| 88 |
-
def preprocess(model_name, sample_rate, cpu_core, cut_preprocess, process_effects, dataset, clean_dataset, clean_strength, chunk_len=3.0, overlap_len=0.3, normalization_mode="none"):
|
| 89 |
-
sr = int(float(sample_rate.rstrip("k")) * 1000)
|
| 90 |
-
|
| 91 |
-
if not model_name: return gr_warning(translations["provide_name"])
|
| 92 |
-
if not os.path.exists(dataset) or not any(f.lower().endswith(("wav", "mp3", "flac", "ogg", "opus", "m4a", "mp4", "aac", "alac", "wma", "aiff", "webm", "ac3")) for f in os.listdir(dataset) if os.path.isfile(os.path.join(dataset, f))): return gr_warning(translations["not_found_data"])
|
| 93 |
-
|
| 94 |
-
model_dir = os.path.join(configs["logs_path"], model_name)
|
| 95 |
-
if os.path.exists(model_dir): shutil.rmtree(model_dir, ignore_errors=True)
|
| 96 |
-
|
| 97 |
-
p = subprocess.Popen(f'{python} {configs["preprocess_path"]} --model_name "{model_name}" --dataset_path "{dataset}" --sample_rate {sr} --cpu_cores {cpu_core} --cut_preprocess {cut_preprocess} --process_effects {process_effects} --clean_dataset {clean_dataset} --clean_strength {clean_strength} --chunk_len {chunk_len} --overlap_len {overlap_len} --normalization_mode {normalization_mode}', shell=True)
|
| 98 |
-
done = [False]
|
| 99 |
-
|
| 100 |
-
threading.Thread(target=if_done, args=(done, p)).start()
|
| 101 |
-
os.makedirs(model_dir, exist_ok=True)
|
| 102 |
-
|
| 103 |
-
for log in log_read(done, "preprocess"):
|
| 104 |
-
yield log
|
| 105 |
-
|
| 106 |
-
def extract(model_name, version, method, pitch_guidance, hop_length, cpu_cores, gpu, sample_rate, embedders, custom_embedders, onnx_f0_mode, embedders_mode, f0_autotune, f0_autotune_strength, hybrid_method, rms_extract, alpha=0.5):
|
| 107 |
-
f0method, embedder_model = (method if method != "hybrid" else hybrid_method), (embedders if embedders != "custom" else custom_embedders)
|
| 108 |
-
sr = int(float(sample_rate.rstrip("k")) * 1000)
|
| 109 |
-
|
| 110 |
-
if not model_name: return gr_warning(translations["provide_name"])
|
| 111 |
-
model_dir = os.path.join(configs["logs_path"], model_name)
|
| 112 |
-
|
| 113 |
-
try:
|
| 114 |
-
if not any(os.path.isfile(os.path.join(model_dir, "sliced_audios", f)) for f in os.listdir(os.path.join(model_dir, "sliced_audios"))) or not any(os.path.isfile(os.path.join(model_dir, "sliced_audios_16k", f)) for f in os.listdir(os.path.join(model_dir, "sliced_audios_16k"))): return gr_warning(translations["not_found_data_preprocess"])
|
| 115 |
-
except:
|
| 116 |
-
return gr_warning(translations["not_found_data_preprocess"])
|
| 117 |
-
|
| 118 |
-
p = subprocess.Popen(f'{python} {configs["extract_path"]} --model_name "{model_name}" --rvc_version {version} --f0_method {f0method} --pitch_guidance {pitch_guidance} --hop_length {hop_length} --cpu_cores {cpu_cores} --gpu {gpu} --sample_rate {sr} --embedder_model {embedder_model} --f0_onnx {onnx_f0_mode} --embedders_mode {embedders_mode} --f0_autotune {f0_autotune} --f0_autotune_strength {f0_autotune_strength} --rms_extract {rms_extract} --alpha {alpha}', shell=True)
|
| 119 |
-
done = [False]
|
| 120 |
-
|
| 121 |
-
threading.Thread(target=if_done, args=(done, p)).start()
|
| 122 |
-
os.makedirs(model_dir, exist_ok=True)
|
| 123 |
-
|
| 124 |
-
for log in log_read(done, "extract"):
|
| 125 |
-
yield log
|
| 126 |
-
|
| 127 |
-
def create_index(model_name, rvc_version, index_algorithm):
|
| 128 |
-
if not model_name: return gr_warning(translations["provide_name"])
|
| 129 |
-
model_dir = os.path.join(configs["logs_path"], model_name)
|
| 130 |
-
|
| 131 |
-
try:
|
| 132 |
-
if not any(os.path.isfile(os.path.join(model_dir, f"{rvc_version}_extracted", f)) for f in os.listdir(os.path.join(model_dir, f"{rvc_version}_extracted"))): return gr_warning(translations["not_found_data_extract"])
|
| 133 |
-
except:
|
| 134 |
-
return gr_warning(translations["not_found_data_extract"])
|
| 135 |
-
|
| 136 |
-
p = subprocess.Popen(f'{python} {configs["create_index_path"]} --model_name "{model_name}" --rvc_version {rvc_version} --index_algorithm {index_algorithm}', shell=True)
|
| 137 |
-
done = [False]
|
| 138 |
-
|
| 139 |
-
threading.Thread(target=if_done, args=(done, p)).start()
|
| 140 |
-
os.makedirs(model_dir, exist_ok=True)
|
| 141 |
-
|
| 142 |
-
for log in log_read(done, "create_index"):
|
| 143 |
-
yield log
|
| 144 |
-
|
| 145 |
-
def training(model_name, rvc_version, save_every_epoch, save_only_latest, save_every_weights, total_epoch, sample_rate, batch_size, gpu, pitch_guidance, not_pretrain, custom_pretrained, pretrain_g, pretrain_d, detector, threshold, clean_up, cache, model_author, vocoder, checkpointing, deterministic, benchmark, optimizer, energy_use, custom_reference=False, reference_name="", multiscale_mel_loss=False):
|
| 146 |
-
sr = int(float(sample_rate.rstrip("k")) * 1000)
|
| 147 |
-
if not model_name: return gr_warning(translations["provide_name"])
|
| 148 |
-
|
| 149 |
-
model_dir = os.path.join(configs["logs_path"], model_name)
|
| 150 |
-
if os.path.exists(os.path.join(model_dir, "train_pid.txt")): os.remove(os.path.join(model_dir, "train_pid.txt"))
|
| 151 |
-
|
| 152 |
-
try:
|
| 153 |
-
if not any(os.path.isfile(os.path.join(model_dir, f"{rvc_version}_extracted", f)) for f in os.listdir(os.path.join(model_dir, f"{rvc_version}_extracted"))): return gr_warning(translations["not_found_data_extract"])
|
| 154 |
-
except:
|
| 155 |
-
return gr_warning(translations["not_found_data_extract"])
|
| 156 |
-
|
| 157 |
-
if not not_pretrain:
|
| 158 |
-
if not custom_pretrained:
|
| 159 |
-
pretrain_dir = configs["pretrained_v2_path"] if rvc_version == 'v2' else configs["pretrained_v1_path"]
|
| 160 |
-
download_version = codecs.decode(f"uggcf://uhttvatsnpr.pb/NauC/Ivrganzrfr-EIP-Cebwrpg/erfbyir/znva/cergenvarq_", "rot13") + f"{rvc_version}/"
|
| 161 |
-
|
| 162 |
-
pretrained_selector = {
|
| 163 |
-
True: {
|
| 164 |
-
32000: ("f0G32k.pth", "f0D32k.pth"),
|
| 165 |
-
40000: ("f0G40k.pth", "f0D40k.pth"),
|
| 166 |
-
48000: ("f0G48k.pth", "f0D48k.pth")
|
| 167 |
-
},
|
| 168 |
-
False: {
|
| 169 |
-
32000: ("G32k.pth", "D32k.pth"),
|
| 170 |
-
40000: ("G40k.pth", "D40k.pth"),
|
| 171 |
-
48000: ("G48k.pth", "D48k.pth")
|
| 172 |
-
}
|
| 173 |
-
}
|
| 174 |
-
|
| 175 |
-
pg2, pd2 = "", ""
|
| 176 |
-
pg, pd = pretrained_selector[pitch_guidance][sr]
|
| 177 |
-
|
| 178 |
-
if energy_use: pg2, pd2 = pg2 + "ENERGY_", pd2 + "ENERGY_"
|
| 179 |
-
if vocoder != 'Default': pg2, pd2 = pg2 + vocoder + "_", pd2 + vocoder + "_"
|
| 180 |
-
|
| 181 |
-
pg2, pd2 = pg2 + pg, pd2 + pd
|
| 182 |
-
pretrained_G, pretrained_D = (
|
| 183 |
-
os.path.join(
|
| 184 |
-
pretrain_dir,
|
| 185 |
-
pg2
|
| 186 |
-
),
|
| 187 |
-
os.path.join(
|
| 188 |
-
pretrain_dir,
|
| 189 |
-
pd2
|
| 190 |
-
)
|
| 191 |
-
)
|
| 192 |
-
|
| 193 |
-
try:
|
| 194 |
-
if not os.path.exists(pretrained_G):
|
| 195 |
-
gr_info(translations["download_pretrained"].format(dg="G", rvc_version=rvc_version))
|
| 196 |
-
huggingface.HF_download_file(
|
| 197 |
-
"".join(
|
| 198 |
-
[
|
| 199 |
-
download_version,
|
| 200 |
-
pg2
|
| 201 |
-
]
|
| 202 |
-
),
|
| 203 |
-
os.path.join(
|
| 204 |
-
pretrain_dir,
|
| 205 |
-
pg2
|
| 206 |
-
)
|
| 207 |
-
)
|
| 208 |
-
|
| 209 |
-
if not os.path.exists(pretrained_D):
|
| 210 |
-
gr_info(translations["download_pretrained"].format(dg="D", rvc_version=rvc_version))
|
| 211 |
-
huggingface.HF_download_file(
|
| 212 |
-
"".join(
|
| 213 |
-
[
|
| 214 |
-
download_version,
|
| 215 |
-
pd2
|
| 216 |
-
]
|
| 217 |
-
),
|
| 218 |
-
os.path.join(
|
| 219 |
-
pretrain_dir,
|
| 220 |
-
pd2
|
| 221 |
-
)
|
| 222 |
-
)
|
| 223 |
-
except:
|
| 224 |
-
gr_warning(translations["not_use_pretrain_error_download"])
|
| 225 |
-
pretrained_G = pretrained_D = None
|
| 226 |
-
else:
|
| 227 |
-
if not pretrain_g: return gr_warning(translations["provide_pretrained"].format(dg="G"))
|
| 228 |
-
if not pretrain_d: return gr_warning(translations["provide_pretrained"].format(dg="D"))
|
| 229 |
-
|
| 230 |
-
pg2, pd2 = pretrain_g, pretrain_d
|
| 231 |
-
pretrained_G, pretrained_D = (
|
| 232 |
-
(os.path.join(configs["pretrained_custom_path"], pg2) if not os.path.exists(pg2) else pg2),
|
| 233 |
-
(os.path.join(configs["pretrained_custom_path"], pd2) if not os.path.exists(pd2) else pd2)
|
| 234 |
-
)
|
| 235 |
-
|
| 236 |
-
if not os.path.exists(pretrained_G): return gr_warning(translations["not_found_pretrain"].format(dg="G"))
|
| 237 |
-
if not os.path.exists(pretrained_D): return gr_warning(translations["not_found_pretrain"].format(dg="D"))
|
| 238 |
-
else:
|
| 239 |
-
pretrained_G = pretrained_D = None
|
| 240 |
-
gr_warning(translations["not_use_pretrain"])
|
| 241 |
-
|
| 242 |
-
if custom_reference:
|
| 243 |
-
reference_path = os.path.join(configs["reference_path"], reference_name)
|
| 244 |
-
|
| 245 |
-
if not os.path.exists(reference_path):
|
| 246 |
-
gr_warning(translations["not_found_reference"])
|
| 247 |
-
|
| 248 |
-
custom_reference = False
|
| 249 |
-
reference_path = None
|
| 250 |
-
else: reference_path = None
|
| 251 |
-
|
| 252 |
-
gr_info(translations["start"].format(start=translations["training"]))
|
| 253 |
-
|
| 254 |
-
p = subprocess.Popen(f'{python} {configs["train_path"]} --model_name "{model_name}" --rvc_version {rvc_version} --save_every_epoch {save_every_epoch} --save_only_latest {save_only_latest} --save_every_weights {save_every_weights} --total_epoch {total_epoch} --batch_size {batch_size} --gpu {gpu} --pitch_guidance {pitch_guidance} --overtraining_detector {detector} --overtraining_threshold {threshold} --cleanup {clean_up} --cache_data_in_gpu {cache} --g_pretrained_path "{pretrained_G}" --d_pretrained_path "{pretrained_D}" --model_author "{model_author}" --vocoder "{vocoder}" --checkpointing {checkpointing} --deterministic {deterministic} --benchmark {benchmark} --optimizer {optimizer} --energy_use {energy_use} --use_custom_reference {custom_reference} --reference_path {reference_path} --multiscale_mel_loss {multiscale_mel_loss}', shell=True)
|
| 255 |
-
done = [False]
|
| 256 |
-
|
| 257 |
-
with open(os.path.join(model_dir, "train_pid.txt"), "w") as pid_file:
|
| 258 |
-
pid_file.write(str(p.pid))
|
| 259 |
-
|
| 260 |
-
threading.Thread(target=if_done, args=(done, p)).start()
|
| 261 |
-
|
| 262 |
-
for log in log_read(done, "train"):
|
| 263 |
-
lines = log.splitlines()
|
| 264 |
-
if len(lines) > 50: log = "\n".join(lines[-50:])
|
| 265 |
-
yield log
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/core/tts.py
DELETED
|
@@ -1,100 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
import pysrt
|
| 4 |
-
import codecs
|
| 5 |
-
import librosa
|
| 6 |
-
import asyncio
|
| 7 |
-
import requests
|
| 8 |
-
import tempfile
|
| 9 |
-
|
| 10 |
-
sys.path.append(os.getcwd())
|
| 11 |
-
|
| 12 |
-
from main.app.variables import translations
|
| 13 |
-
from main.app.core.ui import gr_info, gr_warning, gr_error
|
| 14 |
-
|
| 15 |
-
def synthesize_tts(prompt, voice, speed, output, pitch, google):
|
| 16 |
-
if not google:
|
| 17 |
-
from edge_tts import Communicate
|
| 18 |
-
asyncio.run(Communicate(text=prompt, voice=voice, rate=f"+{speed}%" if speed >= 0 else f"{speed}%", pitch=f"+{pitch}Hz" if pitch >= 0 else f"{pitch}Hz").save(output))
|
| 19 |
-
else:
|
| 20 |
-
response = requests.get(codecs.decode("uggcf://genafyngr.tbbtyr.pbz/genafyngr_ggf", "rot13"), params={"ie": "UTF-8", "q": prompt, "tl": voice, "ttsspeed": speed, "client": "tw-ob"}, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"})
|
| 21 |
-
|
| 22 |
-
if response.status_code == 200:
|
| 23 |
-
with open(output, "wb") as f:
|
| 24 |
-
f.write(response.content)
|
| 25 |
-
|
| 26 |
-
if pitch != 0 or speed != 0:
|
| 27 |
-
y, sr = librosa.load(output, sr=None)
|
| 28 |
-
|
| 29 |
-
if pitch != 0: y = librosa.effects.pitch_shift(y, sr=sr, n_steps=pitch)
|
| 30 |
-
if speed != 0: y = librosa.effects.time_stretch(y, rate=speed)
|
| 31 |
-
|
| 32 |
-
import soundfile as sf
|
| 33 |
-
sf.write(file=output, data=y, samplerate=sr, format=os.path.splitext(os.path.basename(output))[-1].lower().replace('.', ''))
|
| 34 |
-
else: gr_error(f"{response.status_code}, {response.text}")
|
| 35 |
-
|
| 36 |
-
def srt_tts(srt_file, out_file, voice, rate = 0, sr = 24000, google = False):
|
| 37 |
-
import numpy as np
|
| 38 |
-
import soundfile as sf
|
| 39 |
-
|
| 40 |
-
def time_stretch(y, sr, target_duration):
|
| 41 |
-
rate = (len(y) / sr) / target_duration
|
| 42 |
-
if rate != 1.0: y = librosa.effects.time_stretch(y=y.astype(np.float32), rate=rate)
|
| 43 |
-
|
| 44 |
-
n_target = int(round(target_duration * sr))
|
| 45 |
-
return np.pad(y, (0, n_target - len(y))) if len(y) < n_target else y[:n_target]
|
| 46 |
-
|
| 47 |
-
def pysrttime_to_seconds(t):
|
| 48 |
-
return (t.hours * 60 + t.minutes) * 60 + t.seconds + t.milliseconds / 1000
|
| 49 |
-
|
| 50 |
-
subs = pysrt.open(srt_file)
|
| 51 |
-
if not subs: raise ValueError(translations["srt"])
|
| 52 |
-
|
| 53 |
-
final_audio = np.zeros(int(round(pysrttime_to_seconds(subs[-1].end) * sr)), dtype=np.float32)
|
| 54 |
-
|
| 55 |
-
with tempfile.TemporaryDirectory() as tempdir:
|
| 56 |
-
for idx, seg in enumerate(subs):
|
| 57 |
-
wav_path = os.path.join(tempdir, f"seg_{idx}.wav")
|
| 58 |
-
synthesize_tts(" ".join(seg.text.splitlines()), voice, 0, wav_path, rate, google)
|
| 59 |
-
|
| 60 |
-
audio, file_sr = sf.read(wav_path, dtype=np.float32)
|
| 61 |
-
if file_sr != sr: audio = np.interp(np.linspace(0, len(audio) - 1, int(len(audio) * sr / file_sr)), np.arange(len(audio)), audio)
|
| 62 |
-
adjusted = time_stretch(audio, sr, pysrttime_to_seconds(seg.duration))
|
| 63 |
-
|
| 64 |
-
start_sample = int(round(pysrttime_to_seconds(seg.start) * sr))
|
| 65 |
-
end_sample = start_sample + adjusted.shape[0]
|
| 66 |
-
|
| 67 |
-
if end_sample > final_audio.shape[0]:
|
| 68 |
-
adjusted = adjusted[: final_audio.shape[0] - start_sample]
|
| 69 |
-
end_sample = final_audio.shape[0]
|
| 70 |
-
|
| 71 |
-
final_audio[start_sample:end_sample] += adjusted
|
| 72 |
-
|
| 73 |
-
sf.write(out_file, final_audio, sr)
|
| 74 |
-
|
| 75 |
-
def TTS(prompt, voice, speed, output, pitch, google, srt_input):
|
| 76 |
-
if not srt_input: srt_input = ""
|
| 77 |
-
|
| 78 |
-
if not prompt and not srt_input.endswith(".srt"):
|
| 79 |
-
gr_warning(translations["enter_the_text"])
|
| 80 |
-
return None
|
| 81 |
-
|
| 82 |
-
if not voice:
|
| 83 |
-
gr_warning(translations["choose_voice"])
|
| 84 |
-
return None
|
| 85 |
-
|
| 86 |
-
if not output:
|
| 87 |
-
gr_warning(translations["output_not_valid"])
|
| 88 |
-
return None
|
| 89 |
-
|
| 90 |
-
if os.path.isdir(output): output = os.path.join(output, f"tts.wav")
|
| 91 |
-
gr_info(translations["convert"].format(name=translations["text"]))
|
| 92 |
-
|
| 93 |
-
output_dir = os.path.dirname(output) or output
|
| 94 |
-
if not os.path.exists(output_dir): os.makedirs(output_dir, exist_ok=True)
|
| 95 |
-
|
| 96 |
-
if srt_input.endswith(".srt"): srt_tts(srt_input, output, voice, 0, 24000, google)
|
| 97 |
-
else: synthesize_tts(prompt, voice, speed, output, pitch, google)
|
| 98 |
-
|
| 99 |
-
gr_info(translations["success"])
|
| 100 |
-
return output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/core/ui.py
DELETED
|
@@ -1,362 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import re
|
| 3 |
-
import sys
|
| 4 |
-
import json
|
| 5 |
-
import torch
|
| 6 |
-
import shutil
|
| 7 |
-
|
| 8 |
-
import gradio as gr
|
| 9 |
-
import sounddevice as sd
|
| 10 |
-
|
| 11 |
-
sys.path.append(os.getcwd())
|
| 12 |
-
|
| 13 |
-
from main.library.backends import directml, opencl
|
| 14 |
-
from main.inference.realtime.audio import list_audio_device
|
| 15 |
-
from main.app.variables import config, configs, configs_json, logger, translations, edgetts, google_tts_voice, method_f0, method_f0_full, vr_models, mdx_models, demucs_models, embedders_model, spin_model, whisper_model
|
| 16 |
-
|
| 17 |
-
def gr_info(message):
|
| 18 |
-
gr.Info(message, duration=2)
|
| 19 |
-
logger.info(message)
|
| 20 |
-
|
| 21 |
-
def gr_warning(message):
|
| 22 |
-
gr.Warning(message, duration=2)
|
| 23 |
-
logger.warning(message)
|
| 24 |
-
|
| 25 |
-
def gr_error(message):
|
| 26 |
-
gr.Error(message=message, duration=6)
|
| 27 |
-
logger.error(message)
|
| 28 |
-
|
| 29 |
-
def get_gpu_info():
|
| 30 |
-
ngpu = torch.cuda.device_count()
|
| 31 |
-
gpu_infos = [
|
| 32 |
-
f"{i}: {torch.cuda.get_device_name(i)} ({int(torch.cuda.get_device_properties(i).total_memory / 1024 / 1024 / 1024 + 0.4)} GB)"
|
| 33 |
-
for i in range(ngpu)
|
| 34 |
-
if torch.cuda.is_available() or ngpu != 0
|
| 35 |
-
]
|
| 36 |
-
|
| 37 |
-
if len(gpu_infos) == 0:
|
| 38 |
-
if directml.torch_available:
|
| 39 |
-
ngpu = directml.device_count()
|
| 40 |
-
gpu_infos = [f"{i}: {directml.device_name(i)}" for i in range(ngpu) if directml.is_available() or ngpu != 0]
|
| 41 |
-
elif opencl.torch_available:
|
| 42 |
-
ngpu = opencl.device_count()
|
| 43 |
-
gpu_infos = [f"{i}: {opencl.device_name(i)}" for i in range(ngpu) if opencl.is_available() or ngpu != 0]
|
| 44 |
-
else:
|
| 45 |
-
ngpu = 0
|
| 46 |
-
gpu_infos = []
|
| 47 |
-
|
| 48 |
-
return "\n".join(gpu_infos) if len(gpu_infos) > 0 and not config.cpu_mode else translations["no_support_gpu"]
|
| 49 |
-
|
| 50 |
-
def gpu_number_str():
|
| 51 |
-
if config.cpu_mode: return "-"
|
| 52 |
-
|
| 53 |
-
ngpu = torch.cuda.device_count()
|
| 54 |
-
if ngpu == 0: ngpu = directml.device_count() if directml.torch_available else opencl.device_count()
|
| 55 |
-
|
| 56 |
-
return str("-".join(map(str, range(ngpu))) if torch.cuda.is_available() or directml.is_available() or opencl.is_available() else "-")
|
| 57 |
-
|
| 58 |
-
def change_f0_choices():
|
| 59 |
-
f0_file = sorted([os.path.abspath(os.path.join(root, f)) for root, _, files in os.walk(configs["f0_path"]) for f in files if f.endswith(".txt")])
|
| 60 |
-
return {"value": f0_file[0] if len(f0_file) >= 1 else "", "choices": f0_file, "__type__": "update"}
|
| 61 |
-
|
| 62 |
-
def change_audios_choices(input_audio):
|
| 63 |
-
audios = sorted([os.path.abspath(os.path.join(root, f)) for root, _, files in os.walk(configs["audios_path"]) for f in files if os.path.splitext(f)[1].lower() in (".wav", ".mp3", ".flac", ".ogg", ".opus", ".m4a", ".mp4", ".aac", ".alac", ".wma", ".aiff", ".webm", ".ac3")])
|
| 64 |
-
return {"value": input_audio if input_audio != "" else (audios[0] if len(audios) >= 1 else ""), "choices": audios, "__type__": "update"}
|
| 65 |
-
|
| 66 |
-
def change_reference_choices():
|
| 67 |
-
reference = sorted([re.sub(r'_v\d+_(?:[A-Za-z0-9_]+?)_(True|False)_(True|False)$', '', name) for name in os.listdir(configs["reference_path"]) if os.path.exists(os.path.join(configs["reference_path"], name)) and os.path.isdir(os.path.join(configs["reference_path"], name))])
|
| 68 |
-
return {"value": reference[0] if len(reference) >= 1 else "", "choices": reference, "__type__": "update"}
|
| 69 |
-
|
| 70 |
-
def change_models_choices():
|
| 71 |
-
model, index = sorted(list(model for model in os.listdir(configs["weights_path"]) if model.endswith((".pth", ".onnx")) and not model.startswith("G_") and not model.startswith("D_"))), sorted([os.path.join(root, name) for root, _, files in os.walk(configs["logs_path"], topdown=False) for name in files if name.endswith(".index") and "trained" not in name])
|
| 72 |
-
return [{"value": model[0] if len(model) >= 1 else "", "choices": model, "__type__": "update"}, {"value": index[0] if len(index) >= 1 else "", "choices": index, "__type__": "update"}]
|
| 73 |
-
|
| 74 |
-
def change_pretrained_choices():
|
| 75 |
-
pretrainD = sorted([model for model in os.listdir(configs["pretrained_custom_path"]) if model.endswith(".pth") and "D" in model])
|
| 76 |
-
pretrainG = sorted([model for model in os.listdir(configs["pretrained_custom_path"]) if model.endswith(".pth") and "G" in model])
|
| 77 |
-
|
| 78 |
-
return [{"choices": pretrainD, "value": pretrainD[0] if len(pretrainD) >= 1 else "", "__type__": "update"}, {"choices": pretrainG, "value": pretrainG[0] if len(pretrainG) >= 1 else "", "__type__": "update"}]
|
| 79 |
-
|
| 80 |
-
def change_choices_del():
|
| 81 |
-
return [{"choices": sorted(list(model for model in os.listdir(configs["weights_path"]) if model.endswith(".pth") and not model.startswith("G_") and not model.startswith("D_"))), "__type__": "update"}, {"choices": sorted([os.path.join(configs["logs_path"], f) for f in os.listdir(configs["logs_path"]) if f not in ["mute", "reference"] and os.path.isdir(os.path.join(configs["logs_path"], f))]), "__type__": "update"}]
|
| 82 |
-
|
| 83 |
-
def change_preset_choices():
|
| 84 |
-
return {"value": "", "choices": sorted(list(f for f in os.listdir(configs["presets_path"]) if f.endswith(".conversion.json"))), "__type__": "update"}
|
| 85 |
-
|
| 86 |
-
def change_effect_preset_choices():
|
| 87 |
-
return {"value": "", "choices": sorted(list(f for f in os.listdir(configs["presets_path"]) if f.endswith(".effect.json"))), "__type__": "update"}
|
| 88 |
-
|
| 89 |
-
def change_tts_voice_choices(google):
|
| 90 |
-
return {"choices": google_tts_voice if google else edgetts, "value": google_tts_voice[0] if google else edgetts[0], "__type__": "update"}
|
| 91 |
-
|
| 92 |
-
def change_backing_choices(backing, merge):
|
| 93 |
-
if backing or merge: return {"value": False, "interactive": False, "__type__": "update"}
|
| 94 |
-
elif not backing or not merge: return {"interactive": True, "__type__": "update"}
|
| 95 |
-
else: gr_warning(translations["option_not_valid"])
|
| 96 |
-
|
| 97 |
-
def change_download_choices(select):
|
| 98 |
-
selects = [False]*10
|
| 99 |
-
|
| 100 |
-
if select == translations["download_url"]: selects[0] = selects[1] = selects[2] = True
|
| 101 |
-
elif select == translations["download_from_csv"]: selects[3] = selects[4] = True
|
| 102 |
-
elif select == translations["search_models"]: selects[5] = selects[6] = True
|
| 103 |
-
elif select == translations["upload"]: selects[9] = True
|
| 104 |
-
else: gr_warning(translations["option_not_valid"])
|
| 105 |
-
|
| 106 |
-
return [{"visible": selects[i], "__type__": "update"} for i in range(len(selects))]
|
| 107 |
-
|
| 108 |
-
def change_download_pretrained_choices(select):
|
| 109 |
-
selects = [False]*7
|
| 110 |
-
|
| 111 |
-
if select == translations["download_url"]: selects[0] = selects[1] = selects[2] = True
|
| 112 |
-
elif select == translations["list_model"]: selects[3] = selects[4] = selects[5] = True
|
| 113 |
-
elif select == translations["upload"]: selects[6] = True
|
| 114 |
-
else: gr_warning(translations["option_not_valid"])
|
| 115 |
-
|
| 116 |
-
return [{"visible": selects[i], "__type__": "update"} for i in range(len(selects))]
|
| 117 |
-
|
| 118 |
-
def get_index(model):
|
| 119 |
-
model = os.path.basename(model).split("_")[0]
|
| 120 |
-
return {"value": next((f for f in [os.path.join(root, name) for root, _, files in os.walk(configs["logs_path"], topdown=False) for name in files if name.endswith(".index") and "trained" not in name] if model.split(".")[0] in f), ""), "__type__": "update"} if model else None
|
| 121 |
-
|
| 122 |
-
def index_strength_show(index):
|
| 123 |
-
return {"visible": index != "" and index != None and os.path.exists(index) and os.path.isfile(index), "value": 0.5, "__type__": "update"}
|
| 124 |
-
|
| 125 |
-
def hoplength_show(method, hybrid_method=None):
|
| 126 |
-
visible = False
|
| 127 |
-
|
| 128 |
-
for m in ["mangio-crepe", "fcpe", "yin", "piptrack", "mangio-penn"]:
|
| 129 |
-
if m in method: visible = True
|
| 130 |
-
if hybrid_method is not None and m in hybrid_method: visible = True
|
| 131 |
-
|
| 132 |
-
if visible: break
|
| 133 |
-
else: visible = False
|
| 134 |
-
|
| 135 |
-
return {"visible": visible, "__type__": "update"}
|
| 136 |
-
|
| 137 |
-
def visible(value):
|
| 138 |
-
return {"visible": value, "__type__": "update"}
|
| 139 |
-
|
| 140 |
-
def valueFalse_interactive(value):
|
| 141 |
-
return {"value": False, "interactive": value, "__type__": "update"}
|
| 142 |
-
|
| 143 |
-
def valueEmpty_visible1(value):
|
| 144 |
-
return {"value": "", "visible": value, "__type__": "update"}
|
| 145 |
-
|
| 146 |
-
def pitch_guidance_lock(vocoders):
|
| 147 |
-
return {"value": True, "interactive": vocoders == "Default", "__type__": "update"}
|
| 148 |
-
|
| 149 |
-
def vocoders_lock(pitch, vocoders):
|
| 150 |
-
return {"value": vocoders if pitch else "Default", "interactive": pitch, "__type__": "update"}
|
| 151 |
-
|
| 152 |
-
def unlock_f0(value):
|
| 153 |
-
return {"choices": method_f0_full if value else method_f0, "value": "rmvpe", "__type__": "update"}
|
| 154 |
-
|
| 155 |
-
def unlock_vocoder(value, vocoder):
|
| 156 |
-
return {"value": vocoder if value == "v2" else "Default", "interactive": value == "v2", "__type__": "update"}
|
| 157 |
-
|
| 158 |
-
def unlock_ver(value, vocoder):
|
| 159 |
-
return {"value": "v2" if vocoder == "Default" else value, "interactive": vocoder == "Default", "__type__": "update"}
|
| 160 |
-
|
| 161 |
-
def change_embedders_mode(value):
|
| 162 |
-
if value == "spin":
|
| 163 |
-
return {"value": spin_model[0], "choices": spin_model, "__type__": "update"}
|
| 164 |
-
elif value == "whisper":
|
| 165 |
-
return {"value": whisper_model[0], "choices": whisper_model, "__type__": "update"}
|
| 166 |
-
else:
|
| 167 |
-
return {"value": embedders_model[0], "choices": embedders_model, "__type__": "update"}
|
| 168 |
-
|
| 169 |
-
def change_fp(fp):
|
| 170 |
-
fp16 = fp == "fp16"
|
| 171 |
-
|
| 172 |
-
if fp16 and config.device in ["cpu", "mps", "ocl:0"]:
|
| 173 |
-
gr_warning(translations["fp16_not_support"])
|
| 174 |
-
return "fp32"
|
| 175 |
-
else:
|
| 176 |
-
gr_info(translations["start_update_precision"])
|
| 177 |
-
|
| 178 |
-
configs = json.load(open(configs_json, "r"))
|
| 179 |
-
configs["fp16"] = config.is_half = fp16
|
| 180 |
-
|
| 181 |
-
with open(configs_json, "w") as f:
|
| 182 |
-
json.dump(configs, f, indent=4)
|
| 183 |
-
|
| 184 |
-
gr_info(translations["success"])
|
| 185 |
-
return "fp16" if fp16 else "fp32"
|
| 186 |
-
|
| 187 |
-
def process_output(file_path):
|
| 188 |
-
if config.configs.get("delete_exists_file", True):
|
| 189 |
-
if os.path.exists(file_path) and os.path.isfile(file_path): os.remove(file_path)
|
| 190 |
-
return file_path
|
| 191 |
-
else:
|
| 192 |
-
if not os.path.exists(file_path): return file_path
|
| 193 |
-
file = os.path.splitext(os.path.basename(file_path))
|
| 194 |
-
|
| 195 |
-
index = 1
|
| 196 |
-
while 1:
|
| 197 |
-
file_path = os.path.join(os.path.dirname(file_path), f"{file[0]}_{index}{file[1]}")
|
| 198 |
-
if not os.path.exists(file_path): return file_path
|
| 199 |
-
index += 1
|
| 200 |
-
|
| 201 |
-
def shutil_move(input_path, output_path):
|
| 202 |
-
output_path = os.path.join(output_path, os.path.basename(input_path)) if os.path.isdir(output_path) else output_path
|
| 203 |
-
|
| 204 |
-
return shutil.move(input_path, process_output(output_path)) if os.path.exists(output_path) else shutil.move(input_path, output_path)
|
| 205 |
-
|
| 206 |
-
def separate_change(model_name, karaoke_model, reverb_model, enable_post_process, separate_backing, separate_reverb, enable_denoise):
|
| 207 |
-
model_type = "vr" if model_name in list(vr_models.keys()) else "mdx" if model_name in list(mdx_models.keys()) else "demucs" if model_name in list(demucs_models.keys()) else ""
|
| 208 |
-
karaoke_type = ("vr" if karaoke_model.startswith("VR") else "mdx") if separate_backing else None
|
| 209 |
-
reverb_type = ("vr" if not reverb_model.startswith("MDX") else "mdx") if separate_reverb else None
|
| 210 |
-
|
| 211 |
-
all_types = {model_type, karaoke_type, reverb_type}
|
| 212 |
-
|
| 213 |
-
is_vr = "vr" in all_types
|
| 214 |
-
is_mdx = "mdx" in all_types
|
| 215 |
-
is_demucs = "demucs" in all_types
|
| 216 |
-
|
| 217 |
-
return [
|
| 218 |
-
visible(separate_backing),
|
| 219 |
-
visible(separate_reverb),
|
| 220 |
-
visible(is_mdx or is_demucs),
|
| 221 |
-
visible(is_mdx or is_demucs),
|
| 222 |
-
visible(is_mdx),
|
| 223 |
-
visible(is_mdx or is_vr),
|
| 224 |
-
visible(is_demucs),
|
| 225 |
-
visible(is_vr),
|
| 226 |
-
visible(is_vr),
|
| 227 |
-
visible(is_vr and enable_post_process),
|
| 228 |
-
visible(is_vr and enable_denoise),
|
| 229 |
-
valueFalse_interactive(is_vr),
|
| 230 |
-
valueFalse_interactive(is_vr),
|
| 231 |
-
valueFalse_interactive(is_vr)
|
| 232 |
-
]
|
| 233 |
-
|
| 234 |
-
def create_dataset_change(model_name, reverb_model, enable_post_process, separate_reverb, enable_denoise):
|
| 235 |
-
model_type = "vr" if model_name in list(vr_models.keys()) else "mdx" if model_name in list(mdx_models.keys()) else "demucs" if model_name in list(demucs_models.keys()) else ""
|
| 236 |
-
reverb_type = ("vr" if not reverb_model.startswith("MDX") else "mdx") if separate_reverb else None
|
| 237 |
-
all_types = {model_type, reverb_type}
|
| 238 |
-
|
| 239 |
-
is_vr = "vr" in all_types
|
| 240 |
-
is_mdx = "mdx" in all_types
|
| 241 |
-
is_demucs = "demucs" in all_types
|
| 242 |
-
|
| 243 |
-
return [
|
| 244 |
-
visible(separate_reverb),
|
| 245 |
-
visible(is_mdx or is_demucs),
|
| 246 |
-
visible(is_mdx or is_demucs),
|
| 247 |
-
visible(is_mdx),
|
| 248 |
-
visible(is_mdx or is_vr),
|
| 249 |
-
visible(is_demucs),
|
| 250 |
-
visible(is_vr),
|
| 251 |
-
visible(is_vr),
|
| 252 |
-
visible(is_vr and enable_post_process),
|
| 253 |
-
visible(is_vr and enable_denoise),
|
| 254 |
-
valueFalse_interactive(is_vr),
|
| 255 |
-
valueFalse_interactive(is_vr),
|
| 256 |
-
valueFalse_interactive(is_vr)
|
| 257 |
-
]
|
| 258 |
-
|
| 259 |
-
def audio_device():
|
| 260 |
-
try:
|
| 261 |
-
input_devices, output_devices = list_audio_device()
|
| 262 |
-
|
| 263 |
-
def priority(name):
|
| 264 |
-
n = name.lower()
|
| 265 |
-
if "virtual" in n:
|
| 266 |
-
return 0
|
| 267 |
-
if "vb" in n:
|
| 268 |
-
return 1
|
| 269 |
-
return 2
|
| 270 |
-
|
| 271 |
-
output_sorted = sorted(output_devices, key=lambda d: priority(d.name))
|
| 272 |
-
input_sorted = sorted(
|
| 273 |
-
input_devices, key=lambda d: priority(d.name), reverse=True
|
| 274 |
-
)
|
| 275 |
-
|
| 276 |
-
input_device_list = {
|
| 277 |
-
f"{input_sorted.index(d)+1}: {d.name} ({d.host_api})": [d.index, d.max_input_channels] for d in input_sorted
|
| 278 |
-
}
|
| 279 |
-
output_device_list = {
|
| 280 |
-
f"{output_sorted.index(d)+1}: {d.name} ({d.host_api})": [d.index, d.max_output_channels] for d in output_sorted
|
| 281 |
-
}
|
| 282 |
-
|
| 283 |
-
return input_device_list, output_device_list
|
| 284 |
-
except Exception:
|
| 285 |
-
return [], []
|
| 286 |
-
|
| 287 |
-
def update_audio_device(input_device, output_device, monitor_device, monitor):
|
| 288 |
-
input_channels_map, output_channels_map = audio_device()
|
| 289 |
-
|
| 290 |
-
input_is_asio = "ASIO" in input_device if input_device else False
|
| 291 |
-
output_is_asio = "ASIO" in output_device if output_device else False
|
| 292 |
-
monitor_is_asio = "ASIO" in monitor_device if monitor_device else False
|
| 293 |
-
|
| 294 |
-
try:
|
| 295 |
-
input_max_ch = input_channels_map.get(input_device, [])[1]
|
| 296 |
-
output_max_ch = output_channels_map.get(output_device, [])[1]
|
| 297 |
-
monitor_max_ch = output_channels_map.get(monitor_device, [])[1] if monitor else 128
|
| 298 |
-
except:
|
| 299 |
-
input_max_ch = output_max_ch = monitor_max_ch = -1
|
| 300 |
-
|
| 301 |
-
return [
|
| 302 |
-
visible(monitor),
|
| 303 |
-
visible(monitor),
|
| 304 |
-
visible(monitor_is_asio),
|
| 305 |
-
visible(input_is_asio or output_is_asio or monitor_is_asio),
|
| 306 |
-
gr.update(visible=input_is_asio, maximum=input_max_ch),
|
| 307 |
-
gr.update(visible=output_is_asio, maximum=output_max_ch),
|
| 308 |
-
gr.update(visible=monitor_is_asio, maximum=monitor_max_ch)
|
| 309 |
-
]
|
| 310 |
-
|
| 311 |
-
def change_audio_device_choices():
|
| 312 |
-
sd._terminate()
|
| 313 |
-
sd._initialize()
|
| 314 |
-
|
| 315 |
-
input_channels_map, output_channels_map = audio_device()
|
| 316 |
-
input_channels_map, output_channels_map = list(input_channels_map.keys()), list(output_channels_map.keys())
|
| 317 |
-
|
| 318 |
-
return [
|
| 319 |
-
{"value": input_channels_map[0] if len(input_channels_map) >= 1 else "", "choices": input_channels_map, "__type__": "update"},
|
| 320 |
-
{"value": output_channels_map[0] if len(output_channels_map) >= 1 else "", "choices": output_channels_map, "__type__": "update"},
|
| 321 |
-
{"value": output_channels_map[0] if len(output_channels_map) >= 1 else "", "choices": output_channels_map, "__type__": "update"}
|
| 322 |
-
]
|
| 323 |
-
|
| 324 |
-
def replace_punctuation(filename):
|
| 325 |
-
return filename.replace(" ", "_").replace("-", "").replace("(", "").replace(")", "").replace("[", "").replace("]", "").replace(",", "").replace('"', "").replace("'", "").replace("|", "_").replace("{", "").replace("}", "").replace("-_-", "_").replace("_-_", "_").replace("-", "_").replace("---", "_").replace("___", "_").strip()
|
| 326 |
-
|
| 327 |
-
def replace_url(url):
|
| 328 |
-
return url.replace("/blob/", "/resolve/").replace("?download=true", "").strip()
|
| 329 |
-
|
| 330 |
-
def replace_modelname(modelname):
|
| 331 |
-
return replace_punctuation(modelname.replace(".onnx", "").replace(".pth", "").replace(".index", "").replace(".zip", ""))
|
| 332 |
-
|
| 333 |
-
def replace_export_format(audio_path, export_format = "wav"):
|
| 334 |
-
export_format = f".{export_format}"
|
| 335 |
-
|
| 336 |
-
return audio_path if audio_path.endswith(export_format) else audio_path.replace(f".{os.path.basename(audio_path).split('.')[-1]}", export_format)
|
| 337 |
-
|
| 338 |
-
def update_dropdowns_from_json(data):
|
| 339 |
-
if not data:
|
| 340 |
-
return [
|
| 341 |
-
gr.update(choices=[], value=None),
|
| 342 |
-
gr.update(choices=[], value=None),
|
| 343 |
-
gr.update(choices=[], value=None)
|
| 344 |
-
]
|
| 345 |
-
|
| 346 |
-
inputs = list(data.get("inputs", {}).keys())
|
| 347 |
-
outputs = list(data.get("outputs", {}).keys())
|
| 348 |
-
|
| 349 |
-
return [
|
| 350 |
-
gr.update(choices=inputs, value=inputs[0] if len(inputs) > 0 else None),
|
| 351 |
-
gr.update(choices=outputs, value=outputs[0] if len(outputs) > 0 else None),
|
| 352 |
-
gr.update(choices=outputs, value=outputs[0] if len(outputs) > 0 else None),
|
| 353 |
-
]
|
| 354 |
-
|
| 355 |
-
def update_button_from_json(data):
|
| 356 |
-
if not data:
|
| 357 |
-
return [gr.update(interactive=True), gr.update(interactive=False)]
|
| 358 |
-
|
| 359 |
-
return [
|
| 360 |
-
gr.update(interactive=data.get("start_button", True)),
|
| 361 |
-
gr.update(interactive=data.get("stop_button", False))
|
| 362 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/core/utils.py
DELETED
|
@@ -1,61 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
import json
|
| 4 |
-
import codecs
|
| 5 |
-
import requests
|
| 6 |
-
|
| 7 |
-
sys.path.append(os.getcwd())
|
| 8 |
-
|
| 9 |
-
from main.app.core.ui import gr_info, gr_warning
|
| 10 |
-
from main.app.variables import translations, configs
|
| 11 |
-
|
| 12 |
-
def stop_pid(pid_file, model_name=None, train=False):
|
| 13 |
-
try:
|
| 14 |
-
pid_file_path = os.path.join("assets", f"{pid_file}.txt") if model_name is None else os.path.join(configs["logs_path"], model_name, f"{pid_file}.txt")
|
| 15 |
-
|
| 16 |
-
if not os.path.exists(pid_file_path): return gr_warning(translations["not_found_pid"])
|
| 17 |
-
else:
|
| 18 |
-
with open(pid_file_path, "r") as pid_file:
|
| 19 |
-
pids = [int(pid) for pid in pid_file.readlines()]
|
| 20 |
-
|
| 21 |
-
for pid in pids:
|
| 22 |
-
os.kill(pid, 9)
|
| 23 |
-
|
| 24 |
-
if os.path.exists(pid_file_path): os.remove(pid_file_path)
|
| 25 |
-
|
| 26 |
-
pid_file_path = os.path.join(configs["logs_path"], model_name, "config.json")
|
| 27 |
-
|
| 28 |
-
if train and os.path.exists(pid_file_path):
|
| 29 |
-
with open(pid_file_path, "r") as pid_file:
|
| 30 |
-
pid_data = json.load(pid_file)
|
| 31 |
-
pids = pid_data.get("process_pids", [])
|
| 32 |
-
|
| 33 |
-
with open(pid_file_path, "w") as pid_file:
|
| 34 |
-
pid_data.pop("process_pids", None)
|
| 35 |
-
|
| 36 |
-
json.dump(pid_data, pid_file, indent=4)
|
| 37 |
-
|
| 38 |
-
for pid in pids:
|
| 39 |
-
os.kill(pid, 9)
|
| 40 |
-
|
| 41 |
-
gr_info(translations["end_pid"])
|
| 42 |
-
except:
|
| 43 |
-
pass
|
| 44 |
-
|
| 45 |
-
def google_translate(text, source='auto', target='vi'):
|
| 46 |
-
if text == "": return gr_warning(translations["prompt_warning"])
|
| 47 |
-
|
| 48 |
-
try:
|
| 49 |
-
import textwrap
|
| 50 |
-
|
| 51 |
-
def translate_chunk(chunk):
|
| 52 |
-
response = requests.get(codecs.decode("uggcf://genafyngr.tbbtyrncvf.pbz/genafyngr_n/fvatyr", "rot13"), params={'client': 'gtx', 'sl': source, 'tl': target, 'dt': 't', 'q': chunk})
|
| 53 |
-
return ''.join([i[0] for i in response.json()[0]]) if response.status_code == 200 else chunk
|
| 54 |
-
|
| 55 |
-
translated_text = ''
|
| 56 |
-
for chunk in textwrap.wrap(text, 5000, break_long_words=False, break_on_hyphens=False):
|
| 57 |
-
translated_text += translate_chunk(chunk)
|
| 58 |
-
|
| 59 |
-
return translated_text
|
| 60 |
-
except:
|
| 61 |
-
return text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/parser.py
DELETED
|
@@ -1,369 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
sys.path.append(os.getcwd())
|
| 5 |
-
|
| 6 |
-
try:
|
| 7 |
-
argv = sys.argv[1]
|
| 8 |
-
except IndexError:
|
| 9 |
-
argv = None
|
| 10 |
-
|
| 11 |
-
argv_is_allows = ["--audio_effects", "--convert", "--create_dataset", "--create_index", "--extract", "--preprocess", "--separator_music", "--train", "--help_audio_effects", "--help_convert", "--help_create_dataset", "--help_create_index", "--help_extract", "--help_preprocess", "--help_separate_music", "--help_train", "--help", "--create_reference", "help_create_reference"]
|
| 12 |
-
|
| 13 |
-
if argv not in argv_is_allows:
|
| 14 |
-
print("Cú pháp không hợp lệ! Sử dụng --help để biết thêm")
|
| 15 |
-
quit()
|
| 16 |
-
|
| 17 |
-
if argv_is_allows[0] in argv: from main.inference.audio_effects import main
|
| 18 |
-
elif argv_is_allows[1] in argv: from main.inference.conversion.convert import main
|
| 19 |
-
elif argv_is_allows[2] in argv: from main.inference.create_dataset import main
|
| 20 |
-
elif argv_is_allows[3] in argv: from main.inference.create_index import main
|
| 21 |
-
elif argv_is_allows[4] in argv: from main.inference.extracting.extract import main
|
| 22 |
-
elif argv_is_allows[5] in argv: from main.inference.preprocess.preprocess import main
|
| 23 |
-
elif argv_is_allows[6] in argv: from main.inference.separate_music import main
|
| 24 |
-
elif argv_is_allows[7] in argv: from main.inference.training.train import main
|
| 25 |
-
elif argv_is_allows[17] in argv: from main.inference.create_reference import main
|
| 26 |
-
elif argv_is_allows[8] in argv:
|
| 27 |
-
print("""Các tham số của `--audio_effects`:
|
| 28 |
-
1. Đường dẫn tệp:
|
| 29 |
-
- `--input_path` (bắt buộc): Đường dẫn đến tệp âm thanh đầu vào.
|
| 30 |
-
- `--output_path` (mặc định: `./audios/apply_effects.wav`): Đường dẫn lưu tệp đầu ra.
|
| 31 |
-
- `--export_format` (mặc định: `wav`): Định dạng xuất tệp (`wav`, `mp3`, ...).
|
| 32 |
-
|
| 33 |
-
2. Lấy mẫu lại:
|
| 34 |
-
- `--resample` (mặc định: `False`): Có lấy mẫu lại hay không.
|
| 35 |
-
- `--resample_sr` (mặc định: `0`): Tần số lấy mẫu mới (Hz).
|
| 36 |
-
|
| 37 |
-
3. Hiệu ứng chorus:
|
| 38 |
-
- `--chorus`: Bật/tắt chorus.
|
| 39 |
-
- `--chorus_depth`, `--chorus_rate`, `--chorus_mix`, `--chorus_delay`, `--chorus_feedback`: Các thông số điều chỉnh chorus.
|
| 40 |
-
|
| 41 |
-
4. Hiệu ứng distortion:
|
| 42 |
-
- `--distortion`: Bật/tắt distortion.
|
| 43 |
-
- `--drive_db`: Mức độ méo âm thanh.
|
| 44 |
-
|
| 45 |
-
5. Hiệu ứng reverb:
|
| 46 |
-
- `--reverb`: Bật/tắt hồi âm.
|
| 47 |
-
- `--reverb_room_size`, `--reverb_damping`, `--reverb_wet_level`, `--reverb_dry_level`, `--reverb_width`, `--reverb_freeze_mode`: Điều chỉnh hồi âm.
|
| 48 |
-
|
| 49 |
-
6. Hiệu ứng pitch shift:
|
| 50 |
-
- `--pitchshift`: Bật/tắt thay đổi cao độ.
|
| 51 |
-
- `--pitch_shift`: Giá trị dịch cao độ.
|
| 52 |
-
|
| 53 |
-
7. Hiệu ứng delay:
|
| 54 |
-
- `--delay`: Bật/tắt delay.
|
| 55 |
-
- `--delay_seconds`, `--delay_feedback`, `--delay_mix`: Điều chỉnh thời gian trễ, phản hồi và hòa trộn.
|
| 56 |
-
|
| 57 |
-
8. Compressor:
|
| 58 |
-
- `--compressor`: Bật/tắt compressor.
|
| 59 |
-
- `--compressor_threshold`, `--compressor_ratio`, `--compressor_attack_ms`, `--compressor_release_ms`: Các thông số nén.
|
| 60 |
-
|
| 61 |
-
9. Limiter:
|
| 62 |
-
- `--limiter`: Bật/tắt giới hạn mức âm thanh.
|
| 63 |
-
- `--limiter_threshold`, `--limiter_release`: Ngưỡng giới hạn và thời gian nhả.
|
| 64 |
-
|
| 65 |
-
10. Gain (Khuếch đại):
|
| 66 |
-
- `--gain`: Bật/tắt gain.
|
| 67 |
-
- `--gain_db`: Mức gain (dB).
|
| 68 |
-
|
| 69 |
-
11. Bitcrush:
|
| 70 |
-
- `--bitcrush`: Bật/tắt hiệu ứng giảm độ phân giải.
|
| 71 |
-
- `--bitcrush_bit_depth`: Số bit của bitcrush.
|
| 72 |
-
|
| 73 |
-
12. Clipping:
|
| 74 |
-
- `--clipping`: Bật/tắt cắt âm thanh.
|
| 75 |
-
- `--clipping_threshold`: Ngưỡng clipping.
|
| 76 |
-
|
| 77 |
-
13. Phaser:
|
| 78 |
-
- `--phaser`: Bật/tắt hiệu ứng phaser.
|
| 79 |
-
- `--phaser_rate_hz`, `--phaser_depth`, `--phaser_centre_frequency_hz`, `--phaser_feedback`, `--phaser_mix`: Điều chỉnh hiệu ứng phaser.
|
| 80 |
-
|
| 81 |
-
14. Boost bass & treble:
|
| 82 |
-
- `--treble_bass_boost`: Bật/tắt tăng cường âm bass và treble.
|
| 83 |
-
- `--bass_boost_db`, `--bass_boost_frequency`, `--treble_boost_db`, `--treble_boost_frequency`: Các thông số tăng bass và treble.
|
| 84 |
-
|
| 85 |
-
15. Fade in & fade out:
|
| 86 |
-
- `--fade_in_out`: Bật/tắt hiệu ứng fade.
|
| 87 |
-
- `--fade_in_duration`, `--fade_out_duration`: Thời gian fade vào/ra.
|
| 88 |
-
|
| 89 |
-
16. Kết hợp âm thanh:
|
| 90 |
-
- `--audio_combination`: Bật/tắt ghép nhiều tệp âm thanh.
|
| 91 |
-
- `--audio_combination_input`: Đường dẫn tệp âm thanh bổ sung.
|
| 92 |
-
- `--main_volume`: Âm lượng của âm thanh chính.
|
| 93 |
-
- `--combination_volume`:: Âm lượng của âm thanh cần kết hợp.
|
| 94 |
-
""")
|
| 95 |
-
quit()
|
| 96 |
-
elif argv_is_allows[9] in argv:
|
| 97 |
-
print("""Các tham số của --convert:
|
| 98 |
-
1. Cấu hình xử lí giọng nói:
|
| 99 |
-
- `--pitch` (mặc định: `0`): Điều chỉnh cao độ.
|
| 100 |
-
- `--filter_radius` (mặc định: `3`): Độ mượt của đường F0.
|
| 101 |
-
- `--index_rate` (mặc định: `0.5`): Tỷ lệ sử dụng chỉ mục giọng nói.
|
| 102 |
-
- `--rms_mix_rate` (mặc định: `1`): Hệ số điều chỉnh biên độ âm lượng.
|
| 103 |
-
- `--protect` (mặc định: `0.33`): Bảo vệ phụ âm.
|
| 104 |
-
- `--hop_length` (mặc định: `64`): Bước nhảy khi xử lí âm thanh.
|
| 105 |
-
|
| 106 |
-
2. Cấu hình F0:
|
| 107 |
-
- `--f0_method` (mặc định: `rmvpe`): Phương pháp dự đoán F0 (`pm`, `dio`, `mangio-crepe-tiny`, `mangio-crepe-small`, `mangio-crepe-medium`, `mangio-crepe-large`, `mangio-crepe-full`, `crepe-tiny`, `crepe-small`, `crepe-medium`, `crepe-large`, `crepe-full`, `fcpe`, `fcpe-legacy`, `rmvpe`, `rmvpe-legacy`, `harvest`, `yin`, `pyin`, `swipe`).
|
| 108 |
-
- `--f0_autotune` (mặc định: `False`): Có tự động điều chỉnh F0 hay không.
|
| 109 |
-
- `--f0_autotune_strength` (mặc định: `1`): Cường độ hiệu chỉnh tự động F0.
|
| 110 |
-
- `--f0_file` (mặc định: ``): Đường dẫn tệp F0 có sẵn.
|
| 111 |
-
- `--f0_onnx` (mặc định: `False`): Có sử dụng phiên bản ONNX của F0 hay không.
|
| 112 |
-
- `--proposal_pitch` (mặc định: `False`): Đề xuất cao độ thay vì điều chỉnh thủ công.
|
| 113 |
-
- `--proposal_pitch_threshold` (mặc định: `0.0`): Ngưỡng tần số ước tính cao độ.
|
| 114 |
-
- `--alpha` (mặc định: `0.5`): Ngưỡng trộn cao độ khi ước tính cao độ hybrid.
|
| 115 |
-
|
| 116 |
-
3. Mô hình nhúng:
|
| 117 |
-
- `--embedder_model` (mặc định: `hubert_base`): Mô hình nhúng sử dụng.
|
| 118 |
-
- `--embedders_mode` (mặc định: `fairseq`): Chế độ nhúng (`fairseq`, `transformers`, `onnx`, `whisper`).
|
| 119 |
-
|
| 120 |
-
4. Đường dẫn tệp:
|
| 121 |
-
- `--input_path` (bắt buộc): Đường dẫn tệp âm thanh đầu vào.
|
| 122 |
-
- `--output_path` (mặc định: `./audios/output.wav`): Đường dẫn lưu tệp đầu ra.
|
| 123 |
-
- `--export_format` (mặc định: `wav`): Định dạng xuất tệp.
|
| 124 |
-
- `--pth_path` (bắt buộc): Đường dẫn đến tệp mô hình `.pth`.
|
| 125 |
-
- `--index_path` (mặc định: `None`): Đường dẫn tệp chỉ mục (nếu có).
|
| 126 |
-
|
| 127 |
-
5. Làm sạch âm thanh:
|
| 128 |
-
- `--clean_audio` (mặc định: `False`): Có áp dụng làm sạch âm thanh không.
|
| 129 |
-
- `--clean_strength` (mặc định: `0.7`): Mức độ làm sạch.
|
| 130 |
-
|
| 131 |
-
6. Resampling & chia nhỏ âm thanh:
|
| 132 |
-
- `--resample_sr` (mặc định: `0`): Tần số lấy mẫu mới (0 nghĩa là giữ nguyên).
|
| 133 |
-
- `--split_audio` (mặc định: `False`): Có chia nhỏ audio trước khi xử lí không.
|
| 134 |
-
|
| 135 |
-
7. Kiểm tra & tối ưu hóa:
|
| 136 |
-
- `--checkpointing` (mặc định: `False`): Bật/tắt checkpointing để tiết kiệm RAM.
|
| 137 |
-
|
| 138 |
-
8. Dịch formant:
|
| 139 |
-
- `--formant_shifting` (mặc định: `False`): Có bật hiệu ứng dịch formant không.
|
| 140 |
-
- `--formant_qfrency` (mặc định: `0.8`): Hệ số dịch formant theo tần số.
|
| 141 |
-
- `--formant_timbre` (mặc định: `0.8`): Hệ số thay đổi màu sắc giọng.
|
| 142 |
-
""")
|
| 143 |
-
quit()
|
| 144 |
-
elif argv_is_allows[10] in argv:
|
| 145 |
-
print("""Các tham số của --create_dataset:
|
| 146 |
-
1. Đường dẫn & cấu hình dataset:
|
| 147 |
-
- `--input_data` (bắt buộc): Đường dẫn liên kết đến âm thanh (Liên kết Youtube, có thể dùng dấu `,` để dùng nhiều liên kết).
|
| 148 |
-
- `--output_dirs` (mặc định: `./dataset`): Thư mục xuất dữ liệu đầu ra.
|
| 149 |
-
- `--sample_rate` (mặc định: `48000`): Tần số lấy mẫu cho âm thanh.
|
| 150 |
-
|
| 151 |
-
2. Làm sạch dữ liệu:
|
| 152 |
-
- `--clean_dataset` (mặc định: `False`): Có áp dụng làm sạch dữ liệu hay không.
|
| 153 |
-
- `--clean_strength` (mặc định: `0.7`): Mức độ làm sạch dữ liệu.
|
| 154 |
-
|
| 155 |
-
3. Tách giọng & hiệu ứng:
|
| 156 |
-
- `--separate` (mặc định: `True`): có tách nhạc hay không.
|
| 157 |
-
- `--separator_reverb` (mặc định: `False`): Có tách vang giọng không.
|
| 158 |
-
- `--model_name` (mặc định: `MDXNET_Main`): Mô hình tách nhạc ('Main_340', 'Main_390', 'Main_406', 'Main_427', 'Main_438', 'Inst_full_292', 'Inst_HQ_1', 'Inst_HQ_2', 'Inst_HQ_3', 'Inst_HQ_4', 'Inst_HQ_5', 'Kim_Vocal_1', 'Kim_Vocal_2', 'Kim_Inst', 'Inst_187_beta', 'Inst_82_beta', 'Inst_90_beta', 'Voc_FT', 'Crowd_HQ', 'MDXNET_9482', 'Inst_1', 'Inst_2', 'Inst_3', 'MDXNET_1_9703', 'MDXNET_2_9682', 'MDXNET_3_9662', 'Inst_Main', 'MDXNET_Main', 'HT-Tuned', 'HT-Normal', 'HD_MMI', 'HT_6S', 'HP-1', 'HP-2', 'HP-Vocal-1', 'HP-Vocal-2', 'HP2-1', 'HP2-2', 'HP2-3', 'SP-2B-1', 'SP-2B-2', 'SP-3B-1', 'SP-4B-1', 'SP-4B-2', 'SP-MID-1', 'SP-MID-2').
|
| 159 |
-
- `--reverb_model` (mặc định: `MDX-Reverb`): Mô hình tách nhạc ("MDX-Reverb", 'VR-Reverb', 'Echo-Aggressive', 'Echo-Normal').
|
| 160 |
-
- `--denoise_model` (mặc định: `Normal`): Mô hình tách nhạc ('Lite', 'Normal').
|
| 161 |
-
|
| 162 |
-
4. Cấu hình xử lí âm thanh:
|
| 163 |
-
- `--shifts` (mặc định: `2`): Số lượng dự đoán.
|
| 164 |
-
- `--batch_size` (mặc định: `1`): Kích thước lô.
|
| 165 |
-
- `--overlap` (mặc định: `0.25`): Mức độ chồng lấn giữa các đoạn.
|
| 166 |
-
- `--aggression` (mặc định: `5`): Cường độ chiết xuất thân chính.
|
| 167 |
-
- `--hop_length` (mặc định: `1024`): Bước nhảy MDX khi xử lí.
|
| 168 |
-
- `--window_size` (mặc định: `512`): Kích thước cửa sổ.
|
| 169 |
-
- `--segments_size` (mặc định: `256`): Kích thước phân đoạn âm thanh.
|
| 170 |
-
- `--post_process_threshold` (mặc định: `0.2`): Mức độ xử lí hậu kỳ sau khi tách nhạc.
|
| 171 |
-
|
| 172 |
-
5. Cấu hình xử lí âm thanh khác:
|
| 173 |
-
- `--enable_tta` (mặc định: `False`): Tăng cường suy luận.
|
| 174 |
-
- `--enable_denoise` (mặc định: `False`): Khữ tách nhạc.
|
| 175 |
-
- `--high_end_process` (mặc định: `False`): Xử lí dải cao.
|
| 176 |
-
- `--enable_post_process` (mặc định: `False`): Hậu xử lí.
|
| 177 |
-
|
| 178 |
-
6. Bỏ qua phần âm thanh:
|
| 179 |
-
- `--skip_seconds` (mặc định: `False`): Có bỏ qua giây âm thanh nào không.
|
| 180 |
-
- `--skip_start_audios` (mặc định: `0`): Thời gian (giây) cần bỏ qua ở đầu audio.
|
| 181 |
-
- `--skip_end_audios` (mặc định: `0`): Thời gian (giây) cần bỏ qua ở cuối audio.
|
| 182 |
-
""")
|
| 183 |
-
quit()
|
| 184 |
-
elif argv_is_allows[11] in argv:
|
| 185 |
-
print("""Các tham số của --create_index:
|
| 186 |
-
1. Thông tin mô hình:
|
| 187 |
-
- `--model_name` (bắt buộc): Tên mô hình.
|
| 188 |
-
- `--rvc_version` (mặc định: `v2`): Phiên bản (`v1`, `v2`).
|
| 189 |
-
- `--index_algorithm` (mặc định: `Auto`): Thuật toán index sử dụng (`Auto`, `Faiss`, `KMeans`).
|
| 190 |
-
""")
|
| 191 |
-
quit()
|
| 192 |
-
elif argv_is_allows[12] in argv:
|
| 193 |
-
print("""Các tham số của --extract:
|
| 194 |
-
1. Thông tin mô hình:
|
| 195 |
-
- `--model_name` (bắt buộc): Tên mô hình.
|
| 196 |
-
- `--rvc_version` (mặc định: `v2`): Phiên bản RVC (`v1`, `v2`).
|
| 197 |
-
|
| 198 |
-
2. Cấu hình F0:
|
| 199 |
-
- `--f0_method` (mặc định: `rmvpe`): Phương pháp dự đoán F0 (`pm`, `dio`, `mangio-crepe-tiny`, `mangio-crepe-small`, `mangio-crepe-medium`, `mangio-crepe-large`, `mangio-crepe-full`, `crepe-tiny`, `crepe-small`, `crepe-medium`, `crepe-large`, `crepe-full`, `fcpe`, `fcpe-legacy`, `rmvpe`, `rmvpe-legacy`, `harvest`, `yin`, `pyin`, `swipe`).
|
| 200 |
-
- `--f0_onnx` (mặc định: `False`): Có sử dụng phiên bản ONNX của F0 hay không.
|
| 201 |
-
- `--pitch_guidance` (mặc định: `True`): Có sử dụng hướng dẫn cao độ hay không.
|
| 202 |
-
- `--f0_autotune` (mặc định: `False`): Có tự động điều chỉnh F0 hay không.
|
| 203 |
-
- `--f0_autotune_strength` (mặc định: `1`): Cường độ hiệu chỉnh tự động F0.
|
| 204 |
-
- `--alpha` (mặc định: `0.5`): Ngưỡng trộn cao độ khi ước tính cao độ hybrid.
|
| 205 |
-
|
| 206 |
-
3. Cấu hình xử lí:
|
| 207 |
-
- `--hop_length` (mặc định: `128`): Độ dài bước nhảy trong quá trình xử lí.
|
| 208 |
-
- `--cpu_cores` (mặc định: `2`): Số lượng luồng CPU sử dụng.
|
| 209 |
-
- `--gpu` (mặc định: `-`): Chỉ định GPU sử dụng (ví dụ: `0` cho GPU đầu tiên, `-` để tắt GPU).
|
| 210 |
-
- `--sample_rate` (bắt buộc): Tần số lấy mẫu của âm thanh đầu vào.
|
| 211 |
-
|
| 212 |
-
4. Cấu hình nhúng:
|
| 213 |
-
- `--embedder_model` (mặc định: `hubert_base`): Tên mô hình nhúng.
|
| 214 |
-
- `--embedders_mode` (mặc định: `fairseq`): Chế độ nhúng (`fairseq`, `transformers`, `onnx`, `whisper`).
|
| 215 |
-
|
| 216 |
-
4. RMS:
|
| 217 |
-
- `--rms_extract` (mặc định: False): Trích xuất thêm năng lượng rms.
|
| 218 |
-
""")
|
| 219 |
-
quit()
|
| 220 |
-
elif argv_is_allows[13] in argv:
|
| 221 |
-
print("""Các tham số của --preprocess:
|
| 222 |
-
1. Thông tin mô hình:
|
| 223 |
-
- `--model_name` (bắt buộc): Tên mô hình.
|
| 224 |
-
|
| 225 |
-
2. Cấu hình dữ liệu:
|
| 226 |
-
- `--dataset_path` (mặc định: `./dataset`): Đường dẫn thư mục chứa tệp dữ liệu.
|
| 227 |
-
- `--sample_rate` (bắt buộc): Tần số lấy mẫu của dữ liệu âm thanh.
|
| 228 |
-
|
| 229 |
-
3. Cấu hình xử lí:
|
| 230 |
-
- `--cpu_cores` (mặc định: `2`): Số lượng luồng CPU sử dụng.
|
| 231 |
-
- `--cut_preprocess` (mặc định: `Automatic`): Cách cắt dữ liệu tiền xử l�� (`Automatic`, `Simple`, `Skip`).
|
| 232 |
-
- `--process_effects` (mặc định: `False`): Có áp dụng tiền xử lí hay không.
|
| 233 |
-
- `--clean_dataset` (mặc định: `False`): Có làm sạch tệp dữ liệu hay không.
|
| 234 |
-
- `--clean_strength` (mặc định: `0.7`): Độ mạnh của quá trình làm sạch dữ liệu.
|
| 235 |
-
|
| 236 |
-
4. Cấu hình khác:
|
| 237 |
-
- `--chunk_len` (mặc định: `3.0`): Độ dài của đoạn âm thanh cho phương pháp 'Simple'.
|
| 238 |
-
- `--overlap_len` (mặc định: `0.3`): Độ dài của phần chồng chéo giữa các lát cắt đối với phương pháp 'Simple'.
|
| 239 |
-
- `--normalization_mode` (mặc định: `none`): Có xử lí chuẩn hóa âm thanh không (`none`, `pre`, `post`)
|
| 240 |
-
""")
|
| 241 |
-
quit()
|
| 242 |
-
elif argv_is_allows[14] in argv:
|
| 243 |
-
print("""Các tham số của --separate_music:
|
| 244 |
-
1. Cấu hình đầu vào, đầu ra:
|
| 245 |
-
- `--input_path` (bắt buộc): Đường dẫn tệp âm thanh đầu vào.
|
| 246 |
-
- `--output_dirs` (mặc định: `./audios`): Thư mục lưu tệp đầu ra.
|
| 247 |
-
- `--export_format` (mặc định: `wav`): Định dạng xuất tệp (`wav`, `mp3`,...).
|
| 248 |
-
- `--sample_rate` (mặc định: `44100`): Tần số lấy mẫu của âm thanh đầu ra.
|
| 249 |
-
|
| 250 |
-
2. Cấu hình mô hình:
|
| 251 |
-
- `--model_name` (mặc định: `MDXNET_Main`): Mô hình tách nhạc ('Main_340', 'Main_390', 'Main_406', 'Main_427', 'Main_438', 'Inst_full_292', 'Inst_HQ_1', 'Inst_HQ_2', 'Inst_HQ_3', 'Inst_HQ_4', 'Inst_HQ_5', 'Kim_Vocal_1', 'Kim_Vocal_2', 'Kim_Inst', 'Inst_187_beta', 'Inst_82_beta', 'Inst_90_beta', 'Voc_FT', 'Crowd_HQ', 'MDXNET_9482', 'Inst_1', 'Inst_2', 'Inst_3', 'MDXNET_1_9703', 'MDXNET_2_9682', 'MDXNET_3_9662', 'Inst_Main', 'MDXNET_Main', 'HT-Tuned', 'HT-Normal', 'HD_MMI', 'HT_6S', 'HP-1', 'HP-2', 'HP-Vocal-1', 'HP-Vocal-2', 'HP2-1', 'HP2-2', 'HP2-3', 'SP-2B-1', 'SP-2B-2', 'SP-3B-1', 'SP-4B-1', 'SP-4B-2', 'SP-MID-1', 'SP-MID-2').
|
| 252 |
-
- `--karaoke_model` (mặc định: `MDX-Version-1`): Mô hình tách nhạc ('MDX-Version-1', 'MDX-Version-2', 'VR-Version-1', 'VR-Version-2').
|
| 253 |
-
- `--reverb_model` (mặc định: `MDX-Reverb`): Mô hình tách nhạc ("MDX-Reverb", 'VR-Reverb', 'Echo-Aggressive', 'Echo-Normal').
|
| 254 |
-
- `--denoise_model` (mặc định: `Normal`): Mô hình tách nhạc ('Lite', 'Normal').
|
| 255 |
-
|
| 256 |
-
3. Cấu hình xử lí âm thanh:
|
| 257 |
-
- `--shifts` (mặc định: `2`): Số lượng dự đoán.
|
| 258 |
-
- `--batch_size` (mặc định: `1`): Kích thước lô.
|
| 259 |
-
- `--overlap` (mặc định: `0.25`): Mức độ chồng lấn giữa các đoạn.
|
| 260 |
-
- `--aggression` (mặc định: `5`): Cường độ chiết xuất thân chính.
|
| 261 |
-
- `--hop_length` (mặc định: `1024`): Bước nhảy MDX khi xử lí.
|
| 262 |
-
- `--window_size` (mặc định: `512`): Kích thước cửa sổ.
|
| 263 |
-
- `--segments_size` (mặc định: `256`): Kích thước phân đoạn âm thanh.
|
| 264 |
-
- `--post_process_threshold` (mặc định: `0.2`): Mức độ xử lí hậu kỳ sau khi tách nhạc.
|
| 265 |
-
|
| 266 |
-
4. Cấu hình xử lí âm thanh khác:
|
| 267 |
-
- `--enable_tta` (mặc định: `False`): Tăng cường suy luận.
|
| 268 |
-
- `--enable_denoise` (mặc định: `False`): Khữ tách nhạc.
|
| 269 |
-
- `--high_end_process` (mặc định: `False`): Xử lí dải cao.
|
| 270 |
-
- `--enable_post_process` (mặc định: `False`): Hậu xử lí.
|
| 271 |
-
- `--separate_backing` (mặc định: `False`): Tách bè giọng.
|
| 272 |
-
- `--separate_reverb` (mặc định: `False`): Tách vang giọng.
|
| 273 |
-
""")
|
| 274 |
-
quit()
|
| 275 |
-
elif argv_is_allows[15] in argv:
|
| 276 |
-
print("""Các tham số của --train:
|
| 277 |
-
1. Cấu hình mô hình:
|
| 278 |
-
- `--model_name` (bắt buộc): Tên mô hình.
|
| 279 |
-
- `--rvc_version` (mặc định: `v2`): Phiên bản RVC (`v1`, `v2`).
|
| 280 |
-
- `--model_author` (tùy chọn): Tác giả của mô hình.
|
| 281 |
-
|
| 282 |
-
2. Cấu hình lưu:
|
| 283 |
-
- `--save_every_epoch` (bắt buộc): Số kỷ nguyên giữa mỗi lần lưu.
|
| 284 |
-
- `--save_only_latest` (mặc định: `True`): Chỉ lưu điểm mới nhất.
|
| 285 |
-
- `--save_every_weights` (mặc định: `True`): Lưu tất cả trọng số của mô hình.
|
| 286 |
-
|
| 287 |
-
3. Cấu hình huấn luyện:
|
| 288 |
-
- `--total_epoch` (mặc định: `300`): Tổng số kỷ nguyên huấn luyện.
|
| 289 |
-
- `--batch_size` (mặc định: `8`): Kích thước lô trong quá trình huấn luyện.
|
| 290 |
-
|
| 291 |
-
4. Cấu hình thiết bị:
|
| 292 |
-
- `--gpu` (mặc định: `0`): Chỉ định GPU để sử dụng (số hoặc `-` nếu không dùng GPU).
|
| 293 |
-
- `--cache_data_in_gpu` (mặc định: `False`): Lưu dữ liệu vào GPU để tăng tốc.
|
| 294 |
-
|
| 295 |
-
5. Cấu hình huấn luyện nâng cao:
|
| 296 |
-
- `--pitch_guidance` (mặc định: `True`): Sử dụng hướng dẫn cao độ.
|
| 297 |
-
- `--g_pretrained_path` (mặc định: ``): Đường dẫn đến trọng số G đã huấn luyện trước.
|
| 298 |
-
- `--d_pretrained_path` (mặc định: ``): Đường dẫn đến trọng số D đã huấn luyện trước.
|
| 299 |
-
- `--vocoder` (mặc định: `Default`): Bộ mã hóa được sử dụng (`Default`, `MRF-HiFi-GAN`, `RefineGAN`).
|
| 300 |
-
- `--energy_use` (mặc định: `False`): Sử dụng năng lượng rms.
|
| 301 |
-
|
| 302 |
-
6. Phát hiện huấn luyện quá mức:
|
| 303 |
-
- `--overtraining_detector` (mặc định: `False`): Bật/tắt chế độ phát hiện huấn luyện quá mức.
|
| 304 |
-
- `--overtraining_threshold` (mặc định: `50`): Ngưỡng để xác định huấn luyện quá mức.
|
| 305 |
-
|
| 306 |
-
7. Xử lí dữ liệu:
|
| 307 |
-
- `--cleanup` (mặc định: `False`): Dọn dẹp tệp huấn luyện cũ để tiến hành huấn luyện lại từ đầu.
|
| 308 |
-
|
| 309 |
-
8. Tối ưu:
|
| 310 |
-
- `--checkpointing` (mặc định: `False`): Bật/tắt checkpointing để tiết kiệm RAM.
|
| 311 |
-
- `--deterministic` (mặc định: `False`): Khi bật sẽ sử dụng các thuật toán có tính xác định cao, đảm bảo rằng mỗi lần chạy cùng một dữ liệu đầu vào sẽ cho kết quả giống nhau.
|
| 312 |
-
- `--benchmark` (mặc định: `False`): Khi bật sẽ thử nghiệm và chọn thuật toán tối ưu nhất cho phần cứng và kích thước cụ thể.
|
| 313 |
-
- `--optimizer` (mặc định: `AdamW`): Trình tối ưu hóa được sử dụng (`AdamW`, `RAdam`, `AnyPrecisionAdamW`).
|
| 314 |
-
- `--multiscale_mel_loss` (mặc định: `False`): So sánh phổ Mel của âm thanh thật và âm thanh giả ở nhiều thang độ khác nhau. Giúp mô hình học được chi tiết âm sắc, độ sáng và cấu trúc tần số tốt hơn, từ đó cải thiện chất lượng và độ tự nhiên của giọng nói đầu ra.
|
| 315 |
-
|
| 316 |
-
9. Bộ tham chiếu:
|
| 317 |
-
- `--use_custom_reference` (mặc định: `False`): Có tùy chỉnh bộ tham chiếu hay không.
|
| 318 |
-
- `--reference_path` (mặc định: `False`): Đường dẫn đến bộ tham chiếu.
|
| 319 |
-
""")
|
| 320 |
-
quit()
|
| 321 |
-
elif argv_is_allows[18] in argv:
|
| 322 |
-
print("""Các tham số của --create_reference:
|
| 323 |
-
1. Đường dẫn tệp:
|
| 324 |
-
- `--audio_path` (bắt buộc): Đường dẫn tệp âm thanh đầu vào.
|
| 325 |
-
- `--reference_name` (mặc định: `reference`): Đường dẫn lưu bộ tham chiếu đầu ra.
|
| 326 |
-
|
| 327 |
-
2. Cấu hình bộ tham chiếu:
|
| 328 |
-
- `--pitch_guidance` (mặc định: `True`): Sử dụng hướng dẫn cao độ.
|
| 329 |
-
- `--energy_use` (mặc định: `False`): Sử dụng năng lượng rms.
|
| 330 |
-
- `--version` (mặc định: `v2`): Phiên bản RVC (`v1`, `v2`).
|
| 331 |
-
|
| 332 |
-
3. Cấu hình nhúng:
|
| 333 |
-
- `--embedder_model` (mặc định: `hubert_base`): Tên mô hình nhúng.
|
| 334 |
-
- `--embedders_mode` (mặc định: `fairseq`): Chế độ nhúng (`fairseq`, `transformers`, `onnx`, `whisper`).
|
| 335 |
-
|
| 336 |
-
4. Cấu hình F0:
|
| 337 |
-
- `--f0_method` (mặc định: `rmvpe`): Phương pháp dự đoán F0 (`pm`, `dio`, `mangio-crepe-tiny`, `mangio-crepe-small`, `mangio-crepe-medium`, `mangio-crepe-large`, `mangio-crepe-full`, `crepe-tiny`, `crepe-small`, `crepe-medium`, `crepe-large`, `crepe-full`, `fcpe`, `fcpe-legacy`, `rmvpe`, `rmvpe-legacy`, `harvest`, `yin`, `pyin`, `swipe`).
|
| 338 |
-
- `--f0_onnx` (mặc định: `False`): Có sử dụng phiên bản ONNX của F0 hay không.
|
| 339 |
-
- `--f0_up_key` (mặc định: `0`): Điều chỉnh cao độ.
|
| 340 |
-
- `--filter_radius` (mặc định: `3`): Độ mượt của đường F0.
|
| 341 |
-
- `--f0_autotune` (mặc định: `False`): Có tự động điều chỉnh F0 hay không.
|
| 342 |
-
- `--f0_autotune_strength` (mặc định: `1`): Cường độ hiệu chỉnh tự động F0.
|
| 343 |
-
- `--f0_file` (mặc định: ``): Đường dẫn tệp F0 có sẵn.
|
| 344 |
-
- `--proposal_pitch` (mặc định: `False`): Đề xuất cao độ thay vì điều chỉnh thủ công.
|
| 345 |
-
- `--proposal_pitch_threshold` (mặc định: `0.0`): Ngưỡng tần số ước tính cao độ.
|
| 346 |
-
- `--alpha` (mặc định: `0.5`): Ngưỡng trộn cao độ khi ước tính cao độ hybrid.
|
| 347 |
-
""")
|
| 348 |
-
quit()
|
| 349 |
-
elif argv_is_allows[16] in argv:
|
| 350 |
-
print("""Sử dụng:
|
| 351 |
-
1. `--help_audio_effects`: Trợ giúp về phần thêm hiệu ứng âm thanh.
|
| 352 |
-
2. `--help_convert`: Trợ giúp về chuyển đổi âm thanh.
|
| 353 |
-
3. `--help_create_dataset`: Trợ giúp về tạo dữ liệu huấn luyện.
|
| 354 |
-
4. `--help_create_index`: Trợ giúp về tạo chỉ mục.
|
| 355 |
-
5. `--help_extract`: Trợ giúp về trích xuất dữ liệu huấn luyện.
|
| 356 |
-
6. `--help_preprocess`: Trợ giúp về xử lí trước dữ liệu.
|
| 357 |
-
7. `--help_separate_music`: Trợ giúp về tách nhạc.
|
| 358 |
-
8. `--help_train`: Trợ giúp về huấn luyện mô hình.
|
| 359 |
-
9. `--help_create_reference`: Trợ giúp về tạo bộ tham chiếu.
|
| 360 |
-
""")
|
| 361 |
-
quit()
|
| 362 |
-
|
| 363 |
-
if __name__ == "__main__":
|
| 364 |
-
import torch.multiprocessing as mp
|
| 365 |
-
|
| 366 |
-
if "--train" in argv: mp.set_start_method("spawn")
|
| 367 |
-
if "--preprocess" in argv or "--extract" in argv: mp.set_start_method("spawn", force=True)
|
| 368 |
-
|
| 369 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/run_tensorboard.py
DELETED
|
@@ -1,32 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
import time
|
| 4 |
-
import logging
|
| 5 |
-
import warnings
|
| 6 |
-
import webbrowser
|
| 7 |
-
|
| 8 |
-
from tensorboard import program
|
| 9 |
-
|
| 10 |
-
sys.path.append(os.getcwd())
|
| 11 |
-
|
| 12 |
-
from main.app.variables import config, translations, logger
|
| 13 |
-
|
| 14 |
-
def launch_tensorboard():
|
| 15 |
-
warnings.filterwarnings("ignore")
|
| 16 |
-
for l in ["root", "tensorboard"]:
|
| 17 |
-
logging.getLogger(l).setLevel(logging.ERROR)
|
| 18 |
-
|
| 19 |
-
tb = program.TensorBoard()
|
| 20 |
-
tb.configure(argv=[None, "--logdir", config.configs["logs_path"], f"--port={config.configs['tensorboard_port']}"])
|
| 21 |
-
url = tb.launch()
|
| 22 |
-
|
| 23 |
-
logger.info(f"{translations['tensorboard_url']}: {url}")
|
| 24 |
-
if "--open" in sys.argv: webbrowser.open(url)
|
| 25 |
-
|
| 26 |
-
return f"{translations['tensorboard_url']}: {url}"
|
| 27 |
-
|
| 28 |
-
if __name__ == "__main__":
|
| 29 |
-
launch_tensorboard()
|
| 30 |
-
|
| 31 |
-
while 1:
|
| 32 |
-
time.sleep(5)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/downloads/downloads.py
DELETED
|
@@ -1,112 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.variables import translations, configs, models, model_options
|
| 9 |
-
from main.app.core.downloads import download_model, search_models, download_pretrained_model
|
| 10 |
-
from main.app.core.ui import change_download_choices, change_download_pretrained_choices, shutil_move
|
| 11 |
-
from main.app.core.process import fetch_pretrained_data, save_drop_model, update_sample_rate_dropdown
|
| 12 |
-
|
| 13 |
-
def download_tab():
|
| 14 |
-
with gr.TabItem(translations["downloads"], visible=configs.get("downloads_tab", True)):
|
| 15 |
-
gr.Markdown(translations["download_markdown"])
|
| 16 |
-
with gr.Row():
|
| 17 |
-
gr.Markdown(translations["download_markdown_2"])
|
| 18 |
-
with gr.Row():
|
| 19 |
-
with gr.Accordion(translations["model_download"], open=True):
|
| 20 |
-
with gr.Row():
|
| 21 |
-
downloadmodel = gr.Radio(label=translations["model_download_select"], choices=[translations["download_url"], translations["download_from_csv"], translations["search_models"], translations["upload"]], interactive=True, value=translations["download_url"])
|
| 22 |
-
with gr.Row():
|
| 23 |
-
gr.Markdown("___")
|
| 24 |
-
with gr.Column():
|
| 25 |
-
with gr.Row():
|
| 26 |
-
url_input = gr.Textbox(label=translations["model_url"], value="", placeholder="https://...", scale=6)
|
| 27 |
-
download_model_name = gr.Textbox(label=translations["modelname"], value="", placeholder=translations["modelname"], scale=2)
|
| 28 |
-
url_download = gr.Button(value=translations["downloads"], scale=2)
|
| 29 |
-
with gr.Column():
|
| 30 |
-
model_browser = gr.Dropdown(choices=models.keys(), label=translations["model_warehouse"], scale=8, allow_custom_value=True, visible=False)
|
| 31 |
-
download_from_browser = gr.Button(value=translations["get_model"], scale=2, variant="primary", visible=False)
|
| 32 |
-
with gr.Column():
|
| 33 |
-
search_name = gr.Textbox(label=translations["name_to_search"], placeholder=translations["modelname"], interactive=True, scale=8, visible=False)
|
| 34 |
-
search = gr.Button(translations["search_2"], scale=2, visible=False)
|
| 35 |
-
search_dropdown = gr.Dropdown(label=translations["select_download_model"], value="", choices=[], allow_custom_value=True, interactive=False, visible=False)
|
| 36 |
-
download = gr.Button(translations["downloads"], variant="primary", visible=False)
|
| 37 |
-
with gr.Column():
|
| 38 |
-
model_upload = gr.Files(label=translations["drop_model"], file_types=[".pth", ".onnx", ".index", ".zip"], visible=False)
|
| 39 |
-
with gr.Row():
|
| 40 |
-
with gr.Accordion(translations["download_pretrained_2"], open=False):
|
| 41 |
-
with gr.Row():
|
| 42 |
-
pretrain_download_choices = gr.Radio(label=translations["model_download_select"], choices=[translations["download_url"], translations["list_model"], translations["upload"]], value=translations["download_url"], interactive=True)
|
| 43 |
-
with gr.Row():
|
| 44 |
-
gr.Markdown("___")
|
| 45 |
-
with gr.Column():
|
| 46 |
-
with gr.Row():
|
| 47 |
-
pretrainD = gr.Textbox(label=translations["pretrained_url"].format(dg="D"), value="", placeholder="https://...", interactive=True, scale=4)
|
| 48 |
-
pretrainG = gr.Textbox(label=translations["pretrained_url"].format(dg="G"), value="", placeholder="https://...", interactive=True, scale=4)
|
| 49 |
-
download_pretrain_button = gr.Button(translations["downloads"], scale=2)
|
| 50 |
-
with gr.Column():
|
| 51 |
-
with gr.Row():
|
| 52 |
-
pretrain_choices = gr.Dropdown(label=translations["select_pretrain"], info=translations["select_pretrain_info"], choices=list(fetch_pretrained_data().keys()), value="Titan_Medium", allow_custom_value=True, interactive=True, scale=6, visible=False)
|
| 53 |
-
sample_rate_pretrain = gr.Dropdown(label=translations["pretrain_sr"], info=translations["pretrain_sr"], choices=["48k", "40k", "32k"], value="48k", interactive=True, visible=False)
|
| 54 |
-
download_pretrain_choices_button = gr.Button(translations["downloads"], scale=2, variant="primary", visible=False)
|
| 55 |
-
with gr.Row():
|
| 56 |
-
pretrain_upload = gr.Files(label=translations["drop_pretrain"].format(dg="G, D"), file_types=[".pth"], visible=False)
|
| 57 |
-
with gr.Row():
|
| 58 |
-
url_download.click(
|
| 59 |
-
fn=download_model,
|
| 60 |
-
inputs=[
|
| 61 |
-
url_input,
|
| 62 |
-
download_model_name
|
| 63 |
-
],
|
| 64 |
-
outputs=[url_input],
|
| 65 |
-
api_name="download_model"
|
| 66 |
-
)
|
| 67 |
-
download_from_browser.click(
|
| 68 |
-
fn=lambda model: download_model(models[model], model),
|
| 69 |
-
inputs=[model_browser],
|
| 70 |
-
outputs=[model_browser],
|
| 71 |
-
api_name="download_browser"
|
| 72 |
-
)
|
| 73 |
-
with gr.Row():
|
| 74 |
-
downloadmodel.change(fn=change_download_choices, inputs=[downloadmodel], outputs=[url_input, download_model_name, url_download, model_browser, download_from_browser, search_name, search, search_dropdown, download, model_upload])
|
| 75 |
-
search.click(fn=search_models, inputs=[search_name], outputs=[search_dropdown, download])
|
| 76 |
-
model_upload.upload(fn=save_drop_model, inputs=[model_upload], outputs=[model_upload])
|
| 77 |
-
download.click(
|
| 78 |
-
fn=lambda model: download_model(model_options[model], model),
|
| 79 |
-
inputs=[search_dropdown],
|
| 80 |
-
outputs=[search_dropdown],
|
| 81 |
-
api_name="search_models"
|
| 82 |
-
)
|
| 83 |
-
with gr.Row():
|
| 84 |
-
pretrain_download_choices.change(fn=change_download_pretrained_choices, inputs=[pretrain_download_choices], outputs=[pretrainD, pretrainG, download_pretrain_button, pretrain_choices, sample_rate_pretrain, download_pretrain_choices_button, pretrain_upload])
|
| 85 |
-
pretrain_choices.change(fn=update_sample_rate_dropdown, inputs=[pretrain_choices], outputs=[sample_rate_pretrain])
|
| 86 |
-
with gr.Row():
|
| 87 |
-
download_pretrain_button.click(
|
| 88 |
-
fn=download_pretrained_model,
|
| 89 |
-
inputs=[
|
| 90 |
-
pretrain_download_choices,
|
| 91 |
-
pretrainD,
|
| 92 |
-
pretrainG
|
| 93 |
-
],
|
| 94 |
-
outputs=[pretrainD, pretrainG],
|
| 95 |
-
api_name="download_pretrain_link"
|
| 96 |
-
)
|
| 97 |
-
download_pretrain_choices_button.click(
|
| 98 |
-
fn=download_pretrained_model,
|
| 99 |
-
inputs=[
|
| 100 |
-
pretrain_download_choices,
|
| 101 |
-
pretrain_choices,
|
| 102 |
-
sample_rate_pretrain
|
| 103 |
-
],
|
| 104 |
-
outputs=[pretrain_choices],
|
| 105 |
-
api_name="download_pretrain_choices"
|
| 106 |
-
)
|
| 107 |
-
pretrain_upload.upload(
|
| 108 |
-
fn=lambda pretrain_upload: [shutil_move(pretrain.name, configs["pretrained_custom_path"]) for pretrain in pretrain_upload],
|
| 109 |
-
inputs=[pretrain_upload],
|
| 110 |
-
outputs=[],
|
| 111 |
-
api_name="upload_pretrain"
|
| 112 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/editing/child/audio_effects.py
DELETED
|
@@ -1,393 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.core.editing import audio_effects
|
| 9 |
-
from main.app.core.presets import audio_effect_load_presets, audio_effect_save_presets
|
| 10 |
-
from main.app.core.ui import visible, change_audios_choices, change_effect_preset_choices, shutil_move
|
| 11 |
-
from main.app.variables import translations, paths_for_files, sample_rate_choice, audio_effect_presets_file, configs, file_types, export_format_choices
|
| 12 |
-
|
| 13 |
-
def audio_effects_tab():
|
| 14 |
-
with gr.Row():
|
| 15 |
-
gr.Markdown(translations["audio_effects_edit"])
|
| 16 |
-
with gr.Row():
|
| 17 |
-
with gr.Column():
|
| 18 |
-
with gr.Row():
|
| 19 |
-
reverb_check_box = gr.Checkbox(label=translations["reverb"], value=False, interactive=True)
|
| 20 |
-
chorus_check_box = gr.Checkbox(label=translations["chorus"], value=False, interactive=True)
|
| 21 |
-
delay_check_box = gr.Checkbox(label=translations["delay"], value=False, interactive=True)
|
| 22 |
-
phaser_check_box = gr.Checkbox(label=translations["phaser"], value=False, interactive=True)
|
| 23 |
-
compressor_check_box = gr.Checkbox(label=translations["compressor"], value=False, interactive=True)
|
| 24 |
-
more_options = gr.Checkbox(label=translations["more_option"], value=False, interactive=True)
|
| 25 |
-
with gr.Row():
|
| 26 |
-
with gr.Accordion(translations["input_output"], open=False):
|
| 27 |
-
with gr.Row():
|
| 28 |
-
upload_audio = gr.Files(label=translations["drop_audio"], file_types=file_types)
|
| 29 |
-
with gr.Row():
|
| 30 |
-
audio_in_path = gr.Dropdown(label=translations["input_audio"], value="", choices=paths_for_files, info=translations["provide_audio"], interactive=True, allow_custom_value=True)
|
| 31 |
-
audio_out_path = gr.Textbox(label=translations["output_audio"], value="audios/audio_effects.wav", placeholder="audios/audio_effects.wav", info=translations["provide_output"], interactive=True)
|
| 32 |
-
with gr.Row():
|
| 33 |
-
with gr.Column():
|
| 34 |
-
audio_combination = gr.Checkbox(label=translations["merge_instruments"], value=False, interactive=True)
|
| 35 |
-
audio_combination_input = gr.Dropdown(label=translations["input_audio"], value="", choices=paths_for_files, info=translations["provide_audio"], interactive=True, allow_custom_value=True, visible=audio_combination.value)
|
| 36 |
-
with gr.Row():
|
| 37 |
-
main_vol = gr.Slider(minimum=-80, maximum=80, label=translations["main_volume"], info=translations["main_volume_info"], value=-4, step=1, interactive=True, visible=audio_combination.value)
|
| 38 |
-
combine_vol = gr.Slider(minimum=-80, maximum=80, label=translations["combination_volume"], info=translations["combination_volume_info"], value=-7, step=1, interactive=True, visible=audio_combination.value)
|
| 39 |
-
with gr.Row():
|
| 40 |
-
audio_effects_refresh = gr.Button(translations["refresh"])
|
| 41 |
-
with gr.Row():
|
| 42 |
-
audio_output_format = gr.Radio(label=translations["export_format"], info=translations["export_info"], choices=export_format_choices, value="wav", interactive=True)
|
| 43 |
-
with gr.Row():
|
| 44 |
-
with gr.Accordion(translations["use_presets"], open=False):
|
| 45 |
-
with gr.Row():
|
| 46 |
-
presets_name = gr.Dropdown(label=translations["file_preset"], choices=audio_effect_presets_file, value=audio_effect_presets_file[0] if len(audio_effect_presets_file) > 0 else '', interactive=True, allow_custom_value=True)
|
| 47 |
-
with gr.Row():
|
| 48 |
-
load_click = gr.Button(translations["load_file"], variant="primary")
|
| 49 |
-
refresh_click = gr.Button(translations["refresh"])
|
| 50 |
-
with gr.Accordion(translations["export_file"], open=False):
|
| 51 |
-
with gr.Row():
|
| 52 |
-
with gr.Column():
|
| 53 |
-
name_to_save_file = gr.Textbox(label=translations["filename_to_save"])
|
| 54 |
-
save_file_button = gr.Button(translations["export_file"])
|
| 55 |
-
with gr.Row():
|
| 56 |
-
upload_presets = gr.Files(label=translations["upload_presets"], file_types=[".effect.json"])
|
| 57 |
-
with gr.Row():
|
| 58 |
-
apply_effects_button = gr.Button(translations["apply"], variant="primary", scale=2)
|
| 59 |
-
with gr.Row():
|
| 60 |
-
with gr.Column():
|
| 61 |
-
with gr.Row():
|
| 62 |
-
with gr.Accordion(translations["reverb"], open=False, visible=reverb_check_box.value) as reverb_accordion:
|
| 63 |
-
reverb_freeze_mode = gr.Checkbox(label=translations["reverb_freeze"], info=translations["reverb_freeze_info"], value=False, interactive=True)
|
| 64 |
-
reverb_room_size = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.15, label=translations["room_size"], info=translations["room_size_info"], interactive=True)
|
| 65 |
-
reverb_damping = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.7, label=translations["damping"], info=translations["damping_info"], interactive=True)
|
| 66 |
-
reverb_wet_level = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.2, label=translations["wet_level"], info=translations["wet_level_info"], interactive=True)
|
| 67 |
-
reverb_dry_level = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.8, label=translations["dry_level"], info=translations["dry_level_info"], interactive=True)
|
| 68 |
-
reverb_width = gr.Slider(minimum=0, maximum=1, step=0.01, value=1, label=translations["width"], info=translations["width_info"], interactive=True)
|
| 69 |
-
with gr.Row():
|
| 70 |
-
with gr.Accordion(translations["chorus"], open=False, visible=chorus_check_box.value) as chorus_accordion:
|
| 71 |
-
chorus_depth = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["chorus_depth"], info=translations["chorus_depth_info"], interactive=True)
|
| 72 |
-
chorus_rate_hz = gr.Slider(minimum=0.1, maximum=10, step=0.1, value=1.5, label=translations["chorus_rate_hz"], info=translations["chorus_rate_hz_info"], interactive=True)
|
| 73 |
-
chorus_mix = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["chorus_mix"], info=translations["chorus_mix_info"], interactive=True)
|
| 74 |
-
chorus_centre_delay_ms = gr.Slider(minimum=0, maximum=50, step=1, value=10, label=translations["chorus_centre_delay_ms"], info=translations["chorus_centre_delay_ms_info"], interactive=True)
|
| 75 |
-
chorus_feedback = gr.Slider(minimum=-1, maximum=1, step=0.01, value=0, label=translations["chorus_feedback"], info=translations["chorus_feedback_info"], interactive=True)
|
| 76 |
-
with gr.Row():
|
| 77 |
-
with gr.Accordion(translations["delay"], open=False, visible=delay_check_box.value) as delay_accordion:
|
| 78 |
-
delay_second = gr.Slider(minimum=0, maximum=5, step=0.01, value=0.5, label=translations["delay_seconds"], info=translations["delay_seconds_info"], interactive=True)
|
| 79 |
-
delay_feedback = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["delay_feedback"], info=translations["delay_feedback_info"], interactive=True)
|
| 80 |
-
delay_mix = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["delay_mix"], info=translations["delay_mix_info"], interactive=True)
|
| 81 |
-
with gr.Column():
|
| 82 |
-
with gr.Row():
|
| 83 |
-
with gr.Accordion(translations["more_option"], open=False, visible=more_options.value) as more_accordion:
|
| 84 |
-
with gr.Row():
|
| 85 |
-
fade = gr.Checkbox(label=translations["fade"], value=False, interactive=True)
|
| 86 |
-
bass_or_treble = gr.Checkbox(label=translations["bass_or_treble"], value=False, interactive=True)
|
| 87 |
-
limiter = gr.Checkbox(label=translations["limiter"], value=False, interactive=True)
|
| 88 |
-
resample_checkbox = gr.Checkbox(label=translations["resample"], value=False, interactive=True)
|
| 89 |
-
with gr.Row():
|
| 90 |
-
distortion_checkbox = gr.Checkbox(label=translations["distortion"], value=False, interactive=True)
|
| 91 |
-
gain_checkbox = gr.Checkbox(label=translations["gain"], value=False, interactive=True)
|
| 92 |
-
bitcrush_checkbox = gr.Checkbox(label=translations["bitcrush"], value=False, interactive=True)
|
| 93 |
-
clipping_checkbox = gr.Checkbox(label=translations["clipping"], value=False, interactive=True)
|
| 94 |
-
with gr.Accordion(translations["fade"], open=True, visible=fade.value) as fade_accordion:
|
| 95 |
-
with gr.Row():
|
| 96 |
-
fade_in = gr.Slider(minimum=0, maximum=10000, step=100, value=0, label=translations["fade_in"], info=translations["fade_in_info"], interactive=True)
|
| 97 |
-
fade_out = gr.Slider(minimum=0, maximum=10000, step=100, value=0, label=translations["fade_out"], info=translations["fade_out_info"], interactive=True)
|
| 98 |
-
with gr.Accordion(translations["bass_or_treble"], open=True, visible=bass_or_treble.value) as bass_treble_accordion:
|
| 99 |
-
with gr.Row():
|
| 100 |
-
bass_boost = gr.Slider(minimum=0, maximum=20, step=1, value=0, label=translations["bass_boost"], info=translations["bass_boost_info"], interactive=True)
|
| 101 |
-
bass_frequency = gr.Slider(minimum=20, maximum=200, step=10, value=100, label=translations["bass_frequency"], info=translations["bass_frequency_info"], interactive=True)
|
| 102 |
-
with gr.Row():
|
| 103 |
-
treble_boost = gr.Slider(minimum=0, maximum=20, step=1, value=0, label=translations["treble_boost"], info=translations["treble_boost_info"], interactive=True)
|
| 104 |
-
treble_frequency = gr.Slider(minimum=1000, maximum=10000, step=500, value=3000, label=translations["treble_frequency"], info=translations["treble_frequency_info"], interactive=True)
|
| 105 |
-
with gr.Accordion(translations["limiter"], open=True, visible=limiter.value) as limiter_accordion:
|
| 106 |
-
with gr.Row():
|
| 107 |
-
limiter_threshold_db = gr.Slider(minimum=-60, maximum=0, step=1, value=-1, label=translations["limiter_threshold_db"], info=translations["limiter_threshold_db_info"], interactive=True)
|
| 108 |
-
limiter_release_ms = gr.Slider(minimum=10, maximum=1000, step=1, value=100, label=translations["limiter_release_ms"], info=translations["limiter_release_ms_info"], interactive=True)
|
| 109 |
-
with gr.Column():
|
| 110 |
-
pitch_shift_semitones = gr.Slider(minimum=-20, maximum=20, step=1, value=0, label=translations["pitch"], info=translations["pitch_info"], interactive=True)
|
| 111 |
-
audio_effect_resample_sr = gr.Radio(choices=[0]+sample_rate_choice, value=0, label=translations["resample"], info=translations["resample_info"], interactive=True, visible=resample_checkbox.value)
|
| 112 |
-
distortion_drive_db = gr.Slider(minimum=0, maximum=50, step=1, value=20, label=translations["distortion"], info=translations["distortion_info"], interactive=True, visible=distortion_checkbox.value)
|
| 113 |
-
gain_db = gr.Slider(minimum=-60, maximum=60, step=1, value=0, label=translations["gain"], info=translations["gain_info"], interactive=True, visible=gain_checkbox.value)
|
| 114 |
-
clipping_threshold_db = gr.Slider(minimum=-60, maximum=0, step=1, value=-1, label=translations["clipping_threshold_db"], info=translations["clipping_threshold_db_info"], interactive=True, visible=clipping_checkbox.value)
|
| 115 |
-
bitcrush_bit_depth = gr.Slider(minimum=1, maximum=24, step=1, value=16, label=translations["bitcrush_bit_depth"], info=translations["bitcrush_bit_depth_info"], interactive=True, visible=bitcrush_checkbox.value)
|
| 116 |
-
with gr.Row():
|
| 117 |
-
with gr.Accordion(translations["phaser"], open=False, visible=phaser_check_box.value) as phaser_accordion:
|
| 118 |
-
phaser_depth = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["phaser_depth"], info=translations["phaser_depth_info"], interactive=True)
|
| 119 |
-
phaser_rate_hz = gr.Slider(minimum=0.1, maximum=10, step=0.1, value=1, label=translations["phaser_rate_hz"], info=translations["phaser_rate_hz_info"], interactive=True)
|
| 120 |
-
phaser_mix = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["phaser_mix"], info=translations["phaser_mix_info"], interactive=True)
|
| 121 |
-
phaser_centre_frequency_hz = gr.Slider(minimum=50, maximum=5000, step=10, value=1000, label=translations["phaser_centre_frequency_hz"], info=translations["phaser_centre_frequency_hz_info"], interactive=True)
|
| 122 |
-
phaser_feedback = gr.Slider(minimum=-1, maximum=1, step=0.01, value=0, label=translations["phaser_feedback"], info=translations["phaser_feedback_info"], interactive=True)
|
| 123 |
-
with gr.Row():
|
| 124 |
-
with gr.Accordion(translations["compressor"], open=False, visible=compressor_check_box.value) as compressor_accordion:
|
| 125 |
-
compressor_threshold_db = gr.Slider(minimum=-60, maximum=0, step=1, value=-20, label=translations["compressor_threshold_db"], info=translations["compressor_threshold_db_info"], interactive=True)
|
| 126 |
-
compressor_ratio = gr.Slider(minimum=1, maximum=20, step=0.1, value=1, label=translations["compressor_ratio"], info=translations["compressor_ratio_info"], interactive=True)
|
| 127 |
-
compressor_attack_ms = gr.Slider(minimum=0.1, maximum=100, step=0.1, value=10, label=translations["compressor_attack_ms"], info=translations["compressor_attack_ms_info"], interactive=True)
|
| 128 |
-
compressor_release_ms = gr.Slider(minimum=10, maximum=1000, step=1, value=100, label=translations["compressor_release_ms"], info=translations["compressor_release_ms_info"], interactive=True)
|
| 129 |
-
with gr.Row():
|
| 130 |
-
gr.Markdown(translations["output_audio"])
|
| 131 |
-
with gr.Row():
|
| 132 |
-
audio_play_input = gr.Audio(show_download_button=True, interactive=False, label=translations["input_audio"])
|
| 133 |
-
audio_play_output = gr.Audio(show_download_button=True, interactive=False, label=translations["output_audio"])
|
| 134 |
-
with gr.Row():
|
| 135 |
-
reverb_check_box.change(fn=visible, inputs=[reverb_check_box], outputs=[reverb_accordion])
|
| 136 |
-
chorus_check_box.change(fn=visible, inputs=[chorus_check_box], outputs=[chorus_accordion])
|
| 137 |
-
delay_check_box.change(fn=visible, inputs=[delay_check_box], outputs=[delay_accordion])
|
| 138 |
-
with gr.Row():
|
| 139 |
-
compressor_check_box.change(fn=visible, inputs=[compressor_check_box], outputs=[compressor_accordion])
|
| 140 |
-
phaser_check_box.change(fn=visible, inputs=[phaser_check_box], outputs=[phaser_accordion])
|
| 141 |
-
more_options.change(fn=visible, inputs=[more_options], outputs=[more_accordion])
|
| 142 |
-
with gr.Row():
|
| 143 |
-
fade.change(fn=visible, inputs=[fade], outputs=[fade_accordion])
|
| 144 |
-
bass_or_treble.change(fn=visible, inputs=[bass_or_treble], outputs=[bass_treble_accordion])
|
| 145 |
-
limiter.change(fn=visible, inputs=[limiter], outputs=[limiter_accordion])
|
| 146 |
-
resample_checkbox.change(fn=visible, inputs=[resample_checkbox], outputs=[audio_effect_resample_sr])
|
| 147 |
-
with gr.Row():
|
| 148 |
-
distortion_checkbox.change(fn=visible, inputs=[distortion_checkbox], outputs=[distortion_drive_db])
|
| 149 |
-
gain_checkbox.change(fn=visible, inputs=[gain_checkbox], outputs=[gain_db])
|
| 150 |
-
clipping_checkbox.change(fn=visible, inputs=[clipping_checkbox], outputs=[clipping_threshold_db])
|
| 151 |
-
bitcrush_checkbox.change(fn=visible, inputs=[bitcrush_checkbox], outputs=[bitcrush_bit_depth])
|
| 152 |
-
with gr.Row():
|
| 153 |
-
upload_audio.upload(fn=lambda audio_in: [shutil_move(audio.name, configs["audios_path"]) for audio in audio_in][0], inputs=[upload_audio], outputs=[audio_in_path])
|
| 154 |
-
audio_in_path.change(fn=lambda audio: audio if audio else None, inputs=[audio_in_path], outputs=[audio_play_input])
|
| 155 |
-
audio_effects_refresh.click(fn=lambda a, b: [change_audios_choices(a), change_audios_choices(b)], inputs=[audio_in_path, audio_combination_input], outputs=[audio_in_path, audio_combination_input])
|
| 156 |
-
with gr.Row():
|
| 157 |
-
more_options.change(fn=lambda: [False]*8, inputs=[], outputs=[fade, bass_or_treble, limiter, resample_checkbox, distortion_checkbox, gain_checkbox, clipping_checkbox, bitcrush_checkbox])
|
| 158 |
-
audio_combination.change(fn=visible, inputs=[audio_combination], outputs=[audio_combination_input])
|
| 159 |
-
audio_combination.change(fn=lambda a: [visible(a) for _ in range(2)], inputs=[audio_combination], outputs=[main_vol, combine_vol])
|
| 160 |
-
with gr.Row():
|
| 161 |
-
upload_presets.upload(fn=lambda presets_in: [shutil_move(preset.name, configs["presets_path"]) for preset in presets_in][0], inputs=[upload_presets], outputs=[presets_name])
|
| 162 |
-
refresh_click.click(fn=change_effect_preset_choices, inputs=[], outputs=[presets_name])
|
| 163 |
-
with gr.Row():
|
| 164 |
-
load_click.click(
|
| 165 |
-
fn=audio_effect_load_presets,
|
| 166 |
-
inputs=[
|
| 167 |
-
presets_name,
|
| 168 |
-
resample_checkbox,
|
| 169 |
-
audio_effect_resample_sr,
|
| 170 |
-
chorus_depth,
|
| 171 |
-
chorus_rate_hz,
|
| 172 |
-
chorus_mix,
|
| 173 |
-
chorus_centre_delay_ms,
|
| 174 |
-
chorus_feedback,
|
| 175 |
-
distortion_drive_db,
|
| 176 |
-
reverb_room_size,
|
| 177 |
-
reverb_damping,
|
| 178 |
-
reverb_wet_level,
|
| 179 |
-
reverb_dry_level,
|
| 180 |
-
reverb_width,
|
| 181 |
-
reverb_freeze_mode,
|
| 182 |
-
pitch_shift_semitones,
|
| 183 |
-
delay_second,
|
| 184 |
-
delay_feedback,
|
| 185 |
-
delay_mix,
|
| 186 |
-
compressor_threshold_db,
|
| 187 |
-
compressor_ratio,
|
| 188 |
-
compressor_attack_ms,
|
| 189 |
-
compressor_release_ms,
|
| 190 |
-
limiter_threshold_db,
|
| 191 |
-
limiter_release_ms,
|
| 192 |
-
gain_db,
|
| 193 |
-
bitcrush_bit_depth,
|
| 194 |
-
clipping_threshold_db,
|
| 195 |
-
phaser_rate_hz,
|
| 196 |
-
phaser_depth,
|
| 197 |
-
phaser_centre_frequency_hz,
|
| 198 |
-
phaser_feedback,
|
| 199 |
-
phaser_mix,
|
| 200 |
-
bass_boost,
|
| 201 |
-
bass_frequency,
|
| 202 |
-
treble_boost,
|
| 203 |
-
treble_frequency,
|
| 204 |
-
fade_in,
|
| 205 |
-
fade_out,
|
| 206 |
-
chorus_check_box,
|
| 207 |
-
distortion_checkbox,
|
| 208 |
-
reverb_check_box,
|
| 209 |
-
delay_check_box,
|
| 210 |
-
compressor_check_box,
|
| 211 |
-
limiter,
|
| 212 |
-
gain_checkbox,
|
| 213 |
-
bitcrush_checkbox,
|
| 214 |
-
clipping_checkbox,
|
| 215 |
-
phaser_check_box,
|
| 216 |
-
bass_or_treble,
|
| 217 |
-
fade
|
| 218 |
-
],
|
| 219 |
-
outputs=[
|
| 220 |
-
resample_checkbox,
|
| 221 |
-
audio_effect_resample_sr,
|
| 222 |
-
chorus_depth,
|
| 223 |
-
chorus_rate_hz,
|
| 224 |
-
chorus_mix,
|
| 225 |
-
chorus_centre_delay_ms,
|
| 226 |
-
chorus_feedback,
|
| 227 |
-
distortion_drive_db,
|
| 228 |
-
reverb_room_size,
|
| 229 |
-
reverb_damping,
|
| 230 |
-
reverb_wet_level,
|
| 231 |
-
reverb_dry_level,
|
| 232 |
-
reverb_width,
|
| 233 |
-
reverb_freeze_mode,
|
| 234 |
-
pitch_shift_semitones,
|
| 235 |
-
delay_second,
|
| 236 |
-
delay_feedback,
|
| 237 |
-
delay_mix,
|
| 238 |
-
compressor_threshold_db,
|
| 239 |
-
compressor_ratio,
|
| 240 |
-
compressor_attack_ms,
|
| 241 |
-
compressor_release_ms,
|
| 242 |
-
limiter_threshold_db,
|
| 243 |
-
limiter_release_ms,
|
| 244 |
-
gain_db,
|
| 245 |
-
bitcrush_bit_depth,
|
| 246 |
-
clipping_threshold_db,
|
| 247 |
-
phaser_rate_hz,
|
| 248 |
-
phaser_depth,
|
| 249 |
-
phaser_centre_frequency_hz,
|
| 250 |
-
phaser_feedback,
|
| 251 |
-
phaser_mix,
|
| 252 |
-
bass_boost,
|
| 253 |
-
bass_frequency,
|
| 254 |
-
treble_boost,
|
| 255 |
-
treble_frequency,
|
| 256 |
-
fade_in,
|
| 257 |
-
fade_out,
|
| 258 |
-
chorus_check_box,
|
| 259 |
-
distortion_checkbox,
|
| 260 |
-
reverb_check_box,
|
| 261 |
-
delay_check_box,
|
| 262 |
-
compressor_check_box,
|
| 263 |
-
limiter,
|
| 264 |
-
gain_checkbox,
|
| 265 |
-
bitcrush_checkbox,
|
| 266 |
-
clipping_checkbox,
|
| 267 |
-
phaser_check_box,
|
| 268 |
-
bass_or_treble,
|
| 269 |
-
fade
|
| 270 |
-
],
|
| 271 |
-
)
|
| 272 |
-
save_file_button.click(
|
| 273 |
-
fn=audio_effect_save_presets,
|
| 274 |
-
inputs=[
|
| 275 |
-
name_to_save_file,
|
| 276 |
-
resample_checkbox,
|
| 277 |
-
audio_effect_resample_sr,
|
| 278 |
-
chorus_depth,
|
| 279 |
-
chorus_rate_hz,
|
| 280 |
-
chorus_mix,
|
| 281 |
-
chorus_centre_delay_ms,
|
| 282 |
-
chorus_feedback,
|
| 283 |
-
distortion_drive_db,
|
| 284 |
-
reverb_room_size,
|
| 285 |
-
reverb_damping,
|
| 286 |
-
reverb_wet_level,
|
| 287 |
-
reverb_dry_level,
|
| 288 |
-
reverb_width,
|
| 289 |
-
reverb_freeze_mode,
|
| 290 |
-
pitch_shift_semitones,
|
| 291 |
-
delay_second,
|
| 292 |
-
delay_feedback,
|
| 293 |
-
delay_mix,
|
| 294 |
-
compressor_threshold_db,
|
| 295 |
-
compressor_ratio,
|
| 296 |
-
compressor_attack_ms,
|
| 297 |
-
compressor_release_ms,
|
| 298 |
-
limiter_threshold_db,
|
| 299 |
-
limiter_release_ms,
|
| 300 |
-
gain_db,
|
| 301 |
-
bitcrush_bit_depth,
|
| 302 |
-
clipping_threshold_db,
|
| 303 |
-
phaser_rate_hz,
|
| 304 |
-
phaser_depth,
|
| 305 |
-
phaser_centre_frequency_hz,
|
| 306 |
-
phaser_feedback,
|
| 307 |
-
phaser_mix,
|
| 308 |
-
bass_boost,
|
| 309 |
-
bass_frequency,
|
| 310 |
-
treble_boost,
|
| 311 |
-
treble_frequency,
|
| 312 |
-
fade_in,
|
| 313 |
-
fade_out,
|
| 314 |
-
chorus_check_box,
|
| 315 |
-
distortion_checkbox,
|
| 316 |
-
reverb_check_box,
|
| 317 |
-
delay_check_box,
|
| 318 |
-
compressor_check_box,
|
| 319 |
-
limiter,
|
| 320 |
-
gain_checkbox,
|
| 321 |
-
bitcrush_checkbox,
|
| 322 |
-
clipping_checkbox,
|
| 323 |
-
phaser_check_box,
|
| 324 |
-
bass_or_treble,
|
| 325 |
-
fade
|
| 326 |
-
],
|
| 327 |
-
outputs=[presets_name]
|
| 328 |
-
)
|
| 329 |
-
with gr.Row():
|
| 330 |
-
apply_effects_button.click(
|
| 331 |
-
fn=audio_effects,
|
| 332 |
-
inputs=[
|
| 333 |
-
audio_in_path,
|
| 334 |
-
audio_out_path,
|
| 335 |
-
resample_checkbox,
|
| 336 |
-
audio_effect_resample_sr,
|
| 337 |
-
chorus_depth,
|
| 338 |
-
chorus_rate_hz,
|
| 339 |
-
chorus_mix,
|
| 340 |
-
chorus_centre_delay_ms,
|
| 341 |
-
chorus_feedback,
|
| 342 |
-
distortion_drive_db,
|
| 343 |
-
reverb_room_size,
|
| 344 |
-
reverb_damping,
|
| 345 |
-
reverb_wet_level,
|
| 346 |
-
reverb_dry_level,
|
| 347 |
-
reverb_width,
|
| 348 |
-
reverb_freeze_mode,
|
| 349 |
-
pitch_shift_semitones,
|
| 350 |
-
delay_second,
|
| 351 |
-
delay_feedback,
|
| 352 |
-
delay_mix,
|
| 353 |
-
compressor_threshold_db,
|
| 354 |
-
compressor_ratio,
|
| 355 |
-
compressor_attack_ms,
|
| 356 |
-
compressor_release_ms,
|
| 357 |
-
limiter_threshold_db,
|
| 358 |
-
limiter_release_ms,
|
| 359 |
-
gain_db,
|
| 360 |
-
bitcrush_bit_depth,
|
| 361 |
-
clipping_threshold_db,
|
| 362 |
-
phaser_rate_hz,
|
| 363 |
-
phaser_depth,
|
| 364 |
-
phaser_centre_frequency_hz,
|
| 365 |
-
phaser_feedback,
|
| 366 |
-
phaser_mix,
|
| 367 |
-
bass_boost,
|
| 368 |
-
bass_frequency,
|
| 369 |
-
treble_boost,
|
| 370 |
-
treble_frequency,
|
| 371 |
-
fade_in,
|
| 372 |
-
fade_out,
|
| 373 |
-
audio_output_format,
|
| 374 |
-
chorus_check_box,
|
| 375 |
-
distortion_checkbox,
|
| 376 |
-
reverb_check_box,
|
| 377 |
-
delay_check_box,
|
| 378 |
-
compressor_check_box,
|
| 379 |
-
limiter,
|
| 380 |
-
gain_checkbox,
|
| 381 |
-
bitcrush_checkbox,
|
| 382 |
-
clipping_checkbox,
|
| 383 |
-
phaser_check_box,
|
| 384 |
-
bass_or_treble,
|
| 385 |
-
fade,
|
| 386 |
-
audio_combination,
|
| 387 |
-
audio_combination_input,
|
| 388 |
-
main_vol,
|
| 389 |
-
combine_vol
|
| 390 |
-
],
|
| 391 |
-
outputs=[audio_play_output],
|
| 392 |
-
api_name="audio_effects"
|
| 393 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/editing/child/quirk.py
DELETED
|
@@ -1,48 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.core.editing import apply_voice_quirk
|
| 9 |
-
from main.app.core.ui import change_audios_choices, shutil_move
|
| 10 |
-
from main.app.variables import translations, paths_for_files, configs, file_types, export_format_choices
|
| 11 |
-
|
| 12 |
-
def quirk_tab():
|
| 13 |
-
with gr.Row():
|
| 14 |
-
gr.Markdown(translations["quirk_markdown"])
|
| 15 |
-
with gr.Row():
|
| 16 |
-
input_audio_play = gr.Audio(show_download_button=True, interactive=False, label=translations["input_audio"])
|
| 17 |
-
with gr.Row():
|
| 18 |
-
quirk_choice = gr.Radio(label=translations["quirk_label"], info=translations["quirk_label_info"], choices=list(translations["quirk_choice"].keys()), interactive=True, value=list(translations["quirk_choice"].keys())[0])
|
| 19 |
-
with gr.Row():
|
| 20 |
-
apply_quirk_button = gr.Button(translations["apply"], variant="primary")
|
| 21 |
-
with gr.Row():
|
| 22 |
-
with gr.Accordion(translations["input_output"], open=False):
|
| 23 |
-
with gr.Row():
|
| 24 |
-
quirk_upload_audio = gr.Files(label=translations["drop_audio"], file_types=file_types)
|
| 25 |
-
with gr.Column():
|
| 26 |
-
quirk_export_format = gr.Radio(label=translations["export_format"], info=translations["export_info"], choices=export_format_choices, value="wav", interactive=True)
|
| 27 |
-
quirk_input_path = gr.Dropdown(label=translations["audio_path"], value="", choices=paths_for_files, info=translations["provide_audio"], allow_custom_value=True, interactive=True)
|
| 28 |
-
quirk_output_path = gr.Textbox(label=translations["output_path"], value="audios/output.wav", placeholder="audios/output.wav", info=translations["output_path_info"], interactive=True)
|
| 29 |
-
with gr.Column():
|
| 30 |
-
quirk_refresh = gr.Button(translations["refresh"])
|
| 31 |
-
with gr.Row():
|
| 32 |
-
output_audio_play = gr.Audio(show_download_button=True, interactive=False, label=translations["output_audio"])
|
| 33 |
-
with gr.Row():
|
| 34 |
-
quirk_upload_audio.upload(fn=lambda audio_in: [shutil_move(audio.name, configs["audios_path"]) for audio in audio_in][0], inputs=[quirk_upload_audio], outputs=[quirk_input_path])
|
| 35 |
-
quirk_input_path.change(fn=lambda audio: audio if audio else None, inputs=[quirk_input_path], outputs=[input_audio_play])
|
| 36 |
-
quirk_refresh.click(fn=change_audios_choices, inputs=[quirk_input_path], outputs=[quirk_input_path])
|
| 37 |
-
with gr.Row():
|
| 38 |
-
apply_quirk_button.click(
|
| 39 |
-
fn=apply_voice_quirk,
|
| 40 |
-
inputs=[
|
| 41 |
-
quirk_input_path,
|
| 42 |
-
quirk_choice,
|
| 43 |
-
quirk_output_path,
|
| 44 |
-
quirk_export_format
|
| 45 |
-
],
|
| 46 |
-
outputs=[output_audio_play],
|
| 47 |
-
api_name="quirk"
|
| 48 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/editing/editing.py
DELETED
|
@@ -1,20 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.variables import configs, translations
|
| 9 |
-
from main.app.tabs.editing.child.quirk import quirk_tab
|
| 10 |
-
from main.app.tabs.editing.child.audio_effects import audio_effects_tab
|
| 11 |
-
|
| 12 |
-
def editing_tab():
|
| 13 |
-
with gr.TabItem(translations["editing"], visible=configs.get("editing_tab", True)):
|
| 14 |
-
with gr.TabItem(translations["audio_effects"], visible=configs.get("effects_tab", True)):
|
| 15 |
-
gr.Markdown(translations["apply_audio_effects"])
|
| 16 |
-
audio_effects_tab()
|
| 17 |
-
|
| 18 |
-
with gr.TabItem(translations["quirk"], visible=configs.get("quirk", True)):
|
| 19 |
-
gr.Markdown(translations["quirk_info"])
|
| 20 |
-
quirk_tab()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/extra/child/convert_model.py
DELETED
|
@@ -1,31 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.core.ui import visible, shutil_move
|
| 9 |
-
from main.app.core.model_utils import onnx_export
|
| 10 |
-
from main.app.variables import translations, configs
|
| 11 |
-
|
| 12 |
-
def convert_model_tab():
|
| 13 |
-
with gr.Row():
|
| 14 |
-
gr.Markdown(translations["pytorch2onnx_markdown"])
|
| 15 |
-
with gr.Row():
|
| 16 |
-
model_pth_upload = gr.File(label=translations["drop_model"], file_types=[".pth"])
|
| 17 |
-
with gr.Row():
|
| 18 |
-
convert_onnx = gr.Button(translations["convert_model"], variant="primary", scale=2)
|
| 19 |
-
with gr.Row():
|
| 20 |
-
model_pth_path = gr.Textbox(label=translations["model_path"], value="", placeholder="assets/weights/Model.pth", info=translations["model_path_info"], interactive=True)
|
| 21 |
-
with gr.Row():
|
| 22 |
-
output_model2 = gr.File(label=translations["output_model_path"], file_types=[".pth", ".onnx"], interactive=False, visible=False)
|
| 23 |
-
with gr.Row():
|
| 24 |
-
model_pth_upload.upload(fn=lambda model_pth_upload: shutil_move(model_pth_upload.name, configs["weights_path"]), inputs=[model_pth_upload], outputs=[model_pth_path])
|
| 25 |
-
convert_onnx.click(
|
| 26 |
-
fn=onnx_export,
|
| 27 |
-
inputs=[model_pth_path],
|
| 28 |
-
outputs=[output_model2],
|
| 29 |
-
api_name="model_onnx_export"
|
| 30 |
-
)
|
| 31 |
-
convert_onnx.click(fn=lambda: visible(True), inputs=[], outputs=[output_model2])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/extra/child/create_srt.py
DELETED
|
@@ -1,56 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.core.csrt import create_srt
|
| 9 |
-
from main.app.core.ui import shutil_move, change_audios_choices
|
| 10 |
-
from main.app.variables import translations, file_types, configs, paths_for_files
|
| 11 |
-
|
| 12 |
-
def create_srt_tab():
|
| 13 |
-
with gr.Row():
|
| 14 |
-
gr.Markdown(translations["create_srt_markdown_2"])
|
| 15 |
-
with gr.Row():
|
| 16 |
-
with gr.Column():
|
| 17 |
-
srt_content = gr.Textbox(label=translations["srt_content"], value="", lines=9, max_lines=9, interactive=False)
|
| 18 |
-
with gr.Column():
|
| 19 |
-
word_timestamps = gr.Checkbox(label=translations["word_timestamps"], info=translations["word_timestamps_info"], value=False, interactive=True)
|
| 20 |
-
model_size = gr.Radio(label=translations["model_size"], info=translations["model_size_info"], choices=["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3", "large-v3-turbo"], value="medium", interactive=True)
|
| 21 |
-
with gr.Row():
|
| 22 |
-
convert_button = gr.Button(translations["convert_audio"], variant="primary")
|
| 23 |
-
with gr.Row():
|
| 24 |
-
with gr.Accordion(translations["input_output"], open=False):
|
| 25 |
-
with gr.Column():
|
| 26 |
-
input_audio = gr.Dropdown(label=translations["audio_path"], value="", choices=paths_for_files, info=translations["provide_audio"], allow_custom_value=True, interactive=True)
|
| 27 |
-
output_file = gr.Textbox(label=translations["srt_output_file"], value="srt/output.srt", placeholder="srt/output.srt", interactive=True)
|
| 28 |
-
with gr.Column():
|
| 29 |
-
refresh = gr.Button(translations["refresh"])
|
| 30 |
-
with gr.Row():
|
| 31 |
-
input_file = gr.Files(label=translations["drop_audio"], file_types=file_types)
|
| 32 |
-
with gr.Row():
|
| 33 |
-
play_audio = gr.Audio(show_download_button=True, interactive=False, label=translations["input_audio"])
|
| 34 |
-
with gr.Row():
|
| 35 |
-
output_srt = gr.File(label=translations["srt_output_file"], file_types=[".srt"], interactive=False, visible=False)
|
| 36 |
-
with gr.Row():
|
| 37 |
-
input_file.upload(fn=lambda audio_in: [shutil_move(audio.name, configs["audios_path"]) for audio in audio_in][0], inputs=[input_file], outputs=[input_audio])
|
| 38 |
-
input_audio.change(fn=lambda audio: audio if os.path.isfile(audio) else None, inputs=[input_audio], outputs=[play_audio])
|
| 39 |
-
refresh.click(fn=change_audios_choices, inputs=[input_audio], outputs=[input_audio])
|
| 40 |
-
with gr.Row():
|
| 41 |
-
convert_button.click(
|
| 42 |
-
fn=create_srt,
|
| 43 |
-
inputs=[
|
| 44 |
-
model_size,
|
| 45 |
-
input_audio,
|
| 46 |
-
output_file,
|
| 47 |
-
word_timestamps
|
| 48 |
-
],
|
| 49 |
-
outputs=[
|
| 50 |
-
output_srt,
|
| 51 |
-
srt_content
|
| 52 |
-
],
|
| 53 |
-
api_name="create_srt"
|
| 54 |
-
)
|
| 55 |
-
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/extra/child/f0_extract.py
DELETED
|
@@ -1,51 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.core.f0_extract import f0_extract
|
| 9 |
-
from main.app.core.ui import change_audios_choices, unlock_f0, shutil_move
|
| 10 |
-
from main.app.variables import translations, paths_for_files, method_f0, configs, file_types
|
| 11 |
-
|
| 12 |
-
def f0_extract_tab():
|
| 13 |
-
with gr.Row():
|
| 14 |
-
gr.Markdown(translations["f0_extractor_markdown_2"])
|
| 15 |
-
with gr.Row():
|
| 16 |
-
extractor_button = gr.Button(translations["extract_button"].replace("2. ", ""), variant="primary")
|
| 17 |
-
with gr.Row():
|
| 18 |
-
with gr.Column():
|
| 19 |
-
upload_audio_file = gr.Files(label=translations["drop_audio"], file_types=file_types)
|
| 20 |
-
audioplay = gr.Audio(show_download_button=True, interactive=False, label=translations["input_audio"])
|
| 21 |
-
with gr.Column():
|
| 22 |
-
with gr.Accordion(translations["f0_method"], open=False):
|
| 23 |
-
with gr.Group():
|
| 24 |
-
with gr.Row():
|
| 25 |
-
onnx_f0_mode3 = gr.Checkbox(label=translations["f0_onnx_mode"], info=translations["f0_onnx_mode_info"], value=False, interactive=True)
|
| 26 |
-
unlock_full_method = gr.Checkbox(label=translations["f0_unlock"], info=translations["f0_unlock_info"], value=False, interactive=True)
|
| 27 |
-
f0_method_extract = gr.Radio(label=translations["f0_method"], info=translations["f0_method_info"], choices=[m for m in method_f0 if m != "hybrid"], value="rmvpe", interactive=True)
|
| 28 |
-
with gr.Accordion(translations["audio_path"], open=True):
|
| 29 |
-
input_audio_path = gr.Dropdown(label=translations["audio_path"], value="", choices=paths_for_files, allow_custom_value=True, interactive=True)
|
| 30 |
-
refresh_audio_button = gr.Button(translations["refresh"])
|
| 31 |
-
with gr.Row():
|
| 32 |
-
gr.Markdown("___")
|
| 33 |
-
with gr.Row():
|
| 34 |
-
file_output = gr.File(label="", file_types=[".txt"], interactive=False)
|
| 35 |
-
image_output = gr.Image(label="", interactive=False, show_download_button=True)
|
| 36 |
-
with gr.Row():
|
| 37 |
-
upload_audio_file.upload(fn=lambda audio_in: [shutil_move(audio.name, configs["audios_path"]) for audio in audio_in][0], inputs=[upload_audio_file], outputs=[input_audio_path])
|
| 38 |
-
input_audio_path.change(fn=lambda audio: audio if os.path.isfile(audio) else None, inputs=[input_audio_path], outputs=[audioplay])
|
| 39 |
-
refresh_audio_button.click(fn=change_audios_choices, inputs=[input_audio_path], outputs=[input_audio_path])
|
| 40 |
-
with gr.Row():
|
| 41 |
-
unlock_full_method.change(fn=lambda method: {"choices": [m for m in unlock_f0(method)["choices"] if m != "hybrid"], "value": "rmvpe", "__type__": "update"}, inputs=[unlock_full_method], outputs=[f0_method_extract])
|
| 42 |
-
extractor_button.click(
|
| 43 |
-
fn=f0_extract,
|
| 44 |
-
inputs=[
|
| 45 |
-
input_audio_path,
|
| 46 |
-
f0_method_extract,
|
| 47 |
-
onnx_f0_mode3
|
| 48 |
-
],
|
| 49 |
-
outputs=[file_output, image_output],
|
| 50 |
-
api_name="f0_extract"
|
| 51 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/extra/child/fushion.py
DELETED
|
@@ -1,45 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.core.ui import visible, shutil_move
|
| 9 |
-
from main.app.core.model_utils import fushion_model
|
| 10 |
-
from main.app.variables import translations, configs
|
| 11 |
-
|
| 12 |
-
def fushion_tab():
|
| 13 |
-
with gr.Row():
|
| 14 |
-
gr.Markdown(translations["fushion_markdown_2"])
|
| 15 |
-
with gr.Row():
|
| 16 |
-
name_to_save = gr.Textbox(label=translations["modelname"], placeholder="Model.pth", value="", max_lines=1, interactive=True)
|
| 17 |
-
with gr.Row():
|
| 18 |
-
fushion_button = gr.Button(translations["fushion"], variant="primary", scale=4)
|
| 19 |
-
with gr.Column():
|
| 20 |
-
with gr.Row():
|
| 21 |
-
model_a = gr.File(label=f"{translations['model_name']} 1", file_types=[".pth", ".onnx"])
|
| 22 |
-
model_b = gr.File(label=f"{translations['model_name']} 2", file_types=[".pth", ".onnx"])
|
| 23 |
-
with gr.Row():
|
| 24 |
-
model_path_a = gr.Textbox(label=f"{translations['model_path']} 1", value="", placeholder="assets/weights/Model_1.pth")
|
| 25 |
-
model_path_b = gr.Textbox(label=f"{translations['model_path']} 2", value="", placeholder="assets/weights/Model_2.pth")
|
| 26 |
-
with gr.Row():
|
| 27 |
-
ratio = gr.Slider(minimum=0, maximum=1, label=translations["model_ratio"], info=translations["model_ratio_info"], value=0.5, interactive=True)
|
| 28 |
-
with gr.Row():
|
| 29 |
-
output_model = gr.File(label=translations["output_model_path"], file_types=[".pth", ".onnx"], interactive=False, visible=False)
|
| 30 |
-
with gr.Row():
|
| 31 |
-
model_a.upload(fn=lambda model: shutil_move(model.name, configs["weights_path"]), inputs=[model_a], outputs=[model_path_a])
|
| 32 |
-
model_b.upload(fn=lambda model: shutil_move(model.name, configs["weights_path"]), inputs=[model_b], outputs=[model_path_b])
|
| 33 |
-
with gr.Row():
|
| 34 |
-
fushion_button.click(
|
| 35 |
-
fn=fushion_model,
|
| 36 |
-
inputs=[
|
| 37 |
-
name_to_save,
|
| 38 |
-
model_path_a,
|
| 39 |
-
model_path_b,
|
| 40 |
-
ratio
|
| 41 |
-
],
|
| 42 |
-
outputs=[name_to_save, output_model],
|
| 43 |
-
api_name="fushion_model"
|
| 44 |
-
)
|
| 45 |
-
fushion_button.click(fn=lambda: visible(True), inputs=[], outputs=[output_model])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/extra/child/read_model.py
DELETED
|
@@ -1,29 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.core.ui import shutil_move
|
| 9 |
-
from main.app.core.model_utils import model_info
|
| 10 |
-
from main.app.variables import translations, configs
|
| 11 |
-
|
| 12 |
-
def read_model_tab():
|
| 13 |
-
with gr.Row():
|
| 14 |
-
gr.Markdown(translations["read_model_markdown_2"])
|
| 15 |
-
with gr.Row():
|
| 16 |
-
model = gr.File(label=translations["drop_model"], file_types=[".pth", ".onnx"])
|
| 17 |
-
with gr.Row():
|
| 18 |
-
read_button = gr.Button(translations["readmodel"], variant="primary", scale=2)
|
| 19 |
-
with gr.Column():
|
| 20 |
-
model_path = gr.Textbox(label=translations["model_path"], value="", placeholder="assets/weights/Model.pth", info=translations["model_path_info"], interactive=True)
|
| 21 |
-
output_info = gr.Textbox(label=translations["modelinfo"], value="", interactive=False, scale=6)
|
| 22 |
-
with gr.Row():
|
| 23 |
-
model.upload(fn=lambda model: shutil_move(model.name, configs["weights_path"]), inputs=[model], outputs=[model_path])
|
| 24 |
-
read_button.click(
|
| 25 |
-
fn=model_info,
|
| 26 |
-
inputs=[model_path],
|
| 27 |
-
outputs=[output_info],
|
| 28 |
-
api_name="read_model"
|
| 29 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/extra/child/settings.py
DELETED
|
@@ -1,61 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.core.ui import change_fp
|
| 9 |
-
from main.app.core.utils import stop_pid
|
| 10 |
-
from main.app.core.restart import change_font, change_language, change_theme
|
| 11 |
-
from main.app.variables import translations, theme, font, configs, language, config
|
| 12 |
-
|
| 13 |
-
def settings_tab(app):
|
| 14 |
-
with gr.Row():
|
| 15 |
-
gr.Markdown(translations["settings_markdown_2"])
|
| 16 |
-
with gr.Row():
|
| 17 |
-
toggle_button = gr.Button(translations["change_light_dark"], variant="secondary", scale=2)
|
| 18 |
-
with gr.Row():
|
| 19 |
-
with gr.Column():
|
| 20 |
-
language_dropdown = gr.Dropdown(label=translations["lang"], interactive=True, info=translations["lang_restart"], choices=configs.get("support_language", "vi-VN"), value=language)
|
| 21 |
-
change_lang = gr.Button(translations["change_lang"], variant="primary", scale=2)
|
| 22 |
-
with gr.Column():
|
| 23 |
-
theme_dropdown = gr.Dropdown(label=translations["theme"], interactive=True, info=translations["theme_restart"], choices=configs.get("themes", theme), value=theme, allow_custom_value=True)
|
| 24 |
-
changetheme = gr.Button(translations["theme_button"], variant="primary", scale=2)
|
| 25 |
-
with gr.Row():
|
| 26 |
-
with gr.Column():
|
| 27 |
-
fp_choice = gr.Radio(choices=["fp16","fp32"], value="fp16" if configs.get("fp16", False) else "fp32", label=translations["precision"], info=translations["precision_info"], interactive=config.device not in ["cpu", "mps", "ocl:0"])
|
| 28 |
-
fp_button = gr.Button(translations["update_precision"], variant="secondary", scale=2)
|
| 29 |
-
with gr.Column():
|
| 30 |
-
font_choice = gr.Textbox(label=translations["font"], info=translations["font_info"], value=font, interactive=True)
|
| 31 |
-
font_button = gr.Button(translations["change_font"])
|
| 32 |
-
with gr.Row():
|
| 33 |
-
with gr.Column():
|
| 34 |
-
with gr.Accordion(translations["stop"], open=False, visible=True):
|
| 35 |
-
separate_stop = gr.Button(translations["stop_separate"])
|
| 36 |
-
convert_stop = gr.Button(translations["stop_convert"])
|
| 37 |
-
create_dataset_stop = gr.Button(translations["stop_create_dataset"])
|
| 38 |
-
with gr.Accordion(translations["stop_training"], open=False):
|
| 39 |
-
model_name_stop = gr.Textbox(label=translations["modelname"], info=translations["training_model_name"], value="", placeholder=translations["modelname"], interactive=True)
|
| 40 |
-
preprocess_stop = gr.Button(translations["stop_preprocess"])
|
| 41 |
-
extract_stop = gr.Button(translations["stop_extract"])
|
| 42 |
-
train_stop = gr.Button(translations["stop_training"])
|
| 43 |
-
with gr.Row():
|
| 44 |
-
toggle_button.click(fn=None, js="() => {document.body.classList.toggle('dark')}")
|
| 45 |
-
fp_button.click(fn=change_fp, inputs=[fp_choice], outputs=[fp_choice])
|
| 46 |
-
with gr.Row():
|
| 47 |
-
change_lang.click(fn=lambda a: change_language(a, app), inputs=[language_dropdown], outputs=[])
|
| 48 |
-
changetheme.click(fn=lambda a: change_theme(a, app) , inputs=[theme_dropdown], outputs=[])
|
| 49 |
-
font_button.click(fn=lambda a: change_font(a, app), inputs=[font_choice], outputs=[])
|
| 50 |
-
with gr.Row():
|
| 51 |
-
change_lang.click(fn=None, js="setTimeout(function() {location.reload()}, 30000)", inputs=[], outputs=[])
|
| 52 |
-
changetheme.click(fn=None, js="setTimeout(function() {location.reload()}, 30000)", inputs=[], outputs=[])
|
| 53 |
-
font_button.click(fn=None, js="setTimeout(function() {location.reload()}, 30000)", inputs=[], outputs=[])
|
| 54 |
-
with gr.Row():
|
| 55 |
-
separate_stop.click(fn=lambda: stop_pid("separate_pid", None, False), inputs=[], outputs=[])
|
| 56 |
-
convert_stop.click(fn=lambda: stop_pid("convert_pid", None, False), inputs=[], outputs=[])
|
| 57 |
-
create_dataset_stop.click(fn=lambda: stop_pid("create_dataset_pid", None, False), inputs=[], outputs=[])
|
| 58 |
-
with gr.Row():
|
| 59 |
-
preprocess_stop.click(fn=lambda model_name_stop: stop_pid("preprocess_pid", model_name_stop, False), inputs=[model_name_stop], outputs=[])
|
| 60 |
-
extract_stop.click(fn=lambda model_name_stop: stop_pid("extract_pid", model_name_stop, False), inputs=[model_name_stop], outputs=[])
|
| 61 |
-
train_stop.click(fn=lambda model_name_stop: stop_pid("train_pid", model_name_stop, True), inputs=[model_name_stop], outputs=[])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/extra/extra.py
DELETED
|
@@ -1,40 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.variables import translations, configs
|
| 9 |
-
from main.app.tabs.extra.child.fushion import fushion_tab
|
| 10 |
-
from main.app.tabs.extra.child.settings import settings_tab
|
| 11 |
-
from main.app.tabs.extra.child.read_model import read_model_tab
|
| 12 |
-
from main.app.tabs.extra.child.f0_extract import f0_extract_tab
|
| 13 |
-
from main.app.tabs.extra.child.create_srt import create_srt_tab
|
| 14 |
-
from main.app.tabs.extra.child.convert_model import convert_model_tab
|
| 15 |
-
|
| 16 |
-
def extra_tab(app):
|
| 17 |
-
with gr.TabItem(translations["extra"], visible=configs.get("extra_tab", True)):
|
| 18 |
-
with gr.TabItem(translations["fushion"], visible=configs.get("fushion_tab", True)):
|
| 19 |
-
gr.Markdown(translations["fushion_markdown"])
|
| 20 |
-
fushion_tab()
|
| 21 |
-
|
| 22 |
-
with gr.TabItem(translations["read_model"], visible=configs.get("read_tab", True)):
|
| 23 |
-
gr.Markdown(translations["read_model_markdown"])
|
| 24 |
-
read_model_tab()
|
| 25 |
-
|
| 26 |
-
with gr.TabItem(translations["convert_model"], visible=configs.get("onnx_tab", True)):
|
| 27 |
-
gr.Markdown(translations["pytorch2onnx"])
|
| 28 |
-
convert_model_tab()
|
| 29 |
-
|
| 30 |
-
with gr.TabItem(translations["f0_extractor_tab"], visible=configs.get("f0_extractor_tab", True)):
|
| 31 |
-
gr.Markdown(translations["f0_extractor_markdown"])
|
| 32 |
-
f0_extract_tab()
|
| 33 |
-
|
| 34 |
-
with gr.TabItem(translations["create_srt_tab"], visible=configs.get("create_srt_tab", True)):
|
| 35 |
-
gr.Markdown(translations["create_srt_markdown"])
|
| 36 |
-
create_srt_tab()
|
| 37 |
-
|
| 38 |
-
with gr.TabItem(translations["settings"], visible=configs.get("settings_tab", True)):
|
| 39 |
-
gr.Markdown(translations["settings_markdown"])
|
| 40 |
-
settings_tab(app)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/inference/child/convert.py
DELETED
|
@@ -1,328 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.core.presets import load_presets, save_presets
|
| 9 |
-
from main.app.core.inference import convert_audio, convert_selection
|
| 10 |
-
from main.app.variables import translations, paths_for_files, sample_rate_choice, model_name, index_path, method_f0, f0_file, embedders_mode, embedders_model, presets_file, configs, file_types, export_format_choices, hybrid_f0_method
|
| 11 |
-
from main.app.core.ui import visible, valueFalse_interactive, change_audios_choices, change_f0_choices, unlock_f0, change_preset_choices, change_backing_choices, hoplength_show, change_models_choices, get_index, index_strength_show, change_embedders_mode, shutil_move
|
| 12 |
-
|
| 13 |
-
def convert_tab():
|
| 14 |
-
with gr.Row():
|
| 15 |
-
gr.Markdown(translations["convert_info"])
|
| 16 |
-
with gr.Row():
|
| 17 |
-
with gr.Column():
|
| 18 |
-
with gr.Group():
|
| 19 |
-
with gr.Row():
|
| 20 |
-
cleaner0 = gr.Checkbox(label=translations["clear_audio"], value=False, interactive=True)
|
| 21 |
-
autotune = gr.Checkbox(label=translations["autotune"], value=False, interactive=True)
|
| 22 |
-
use_audio = gr.Checkbox(label=translations["use_audio"], value=False, interactive=True)
|
| 23 |
-
checkpointing = gr.Checkbox(label=translations["memory_efficient_training"], value=False, interactive=True)
|
| 24 |
-
with gr.Row():
|
| 25 |
-
use_original = gr.Checkbox(label=translations["convert_original"], value=False, interactive=True, visible=use_audio.value)
|
| 26 |
-
convert_backing = gr.Checkbox(label=translations["convert_backing"], value=False, interactive=True, visible=use_audio.value)
|
| 27 |
-
not_merge_backing = gr.Checkbox(label=translations["not_merge_backing"], value=False, interactive=True, visible=use_audio.value)
|
| 28 |
-
merge_instrument = gr.Checkbox(label=translations["merge_instruments"], value=False, interactive=True, visible=use_audio.value)
|
| 29 |
-
with gr.Row():
|
| 30 |
-
pitch = gr.Slider(minimum=-20, maximum=20, step=1, info=translations["pitch_info"], label=translations["pitch"], value=0, interactive=True)
|
| 31 |
-
clean_strength0 = gr.Slider(label=translations["clean_strength"], info=translations["clean_strength_info"], minimum=0, maximum=1, value=0.5, step=0.1, interactive=True, visible=cleaner0.value)
|
| 32 |
-
with gr.Row():
|
| 33 |
-
with gr.Column():
|
| 34 |
-
audio_select = gr.Dropdown(label=translations["select_separate"], choices=[], value="", interactive=True, allow_custom_value=True, visible=False)
|
| 35 |
-
convert_button_2 = gr.Button(translations["convert_audio"], visible=False)
|
| 36 |
-
with gr.Row():
|
| 37 |
-
with gr.Column():
|
| 38 |
-
convert_button = gr.Button(translations["convert_audio"], variant="primary")
|
| 39 |
-
with gr.Row():
|
| 40 |
-
with gr.Column():
|
| 41 |
-
input0 = gr.Files(label=translations["drop_audio"], file_types=file_types)
|
| 42 |
-
play_audio = gr.Audio(show_download_button=True, interactive=False, label=translations["input_audio"])
|
| 43 |
-
with gr.Column():
|
| 44 |
-
with gr.Accordion(translations["model_accordion"], open=True):
|
| 45 |
-
with gr.Row():
|
| 46 |
-
model_pth = gr.Dropdown(label=translations["model_name"], choices=model_name, value=model_name[0] if len(model_name) >= 1 else "", interactive=True, allow_custom_value=True)
|
| 47 |
-
model_index = gr.Dropdown(label=translations["index_path"], choices=index_path, value=index_path[0] if len(index_path) >= 1 else "", interactive=True, allow_custom_value=True)
|
| 48 |
-
with gr.Row():
|
| 49 |
-
refresh = gr.Button(translations["refresh"])
|
| 50 |
-
with gr.Row():
|
| 51 |
-
index_strength = gr.Slider(label=translations["index_strength"], info=translations["index_strength_info"], minimum=0, maximum=1, value=0.5, step=0.01, interactive=True, visible=model_index.value != "")
|
| 52 |
-
with gr.Accordion(translations["input_output"], open=False):
|
| 53 |
-
with gr.Column():
|
| 54 |
-
export_format = gr.Radio(label=translations["export_format"], info=translations["export_info"], choices=export_format_choices, value="wav", interactive=True)
|
| 55 |
-
input_audio0 = gr.Dropdown(label=translations["audio_path"], value="", choices=paths_for_files, info=translations["provide_audio"], allow_custom_value=True, interactive=True)
|
| 56 |
-
output_audio = gr.Textbox(label=translations["output_path"], value="audios/output.wav", placeholder="audios/output.wav", info=translations["output_path_info"], interactive=True)
|
| 57 |
-
with gr.Column():
|
| 58 |
-
refresh0 = gr.Button(translations["refresh"])
|
| 59 |
-
with gr.Accordion(translations["setting"], open=False):
|
| 60 |
-
with gr.Accordion(translations["f0_method"], open=False):
|
| 61 |
-
with gr.Group():
|
| 62 |
-
with gr.Row():
|
| 63 |
-
onnx_f0_mode = gr.Checkbox(label=translations["f0_onnx_mode"], info=translations["f0_onnx_mode_info"], value=False, interactive=True)
|
| 64 |
-
unlock_full_method = gr.Checkbox(label=translations["f0_unlock"], info=translations["f0_unlock_info"], value=False, interactive=True)
|
| 65 |
-
method = gr.Radio(label=translations["f0_method"], info=translations["f0_method_info"], choices=method_f0, value="rmvpe", interactive=True)
|
| 66 |
-
hybrid_method = gr.Dropdown(label=translations["f0_method_hybrid"], info=translations["f0_method_hybrid_info"], choices=hybrid_f0_method, value=hybrid_f0_method[0], interactive=True, allow_custom_value=True, visible=method.value == "hybrid")
|
| 67 |
-
hop_length = gr.Slider(label=translations['hop_length'], info=translations["hop_length_info"], minimum=64, maximum=512, value=160, step=1, interactive=True, visible=False)
|
| 68 |
-
alpha = gr.Slider(label=translations["alpha_label"], info=translations["alpha_info"], minimum=0.1, maximum=1, value=0.5, step=0.1, interactive=True, visible=False)
|
| 69 |
-
with gr.Accordion(translations["f0_file"], open=False):
|
| 70 |
-
upload_f0_file = gr.File(label=translations["upload_f0"], file_types=[".txt"])
|
| 71 |
-
f0_file_dropdown = gr.Dropdown(label=translations["f0_file_2"], value="", choices=f0_file, allow_custom_value=True, interactive=True)
|
| 72 |
-
refresh_f0_file = gr.Button(translations["refresh"])
|
| 73 |
-
with gr.Accordion(translations["hubert_model"], open=False):
|
| 74 |
-
embed_mode = gr.Radio(label=translations["embed_mode"], info=translations["embed_mode_info"], value="fairseq", choices=embedders_mode, interactive=True, visible=True)
|
| 75 |
-
embedders = gr.Radio(label=translations["hubert_model"], info=translations["hubert_info"], choices=embedders_model, value="hubert_base", interactive=True)
|
| 76 |
-
custom_embedders = gr.Textbox(label=translations["modelname"], info=translations["modelname_info"], value="", placeholder="hubert_base", interactive=True, visible=embedders.value == "custom")
|
| 77 |
-
with gr.Accordion(translations["use_presets"], open=False):
|
| 78 |
-
with gr.Row():
|
| 79 |
-
presets_name = gr.Dropdown(label=translations["file_preset"], choices=presets_file, value=presets_file[0] if len(presets_file) > 0 else '', interactive=True, allow_custom_value=True)
|
| 80 |
-
with gr.Row():
|
| 81 |
-
load_click = gr.Button(translations["load_file"], variant="primary")
|
| 82 |
-
refresh_click = gr.Button(translations["refresh"])
|
| 83 |
-
with gr.Accordion(translations["export_file"], open=False):
|
| 84 |
-
with gr.Row():
|
| 85 |
-
with gr.Column():
|
| 86 |
-
with gr.Group():
|
| 87 |
-
with gr.Row():
|
| 88 |
-
cleaner_chbox = gr.Checkbox(label=translations["save_clean"], value=True, interactive=True)
|
| 89 |
-
autotune_chbox = gr.Checkbox(label=translations["save_autotune"], value=True, interactive=True)
|
| 90 |
-
pitch_chbox = gr.Checkbox(label=translations["save_pitch"], value=True, interactive=True)
|
| 91 |
-
index_strength_chbox = gr.Checkbox(label=translations["save_index_2"], value=True, interactive=True)
|
| 92 |
-
resample_sr_chbox = gr.Checkbox(label=translations["save_resample"], value=True, interactive=True)
|
| 93 |
-
filter_radius_chbox = gr.Checkbox(label=translations["save_filter"], value=True, interactive=True)
|
| 94 |
-
rms_mix_rate_chbox = gr.Checkbox(label=translations["save_envelope"], value=True, interactive=True)
|
| 95 |
-
protect_chbox = gr.Checkbox(label=translations["save_protect"], value=True, interactive=True)
|
| 96 |
-
split_audio_chbox = gr.Checkbox(label=translations["save_split"], value=True, interactive=True)
|
| 97 |
-
formant_shifting_chbox = gr.Checkbox(label=translations["formantshift"], value=True, interactive=True)
|
| 98 |
-
with gr.Row():
|
| 99 |
-
with gr.Column():
|
| 100 |
-
name_to_save_file = gr.Textbox(label=translations["filename_to_save"])
|
| 101 |
-
save_file_button = gr.Button(translations["export_file"])
|
| 102 |
-
with gr.Row():
|
| 103 |
-
upload_presets = gr.Files(label=translations["upload_presets"], file_types=[".conversion.json"])
|
| 104 |
-
with gr.Column():
|
| 105 |
-
with gr.Group():
|
| 106 |
-
with gr.Row():
|
| 107 |
-
split_audio = gr.Checkbox(label=translations["split_audio"], value=False, interactive=True)
|
| 108 |
-
formant_shifting = gr.Checkbox(label=translations["formantshift"], value=False, interactive=True)
|
| 109 |
-
with gr.Row():
|
| 110 |
-
proposal_pitch = gr.Checkbox(label=translations["proposal_pitch"], value=False, interactive=True)
|
| 111 |
-
audio_processing = gr.Checkbox(label=translations["audio_processing"], value=False, interactive=True)
|
| 112 |
-
resample_sr = gr.Radio(choices=[0]+sample_rate_choice, label=translations["resample"], info=translations["resample_info"], value=0, interactive=True)
|
| 113 |
-
proposal_pitch_threshold = gr.Slider(minimum=50.0, maximum=1200.0, label=translations["proposal_pitch_threshold"], info=translations["proposal_pitch_threshold_info"], value=255.0, step=0.1, interactive=True, visible=proposal_pitch.value)
|
| 114 |
-
f0_autotune_strength = gr.Slider(minimum=0, maximum=1, label=translations["autotune_rate"], info=translations["autotune_rate_info"], value=1, step=0.1, interactive=True, visible=autotune.value)
|
| 115 |
-
filter_radius = gr.Slider(minimum=0, maximum=7, label=translations["filter_radius"], info=translations["filter_radius_info"], value=3, step=1, interactive=True)
|
| 116 |
-
rms_mix_rate = gr.Slider(minimum=0, maximum=1, label=translations["rms_mix_rate"], info=translations["rms_mix_rate_info"], value=1, step=0.1, interactive=True)
|
| 117 |
-
protect = gr.Slider(minimum=0, maximum=1, label=translations["protect"], info=translations["protect_info"], value=0.5, step=0.01, interactive=True)
|
| 118 |
-
with gr.Row():
|
| 119 |
-
formant_qfrency = gr.Slider(value=1.0, label=translations["formant_qfrency"], info=translations["formant_qfrency"], minimum=0.0, maximum=16.0, step=0.1, interactive=True, visible=False)
|
| 120 |
-
formant_timbre = gr.Slider(value=1.0, label=translations["formant_timbre"], info=translations["formant_timbre"], minimum=0.0, maximum=16.0, step=0.1, interactive=True, visible=False)
|
| 121 |
-
with gr.Row():
|
| 122 |
-
gr.Markdown(translations["output_convert"])
|
| 123 |
-
with gr.Row():
|
| 124 |
-
main_convert = gr.Audio(show_download_button=True, interactive=False, label=translations["main_convert"])
|
| 125 |
-
backing_convert = gr.Audio(show_download_button=True, interactive=False, label=translations["convert_backing"], visible=convert_backing.value)
|
| 126 |
-
main_backing = gr.Audio(show_download_button=True, interactive=False, label=translations["main_or_backing"], visible=convert_backing.value)
|
| 127 |
-
with gr.Row():
|
| 128 |
-
original_convert = gr.Audio(show_download_button=True, interactive=False, label=translations["convert_original"], visible=use_original.value)
|
| 129 |
-
vocal_instrument = gr.Audio(show_download_button=True, interactive=False, label=translations["voice_or_instruments"], visible=merge_instrument.value)
|
| 130 |
-
with gr.Row():
|
| 131 |
-
upload_f0_file.upload(fn=lambda inp: shutil_move(inp.name, configs["f0_path"]), inputs=[upload_f0_file], outputs=[f0_file_dropdown])
|
| 132 |
-
refresh_f0_file.click(fn=change_f0_choices, inputs=[], outputs=[f0_file_dropdown])
|
| 133 |
-
unlock_full_method.change(fn=unlock_f0, inputs=[unlock_full_method], outputs=[method])
|
| 134 |
-
with gr.Row():
|
| 135 |
-
load_click.click(
|
| 136 |
-
fn=load_presets,
|
| 137 |
-
inputs=[
|
| 138 |
-
presets_name,
|
| 139 |
-
cleaner0,
|
| 140 |
-
autotune,
|
| 141 |
-
pitch,
|
| 142 |
-
clean_strength0,
|
| 143 |
-
index_strength,
|
| 144 |
-
resample_sr,
|
| 145 |
-
filter_radius,
|
| 146 |
-
rms_mix_rate,
|
| 147 |
-
protect,
|
| 148 |
-
split_audio,
|
| 149 |
-
f0_autotune_strength,
|
| 150 |
-
formant_shifting,
|
| 151 |
-
formant_qfrency,
|
| 152 |
-
formant_timbre,
|
| 153 |
-
proposal_pitch,
|
| 154 |
-
proposal_pitch_threshold
|
| 155 |
-
],
|
| 156 |
-
outputs=[
|
| 157 |
-
cleaner0,
|
| 158 |
-
autotune,
|
| 159 |
-
pitch,
|
| 160 |
-
clean_strength0,
|
| 161 |
-
index_strength,
|
| 162 |
-
resample_sr,
|
| 163 |
-
filter_radius,
|
| 164 |
-
rms_mix_rate,
|
| 165 |
-
protect,
|
| 166 |
-
split_audio,
|
| 167 |
-
f0_autotune_strength,
|
| 168 |
-
formant_shifting,
|
| 169 |
-
formant_qfrency,
|
| 170 |
-
formant_timbre,
|
| 171 |
-
proposal_pitch,
|
| 172 |
-
proposal_pitch_threshold
|
| 173 |
-
]
|
| 174 |
-
)
|
| 175 |
-
refresh_click.click(fn=change_preset_choices, inputs=[], outputs=[presets_name])
|
| 176 |
-
save_file_button.click(
|
| 177 |
-
fn=save_presets,
|
| 178 |
-
inputs=[
|
| 179 |
-
name_to_save_file,
|
| 180 |
-
cleaner0,
|
| 181 |
-
autotune,
|
| 182 |
-
pitch,
|
| 183 |
-
clean_strength0,
|
| 184 |
-
index_strength,
|
| 185 |
-
resample_sr,
|
| 186 |
-
filter_radius,
|
| 187 |
-
rms_mix_rate,
|
| 188 |
-
protect,
|
| 189 |
-
split_audio,
|
| 190 |
-
f0_autotune_strength,
|
| 191 |
-
cleaner_chbox,
|
| 192 |
-
autotune_chbox,
|
| 193 |
-
pitch_chbox,
|
| 194 |
-
index_strength_chbox,
|
| 195 |
-
resample_sr_chbox,
|
| 196 |
-
filter_radius_chbox,
|
| 197 |
-
rms_mix_rate_chbox,
|
| 198 |
-
protect_chbox,
|
| 199 |
-
split_audio_chbox,
|
| 200 |
-
formant_shifting_chbox,
|
| 201 |
-
formant_shifting,
|
| 202 |
-
formant_qfrency,
|
| 203 |
-
formant_timbre,
|
| 204 |
-
proposal_pitch,
|
| 205 |
-
proposal_pitch_threshold
|
| 206 |
-
],
|
| 207 |
-
outputs=[presets_name]
|
| 208 |
-
)
|
| 209 |
-
with gr.Row():
|
| 210 |
-
upload_presets.upload(fn=lambda presets_in: [shutil_move(preset.name, configs["presets_path"]) for preset in presets_in][0], inputs=[upload_presets], outputs=[presets_name])
|
| 211 |
-
autotune.change(fn=visible, inputs=[autotune], outputs=[f0_autotune_strength])
|
| 212 |
-
use_audio.change(fn=lambda a: [visible(a), visible(a), visible(a), visible(a), visible(a), valueFalse_interactive(a), valueFalse_interactive(a), valueFalse_interactive(a), valueFalse_interactive(a), visible(not a), visible(not a), visible(not a), visible(not a)], inputs=[use_audio], outputs=[main_backing, use_original, convert_backing, not_merge_backing, merge_instrument, use_original, convert_backing, not_merge_backing, merge_instrument, input_audio0, output_audio, input0, play_audio])
|
| 213 |
-
with gr.Row():
|
| 214 |
-
convert_backing.change(fn=lambda a,b: [change_backing_choices(a, b), visible(a)], inputs=[convert_backing, not_merge_backing], outputs=[use_original, backing_convert])
|
| 215 |
-
use_original.change(fn=lambda audio, original: [visible(original), visible(not original), visible(audio and not original), valueFalse_interactive(not original), valueFalse_interactive(not original)], inputs=[use_audio, use_original], outputs=[original_convert, main_convert, main_backing, convert_backing, not_merge_backing])
|
| 216 |
-
cleaner0.change(fn=visible, inputs=[cleaner0], outputs=[clean_strength0])
|
| 217 |
-
with gr.Row():
|
| 218 |
-
merge_instrument.change(fn=visible, inputs=[merge_instrument], outputs=[vocal_instrument])
|
| 219 |
-
not_merge_backing.change(fn=lambda audio, merge, cvb: [visible(audio and not merge), change_backing_choices(cvb, merge)], inputs=[use_audio, not_merge_backing, convert_backing], outputs=[main_backing, use_original])
|
| 220 |
-
method.change(fn=lambda method, hybrid: [visible(method == "hybrid"), visible(method == "hybrid"), hoplength_show(method, hybrid)], inputs=[method, hybrid_method], outputs=[hybrid_method, alpha, hop_length])
|
| 221 |
-
with gr.Row():
|
| 222 |
-
hybrid_method.change(fn=hoplength_show, inputs=[method, hybrid_method], outputs=[hop_length])
|
| 223 |
-
refresh.click(fn=change_models_choices, inputs=[], outputs=[model_pth, model_index])
|
| 224 |
-
model_pth.change(fn=get_index, inputs=[model_pth], outputs=[model_index])
|
| 225 |
-
with gr.Row():
|
| 226 |
-
input0.upload(fn=lambda audio_in: [shutil_move(audio.name, configs["audios_path"]) for audio in audio_in][0], inputs=[input0], outputs=[input_audio0])
|
| 227 |
-
input_audio0.change(fn=lambda audio: audio if os.path.isfile(audio) else None, inputs=[input_audio0], outputs=[play_audio])
|
| 228 |
-
formant_shifting.change(fn=lambda a: [visible(a) for _ in range(2)], inputs=[formant_shifting], outputs=[formant_qfrency, formant_timbre])
|
| 229 |
-
with gr.Row():
|
| 230 |
-
embedders.change(fn=lambda embedders: visible(embedders == "custom"), inputs=[embedders], outputs=[custom_embedders])
|
| 231 |
-
refresh0.click(fn=change_audios_choices, inputs=[input_audio0], outputs=[input_audio0])
|
| 232 |
-
model_index.change(fn=index_strength_show, inputs=[model_index], outputs=[index_strength])
|
| 233 |
-
with gr.Row():
|
| 234 |
-
convert_button.click(fn=lambda: visible(False), inputs=[], outputs=[convert_button])
|
| 235 |
-
convert_button_2.click(fn=lambda: [visible(False), visible(False)], inputs=[], outputs=[audio_select, convert_button_2])
|
| 236 |
-
with gr.Row():
|
| 237 |
-
proposal_pitch.change(fn=visible, inputs=[proposal_pitch], outputs=[proposal_pitch_threshold])
|
| 238 |
-
embed_mode.change(fn=change_embedders_mode, inputs=[embed_mode], outputs=[embedders])
|
| 239 |
-
with gr.Row():
|
| 240 |
-
convert_button.click(
|
| 241 |
-
fn=convert_selection,
|
| 242 |
-
inputs=[
|
| 243 |
-
cleaner0,
|
| 244 |
-
autotune,
|
| 245 |
-
use_audio,
|
| 246 |
-
use_original,
|
| 247 |
-
convert_backing,
|
| 248 |
-
not_merge_backing,
|
| 249 |
-
merge_instrument,
|
| 250 |
-
pitch,
|
| 251 |
-
clean_strength0,
|
| 252 |
-
model_pth,
|
| 253 |
-
model_index,
|
| 254 |
-
index_strength,
|
| 255 |
-
input_audio0,
|
| 256 |
-
output_audio,
|
| 257 |
-
export_format,
|
| 258 |
-
method,
|
| 259 |
-
hybrid_method,
|
| 260 |
-
hop_length,
|
| 261 |
-
embedders,
|
| 262 |
-
custom_embedders,
|
| 263 |
-
resample_sr,
|
| 264 |
-
filter_radius,
|
| 265 |
-
rms_mix_rate,
|
| 266 |
-
protect,
|
| 267 |
-
split_audio,
|
| 268 |
-
f0_autotune_strength,
|
| 269 |
-
checkpointing,
|
| 270 |
-
onnx_f0_mode,
|
| 271 |
-
formant_shifting,
|
| 272 |
-
formant_qfrency,
|
| 273 |
-
formant_timbre,
|
| 274 |
-
f0_file_dropdown,
|
| 275 |
-
embed_mode,
|
| 276 |
-
proposal_pitch,
|
| 277 |
-
proposal_pitch_threshold,
|
| 278 |
-
audio_processing,
|
| 279 |
-
alpha
|
| 280 |
-
],
|
| 281 |
-
outputs=[audio_select, main_convert, backing_convert, main_backing, original_convert, vocal_instrument, convert_button, convert_button_2],
|
| 282 |
-
api_name="convert_selection"
|
| 283 |
-
)
|
| 284 |
-
convert_button_2.click(
|
| 285 |
-
fn=convert_audio,
|
| 286 |
-
inputs=[
|
| 287 |
-
cleaner0,
|
| 288 |
-
autotune,
|
| 289 |
-
use_audio,
|
| 290 |
-
use_original,
|
| 291 |
-
convert_backing,
|
| 292 |
-
not_merge_backing,
|
| 293 |
-
merge_instrument,
|
| 294 |
-
pitch,
|
| 295 |
-
clean_strength0,
|
| 296 |
-
model_pth,
|
| 297 |
-
model_index,
|
| 298 |
-
index_strength,
|
| 299 |
-
input_audio0,
|
| 300 |
-
output_audio,
|
| 301 |
-
export_format,
|
| 302 |
-
method,
|
| 303 |
-
hybrid_method,
|
| 304 |
-
hop_length,
|
| 305 |
-
embedders,
|
| 306 |
-
custom_embedders,
|
| 307 |
-
resample_sr,
|
| 308 |
-
filter_radius,
|
| 309 |
-
rms_mix_rate,
|
| 310 |
-
protect,
|
| 311 |
-
split_audio,
|
| 312 |
-
f0_autotune_strength,
|
| 313 |
-
audio_select,
|
| 314 |
-
checkpointing,
|
| 315 |
-
onnx_f0_mode,
|
| 316 |
-
formant_shifting,
|
| 317 |
-
formant_qfrency,
|
| 318 |
-
formant_timbre,
|
| 319 |
-
f0_file_dropdown,
|
| 320 |
-
embed_mode,
|
| 321 |
-
proposal_pitch,
|
| 322 |
-
proposal_pitch_threshold,
|
| 323 |
-
audio_processing,
|
| 324 |
-
alpha
|
| 325 |
-
],
|
| 326 |
-
outputs=[main_convert, backing_convert, main_backing, original_convert, vocal_instrument, convert_button],
|
| 327 |
-
api_name="convert_audio"
|
| 328 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/inference/child/convert_tts.py
DELETED
|
@@ -1,280 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.core.tts import TTS
|
| 9 |
-
from main.app.core.process import process_input
|
| 10 |
-
from main.app.core.inference import convert_tts
|
| 11 |
-
from main.app.core.utils import google_translate
|
| 12 |
-
from main.app.core.presets import save_presets, load_presets
|
| 13 |
-
from main.app.core.ui import visible, change_f0_choices, unlock_f0, hoplength_show, change_models_choices, get_index, index_strength_show, change_embedders_mode, change_tts_voice_choices, shutil_move, change_preset_choices
|
| 14 |
-
from main.app.variables import translations, sample_rate_choice, model_name, index_path, method_f0, f0_file, embedders_mode, embedders_model, edgetts, google_tts_voice, configs, presets_file, export_format_choices, hybrid_f0_method
|
| 15 |
-
|
| 16 |
-
def convert_tts_tab():
|
| 17 |
-
with gr.Row():
|
| 18 |
-
gr.Markdown(translations["convert_text_markdown_2"])
|
| 19 |
-
with gr.Row():
|
| 20 |
-
with gr.Column():
|
| 21 |
-
with gr.Group():
|
| 22 |
-
with gr.Row():
|
| 23 |
-
use_txt = gr.Checkbox(label=translations["input_txt"], value=False, interactive=True)
|
| 24 |
-
google_tts_check_box = gr.Checkbox(label=translations["googletts"], value=False, interactive=True)
|
| 25 |
-
prompt = gr.Textbox(label=translations["text_to_speech"], value="", placeholder="Hello Words", lines=3)
|
| 26 |
-
with gr.Column():
|
| 27 |
-
speed = gr.Slider(label=translations["voice_speed"], info=translations["voice_speed_info"], minimum=-100, maximum=100, value=0, step=1)
|
| 28 |
-
pitch0 = gr.Slider(minimum=-20, maximum=20, step=1, info=translations["pitch_info"], label=translations["pitch"], value=0, interactive=True)
|
| 29 |
-
with gr.Row():
|
| 30 |
-
tts_button = gr.Button(translations["tts_1"], variant="primary", scale=2)
|
| 31 |
-
convert_button0 = gr.Button(translations["tts_2"], variant="secondary", scale=2)
|
| 32 |
-
with gr.Row():
|
| 33 |
-
with gr.Column():
|
| 34 |
-
txt_input = gr.File(label=translations["drop_text"], file_types=[".txt", ".srt", ".docx"], visible=use_txt.value)
|
| 35 |
-
tts_voice = gr.Dropdown(label=translations["voice"], choices=edgetts, interactive=True, value="vi-VN-NamMinhNeural")
|
| 36 |
-
tts_pitch = gr.Slider(minimum=-20, maximum=20, step=1, info=translations["pitch_info_2"], label=translations["pitch"], value=0, interactive=True)
|
| 37 |
-
with gr.Accordion(translations["translate"], open=False):
|
| 38 |
-
with gr.Row():
|
| 39 |
-
source_lang = gr.Dropdown(label=translations["source_lang"], choices=["auto"]+google_tts_voice, interactive=True, value="auto")
|
| 40 |
-
target_lang = gr.Dropdown(label=translations["target_lang"], choices=google_tts_voice, interactive=True, value="en")
|
| 41 |
-
translate_button = gr.Button(translations["translate"])
|
| 42 |
-
with gr.Column():
|
| 43 |
-
with gr.Accordion(translations["model_accordion"], open=True):
|
| 44 |
-
with gr.Row():
|
| 45 |
-
model_pth0 = gr.Dropdown(label=translations["model_name"], choices=model_name, value=model_name[0] if len(model_name) >= 1 else "", interactive=True, allow_custom_value=True)
|
| 46 |
-
model_index0 = gr.Dropdown(label=translations["index_path"], choices=index_path, value=index_path[0] if len(index_path) >= 1 else "", interactive=True, allow_custom_value=True)
|
| 47 |
-
with gr.Row():
|
| 48 |
-
refresh1 = gr.Button(translations["refresh"])
|
| 49 |
-
with gr.Row():
|
| 50 |
-
index_strength0 = gr.Slider(label=translations["index_strength"], info=translations["index_strength_info"], minimum=0, maximum=1, value=0.5, step=0.01, interactive=True, visible=model_index0.value != "")
|
| 51 |
-
with gr.Accordion(translations["output_path"], open=False):
|
| 52 |
-
export_format0 = gr.Radio(label=translations["export_format"], info=translations["export_info"], choices=export_format_choices, value="wav", interactive=True)
|
| 53 |
-
output_audio0 = gr.Textbox(label=translations["output_tts"], value="audios/tts.wav", placeholder="audios/tts.wav", info=translations["tts_output"], interactive=True)
|
| 54 |
-
output_audio1 = gr.Textbox(label=translations["output_tts_convert"], value="audios/tts-convert.wav", placeholder="audios/tts-convert.wav", info=translations["tts_output"], interactive=True)
|
| 55 |
-
with gr.Accordion(translations["setting"], open=False):
|
| 56 |
-
with gr.Accordion(translations["f0_method"], open=False):
|
| 57 |
-
with gr.Group():
|
| 58 |
-
with gr.Row():
|
| 59 |
-
onnx_f0_mode1 = gr.Checkbox(label=translations["f0_onnx_mode"], info=translations["f0_onnx_mode_info"], value=False, interactive=True)
|
| 60 |
-
unlock_full_method3 = gr.Checkbox(label=translations["f0_unlock"], info=translations["f0_unlock_info"], value=False, interactive=True)
|
| 61 |
-
method0 = gr.Radio(label=translations["f0_method"], info=translations["f0_method_info"], choices=method_f0, value="rmvpe", interactive=True)
|
| 62 |
-
hybrid_method0 = gr.Dropdown(label=translations["f0_method_hybrid"], info=translations["f0_method_hybrid_info"], choices=hybrid_f0_method, value=hybrid_f0_method[0], interactive=True, allow_custom_value=True, visible=method0.value == "hybrid")
|
| 63 |
-
hop_length0 = gr.Slider(label=translations['hop_length'], info=translations["hop_length_info"], minimum=64, maximum=512, value=160, step=1, interactive=True, visible=False)
|
| 64 |
-
alpha = gr.Slider(label=translations["alpha_label"], info=translations["alpha_info"], minimum=0.1, maximum=1, value=0.5, step=0.1, interactive=True, visible=False)
|
| 65 |
-
with gr.Accordion(translations["f0_file"], open=False):
|
| 66 |
-
upload_f0_file0 = gr.File(label=translations["upload_f0"], file_types=[".txt"])
|
| 67 |
-
f0_file_dropdown0 = gr.Dropdown(label=translations["f0_file_2"], value="", choices=f0_file, allow_custom_value=True, interactive=True)
|
| 68 |
-
refresh_f0_file0 = gr.Button(translations["refresh"])
|
| 69 |
-
with gr.Accordion(translations["hubert_model"], open=False):
|
| 70 |
-
embed_mode1 = gr.Radio(label=translations["embed_mode"], info=translations["embed_mode_info"], value="fairseq", choices=embedders_mode, interactive=True, visible=True)
|
| 71 |
-
embedders0 = gr.Radio(label=translations["hubert_model"], info=translations["hubert_info"], choices=embedders_model, value="hubert_base", interactive=True)
|
| 72 |
-
custom_embedders0 = gr.Textbox(label=translations["modelname"], info=translations["modelname_info"], value="", placeholder="hubert_base", interactive=True, visible=embedders0.value == "custom")
|
| 73 |
-
with gr.Accordion(translations["use_presets"], open=False):
|
| 74 |
-
with gr.Row():
|
| 75 |
-
presets_name = gr.Dropdown(label=translations["file_preset"], choices=presets_file, value=presets_file[0] if len(presets_file) > 0 else '', interactive=True, allow_custom_value=True)
|
| 76 |
-
with gr.Row():
|
| 77 |
-
load_click = gr.Button(translations["load_file"], variant="primary")
|
| 78 |
-
refresh_click = gr.Button(translations["refresh"])
|
| 79 |
-
with gr.Accordion(translations["export_file"], open=False):
|
| 80 |
-
with gr.Row():
|
| 81 |
-
with gr.Column():
|
| 82 |
-
with gr.Group():
|
| 83 |
-
with gr.Row():
|
| 84 |
-
cleaner_chbox = gr.Checkbox(label=translations["save_clean"], value=True, interactive=True)
|
| 85 |
-
autotune_chbox = gr.Checkbox(label=translations["save_autotune"], value=True, interactive=True)
|
| 86 |
-
pitch_chbox = gr.Checkbox(label=translations["save_pitch"], value=True, interactive=True)
|
| 87 |
-
index_strength_chbox = gr.Checkbox(label=translations["save_index_2"], value=True, interactive=True)
|
| 88 |
-
resample_sr_chbox = gr.Checkbox(label=translations["save_resample"], value=True, interactive=True)
|
| 89 |
-
filter_radius_chbox = gr.Checkbox(label=translations["save_filter"], value=True, interactive=True)
|
| 90 |
-
rms_mix_rate_chbox = gr.Checkbox(label=translations["save_envelope"], value=True, interactive=True)
|
| 91 |
-
protect_chbox = gr.Checkbox(label=translations["save_protect"], value=True, interactive=True)
|
| 92 |
-
split_audio_chbox = gr.Checkbox(label=translations["save_split"], value=True, interactive=True)
|
| 93 |
-
formant_shifting_chbox = gr.Checkbox(label=translations["formantshift"], value=True, interactive=True)
|
| 94 |
-
with gr.Row():
|
| 95 |
-
with gr.Column():
|
| 96 |
-
name_to_save_file = gr.Textbox(label=translations["filename_to_save"])
|
| 97 |
-
save_file_button = gr.Button(translations["export_file"])
|
| 98 |
-
with gr.Row():
|
| 99 |
-
upload_presets = gr.Files(label=translations["upload_presets"], file_types=[".conversion.json"])
|
| 100 |
-
with gr.Group():
|
| 101 |
-
with gr.Row():
|
| 102 |
-
audio_processing = gr.Checkbox(label=translations["audio_processing"], value=False, interactive=True)
|
| 103 |
-
with gr.Row():
|
| 104 |
-
formant_shifting1 = gr.Checkbox(label=translations["formantshift"], value=False, interactive=True)
|
| 105 |
-
split_audio0 = gr.Checkbox(label=translations["split_audio"], value=False, interactive=True)
|
| 106 |
-
cleaner1 = gr.Checkbox(label=translations["clear_audio"], value=False, interactive=True)
|
| 107 |
-
with gr.Row():
|
| 108 |
-
autotune3 = gr.Checkbox(label=translations["autotune"], value=False, interactive=True)
|
| 109 |
-
checkpointing0 = gr.Checkbox(label=translations["memory_efficient_training"], value=False, interactive=True)
|
| 110 |
-
proposal_pitch = gr.Checkbox(label=translations["proposal_pitch"], value=False, interactive=True)
|
| 111 |
-
with gr.Column():
|
| 112 |
-
resample_sr0 = gr.Radio(choices=[0]+sample_rate_choice, label=translations["resample"], info=translations["resample_info"], value=0, interactive=True)
|
| 113 |
-
proposal_pitch_threshold = gr.Slider(minimum=50.0, maximum=1200.0, label=translations["proposal_pitch_threshold"], info=translations["proposal_pitch_threshold_info"], value=255.0, step=0.1, interactive=True, visible=proposal_pitch.value)
|
| 114 |
-
f0_autotune_strength0 = gr.Slider(minimum=0, maximum=1, label=translations["autotune_rate"], info=translations["autotune_rate_info"], value=1, step=0.1, interactive=True, visible=autotune3.value)
|
| 115 |
-
clean_strength1 = gr.Slider(label=translations["clean_strength"], info=translations["clean_strength_info"], minimum=0, maximum=1, value=0.5, step=0.1, interactive=True, visible=cleaner1.value)
|
| 116 |
-
filter_radius0 = gr.Slider(minimum=0, maximum=7, label=translations["filter_radius"], info=translations["filter_radius_info"], value=3, step=1, interactive=True)
|
| 117 |
-
rms_mix_rate0 = gr.Slider(minimum=0, maximum=1, label=translations["rms_mix_rate"], info=translations["rms_mix_rate_info"], value=1, step=0.1, interactive=True)
|
| 118 |
-
protect0 = gr.Slider(minimum=0, maximum=1, label=translations["protect"], info=translations["protect_info"], value=0.5, step=0.01, interactive=True)
|
| 119 |
-
with gr.Row():
|
| 120 |
-
formant_qfrency1 = gr.Slider(value=1.0, label=translations["formant_qfrency"], info=translations["formant_qfrency"], minimum=0.0, maximum=16.0, step=0.1, interactive=True, visible=False)
|
| 121 |
-
formant_timbre1 = gr.Slider(value=1.0, label=translations["formant_timbre"], info=translations["formant_timbre"], minimum=0.0, maximum=16.0, step=0.1, interactive=True, visible=False)
|
| 122 |
-
with gr.Row():
|
| 123 |
-
gr.Markdown(translations["output_tts_markdown"])
|
| 124 |
-
with gr.Row():
|
| 125 |
-
tts_voice_audio = gr.Audio(show_download_button=True, interactive=False, label=translations["output_text_to_speech"])
|
| 126 |
-
tts_voice_convert = gr.Audio(show_download_button=True, interactive=False, label=translations["output_file_tts_convert"])
|
| 127 |
-
with gr.Row():
|
| 128 |
-
load_click.click(
|
| 129 |
-
fn=load_presets,
|
| 130 |
-
inputs=[
|
| 131 |
-
presets_name,
|
| 132 |
-
cleaner1,
|
| 133 |
-
autotune3,
|
| 134 |
-
pitch0,
|
| 135 |
-
clean_strength1,
|
| 136 |
-
index_strength0,
|
| 137 |
-
resample_sr0,
|
| 138 |
-
filter_radius0,
|
| 139 |
-
rms_mix_rate0,
|
| 140 |
-
protect0,
|
| 141 |
-
split_audio0,
|
| 142 |
-
f0_autotune_strength0,
|
| 143 |
-
formant_shifting1,
|
| 144 |
-
formant_qfrency1,
|
| 145 |
-
formant_timbre1,
|
| 146 |
-
proposal_pitch,
|
| 147 |
-
proposal_pitch_threshold
|
| 148 |
-
],
|
| 149 |
-
outputs=[
|
| 150 |
-
cleaner1,
|
| 151 |
-
autotune3,
|
| 152 |
-
pitch0,
|
| 153 |
-
clean_strength1,
|
| 154 |
-
index_strength0,
|
| 155 |
-
resample_sr0,
|
| 156 |
-
filter_radius0,
|
| 157 |
-
rms_mix_rate0,
|
| 158 |
-
protect0,
|
| 159 |
-
split_audio0,
|
| 160 |
-
f0_autotune_strength0,
|
| 161 |
-
formant_shifting1,
|
| 162 |
-
formant_qfrency1,
|
| 163 |
-
formant_timbre1,
|
| 164 |
-
proposal_pitch,
|
| 165 |
-
proposal_pitch_threshold
|
| 166 |
-
]
|
| 167 |
-
)
|
| 168 |
-
refresh_click.click(fn=change_preset_choices, inputs=[], outputs=[presets_name])
|
| 169 |
-
save_file_button.click(
|
| 170 |
-
fn=save_presets,
|
| 171 |
-
inputs=[
|
| 172 |
-
name_to_save_file,
|
| 173 |
-
cleaner1,
|
| 174 |
-
autotune3,
|
| 175 |
-
pitch0,
|
| 176 |
-
clean_strength1,
|
| 177 |
-
index_strength0,
|
| 178 |
-
resample_sr0,
|
| 179 |
-
filter_radius0,
|
| 180 |
-
rms_mix_rate0,
|
| 181 |
-
protect0,
|
| 182 |
-
split_audio0,
|
| 183 |
-
f0_autotune_strength0,
|
| 184 |
-
cleaner_chbox,
|
| 185 |
-
autotune_chbox,
|
| 186 |
-
pitch_chbox,
|
| 187 |
-
index_strength_chbox,
|
| 188 |
-
resample_sr_chbox,
|
| 189 |
-
filter_radius_chbox,
|
| 190 |
-
rms_mix_rate_chbox,
|
| 191 |
-
protect_chbox,
|
| 192 |
-
split_audio_chbox,
|
| 193 |
-
formant_shifting_chbox,
|
| 194 |
-
formant_shifting1,
|
| 195 |
-
formant_qfrency1,
|
| 196 |
-
formant_timbre1,
|
| 197 |
-
proposal_pitch,
|
| 198 |
-
proposal_pitch_threshold
|
| 199 |
-
],
|
| 200 |
-
outputs=[presets_name]
|
| 201 |
-
)
|
| 202 |
-
with gr.Row():
|
| 203 |
-
proposal_pitch.change(fn=visible, inputs=[proposal_pitch], outputs=[proposal_pitch_threshold])
|
| 204 |
-
upload_presets.upload(fn=lambda presets_in: [shutil_move(preset.name, configs["presets_path"]) for preset in presets_in][0], inputs=[upload_presets], outputs=[presets_name])
|
| 205 |
-
translate_button.click(fn=google_translate, inputs=[prompt, source_lang, target_lang], outputs=[prompt], api_name="google_translate")
|
| 206 |
-
with gr.Row():
|
| 207 |
-
unlock_full_method3.change(fn=unlock_f0, inputs=[unlock_full_method3], outputs=[method0])
|
| 208 |
-
upload_f0_file0.upload(fn=lambda inp: shutil_move(inp.name, configs["f0_path"]), inputs=[upload_f0_file0], outputs=[f0_file_dropdown0])
|
| 209 |
-
refresh_f0_file0.click(fn=change_f0_choices, inputs=[], outputs=[f0_file_dropdown0])
|
| 210 |
-
with gr.Row():
|
| 211 |
-
embed_mode1.change(fn=change_embedders_mode, inputs=[embed_mode1], outputs=[embedders0])
|
| 212 |
-
autotune3.change(fn=visible, inputs=[autotune3], outputs=[f0_autotune_strength0])
|
| 213 |
-
model_pth0.change(fn=get_index, inputs=[model_pth0], outputs=[model_index0])
|
| 214 |
-
with gr.Row():
|
| 215 |
-
cleaner1.change(fn=visible, inputs=[cleaner1], outputs=[clean_strength1])
|
| 216 |
-
method0.change(fn=lambda method, hybrid: [visible(method == "hybrid"), visible(method == "hybrid"), hoplength_show(method, hybrid)], inputs=[method0, hybrid_method0], outputs=[hybrid_method0, alpha, hop_length0])
|
| 217 |
-
hybrid_method0.change(fn=hoplength_show, inputs=[method0, hybrid_method0], outputs=[hop_length0])
|
| 218 |
-
with gr.Row():
|
| 219 |
-
refresh1.click(fn=change_models_choices, inputs=[], outputs=[model_pth0, model_index0])
|
| 220 |
-
embedders0.change(fn=lambda embedders: visible(embedders == "custom"), inputs=[embedders0], outputs=[custom_embedders0])
|
| 221 |
-
formant_shifting1.change(fn=lambda a: [visible(a) for _ in range(2)], inputs=[formant_shifting1], outputs=[formant_qfrency1, formant_timbre1])
|
| 222 |
-
with gr.Row():
|
| 223 |
-
model_index0.change(fn=index_strength_show, inputs=[model_index0], outputs=[index_strength0])
|
| 224 |
-
txt_input.upload(fn=process_input, inputs=[txt_input], outputs=[prompt])
|
| 225 |
-
use_txt.change(fn=visible, inputs=[use_txt], outputs=[txt_input])
|
| 226 |
-
with gr.Row():
|
| 227 |
-
google_tts_check_box.change(fn=change_tts_voice_choices, inputs=[google_tts_check_box], outputs=[tts_voice])
|
| 228 |
-
tts_button.click(
|
| 229 |
-
fn=TTS,
|
| 230 |
-
inputs=[
|
| 231 |
-
prompt,
|
| 232 |
-
tts_voice,
|
| 233 |
-
speed,
|
| 234 |
-
output_audio0,
|
| 235 |
-
tts_pitch,
|
| 236 |
-
google_tts_check_box,
|
| 237 |
-
txt_input
|
| 238 |
-
],
|
| 239 |
-
outputs=[tts_voice_audio],
|
| 240 |
-
api_name="text-to-speech"
|
| 241 |
-
)
|
| 242 |
-
convert_button0.click(
|
| 243 |
-
fn=convert_tts,
|
| 244 |
-
inputs=[
|
| 245 |
-
cleaner1,
|
| 246 |
-
autotune3,
|
| 247 |
-
pitch0,
|
| 248 |
-
clean_strength1,
|
| 249 |
-
model_pth0,
|
| 250 |
-
model_index0,
|
| 251 |
-
index_strength0,
|
| 252 |
-
output_audio0,
|
| 253 |
-
output_audio1,
|
| 254 |
-
export_format0,
|
| 255 |
-
method0,
|
| 256 |
-
hybrid_method0,
|
| 257 |
-
hop_length0,
|
| 258 |
-
embedders0,
|
| 259 |
-
custom_embedders0,
|
| 260 |
-
resample_sr0,
|
| 261 |
-
filter_radius0,
|
| 262 |
-
rms_mix_rate0,
|
| 263 |
-
protect0,
|
| 264 |
-
split_audio0,
|
| 265 |
-
f0_autotune_strength0,
|
| 266 |
-
checkpointing0,
|
| 267 |
-
onnx_f0_mode1,
|
| 268 |
-
formant_shifting1,
|
| 269 |
-
formant_qfrency1,
|
| 270 |
-
formant_timbre1,
|
| 271 |
-
f0_file_dropdown0,
|
| 272 |
-
embed_mode1,
|
| 273 |
-
proposal_pitch,
|
| 274 |
-
proposal_pitch_threshold,
|
| 275 |
-
audio_processing,
|
| 276 |
-
alpha
|
| 277 |
-
],
|
| 278 |
-
outputs=[tts_voice_convert],
|
| 279 |
-
api_name="convert_tts"
|
| 280 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/inference/child/convert_with_whisper.py
DELETED
|
@@ -1,164 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.core.inference import convert_with_whisper
|
| 9 |
-
from main.app.core.ui import visible, change_audios_choices, unlock_f0, hoplength_show, change_models_choices, get_index, index_strength_show, change_embedders_mode, shutil_move
|
| 10 |
-
from main.app.variables import translations, paths_for_files, sample_rate_choice, model_name, index_path, method_f0, embedders_mode, embedders_model, configs, file_types, export_format_choices, whisper_model, hybrid_f0_method
|
| 11 |
-
|
| 12 |
-
def convert_with_whisper_tab():
|
| 13 |
-
with gr.Row():
|
| 14 |
-
gr.Markdown(translations["convert_with_whisper_info"])
|
| 15 |
-
with gr.Row():
|
| 16 |
-
with gr.Column():
|
| 17 |
-
with gr.Group():
|
| 18 |
-
with gr.Row():
|
| 19 |
-
cleaner2 = gr.Checkbox(label=translations["clear_audio"], value=False, interactive=True)
|
| 20 |
-
autotune2 = gr.Checkbox(label=translations["autotune"], value=False, interactive=True)
|
| 21 |
-
checkpointing2 = gr.Checkbox(label=translations["memory_efficient_training"], value=False, interactive=True)
|
| 22 |
-
formant_shifting2 = gr.Checkbox(label=translations["formantshift"], value=False, interactive=True)
|
| 23 |
-
proposal_pitch = gr.Checkbox(label=translations["proposal_pitch"], value=False, interactive=True)
|
| 24 |
-
audio_processing = gr.Checkbox(label=translations["audio_processing"], value=False, interactive=True)
|
| 25 |
-
with gr.Row():
|
| 26 |
-
num_spk = gr.Slider(minimum=2, maximum=8, step=1, info=translations["num_spk_info"], label=translations["num_spk"], value=2, interactive=True)
|
| 27 |
-
with gr.Row():
|
| 28 |
-
with gr.Column():
|
| 29 |
-
convert_button3 = gr.Button(translations["convert_audio"], variant="primary")
|
| 30 |
-
with gr.Row():
|
| 31 |
-
with gr.Column():
|
| 32 |
-
with gr.Accordion(translations["model_accordion"] + " 1", open=True):
|
| 33 |
-
with gr.Row():
|
| 34 |
-
model_pth2 = gr.Dropdown(label=translations["model_name"], choices=model_name, value=model_name[0] if len(model_name) >= 1 else "", interactive=True, allow_custom_value=True)
|
| 35 |
-
model_index2 = gr.Dropdown(label=translations["index_path"], choices=index_path, value=index_path[0] if len(index_path) >= 1 else "", interactive=True, allow_custom_value=True)
|
| 36 |
-
with gr.Row():
|
| 37 |
-
refresh2 = gr.Button(translations["refresh"])
|
| 38 |
-
with gr.Row():
|
| 39 |
-
pitch3 = gr.Slider(minimum=-20, maximum=20, step=1, info=translations["pitch_info"], label=translations["pitch"], value=0, interactive=True)
|
| 40 |
-
index_strength2 = gr.Slider(label=translations["index_strength"], info=translations["index_strength_info"], minimum=0, maximum=1, value=0.5, step=0.01, interactive=True, visible=model_index2.value != "")
|
| 41 |
-
with gr.Accordion(translations["input_output"], open=False):
|
| 42 |
-
with gr.Column():
|
| 43 |
-
export_format2 = gr.Radio(label=translations["export_format"], info=translations["export_info"], choices=export_format_choices, value="wav", interactive=True)
|
| 44 |
-
input_audio1 = gr.Dropdown(label=translations["audio_path"], value="", choices=paths_for_files, info=translations["provide_audio"], allow_custom_value=True, interactive=True)
|
| 45 |
-
output_audio2 = gr.Textbox(label=translations["output_path"], value="audios/output.wav", placeholder="audios/output.wav", info=translations["output_path_info"], interactive=True)
|
| 46 |
-
with gr.Column():
|
| 47 |
-
refresh4 = gr.Button(translations["refresh"])
|
| 48 |
-
with gr.Row():
|
| 49 |
-
input2 = gr.Files(label=translations["drop_audio"], file_types=file_types)
|
| 50 |
-
with gr.Column():
|
| 51 |
-
with gr.Accordion(translations["model_accordion"] + " 2", open=True):
|
| 52 |
-
with gr.Row():
|
| 53 |
-
model_pth3 = gr.Dropdown(label=translations["model_name"], choices=model_name, value=model_name[0] if len(model_name) >= 1 else "", interactive=True, allow_custom_value=True)
|
| 54 |
-
model_index3 = gr.Dropdown(label=translations["index_path"], choices=index_path, value=index_path[0] if len(index_path) >= 1 else "", interactive=True, allow_custom_value=True)
|
| 55 |
-
with gr.Row():
|
| 56 |
-
refresh3 = gr.Button(translations["refresh"])
|
| 57 |
-
with gr.Row():
|
| 58 |
-
pitch4 = gr.Slider(minimum=-20, maximum=20, step=1, info=translations["pitch_info"], label=translations["pitch"], value=0, interactive=True)
|
| 59 |
-
index_strength3 = gr.Slider(label=translations["index_strength"], info=translations["index_strength_info"], minimum=0, maximum=1, value=0.5, step=0.01, interactive=True, visible=model_index3.value != "")
|
| 60 |
-
with gr.Accordion(translations["setting"], open=False):
|
| 61 |
-
with gr.Row():
|
| 62 |
-
model_size = gr.Radio(label=translations["model_size"], info=translations["model_size_info"], choices=whisper_model, value="medium", interactive=True)
|
| 63 |
-
with gr.Accordion(translations["f0_method"], open=False):
|
| 64 |
-
with gr.Group():
|
| 65 |
-
with gr.Row():
|
| 66 |
-
onnx_f0_mode4 = gr.Checkbox(label=translations["f0_onnx_mode"], info=translations["f0_onnx_mode_info"], value=False, interactive=True)
|
| 67 |
-
unlock_full_method2 = gr.Checkbox(label=translations["f0_unlock"], info=translations["f0_unlock_info"], value=False, interactive=True)
|
| 68 |
-
method3 = gr.Radio(label=translations["f0_method"], info=translations["f0_method_info"], choices=method_f0, value="rmvpe", interactive=True)
|
| 69 |
-
hybrid_method3 = gr.Dropdown(label=translations["f0_method_hybrid"], info=translations["f0_method_hybrid_info"], choices=hybrid_f0_method, value=hybrid_f0_method[0], interactive=True, allow_custom_value=True, visible=method3.value == "hybrid")
|
| 70 |
-
hop_length3 = gr.Slider(label=translations['hop_length'], info=translations["hop_length_info"], minimum=64, maximum=512, value=160, step=1, interactive=True, visible=False)
|
| 71 |
-
alpha = gr.Slider(label=translations["alpha_label"], info=translations["alpha_info"], minimum=0.1, maximum=1, value=0.5, step=0.1, interactive=True, visible=False)
|
| 72 |
-
with gr.Accordion(translations["hubert_model"], open=False):
|
| 73 |
-
embed_mode3 = gr.Radio(label=translations["embed_mode"], info=translations["embed_mode_info"], value="fairseq", choices=embedders_mode, interactive=True, visible=True)
|
| 74 |
-
embedders3 = gr.Radio(label=translations["hubert_model"], info=translations["hubert_info"], choices=embedders_model, value="hubert_base", interactive=True)
|
| 75 |
-
custom_embedders3 = gr.Textbox(label=translations["modelname"], info=translations["modelname_info"], value="", placeholder="hubert_base", interactive=True, visible=embedders3.value == "custom")
|
| 76 |
-
with gr.Column():
|
| 77 |
-
resample_sr3 = gr.Radio(choices=[0]+sample_rate_choice, label=translations["resample"], info=translations["resample_info"], value=0, interactive=True)
|
| 78 |
-
proposal_pitch_threshold = gr.Slider(minimum=50.0, maximum=1200.0, label=translations["proposal_pitch_threshold"], info=translations["proposal_pitch_threshold_info"], value=255.0, step=0.1, interactive=True, visible=proposal_pitch.value)
|
| 79 |
-
clean_strength3 = gr.Slider(label=translations["clean_strength"], info=translations["clean_strength_info"], minimum=0, maximum=1, value=0.5, step=0.1, interactive=True, visible=cleaner2.value)
|
| 80 |
-
f0_autotune_strength3 = gr.Slider(minimum=0, maximum=1, label=translations["autotune_rate"], info=translations["autotune_rate_info"], value=1, step=0.1, interactive=True, visible=autotune2.value)
|
| 81 |
-
filter_radius3 = gr.Slider(minimum=0, maximum=7, label=translations["filter_radius"], info=translations["filter_radius_info"], value=3, step=1, interactive=True)
|
| 82 |
-
rms_mix_rate3 = gr.Slider(minimum=0, maximum=1, label=translations["rms_mix_rate"], info=translations["rms_mix_rate_info"], value=1, step=0.1, interactive=True)
|
| 83 |
-
protect3 = gr.Slider(minimum=0, maximum=1, label=translations["protect"], info=translations["protect_info"], value=0.5, step=0.01, interactive=True)
|
| 84 |
-
with gr.Row():
|
| 85 |
-
formant_qfrency3 = gr.Slider(value=1.0, label=translations["formant_qfrency"] + " 1", info=translations["formant_qfrency"], minimum=0.0, maximum=16.0, step=0.1, interactive=True, visible=False)
|
| 86 |
-
formant_timbre3 = gr.Slider(value=1.0, label=translations["formant_timbre"] + " 1", info=translations["formant_timbre"], minimum=0.0, maximum=16.0, step=0.1, interactive=True, visible=False)
|
| 87 |
-
with gr.Row():
|
| 88 |
-
formant_qfrency4 = gr.Slider(value=1.0, label=translations["formant_qfrency"] + " 2", info=translations["formant_qfrency"], minimum=0.0, maximum=16.0, step=0.1, interactive=True, visible=False)
|
| 89 |
-
formant_timbre4 = gr.Slider(value=1.0, label=translations["formant_timbre"] + " 2", info=translations["formant_timbre"], minimum=0.0, maximum=16.0, step=0.1, interactive=True, visible=False)
|
| 90 |
-
with gr.Row():
|
| 91 |
-
gr.Markdown(translations["input_output"])
|
| 92 |
-
with gr.Row():
|
| 93 |
-
play_audio2 = gr.Audio(show_download_button=True, interactive=False, label=translations["input_audio"])
|
| 94 |
-
play_audio3 = gr.Audio(show_download_button=True, interactive=False, label=translations["output_file_tts_convert"])
|
| 95 |
-
with gr.Row():
|
| 96 |
-
autotune2.change(fn=visible, inputs=[autotune2], outputs=[f0_autotune_strength3])
|
| 97 |
-
cleaner2.change(fn=visible, inputs=[cleaner2], outputs=[clean_strength3])
|
| 98 |
-
method3.change(fn=lambda method, hybrid: [visible(method == "hybrid"), visible(method == "hybrid"), hoplength_show(method, hybrid)], inputs=[method3, hybrid_method3], outputs=[hybrid_method3, alpha, hop_length3])
|
| 99 |
-
with gr.Row():
|
| 100 |
-
hybrid_method3.change(fn=hoplength_show, inputs=[method3, hybrid_method3], outputs=[hop_length3])
|
| 101 |
-
refresh2.click(fn=change_models_choices, inputs=[], outputs=[model_pth2, model_index2])
|
| 102 |
-
model_pth2.change(fn=get_index, inputs=[model_pth2], outputs=[model_index2])
|
| 103 |
-
with gr.Row():
|
| 104 |
-
refresh3.click(fn=change_models_choices, inputs=[], outputs=[model_pth3, model_index3])
|
| 105 |
-
model_pth3.change(fn=get_index, inputs=[model_pth3], outputs=[model_index3])
|
| 106 |
-
input2.upload(fn=lambda audio_in: [shutil_move(audio.name, configs["audios_path"]) for audio in audio_in][0], inputs=[input2], outputs=[input_audio1])
|
| 107 |
-
with gr.Row():
|
| 108 |
-
input_audio1.change(fn=lambda audio: audio if os.path.isfile(audio) else None, inputs=[input_audio1], outputs=[play_audio2])
|
| 109 |
-
formant_shifting2.change(fn=lambda a: [visible(a) for _ in range(4)], inputs=[formant_shifting2], outputs=[formant_qfrency3, formant_timbre3, formant_qfrency4, formant_timbre4])
|
| 110 |
-
embedders3.change(fn=lambda embedders: visible(embedders == "custom"), inputs=[embedders3], outputs=[custom_embedders3])
|
| 111 |
-
with gr.Row():
|
| 112 |
-
refresh4.click(fn=change_audios_choices, inputs=[input_audio1], outputs=[input_audio1])
|
| 113 |
-
model_index2.change(fn=index_strength_show, inputs=[model_index2], outputs=[index_strength2])
|
| 114 |
-
model_index3.change(fn=index_strength_show, inputs=[model_index3], outputs=[index_strength3])
|
| 115 |
-
with gr.Row():
|
| 116 |
-
unlock_full_method2.change(fn=unlock_f0, inputs=[unlock_full_method2], outputs=[method3])
|
| 117 |
-
embed_mode3.change(fn=change_embedders_mode, inputs=[embed_mode3], outputs=[embedders3])
|
| 118 |
-
proposal_pitch.change(fn=visible, inputs=[proposal_pitch], outputs=[proposal_pitch_threshold])
|
| 119 |
-
with gr.Row():
|
| 120 |
-
convert_button3.click(
|
| 121 |
-
fn=convert_with_whisper,
|
| 122 |
-
inputs=[
|
| 123 |
-
num_spk,
|
| 124 |
-
model_size,
|
| 125 |
-
cleaner2,
|
| 126 |
-
clean_strength3,
|
| 127 |
-
autotune2,
|
| 128 |
-
f0_autotune_strength3,
|
| 129 |
-
checkpointing2,
|
| 130 |
-
model_pth2,
|
| 131 |
-
model_pth3,
|
| 132 |
-
model_index2,
|
| 133 |
-
model_index3,
|
| 134 |
-
pitch3,
|
| 135 |
-
pitch4,
|
| 136 |
-
index_strength2,
|
| 137 |
-
index_strength3,
|
| 138 |
-
export_format2,
|
| 139 |
-
input_audio1,
|
| 140 |
-
output_audio2,
|
| 141 |
-
onnx_f0_mode4,
|
| 142 |
-
method3,
|
| 143 |
-
hybrid_method3,
|
| 144 |
-
hop_length3,
|
| 145 |
-
embed_mode3,
|
| 146 |
-
embedders3,
|
| 147 |
-
custom_embedders3,
|
| 148 |
-
resample_sr3,
|
| 149 |
-
filter_radius3,
|
| 150 |
-
rms_mix_rate3,
|
| 151 |
-
protect3,
|
| 152 |
-
formant_shifting2,
|
| 153 |
-
formant_qfrency3,
|
| 154 |
-
formant_timbre3,
|
| 155 |
-
formant_qfrency4,
|
| 156 |
-
formant_timbre4,
|
| 157 |
-
proposal_pitch,
|
| 158 |
-
proposal_pitch_threshold,
|
| 159 |
-
audio_processing,
|
| 160 |
-
alpha
|
| 161 |
-
],
|
| 162 |
-
outputs=[play_audio3],
|
| 163 |
-
api_name="convert_with_whisper"
|
| 164 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/inference/child/separate.py
DELETED
|
@@ -1,263 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.core.downloads import download_url
|
| 9 |
-
from main.app.core.separate import separate_music
|
| 10 |
-
from main.app.core.ui import visible, valueFalse_interactive, change_audios_choices, shutil_move, separate_change
|
| 11 |
-
from main.app.variables import translations, uvr_model, karaoke_models, reverb_models, vr_models, denoise_models, mdx_models, paths_for_files, sample_rate_choice, configs, file_types, export_format_choices
|
| 12 |
-
|
| 13 |
-
def separate_tab():
|
| 14 |
-
with gr.Row():
|
| 15 |
-
gr.Markdown(translations["4_part"])
|
| 16 |
-
with gr.Row():
|
| 17 |
-
with gr.Column():
|
| 18 |
-
with gr.Group():
|
| 19 |
-
with gr.Row():
|
| 20 |
-
enable_denoise = gr.Checkbox(label=translations["denoise_mdx"], value=False, interactive=False)
|
| 21 |
-
separate_backing = gr.Checkbox(label=translations["separator_backing"], value=False, interactive=True)
|
| 22 |
-
separate_reverb = gr.Checkbox(label=translations["dereveb_audio"], value=False, interactive=True)
|
| 23 |
-
enable_tta = gr.Checkbox(label=translations["enable_tta"], value=False, interactive=False)
|
| 24 |
-
high_end_process = gr.Checkbox(label=translations["high_end_process"], value=False, interactive=False)
|
| 25 |
-
enable_post_process = gr.Checkbox(label=translations["enable_post_process"], value=False, interactive=False)
|
| 26 |
-
with gr.Row():
|
| 27 |
-
model_name = gr.Dropdown(label=translations["separator_model"], value=uvr_model[0], choices=uvr_model, interactive=True)
|
| 28 |
-
karaoke_model = gr.Dropdown(label=translations["separator_backing_model"], value=list(karaoke_models.keys())[0], choices=list(karaoke_models.keys()), interactive=True, visible=separate_backing.value)
|
| 29 |
-
reverb_model = gr.Dropdown(label=translations["dereveb_model"], value=list(reverb_models.keys())[0], choices=list(reverb_models.keys()), interactive=True, visible=separate_reverb.value)
|
| 30 |
-
denoise_model = gr.Dropdown(label=translations["denoise_model"], value=list(denoise_models.keys())[0], choices=list(denoise_models.keys()), interactive=True, visible=enable_denoise.value and model_name.value in list(vr_models.keys()))
|
| 31 |
-
with gr.Row():
|
| 32 |
-
with gr.Column():
|
| 33 |
-
separate_button = gr.Button(translations["separator_tab"], variant="primary")
|
| 34 |
-
with gr.Row():
|
| 35 |
-
with gr.Column():
|
| 36 |
-
with gr.Group():
|
| 37 |
-
with gr.Row():
|
| 38 |
-
shifts = gr.Slider(label=translations["shift"], info=translations["shift_info"], minimum=1, maximum=20, value=2, step=1, interactive=True)
|
| 39 |
-
batch_size = gr.Slider(label=translations["batch_size"], info=translations["mdx_batch_size_info"], minimum=1, maximum=64, value=1, step=1, interactive=True, visible=False)
|
| 40 |
-
with gr.Row():
|
| 41 |
-
segments_size = gr.Slider(label=translations["segments_size"], info=translations["segments_size_info"], minimum=32, maximum=3072, value=256, step=32, interactive=True)
|
| 42 |
-
aggression = gr.Slider(label=translations['aggression'], info=translations["aggression_info"], minimum=1, maximum=50, value=5, step=1, interactive=True, visible=False)
|
| 43 |
-
drop_audio = gr.Files(label=translations["drop_audio"], file_types=file_types)
|
| 44 |
-
with gr.Accordion(translations["use_url"], open=False):
|
| 45 |
-
url = gr.Textbox(label=translations["url_audio"], value="", placeholder="https://www.youtube.com/...", scale=6)
|
| 46 |
-
download_button = gr.Button(translations["downloads"])
|
| 47 |
-
with gr.Column():
|
| 48 |
-
with gr.Group():
|
| 49 |
-
with gr.Row():
|
| 50 |
-
overlap = gr.Radio(label=translations["overlap"], info=translations["overlap_info"], choices=["0.25", "0.5", "0.75", "0.99"], value="0.25", interactive=True)
|
| 51 |
-
with gr.Row():
|
| 52 |
-
window_size = gr.Slider(label=translations["window_size"], info=translations["window_size_info"], minimum=320, maximum=1024, value=512, step=32, interactive=True, visible=False)
|
| 53 |
-
hop_length = gr.Slider(label=translations['hop_length'], info=translations["hop_length_info"], minimum=64, maximum=8192, value=1024, step=1, interactive=True, visible=False)
|
| 54 |
-
post_process_threshold = gr.Slider(label=translations['post_process_threshold'], info=translations["post_process_threshold_info"], minimum=0.1, maximum=0.3, value=0.2, step=0.1, interactive=True, visible=False)
|
| 55 |
-
sample_rate = gr.Radio(choices=sample_rate_choice, value=44100, label=translations["sr"], info=translations["sr_info"], interactive=True)
|
| 56 |
-
with gr.Accordion(translations["input_output"], open=False):
|
| 57 |
-
export_format = gr.Radio(label=translations["export_format"], info=translations["export_info"], choices=export_format_choices, value="wav", interactive=True)
|
| 58 |
-
input_audio = gr.Dropdown(label=translations["audio_path"], value="", choices=paths_for_files, allow_custom_value=True, interactive=True)
|
| 59 |
-
refresh_audio = gr.Button(translations["refresh"])
|
| 60 |
-
output_dirs = gr.Textbox(label=translations["output_folder"], value="audios", placeholder="audios", info=translations["output_folder_info"], interactive=True)
|
| 61 |
-
audio_input = gr.Audio(show_download_button=True, interactive=False, label=translations["input_audio"])
|
| 62 |
-
with gr.Row():
|
| 63 |
-
gr.Markdown(translations["output_separator"])
|
| 64 |
-
with gr.Row():
|
| 65 |
-
instruments_audio = gr.Audio(show_download_button=True, interactive=False, label=translations["instruments"])
|
| 66 |
-
original_vocals = gr.Audio(show_download_button=True, interactive=False, label=translations["original_vocal"])
|
| 67 |
-
main_vocals = gr.Audio(show_download_button=True, interactive=False, label=translations["main_vocal"], visible=separate_backing.value)
|
| 68 |
-
backing_vocals = gr.Audio(show_download_button=True, interactive=False, label=translations["backing_vocal"], visible=separate_backing.value)
|
| 69 |
-
with gr.Row():
|
| 70 |
-
model_name.change(fn=lambda a: valueFalse_interactive(a in list(mdx_models.keys()) + list(vr_models.keys())), inputs=[model_name], outputs=[enable_denoise])
|
| 71 |
-
separate_backing.change(fn=lambda a, b: valueFalse_interactive(a or b), inputs=[separate_backing, separate_reverb], outputs=[enable_denoise])
|
| 72 |
-
separate_reverb.change(fn=lambda a, b: valueFalse_interactive(a or b), inputs=[separate_backing, separate_reverb], outputs=[enable_denoise])
|
| 73 |
-
with gr.Row():
|
| 74 |
-
input_audio.change(fn=lambda audio: audio if os.path.isfile(audio) else None, inputs=[input_audio], outputs=[audio_input])
|
| 75 |
-
drop_audio.upload(fn=lambda audio_in: [shutil_move(audio.name, configs["audios_path"]) for audio in audio_in][0], inputs=[drop_audio], outputs=[input_audio])
|
| 76 |
-
refresh_audio.click(fn=change_audios_choices, inputs=[input_audio], outputs=[input_audio])
|
| 77 |
-
with gr.Row():
|
| 78 |
-
separate_backing.change(fn=lambda a: [visible(a) for _ in range(2)], inputs=[separate_backing], outputs=[main_vocals, backing_vocals])
|
| 79 |
-
download_button.click(
|
| 80 |
-
fn=download_url,
|
| 81 |
-
inputs=[url],
|
| 82 |
-
outputs=[input_audio, audio_input, url],
|
| 83 |
-
api_name='download_url'
|
| 84 |
-
)
|
| 85 |
-
with gr.Row():
|
| 86 |
-
model_name.change(
|
| 87 |
-
fn=separate_change,
|
| 88 |
-
inputs=[model_name, karaoke_model, reverb_model, enable_post_process, separate_backing, separate_reverb, enable_denoise],
|
| 89 |
-
outputs=[
|
| 90 |
-
karaoke_model,
|
| 91 |
-
reverb_model,
|
| 92 |
-
overlap,
|
| 93 |
-
segments_size,
|
| 94 |
-
hop_length,
|
| 95 |
-
batch_size,
|
| 96 |
-
shifts,
|
| 97 |
-
window_size,
|
| 98 |
-
aggression,
|
| 99 |
-
post_process_threshold,
|
| 100 |
-
denoise_model,
|
| 101 |
-
enable_tta,
|
| 102 |
-
high_end_process,
|
| 103 |
-
enable_post_process,
|
| 104 |
-
]
|
| 105 |
-
)
|
| 106 |
-
with gr.Row():
|
| 107 |
-
karaoke_model.change(
|
| 108 |
-
fn=separate_change,
|
| 109 |
-
inputs=[model_name, karaoke_model, reverb_model, enable_post_process, separate_backing, separate_reverb, enable_denoise],
|
| 110 |
-
outputs=[
|
| 111 |
-
karaoke_model,
|
| 112 |
-
reverb_model,
|
| 113 |
-
overlap,
|
| 114 |
-
segments_size,
|
| 115 |
-
hop_length,
|
| 116 |
-
batch_size,
|
| 117 |
-
shifts,
|
| 118 |
-
window_size,
|
| 119 |
-
aggression,
|
| 120 |
-
post_process_threshold,
|
| 121 |
-
denoise_model,
|
| 122 |
-
enable_tta,
|
| 123 |
-
high_end_process,
|
| 124 |
-
enable_post_process,
|
| 125 |
-
]
|
| 126 |
-
)
|
| 127 |
-
separate_backing.change(
|
| 128 |
-
fn=separate_change,
|
| 129 |
-
inputs=[model_name, karaoke_model, reverb_model, enable_post_process, separate_backing, separate_reverb, enable_denoise],
|
| 130 |
-
outputs=[
|
| 131 |
-
karaoke_model,
|
| 132 |
-
reverb_model,
|
| 133 |
-
overlap,
|
| 134 |
-
segments_size,
|
| 135 |
-
hop_length,
|
| 136 |
-
batch_size,
|
| 137 |
-
shifts,
|
| 138 |
-
window_size,
|
| 139 |
-
aggression,
|
| 140 |
-
post_process_threshold,
|
| 141 |
-
denoise_model,
|
| 142 |
-
enable_tta,
|
| 143 |
-
high_end_process,
|
| 144 |
-
enable_post_process,
|
| 145 |
-
]
|
| 146 |
-
)
|
| 147 |
-
with gr.Row():
|
| 148 |
-
reverb_model.change(
|
| 149 |
-
fn=separate_change,
|
| 150 |
-
inputs=[model_name, karaoke_model, reverb_model, enable_post_process, separate_backing, separate_reverb, enable_denoise],
|
| 151 |
-
outputs=[
|
| 152 |
-
karaoke_model,
|
| 153 |
-
reverb_model,
|
| 154 |
-
overlap,
|
| 155 |
-
segments_size,
|
| 156 |
-
hop_length,
|
| 157 |
-
batch_size,
|
| 158 |
-
shifts,
|
| 159 |
-
window_size,
|
| 160 |
-
aggression,
|
| 161 |
-
post_process_threshold,
|
| 162 |
-
denoise_model,
|
| 163 |
-
enable_tta,
|
| 164 |
-
high_end_process,
|
| 165 |
-
enable_post_process,
|
| 166 |
-
]
|
| 167 |
-
)
|
| 168 |
-
separate_reverb.change(
|
| 169 |
-
fn=separate_change,
|
| 170 |
-
inputs=[model_name, karaoke_model, reverb_model, enable_post_process, separate_backing, separate_reverb, enable_denoise],
|
| 171 |
-
outputs=[
|
| 172 |
-
karaoke_model,
|
| 173 |
-
reverb_model,
|
| 174 |
-
overlap,
|
| 175 |
-
segments_size,
|
| 176 |
-
hop_length,
|
| 177 |
-
batch_size,
|
| 178 |
-
shifts,
|
| 179 |
-
window_size,
|
| 180 |
-
aggression,
|
| 181 |
-
post_process_threshold,
|
| 182 |
-
denoise_model,
|
| 183 |
-
enable_tta,
|
| 184 |
-
high_end_process,
|
| 185 |
-
enable_post_process,
|
| 186 |
-
]
|
| 187 |
-
)
|
| 188 |
-
with gr.Row():
|
| 189 |
-
enable_denoise.change(
|
| 190 |
-
fn=separate_change,
|
| 191 |
-
inputs=[model_name, karaoke_model, reverb_model, enable_post_process, separate_backing, separate_reverb, enable_denoise],
|
| 192 |
-
outputs=[
|
| 193 |
-
karaoke_model,
|
| 194 |
-
reverb_model,
|
| 195 |
-
overlap,
|
| 196 |
-
segments_size,
|
| 197 |
-
hop_length,
|
| 198 |
-
batch_size,
|
| 199 |
-
shifts,
|
| 200 |
-
window_size,
|
| 201 |
-
aggression,
|
| 202 |
-
post_process_threshold,
|
| 203 |
-
denoise_model,
|
| 204 |
-
enable_tta,
|
| 205 |
-
high_end_process,
|
| 206 |
-
enable_post_process,
|
| 207 |
-
]
|
| 208 |
-
)
|
| 209 |
-
enable_post_process.change(
|
| 210 |
-
fn=separate_change,
|
| 211 |
-
inputs=[model_name, karaoke_model, reverb_model, enable_post_process, separate_backing, separate_reverb, enable_denoise],
|
| 212 |
-
outputs=[
|
| 213 |
-
karaoke_model,
|
| 214 |
-
reverb_model,
|
| 215 |
-
overlap,
|
| 216 |
-
segments_size,
|
| 217 |
-
hop_length,
|
| 218 |
-
batch_size,
|
| 219 |
-
shifts,
|
| 220 |
-
window_size,
|
| 221 |
-
aggression,
|
| 222 |
-
post_process_threshold,
|
| 223 |
-
denoise_model,
|
| 224 |
-
enable_tta,
|
| 225 |
-
high_end_process,
|
| 226 |
-
enable_post_process,
|
| 227 |
-
]
|
| 228 |
-
)
|
| 229 |
-
with gr.Row():
|
| 230 |
-
separate_button.click(
|
| 231 |
-
fn=separate_music,
|
| 232 |
-
inputs=[
|
| 233 |
-
input_audio,
|
| 234 |
-
output_dirs,
|
| 235 |
-
export_format,
|
| 236 |
-
model_name,
|
| 237 |
-
karaoke_model,
|
| 238 |
-
reverb_model,
|
| 239 |
-
denoise_model,
|
| 240 |
-
sample_rate,
|
| 241 |
-
shifts,
|
| 242 |
-
batch_size,
|
| 243 |
-
overlap,
|
| 244 |
-
aggression,
|
| 245 |
-
hop_length,
|
| 246 |
-
window_size,
|
| 247 |
-
segments_size,
|
| 248 |
-
post_process_threshold,
|
| 249 |
-
enable_tta,
|
| 250 |
-
enable_denoise,
|
| 251 |
-
high_end_process,
|
| 252 |
-
enable_post_process,
|
| 253 |
-
separate_backing,
|
| 254 |
-
separate_reverb
|
| 255 |
-
],
|
| 256 |
-
outputs=[
|
| 257 |
-
original_vocals,
|
| 258 |
-
instruments_audio,
|
| 259 |
-
main_vocals,
|
| 260 |
-
backing_vocals
|
| 261 |
-
],
|
| 262 |
-
api_name="separate_music"
|
| 263 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/inference/inference.py
DELETED
|
@@ -1,30 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.variables import translations, configs
|
| 9 |
-
from main.app.tabs.inference.child.convert import convert_tab
|
| 10 |
-
from main.app.tabs.inference.child.separate import separate_tab
|
| 11 |
-
from main.app.tabs.inference.child.convert_tts import convert_tts_tab
|
| 12 |
-
from main.app.tabs.inference.child.convert_with_whisper import convert_with_whisper_tab
|
| 13 |
-
|
| 14 |
-
def inference_tab():
|
| 15 |
-
with gr.TabItem(translations["inference"], visible=configs.get("inference_tab", True)):
|
| 16 |
-
with gr.TabItem(translations["separator_tab"], visible=configs.get("separator_tab", True)):
|
| 17 |
-
gr.Markdown(f"## {translations['separator_tab']}")
|
| 18 |
-
separate_tab()
|
| 19 |
-
|
| 20 |
-
with gr.TabItem(translations["convert_audio"], visible=configs.get("convert_tab", True)):
|
| 21 |
-
gr.Markdown(f"## {translations['convert_audio']}")
|
| 22 |
-
convert_tab()
|
| 23 |
-
|
| 24 |
-
with gr.TabItem(translations["convert_with_whisper"], visible=configs.get("convert_with_whisper", True)):
|
| 25 |
-
gr.Markdown(f"## {translations['convert_with_whisper']}")
|
| 26 |
-
convert_with_whisper_tab()
|
| 27 |
-
|
| 28 |
-
with gr.TabItem(translations["convert_text"], visible=configs.get("tts_tab", True)):
|
| 29 |
-
gr.Markdown(translations["convert_text_markdown"])
|
| 30 |
-
convert_tts_tab()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/realtime/realtime.py
DELETED
|
@@ -1,226 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.core.realtime import realtime_start, realtime_stop
|
| 9 |
-
from main.app.variables import translations, configs, model_name, index_path, method_f0, embedders_mode, embedders_model
|
| 10 |
-
from main.app.core.ui import change_models_choices, get_index, index_strength_show, unlock_f0, hoplength_show, change_embedders_mode, visible, audio_device, change_audio_device_choices, update_audio_device
|
| 11 |
-
|
| 12 |
-
input_channels_map, output_channels_map = audio_device()
|
| 13 |
-
|
| 14 |
-
def realtime_tab():
|
| 15 |
-
with gr.TabItem(translations["realtime"], visible=configs.get("realtime_tab", True)):
|
| 16 |
-
gr.Markdown(translations["realtime_markdown"])
|
| 17 |
-
with gr.Row():
|
| 18 |
-
gr.Markdown(translations["realtime_markdown_2"])
|
| 19 |
-
with gr.Row():
|
| 20 |
-
status = gr.Label(label=translations["realtime_latency"], value=translations["realtime_not_startup"])
|
| 21 |
-
with gr.Row():
|
| 22 |
-
monitor = gr.Checkbox(label=translations["monitor"], value=False, interactive=True)
|
| 23 |
-
exclusive_mode = gr.Checkbox(label=translations["exclusive_mode"], value=False, interactive=True)
|
| 24 |
-
vad_enabled = gr.Checkbox(label=translations["vad_enabled"], value=False, interactive=True)
|
| 25 |
-
clean_audio = gr.Checkbox(label=translations["clear_audio"], value=False, interactive=True)
|
| 26 |
-
with gr.Row():
|
| 27 |
-
with gr.Accordion(translations["audio_device"], open=True):
|
| 28 |
-
with gr.Row():
|
| 29 |
-
input_audio_device = gr.Dropdown(label=translations["input_audio_device_label"], info=translations["input_audio_device_info"], choices=list(input_channels_map.keys()), value=list(input_channels_map.keys())[0] if len(list(input_channels_map.keys())) >= 1 else "", interactive=True)
|
| 30 |
-
output_audio_device = gr.Dropdown(label=translations["output_audio_device_label"], info=translations["output_audio_device_info"], choices=list(output_channels_map.keys()), value=list(output_channels_map.keys())[0] if len(list(output_channels_map.keys())) >= 1 else "", interactive=True)
|
| 31 |
-
monitor_output_device = gr.Dropdown(label=translations["monitor_output_device_label"], info=translations["monitor_output_device_info"], choices=list(output_channels_map.keys()), value=list(output_channels_map.keys())[0] if len(list(output_channels_map.keys())) >= 1 else "", interactive=True, visible=False)
|
| 32 |
-
with gr.Row():
|
| 33 |
-
input_audio_gain = gr.Slider(minimum=0, maximum=2500, label=translations["input_audio_gain_label"], info=translations["input_audio_gain_info"], value=100, step=1, interactive=True)
|
| 34 |
-
output_audio_gain = gr.Slider(minimum=0, maximum=4000, label=translations["output_audio_gain_label"], info=translations["output_audio_gain_info"], value=100, step=1, interactive=True)
|
| 35 |
-
monitor_audio_gain = gr.Slider(minimum=0, maximum=4000, label=translations["monitor_audio_gain_label"], info=translations["monitor_audio_gain_info"], value=100, step=1, interactive=True, visible=False)
|
| 36 |
-
with gr.Row(visible=False) as asio_row:
|
| 37 |
-
input_asio_channels = gr.Slider(minimum=-1, maximum=128, label=translations["input_asio_channels_label"], info=translations["input_asio_channels_info"], value=-1, step=1, interactive=True, visible=False)
|
| 38 |
-
output_asio_channels = gr.Slider(minimum=-1, maximum=128, label=translations["output_asio_channels_label"], info=translations["output_asio_channels_info"], value=-1, step=1, interactive=True, visible=False)
|
| 39 |
-
monitor_asio_channels = gr.Slider(minimum=-1, maximum=128, label=translations["monitor_asio_channels_label"], info=translations["monitor_asio_channels_info"], value=-1, step=1, interactive=True, visible=False)
|
| 40 |
-
with gr.Row():
|
| 41 |
-
refresh_audio_device = gr.Button(value=translations["refresh_audio_device"], variant="secondary")
|
| 42 |
-
with gr.Row():
|
| 43 |
-
start_realtime = gr.Button(value=translations["start_realtime_button"], variant="primary", interactive=True)
|
| 44 |
-
stop_realtime = gr.Button(value=translations["stop_realtime_button"], variant="stop", interactive=False)
|
| 45 |
-
with gr.Row():
|
| 46 |
-
chunk_size = gr.Slider(minimum=2.7, maximum=2730.7, step=0.1, label=translations["chunk_size"], info=translations["chunk_size_info"], value=1024, interactive=True)
|
| 47 |
-
pitch = gr.Slider(minimum=-20, maximum=20, step=1, info=translations["pitch_info"], label=translations["pitch"], value=0, interactive=True)
|
| 48 |
-
with gr.Row():
|
| 49 |
-
with gr.Column():
|
| 50 |
-
with gr.Accordion(translations["model_accordion"], open=True):
|
| 51 |
-
with gr.Row():
|
| 52 |
-
model_pth = gr.Dropdown(label=translations["model_name"], choices=model_name, value=model_name[0] if len(model_name) >= 1 else "", interactive=True, allow_custom_value=True)
|
| 53 |
-
model_index = gr.Dropdown(label=translations["index_path"], choices=index_path, value=index_path[0] if len(index_path) >= 1 else "", interactive=True, allow_custom_value=True)
|
| 54 |
-
with gr.Row():
|
| 55 |
-
model_refresh = gr.Button(translations["refresh"])
|
| 56 |
-
with gr.Row():
|
| 57 |
-
index_strength = gr.Slider(label=translations["index_strength"], info=translations["index_strength_info"], minimum=0, maximum=1, value=0.5, step=0.01, interactive=True, visible=model_index.value != "")
|
| 58 |
-
with gr.Column():
|
| 59 |
-
with gr.Accordion(translations["f0_method"], open=True):
|
| 60 |
-
with gr.Group():
|
| 61 |
-
with gr.Row():
|
| 62 |
-
onnx_f0_mode = gr.Checkbox(label=translations["f0_onnx_mode"], info=translations["f0_onnx_mode_info"], value=False, interactive=True)
|
| 63 |
-
unlock_full_method = gr.Checkbox(label=translations["f0_unlock"], info=translations["f0_unlock_info"], value=False, interactive=True)
|
| 64 |
-
f0_method = gr.Radio(label=translations["f0_method"], info=translations["f0_method_info"], choices=[m for m in method_f0 if m != "hybrid"], value="rmvpe", interactive=True)
|
| 65 |
-
hop_length = gr.Slider(label=translations['hop_length'], info=translations["hop_length_info"], minimum=64, maximum=512, value=160, step=1, interactive=True, visible=False)
|
| 66 |
-
with gr.Column():
|
| 67 |
-
with gr.Accordion(translations["hubert_model"], open=True):
|
| 68 |
-
embed_mode = gr.Radio(label=translations["embed_mode"], info=translations["embed_mode_info"], value="fairseq", choices=embedders_mode, interactive=True, visible=True)
|
| 69 |
-
embedders = gr.Radio(label=translations["hubert_model"], info=translations["hubert_info"], choices=embedders_model, value="hubert_base", interactive=True)
|
| 70 |
-
custom_embedders = gr.Textbox(label=translations["modelname"], info=translations["modelname_info"], value="", placeholder="hubert_base", interactive=True, visible=embedders.value == "custom")
|
| 71 |
-
with gr.Row():
|
| 72 |
-
with gr.Accordion(translations["setting"], open=True):
|
| 73 |
-
with gr.Row():
|
| 74 |
-
f0_autotune = gr.Checkbox(label=translations["autotune"], value=False, interactive=True)
|
| 75 |
-
proposal_pitch = gr.Checkbox(label=translations["proposal_pitch"], value=False, interactive=True)
|
| 76 |
-
with gr.Group():
|
| 77 |
-
with gr.Row():
|
| 78 |
-
f0_autotune_strength = gr.Slider(minimum=0, maximum=1, label=translations["autotune_rate"], info=translations["autotune_rate_info"], value=1, step=0.1, interactive=True, visible=f0_autotune.value)
|
| 79 |
-
proposal_pitch_threshold = gr.Slider(minimum=50.0, maximum=1200.0, label=translations["proposal_pitch_threshold"], info=translations["proposal_pitch_threshold_info"], value=255.0, step=0.1, interactive=True, visible=proposal_pitch.value)
|
| 80 |
-
with gr.Row():
|
| 81 |
-
rms_mix_rate = gr.Slider(minimum=0, maximum=1, label=translations["rms_mix_rate"], info=translations["rms_mix_rate_info"], value=1, step=0.1, interactive=True)
|
| 82 |
-
protect = gr.Slider(minimum=0, maximum=1, label=translations["protect"], info=translations["protect_info"], value=0.5, step=0.01, interactive=True)
|
| 83 |
-
with gr.Row():
|
| 84 |
-
clean_strength = gr.Slider(label=translations["clean_strength"], info=translations["clean_strength_info"], minimum=0, maximum=1, value=0.5, step=0.1, interactive=True, visible=False)
|
| 85 |
-
filter_radius = gr.Slider(minimum=0, maximum=7, label=translations["filter_radius"], info=translations["filter_radius_info"], value=3, step=1, interactive=True)
|
| 86 |
-
with gr.Column():
|
| 87 |
-
silent_threshold = gr.Slider(minimum=-90, maximum=-60, label=translations["silent_threshold_label"], info=translations["silent_threshold_info"], value=-90, step=1, interactive=True)
|
| 88 |
-
extra_convert_size = gr.Slider(minimum=0.1, maximum=5, label=translations["extra_convert_size_label"], info=translations["extra_convert_size_info"], value=0.5, step=0.1, interactive=True)
|
| 89 |
-
cross_fade_overlap_size = gr.Slider(minimum=0.05, maximum=0.2, label=translations["cross_fade_overlap_size_label"], info=translations["cross_fade_overlap_size_info"], value=0.1, step=0.01, interactive=True)
|
| 90 |
-
with gr.Row():
|
| 91 |
-
vad_sensitivity = gr.Slider(minimum=0, maximum=3, label=translations["vad_sensitivity_label"], info=translations["vad_sensitivity_info"], value=3, step=1, interactive=True, visible=vad_enabled.value)
|
| 92 |
-
vad_frame_ms = gr.Slider(minimum=10, maximum=30, label=translations["vad_frame_ms_label"], info=translations["vad_frame_ms_info"], value=30, step=10, interactive=True, visible=vad_enabled.value)
|
| 93 |
-
with gr.Row():
|
| 94 |
-
model_pth.change(
|
| 95 |
-
fn=get_index,
|
| 96 |
-
inputs=[model_pth],
|
| 97 |
-
outputs=[model_index]
|
| 98 |
-
)
|
| 99 |
-
model_index.change(
|
| 100 |
-
fn=index_strength_show,
|
| 101 |
-
inputs=[model_index],
|
| 102 |
-
outputs=[index_strength]
|
| 103 |
-
)
|
| 104 |
-
model_refresh.click(
|
| 105 |
-
fn=change_models_choices,
|
| 106 |
-
inputs=[],
|
| 107 |
-
outputs=[model_pth, model_index]
|
| 108 |
-
)
|
| 109 |
-
with gr.Row():
|
| 110 |
-
unlock_full_method.change(
|
| 111 |
-
fn=lambda f0_method: {"choices": [m for m in unlock_f0(f0_method)["choices"] if m != "hybrid"], "value": "rmvpe", "__type__": "update"},
|
| 112 |
-
inputs=[unlock_full_method],
|
| 113 |
-
outputs=[f0_method]
|
| 114 |
-
)
|
| 115 |
-
f0_method.change(
|
| 116 |
-
fn=lambda f0_method: hoplength_show(f0_method, None),
|
| 117 |
-
inputs=[f0_method],
|
| 118 |
-
outputs=[hop_length]
|
| 119 |
-
)
|
| 120 |
-
embed_mode.change(
|
| 121 |
-
fn=change_embedders_mode,
|
| 122 |
-
inputs=[embed_mode],
|
| 123 |
-
outputs=[embedders]
|
| 124 |
-
)
|
| 125 |
-
with gr.Row():
|
| 126 |
-
embedders.change(
|
| 127 |
-
fn=lambda embedders: visible(embedders == "custom"),
|
| 128 |
-
inputs=[embedders],
|
| 129 |
-
outputs=[custom_embedders]
|
| 130 |
-
)
|
| 131 |
-
input_audio_device.change(
|
| 132 |
-
fn=update_audio_device,
|
| 133 |
-
inputs=[input_audio_device, output_audio_device, monitor_output_device, monitor],
|
| 134 |
-
outputs=[monitor_output_device, monitor_audio_gain, monitor_asio_channels, asio_row, input_asio_channels, output_asio_channels, monitor_asio_channels]
|
| 135 |
-
)
|
| 136 |
-
output_audio_device.change(
|
| 137 |
-
fn=update_audio_device,
|
| 138 |
-
inputs=[input_audio_device, output_audio_device, monitor_output_device, monitor],
|
| 139 |
-
outputs=[monitor_output_device, monitor_audio_gain, monitor_asio_channels, asio_row, input_asio_channels, output_asio_channels, monitor_asio_channels]
|
| 140 |
-
)
|
| 141 |
-
with gr.Row():
|
| 142 |
-
monitor_output_device.change(
|
| 143 |
-
fn=update_audio_device,
|
| 144 |
-
inputs=[input_audio_device, output_audio_device, monitor_output_device, monitor],
|
| 145 |
-
outputs=[monitor_output_device, monitor_audio_gain, monitor_asio_channels, asio_row, input_asio_channels, output_asio_channels, monitor_asio_channels]
|
| 146 |
-
)
|
| 147 |
-
monitor.change(
|
| 148 |
-
fn=update_audio_device,
|
| 149 |
-
inputs=[input_audio_device, output_audio_device, monitor_output_device, monitor],
|
| 150 |
-
outputs=[monitor_output_device, monitor_audio_gain, monitor_asio_channels, asio_row, input_asio_channels, output_asio_channels, monitor_asio_channels]
|
| 151 |
-
)
|
| 152 |
-
f0_autotune.change(
|
| 153 |
-
fn=visible,
|
| 154 |
-
inputs=[f0_autotune],
|
| 155 |
-
outputs=[f0_autotune_strength]
|
| 156 |
-
)
|
| 157 |
-
with gr.Row():
|
| 158 |
-
proposal_pitch.change(
|
| 159 |
-
fn=visible,
|
| 160 |
-
inputs=[proposal_pitch],
|
| 161 |
-
outputs=[proposal_pitch_threshold]
|
| 162 |
-
)
|
| 163 |
-
vad_enabled.change(
|
| 164 |
-
fn=lambda a: [visible(a) for _ in range(2)],
|
| 165 |
-
inputs=[vad_enabled],
|
| 166 |
-
outputs=[vad_sensitivity, vad_frame_ms]
|
| 167 |
-
)
|
| 168 |
-
refresh_audio_device.click(
|
| 169 |
-
fn=change_audio_device_choices,
|
| 170 |
-
inputs=[],
|
| 171 |
-
outputs=[input_audio_device, output_audio_device, monitor_output_device]
|
| 172 |
-
)
|
| 173 |
-
with gr.Row():
|
| 174 |
-
clean_audio.change(
|
| 175 |
-
fn=visible,
|
| 176 |
-
inputs=[clean_audio],
|
| 177 |
-
outputs=[clean_strength]
|
| 178 |
-
)
|
| 179 |
-
start_realtime.click(
|
| 180 |
-
fn=realtime_start,
|
| 181 |
-
inputs=[
|
| 182 |
-
monitor,
|
| 183 |
-
exclusive_mode,
|
| 184 |
-
vad_enabled,
|
| 185 |
-
input_audio_device,
|
| 186 |
-
output_audio_device,
|
| 187 |
-
monitor_output_device,
|
| 188 |
-
input_audio_gain,
|
| 189 |
-
output_audio_gain,
|
| 190 |
-
monitor_audio_gain,
|
| 191 |
-
input_asio_channels,
|
| 192 |
-
output_asio_channels,
|
| 193 |
-
monitor_asio_channels,
|
| 194 |
-
chunk_size,
|
| 195 |
-
pitch,
|
| 196 |
-
model_pth,
|
| 197 |
-
model_index,
|
| 198 |
-
index_strength,
|
| 199 |
-
onnx_f0_mode,
|
| 200 |
-
f0_method,
|
| 201 |
-
hop_length,
|
| 202 |
-
embed_mode,
|
| 203 |
-
embedders,
|
| 204 |
-
custom_embedders,
|
| 205 |
-
f0_autotune,
|
| 206 |
-
proposal_pitch,
|
| 207 |
-
f0_autotune_strength,
|
| 208 |
-
proposal_pitch_threshold,
|
| 209 |
-
rms_mix_rate,
|
| 210 |
-
protect,
|
| 211 |
-
filter_radius,
|
| 212 |
-
silent_threshold,
|
| 213 |
-
extra_convert_size,
|
| 214 |
-
cross_fade_overlap_size,
|
| 215 |
-
vad_sensitivity,
|
| 216 |
-
vad_frame_ms,
|
| 217 |
-
clean_audio,
|
| 218 |
-
clean_strength
|
| 219 |
-
],
|
| 220 |
-
outputs=[status, start_realtime, stop_realtime]
|
| 221 |
-
)
|
| 222 |
-
stop_realtime.click(
|
| 223 |
-
fn=realtime_stop,
|
| 224 |
-
inputs=[],
|
| 225 |
-
outputs=[status, start_realtime, stop_realtime]
|
| 226 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/realtime/realtime_client.py
DELETED
|
@@ -1,210 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.variables import translations, configs, model_name, index_path, method_f0, embedders_mode, embedders_model
|
| 9 |
-
from main.app.core.ui import change_models_choices, get_index, index_strength_show, unlock_f0, hoplength_show, change_embedders_mode, visible, update_dropdowns_from_json, update_button_from_json
|
| 10 |
-
|
| 11 |
-
def realtime_client_tab():
|
| 12 |
-
with gr.TabItem(translations["realtime_client"], visible=configs.get("realtime_client_tab", True)):
|
| 13 |
-
gr.Markdown(translations["realtime_markdown"])
|
| 14 |
-
with gr.Row():
|
| 15 |
-
gr.Markdown(translations["realtime_markdown_2"])
|
| 16 |
-
with gr.Row():
|
| 17 |
-
gr.Label(label=translations["realtime_latency"], value=translations["realtime_not_startup"], elem_id="realtime-status-info")
|
| 18 |
-
with gr.Row():
|
| 19 |
-
monitor = gr.Checkbox(label=translations["monitor"], value=False, interactive=True)
|
| 20 |
-
vad_enabled = gr.Checkbox(label=translations["vad_enabled"], value=False, interactive=True)
|
| 21 |
-
clean_audio = gr.Checkbox(label=translations["clear_audio"], value=False, interactive=True)
|
| 22 |
-
with gr.Row():
|
| 23 |
-
with gr.Accordion(translations["audio_device"], open=True):
|
| 24 |
-
with gr.Row():
|
| 25 |
-
input_audio_device = gr.Dropdown(label=translations["input_audio_device_label"], info=translations["input_audio_device_info"], choices=[], value=None, interactive=True)
|
| 26 |
-
output_audio_device = gr.Dropdown(label=translations["output_audio_device_label"], info=translations["output_audio_device_info"], choices=[], value=None, interactive=True)
|
| 27 |
-
monitor_output_device = gr.Dropdown(label=translations["monitor_output_device_label"], info=translations["monitor_output_device_info"], choices=[], value=None, interactive=True, visible=False)
|
| 28 |
-
with gr.Row():
|
| 29 |
-
input_audio_gain = gr.Slider(minimum=0, maximum=2500, label=translations["input_audio_gain_label"], info=translations["input_audio_gain_info"], value=100, step=1, interactive=True)
|
| 30 |
-
output_audio_gain = gr.Slider(minimum=0, maximum=4000, label=translations["output_audio_gain_label"], info=translations["output_audio_gain_info"], value=100, step=1, interactive=True)
|
| 31 |
-
monitor_audio_gain = gr.Slider(minimum=0, maximum=4000, label=translations["monitor_audio_gain_label"], info=translations["monitor_audio_gain_info"], value=100, step=1, interactive=True, visible=False)
|
| 32 |
-
with gr.Row():
|
| 33 |
-
refresh_audio_device = gr.Button(value=translations["refresh_audio_device"], variant="secondary")
|
| 34 |
-
with gr.Row():
|
| 35 |
-
start_realtime = gr.Button(value=translations["start_realtime_button"], variant="primary", interactive=True)
|
| 36 |
-
stop_realtime = gr.Button(value=translations["stop_realtime_button"], variant="stop", interactive=False)
|
| 37 |
-
with gr.Row():
|
| 38 |
-
chunk_size = gr.Slider(minimum=2.7, maximum=2730.7, step=0.1, label=translations["chunk_size"], info=translations["chunk_size_info"], value=1024, interactive=True)
|
| 39 |
-
pitch = gr.Slider(minimum=-20, maximum=20, step=1, info=translations["pitch_info"], label=translations["pitch"], value=0, interactive=True)
|
| 40 |
-
with gr.Row():
|
| 41 |
-
with gr.Column():
|
| 42 |
-
with gr.Accordion(translations["model_accordion"], open=True):
|
| 43 |
-
with gr.Row():
|
| 44 |
-
model_pth = gr.Dropdown(label=translations["model_name"], choices=model_name, value=model_name[0] if len(model_name) >= 1 else "", interactive=True, allow_custom_value=True)
|
| 45 |
-
model_index = gr.Dropdown(label=translations["index_path"], choices=index_path, value=index_path[0] if len(index_path) >= 1 else "", interactive=True, allow_custom_value=True)
|
| 46 |
-
with gr.Row():
|
| 47 |
-
model_refresh = gr.Button(translations["refresh"])
|
| 48 |
-
with gr.Row():
|
| 49 |
-
index_strength = gr.Slider(label=translations["index_strength"], info=translations["index_strength_info"], minimum=0, maximum=1, value=0.5, step=0.01, interactive=True, visible=model_index.value != "")
|
| 50 |
-
with gr.Column():
|
| 51 |
-
with gr.Accordion(translations["f0_method"], open=True):
|
| 52 |
-
with gr.Group():
|
| 53 |
-
with gr.Row():
|
| 54 |
-
onnx_f0_mode = gr.Checkbox(label=translations["f0_onnx_mode"], info=translations["f0_onnx_mode_info"], value=False, interactive=True)
|
| 55 |
-
unlock_full_method = gr.Checkbox(label=translations["f0_unlock"], info=translations["f0_unlock_info"], value=False, interactive=True)
|
| 56 |
-
f0_method = gr.Radio(label=translations["f0_method"], info=translations["f0_method_info"], choices=[m for m in method_f0 if m != "hybrid"], value="rmvpe", interactive=True)
|
| 57 |
-
hop_length = gr.Slider(label=translations['hop_length'], info=translations["hop_length_info"], minimum=64, maximum=512, value=160, step=1, interactive=True, visible=False)
|
| 58 |
-
with gr.Column():
|
| 59 |
-
with gr.Accordion(translations["hubert_model"], open=True):
|
| 60 |
-
embed_mode = gr.Radio(label=translations["embed_mode"], info=translations["embed_mode_info"], value="fairseq", choices=embedders_mode, interactive=True, visible=True)
|
| 61 |
-
embedders = gr.Radio(label=translations["hubert_model"], info=translations["hubert_info"], choices=embedders_model, value="hubert_base", interactive=True)
|
| 62 |
-
custom_embedders = gr.Textbox(label=translations["modelname"], info=translations["modelname_info"], value="", placeholder="hubert_base", interactive=True, visible=embedders.value == "custom")
|
| 63 |
-
with gr.Row():
|
| 64 |
-
with gr.Accordion(translations["setting"], open=True):
|
| 65 |
-
with gr.Row():
|
| 66 |
-
f0_autotune = gr.Checkbox(label=translations["autotune"], value=False, interactive=True)
|
| 67 |
-
proposal_pitch = gr.Checkbox(label=translations["proposal_pitch"], value=False, interactive=True)
|
| 68 |
-
with gr.Group():
|
| 69 |
-
with gr.Row():
|
| 70 |
-
f0_autotune_strength = gr.Slider(minimum=0, maximum=1, label=translations["autotune_rate"], info=translations["autotune_rate_info"], value=1, step=0.1, interactive=True, visible=f0_autotune.value)
|
| 71 |
-
proposal_pitch_threshold = gr.Slider(minimum=50.0, maximum=1200.0, label=translations["proposal_pitch_threshold"], info=translations["proposal_pitch_threshold_info"], value=255.0, step=0.1, interactive=True, visible=proposal_pitch.value)
|
| 72 |
-
with gr.Row():
|
| 73 |
-
rms_mix_rate = gr.Slider(minimum=0, maximum=1, label=translations["rms_mix_rate"], info=translations["rms_mix_rate_info"], value=1, step=0.1, interactive=True)
|
| 74 |
-
protect = gr.Slider(minimum=0, maximum=1, label=translations["protect"], info=translations["protect_info"], value=0.5, step=0.01, interactive=True)
|
| 75 |
-
with gr.Row():
|
| 76 |
-
clean_strength = gr.Slider(label=translations["clean_strength"], info=translations["clean_strength_info"], minimum=0, maximum=1, value=0.5, step=0.1, interactive=True, visible=False)
|
| 77 |
-
filter_radius = gr.Slider(minimum=0, maximum=7, label=translations["filter_radius"], info=translations["filter_radius_info"], value=3, step=1, interactive=True)
|
| 78 |
-
with gr.Column():
|
| 79 |
-
silent_threshold = gr.Slider(minimum=-90, maximum=-60, label=translations["silent_threshold_label"], info=translations["silent_threshold_info"], value=-90, step=1, interactive=True)
|
| 80 |
-
extra_convert_size = gr.Slider(minimum=0.1, maximum=5, label=translations["extra_convert_size_label"], info=translations["extra_convert_size_info"], value=0.5, step=0.1, interactive=True)
|
| 81 |
-
cross_fade_overlap_size = gr.Slider(minimum=0.05, maximum=0.2, label=translations["cross_fade_overlap_size_label"], info=translations["cross_fade_overlap_size_info"], value=0.1, step=0.01, interactive=True)
|
| 82 |
-
with gr.Row():
|
| 83 |
-
vad_sensitivity = gr.Slider(minimum=0, maximum=3, label=translations["vad_sensitivity_label"], info=translations["vad_sensitivity_info"], value=3, step=1, interactive=True, visible=vad_enabled.value)
|
| 84 |
-
vad_frame_ms = gr.Slider(minimum=10, maximum=30, label=translations["vad_frame_ms_label"], info=translations["vad_frame_ms_info"], value=30, step=10, interactive=True, visible=vad_enabled.value)
|
| 85 |
-
with gr.Row():
|
| 86 |
-
json_audio_hidden = gr.JSON(visible=False)
|
| 87 |
-
json_button_hidden = gr.JSON(visible=False)
|
| 88 |
-
with gr.Row():
|
| 89 |
-
model_pth.change(
|
| 90 |
-
fn=get_index,
|
| 91 |
-
inputs=[model_pth],
|
| 92 |
-
outputs=[model_index]
|
| 93 |
-
)
|
| 94 |
-
model_index.change(
|
| 95 |
-
fn=index_strength_show,
|
| 96 |
-
inputs=[model_index],
|
| 97 |
-
outputs=[index_strength]
|
| 98 |
-
)
|
| 99 |
-
model_refresh.click(
|
| 100 |
-
fn=change_models_choices,
|
| 101 |
-
inputs=[],
|
| 102 |
-
outputs=[model_pth, model_index]
|
| 103 |
-
)
|
| 104 |
-
with gr.Row():
|
| 105 |
-
unlock_full_method.change(
|
| 106 |
-
fn=lambda f0_method: {"choices": [m for m in unlock_f0(f0_method)["choices"] if m != "hybrid"], "value": "rmvpe", "__type__": "update"},
|
| 107 |
-
inputs=[unlock_full_method],
|
| 108 |
-
outputs=[f0_method]
|
| 109 |
-
)
|
| 110 |
-
f0_method.change(
|
| 111 |
-
fn=lambda f0_method: hoplength_show(f0_method, None),
|
| 112 |
-
inputs=[f0_method],
|
| 113 |
-
outputs=[hop_length]
|
| 114 |
-
)
|
| 115 |
-
embed_mode.change(
|
| 116 |
-
fn=change_embedders_mode,
|
| 117 |
-
inputs=[embed_mode],
|
| 118 |
-
outputs=[embedders]
|
| 119 |
-
)
|
| 120 |
-
with gr.Row():
|
| 121 |
-
embedders.change(
|
| 122 |
-
fn=lambda embedders: visible(embedders == "custom"),
|
| 123 |
-
inputs=[embedders],
|
| 124 |
-
outputs=[custom_embedders]
|
| 125 |
-
)
|
| 126 |
-
f0_autotune.change(
|
| 127 |
-
fn=visible,
|
| 128 |
-
inputs=[f0_autotune],
|
| 129 |
-
outputs=[f0_autotune_strength]
|
| 130 |
-
)
|
| 131 |
-
clean_audio.change(
|
| 132 |
-
fn=visible,
|
| 133 |
-
inputs=[clean_audio],
|
| 134 |
-
outputs=[clean_strength]
|
| 135 |
-
)
|
| 136 |
-
with gr.Row():
|
| 137 |
-
proposal_pitch.change(
|
| 138 |
-
fn=visible,
|
| 139 |
-
inputs=[proposal_pitch],
|
| 140 |
-
outputs=[proposal_pitch_threshold]
|
| 141 |
-
)
|
| 142 |
-
vad_enabled.change(
|
| 143 |
-
fn=lambda a: [visible(a) for _ in range(2)],
|
| 144 |
-
inputs=[vad_enabled],
|
| 145 |
-
outputs=[vad_sensitivity, vad_frame_ms]
|
| 146 |
-
)
|
| 147 |
-
refresh_audio_device.click(
|
| 148 |
-
fn=None,
|
| 149 |
-
js="getAudioDevices",
|
| 150 |
-
inputs=[],
|
| 151 |
-
outputs=json_audio_hidden
|
| 152 |
-
)
|
| 153 |
-
with gr.Row():
|
| 154 |
-
json_audio_hidden.change(
|
| 155 |
-
fn=update_dropdowns_from_json,
|
| 156 |
-
inputs=[json_audio_hidden],
|
| 157 |
-
outputs=[input_audio_device, output_audio_device, monitor_output_device]
|
| 158 |
-
)
|
| 159 |
-
json_button_hidden.change(
|
| 160 |
-
fn=update_button_from_json,
|
| 161 |
-
inputs=[json_button_hidden],
|
| 162 |
-
outputs=[start_realtime, stop_realtime]
|
| 163 |
-
)
|
| 164 |
-
with gr.Row():
|
| 165 |
-
start_realtime.click(
|
| 166 |
-
fn=None,
|
| 167 |
-
js="StreamAudioRealtime",
|
| 168 |
-
inputs=[
|
| 169 |
-
monitor,
|
| 170 |
-
vad_enabled,
|
| 171 |
-
input_audio_device,
|
| 172 |
-
output_audio_device,
|
| 173 |
-
monitor_output_device,
|
| 174 |
-
input_audio_gain,
|
| 175 |
-
output_audio_gain,
|
| 176 |
-
monitor_audio_gain,
|
| 177 |
-
chunk_size,
|
| 178 |
-
pitch,
|
| 179 |
-
model_pth,
|
| 180 |
-
model_index,
|
| 181 |
-
index_strength,
|
| 182 |
-
onnx_f0_mode,
|
| 183 |
-
f0_method,
|
| 184 |
-
hop_length,
|
| 185 |
-
embed_mode,
|
| 186 |
-
embedders,
|
| 187 |
-
custom_embedders,
|
| 188 |
-
f0_autotune,
|
| 189 |
-
proposal_pitch,
|
| 190 |
-
f0_autotune_strength,
|
| 191 |
-
proposal_pitch_threshold,
|
| 192 |
-
rms_mix_rate,
|
| 193 |
-
protect,
|
| 194 |
-
filter_radius,
|
| 195 |
-
silent_threshold,
|
| 196 |
-
extra_convert_size,
|
| 197 |
-
cross_fade_overlap_size,
|
| 198 |
-
vad_sensitivity,
|
| 199 |
-
vad_frame_ms,
|
| 200 |
-
clean_audio,
|
| 201 |
-
clean_strength
|
| 202 |
-
],
|
| 203 |
-
outputs=[json_button_hidden]
|
| 204 |
-
)
|
| 205 |
-
stop_realtime.click(
|
| 206 |
-
fn=None,
|
| 207 |
-
js="StopAudioStream",
|
| 208 |
-
inputs=[],
|
| 209 |
-
outputs=[json_button_hidden]
|
| 210 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/training/child/create_dataset.py
DELETED
|
@@ -1,282 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.core.training import create_dataset
|
| 9 |
-
from main.app.core.ui import visible, valueFalse_interactive, create_dataset_change
|
| 10 |
-
from main.app.variables import translations, sample_rate_choice, uvr_model, reverb_models, denoise_models, vr_models, mdx_models
|
| 11 |
-
|
| 12 |
-
def create_dataset_tab():
|
| 13 |
-
with gr.Row():
|
| 14 |
-
gr.Markdown(translations["create_dataset_markdown_2"])
|
| 15 |
-
with gr.Group():
|
| 16 |
-
with gr.Row():
|
| 17 |
-
separate = gr.Checkbox(label=translations["separator_tab"], value=False, interactive=True)
|
| 18 |
-
clean_dataset = gr.Checkbox(label=translations["clear_audio"], value=False, interactive=True)
|
| 19 |
-
skip_seconds = gr.Checkbox(label=translations["skip"], value=False, interactive=True)
|
| 20 |
-
separate_reverb = gr.Checkbox(label=translations["dereveb_audio"], value=False, interactive=False)
|
| 21 |
-
with gr.Row(visible=False) as row:
|
| 22 |
-
enable_tta = gr.Checkbox(label=translations["enable_tta"], value=False, interactive=False)
|
| 23 |
-
high_end_process = gr.Checkbox(label=translations["high_end_process"], value=False, interactive=False)
|
| 24 |
-
enable_post_process = gr.Checkbox(label=translations["enable_post_process"], value=False, interactive=False)
|
| 25 |
-
enable_denoise = gr.Checkbox(label=translations["denoise_mdx"], value=False, interactive=False)
|
| 26 |
-
with gr.Row():
|
| 27 |
-
dataset_url = gr.Textbox(label=translations["url_audio"], info=translations["create_dataset_url"], value="", placeholder="https://www.youtube.com/...", interactive=True, scale=5)
|
| 28 |
-
output_dataset = gr.Textbox(label=translations["output_data"], info=translations["output_data_info"], value="dataset", placeholder="dataset", interactive=True)
|
| 29 |
-
with gr.Row():
|
| 30 |
-
create_dataset_button = gr.Button(translations["createdataset"], variant="primary", scale=2, min_width=4000)
|
| 31 |
-
with gr.Row(visible=False) as row_2:
|
| 32 |
-
model_name = gr.Dropdown(label=translations["separator_model"], value=uvr_model[0], choices=uvr_model, interactive=True)
|
| 33 |
-
reverb_model = gr.Dropdown(label=translations["dereveb_model"], value=list(reverb_models.keys())[0], choices=list(reverb_models.keys()), interactive=True)
|
| 34 |
-
denoise_model = gr.Dropdown(label=translations["denoise_model"], value=list(denoise_models.keys())[0], choices=list(denoise_models.keys()), interactive=True, visible=False)
|
| 35 |
-
with gr.Row():
|
| 36 |
-
with gr.Column(visible=False) as row_3:
|
| 37 |
-
with gr.Group():
|
| 38 |
-
with gr.Row():
|
| 39 |
-
overlap = gr.Radio(label=translations["overlap"], info=translations["overlap_info"], choices=["0.25", "0.5", "0.75", "0.99"], value="0.25", interactive=True)
|
| 40 |
-
with gr.Row():
|
| 41 |
-
window_size = gr.Slider(label=translations["window_size"], info=translations["window_size_info"], minimum=320, maximum=1024, value=512, step=32, interactive=True, visible=False)
|
| 42 |
-
with gr.Row():
|
| 43 |
-
shifts = gr.Slider(label=translations["shift"], info=translations["shift_info"], minimum=1, maximum=20, value=2, step=1, interactive=True)
|
| 44 |
-
segments_size = gr.Slider(label=translations["segments_size"], info=translations["segments_size_info"], minimum=32, maximum=3072, value=256, step=32, interactive=True)
|
| 45 |
-
with gr.Row():
|
| 46 |
-
batch_size = gr.Slider(label=translations["batch_size"], info=translations["mdx_batch_size_info"], minimum=1, maximum=64, value=1, step=1, interactive=True, visible=False)
|
| 47 |
-
hop_length = gr.Slider(label=translations['hop_length'], info=translations["hop_length_info"], minimum=64, maximum=8192, value=1024, step=1, interactive=True, visible=False)
|
| 48 |
-
with gr.Row():
|
| 49 |
-
post_process_threshold = gr.Slider(label=translations['post_process_threshold'], info=translations["post_process_threshold_info"], minimum=0.1, maximum=0.3, value=0.2, step=0.1, interactive=True, visible=False)
|
| 50 |
-
aggression = gr.Slider(label=translations['aggression'], info=translations["aggression_info"], minimum=1, maximum=50, value=5, step=1, interactive=True, visible=False)
|
| 51 |
-
with gr.Column():
|
| 52 |
-
sample_rate = gr.Radio(choices=sample_rate_choice, value=48000, label=translations["sr"], info=translations["sr_info"], interactive=True)
|
| 53 |
-
clean_strength = gr.Slider(minimum=0, maximum=1, step=0.1, value=0.5, label=translations["clean_strength"], info=translations["clean_strength_info"], interactive=True, visible=False)
|
| 54 |
-
with gr.Row():
|
| 55 |
-
skip_start = gr.Textbox(label=translations["skip_start"], info=translations["skip_start_info"], value="", placeholder="0,...", interactive=True, visible=skip_seconds.value)
|
| 56 |
-
skip_end = gr.Textbox(label=translations["skip_end"], info=translations["skip_end_info"], value="", placeholder="0,...", interactive=True, visible=skip_seconds.value)
|
| 57 |
-
create_dataset_info = gr.Textbox(label=translations["create_dataset_info"], value="", interactive=False, lines=2)
|
| 58 |
-
with gr.Row():
|
| 59 |
-
separate.change(
|
| 60 |
-
fn=lambda a: [visible(a) for _ in range(3)],
|
| 61 |
-
inputs=[separate],
|
| 62 |
-
outputs=[
|
| 63 |
-
row,
|
| 64 |
-
row_2,
|
| 65 |
-
row_3
|
| 66 |
-
]
|
| 67 |
-
)
|
| 68 |
-
separate.change(
|
| 69 |
-
fn=valueFalse_interactive,
|
| 70 |
-
inputs=[separate],
|
| 71 |
-
outputs=[separate_reverb]
|
| 72 |
-
)
|
| 73 |
-
separate.change(
|
| 74 |
-
fn=create_dataset_change,
|
| 75 |
-
inputs=[
|
| 76 |
-
model_name,
|
| 77 |
-
reverb_model,
|
| 78 |
-
enable_post_process,
|
| 79 |
-
separate_reverb,
|
| 80 |
-
enable_denoise
|
| 81 |
-
],
|
| 82 |
-
outputs=[
|
| 83 |
-
reverb_model,
|
| 84 |
-
overlap,
|
| 85 |
-
segments_size,
|
| 86 |
-
hop_length,
|
| 87 |
-
batch_size,
|
| 88 |
-
shifts,
|
| 89 |
-
window_size,
|
| 90 |
-
aggression,
|
| 91 |
-
post_process_threshold,
|
| 92 |
-
denoise_model,
|
| 93 |
-
enable_tta,
|
| 94 |
-
high_end_process,
|
| 95 |
-
enable_post_process,
|
| 96 |
-
]
|
| 97 |
-
)
|
| 98 |
-
with gr.Row():
|
| 99 |
-
model_name.change(
|
| 100 |
-
fn=create_dataset_change,
|
| 101 |
-
inputs=[
|
| 102 |
-
model_name,
|
| 103 |
-
reverb_model,
|
| 104 |
-
enable_post_process,
|
| 105 |
-
separate_reverb,
|
| 106 |
-
enable_denoise
|
| 107 |
-
],
|
| 108 |
-
outputs=[
|
| 109 |
-
reverb_model,
|
| 110 |
-
overlap,
|
| 111 |
-
segments_size,
|
| 112 |
-
hop_length,
|
| 113 |
-
batch_size,
|
| 114 |
-
shifts,
|
| 115 |
-
window_size,
|
| 116 |
-
aggression,
|
| 117 |
-
post_process_threshold,
|
| 118 |
-
denoise_model,
|
| 119 |
-
enable_tta,
|
| 120 |
-
high_end_process,
|
| 121 |
-
enable_post_process,
|
| 122 |
-
]
|
| 123 |
-
)
|
| 124 |
-
reverb_model.change(
|
| 125 |
-
fn=create_dataset_change,
|
| 126 |
-
inputs=[
|
| 127 |
-
model_name,
|
| 128 |
-
reverb_model,
|
| 129 |
-
enable_post_process,
|
| 130 |
-
separate_reverb,
|
| 131 |
-
enable_denoise
|
| 132 |
-
],
|
| 133 |
-
outputs=[
|
| 134 |
-
reverb_model,
|
| 135 |
-
overlap,
|
| 136 |
-
segments_size,
|
| 137 |
-
hop_length,
|
| 138 |
-
batch_size,
|
| 139 |
-
shifts,
|
| 140 |
-
window_size,
|
| 141 |
-
aggression,
|
| 142 |
-
post_process_threshold,
|
| 143 |
-
denoise_model,
|
| 144 |
-
enable_tta,
|
| 145 |
-
high_end_process,
|
| 146 |
-
enable_post_process,
|
| 147 |
-
]
|
| 148 |
-
)
|
| 149 |
-
denoise_model.change(
|
| 150 |
-
fn=create_dataset_change,
|
| 151 |
-
inputs=[
|
| 152 |
-
model_name,
|
| 153 |
-
reverb_model,
|
| 154 |
-
enable_post_process,
|
| 155 |
-
separate_reverb,
|
| 156 |
-
enable_denoise
|
| 157 |
-
],
|
| 158 |
-
outputs=[
|
| 159 |
-
reverb_model,
|
| 160 |
-
overlap,
|
| 161 |
-
segments_size,
|
| 162 |
-
hop_length,
|
| 163 |
-
batch_size,
|
| 164 |
-
shifts,
|
| 165 |
-
window_size,
|
| 166 |
-
aggression,
|
| 167 |
-
post_process_threshold,
|
| 168 |
-
denoise_model,
|
| 169 |
-
enable_tta,
|
| 170 |
-
high_end_process,
|
| 171 |
-
enable_post_process,
|
| 172 |
-
]
|
| 173 |
-
)
|
| 174 |
-
with gr.Row():
|
| 175 |
-
separate_reverb.change(
|
| 176 |
-
fn=create_dataset_change,
|
| 177 |
-
inputs=[
|
| 178 |
-
model_name,
|
| 179 |
-
reverb_model,
|
| 180 |
-
enable_post_process,
|
| 181 |
-
separate_reverb,
|
| 182 |
-
enable_denoise
|
| 183 |
-
],
|
| 184 |
-
outputs=[
|
| 185 |
-
reverb_model,
|
| 186 |
-
overlap,
|
| 187 |
-
segments_size,
|
| 188 |
-
hop_length,
|
| 189 |
-
batch_size,
|
| 190 |
-
shifts,
|
| 191 |
-
window_size,
|
| 192 |
-
aggression,
|
| 193 |
-
post_process_threshold,
|
| 194 |
-
denoise_model,
|
| 195 |
-
enable_tta,
|
| 196 |
-
high_end_process,
|
| 197 |
-
enable_post_process,
|
| 198 |
-
]
|
| 199 |
-
)
|
| 200 |
-
enable_denoise.change(
|
| 201 |
-
fn=create_dataset_change,
|
| 202 |
-
inputs=[
|
| 203 |
-
model_name,
|
| 204 |
-
reverb_model,
|
| 205 |
-
enable_post_process,
|
| 206 |
-
separate_reverb,
|
| 207 |
-
enable_denoise
|
| 208 |
-
],
|
| 209 |
-
outputs=[
|
| 210 |
-
reverb_model,
|
| 211 |
-
overlap,
|
| 212 |
-
segments_size,
|
| 213 |
-
hop_length,
|
| 214 |
-
batch_size,
|
| 215 |
-
shifts,
|
| 216 |
-
window_size,
|
| 217 |
-
aggression,
|
| 218 |
-
post_process_threshold,
|
| 219 |
-
denoise_model,
|
| 220 |
-
enable_tta,
|
| 221 |
-
high_end_process,
|
| 222 |
-
enable_post_process,
|
| 223 |
-
]
|
| 224 |
-
)
|
| 225 |
-
with gr.Row():
|
| 226 |
-
skip_seconds.change(
|
| 227 |
-
fn=lambda a: [visible(a) for _ in range(2)],
|
| 228 |
-
inputs=[skip_seconds],
|
| 229 |
-
outputs=[
|
| 230 |
-
skip_start,
|
| 231 |
-
skip_end
|
| 232 |
-
]
|
| 233 |
-
)
|
| 234 |
-
clean_dataset.change(
|
| 235 |
-
fn=visible,
|
| 236 |
-
inputs=[clean_dataset],
|
| 237 |
-
outputs=[clean_strength]
|
| 238 |
-
)
|
| 239 |
-
with gr.Row():
|
| 240 |
-
model_name.change(
|
| 241 |
-
fn=lambda a: valueFalse_interactive(a in list(mdx_models.keys()) + list(vr_models.keys())),
|
| 242 |
-
inputs=[model_name],
|
| 243 |
-
outputs=[enable_denoise]
|
| 244 |
-
)
|
| 245 |
-
separate_reverb.change(
|
| 246 |
-
fn=valueFalse_interactive,
|
| 247 |
-
inputs=[separate_reverb],
|
| 248 |
-
outputs=[enable_denoise]
|
| 249 |
-
)
|
| 250 |
-
with gr.Row():
|
| 251 |
-
create_dataset_button.click(
|
| 252 |
-
fn=create_dataset,
|
| 253 |
-
inputs=[
|
| 254 |
-
dataset_url,
|
| 255 |
-
output_dataset,
|
| 256 |
-
skip_seconds,
|
| 257 |
-
skip_start,
|
| 258 |
-
skip_end,
|
| 259 |
-
separate,
|
| 260 |
-
model_name,
|
| 261 |
-
reverb_model,
|
| 262 |
-
denoise_model,
|
| 263 |
-
sample_rate,
|
| 264 |
-
shifts,
|
| 265 |
-
batch_size,
|
| 266 |
-
overlap,
|
| 267 |
-
aggression,
|
| 268 |
-
hop_length,
|
| 269 |
-
window_size,
|
| 270 |
-
segments_size,
|
| 271 |
-
post_process_threshold,
|
| 272 |
-
enable_tta,
|
| 273 |
-
enable_denoise,
|
| 274 |
-
high_end_process,
|
| 275 |
-
enable_post_process,
|
| 276 |
-
separate_reverb,
|
| 277 |
-
clean_dataset,
|
| 278 |
-
clean_strength
|
| 279 |
-
],
|
| 280 |
-
outputs=[create_dataset_info],
|
| 281 |
-
api_name="create_dataset"
|
| 282 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/training/child/create_reference.py
DELETED
|
@@ -1,97 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.core.training import create_reference
|
| 9 |
-
from main.app.core.ui import visible, change_audios_choices, unlock_f0, shutil_move, change_embedders_mode
|
| 10 |
-
from main.app.variables import translations, paths_for_files, method_f0, hybrid_f0_method, file_types, configs, embedders_model, embedders_mode
|
| 11 |
-
|
| 12 |
-
def create_reference_tab():
|
| 13 |
-
with gr.Row():
|
| 14 |
-
gr.Markdown(translations["create_reference_markdown_2"])
|
| 15 |
-
with gr.Row():
|
| 16 |
-
pitch_guidance = gr.Checkbox(label=translations["training_pitch"], value=True, interactive=True)
|
| 17 |
-
use_energy = gr.Checkbox(label=translations["train&energy"], value=False, interactive=True)
|
| 18 |
-
f0_autotune = gr.Checkbox(label=translations["autotune"], value=False, interactive=True)
|
| 19 |
-
proposal_pitch = gr.Checkbox(label=translations["proposal_pitch"], value=False, interactive=True)
|
| 20 |
-
with gr.Row():
|
| 21 |
-
create_reference_button = gr.Button(translations["create_reference"], variant="primary")
|
| 22 |
-
with gr.Row():
|
| 23 |
-
f0_up_key = gr.Slider(minimum=-20, maximum=20, step=1, info=translations["pitch_info"], label=translations["pitch"], value=0, interactive=True)
|
| 24 |
-
proposal_pitch_threshold = gr.Slider(minimum=50.0, maximum=1200.0, label=translations["proposal_pitch_threshold"], info=translations["proposal_pitch_threshold_info"], value=255.0, step=0.1, interactive=True, visible=proposal_pitch.value)
|
| 25 |
-
with gr.Row():
|
| 26 |
-
filter_radius = gr.Slider(minimum=0, maximum=7, label=translations["filter_radius"], info=translations["filter_radius_info"], value=3, step=1, interactive=True)
|
| 27 |
-
f0_autotune_strength = gr.Slider(minimum=0, maximum=1, label=translations["autotune_rate"], info=translations["autotune_rate_info"], value=1, step=0.1, interactive=True, visible=f0_autotune.value)
|
| 28 |
-
with gr.Row():
|
| 29 |
-
with gr.Column():
|
| 30 |
-
with gr.Accordion(translations["input_output"], open=False):
|
| 31 |
-
with gr.Column():
|
| 32 |
-
input_audio = gr.Dropdown(label=translations["audio_path"], value="", choices=paths_for_files, info=translations["provide_audio"], allow_custom_value=True, interactive=True)
|
| 33 |
-
reference_name = gr.Textbox(label=translations["reference_name"], value="reference", placeholder="reference", info=translations["reference_name_info"], interactive=True)
|
| 34 |
-
with gr.Column():
|
| 35 |
-
refresh_audio = gr.Button(translations["refresh"])
|
| 36 |
-
with gr.Column():
|
| 37 |
-
upload_audio = gr.Files(label=translations["drop_audio"], file_types=file_types)
|
| 38 |
-
with gr.Column():
|
| 39 |
-
play_audio = gr.Audio(show_download_button=True, interactive=False, label=translations["input_audio"])
|
| 40 |
-
with gr.Column() as f0_method_column:
|
| 41 |
-
with gr.Accordion(label=translations["f0_method"], open=False):
|
| 42 |
-
with gr.Group():
|
| 43 |
-
with gr.Row():
|
| 44 |
-
onnx_f0 = gr.Checkbox(label=translations["f0_onnx_mode"], value=False, interactive=True)
|
| 45 |
-
unlock_full_method = gr.Checkbox(label=translations["f0_unlock"], value=False, interactive=True)
|
| 46 |
-
f0_method = gr.Radio(label=translations["f0_method"], info=translations["f0_method_info"], choices=method_f0, value="rmvpe", interactive=True)
|
| 47 |
-
f0_hybrid_method = gr.Dropdown(label=translations["f0_method_hybrid"], info=translations["f0_method_hybrid_info"], choices=hybrid_f0_method, value=hybrid_f0_method[0], interactive=True, allow_custom_value=True, visible=f0_method.value == "hybrid")
|
| 48 |
-
with gr.Row():
|
| 49 |
-
alpha = gr.Slider(label=translations["alpha_label"], info=translations["alpha_info"], minimum=0.1, maximum=1, value=0.5, step=0.1, interactive=True, visible=False)
|
| 50 |
-
with gr.Column():
|
| 51 |
-
with gr.Accordion(label=translations["hubert_model"], open=False):
|
| 52 |
-
with gr.Row():
|
| 53 |
-
version = gr.Radio(label=translations["training_version"], info=translations["training_version_info"], choices=["v1", "v2"], value="v2", interactive=True)
|
| 54 |
-
with gr.Group():
|
| 55 |
-
embedder_mode = gr.Radio(label=translations["embed_mode"], info=translations["embed_mode_info"], value="fairseq", choices=embedders_mode, interactive=True, visible=True)
|
| 56 |
-
embedders = gr.Radio(label=translations["hubert_model"], info=translations["hubert_info"], choices=embedders_model, value="hubert_base", interactive=True)
|
| 57 |
-
with gr.Row():
|
| 58 |
-
embedders_custom = gr.Textbox(label=translations["modelname"], info=translations["modelname_info"], value="", placeholder="hubert_base", interactive=True, visible=embedders.value == "custom")
|
| 59 |
-
with gr.Row():
|
| 60 |
-
create_reference_info = gr.Textbox(label=translations["reference_info"], value="", interactive=False, lines=2)
|
| 61 |
-
with gr.Row():
|
| 62 |
-
f0_autotune.change(fn=visible, inputs=[f0_autotune], outputs=[f0_autotune_strength])
|
| 63 |
-
proposal_pitch.change(fn=visible, inputs=[proposal_pitch], outputs=[proposal_pitch_threshold])
|
| 64 |
-
unlock_full_method.change(fn=unlock_f0, inputs=[unlock_full_method], outputs=[f0_method])
|
| 65 |
-
with gr.Row():
|
| 66 |
-
input_audio.change(fn=lambda audio: audio, inputs=[input_audio], outputs=[play_audio])
|
| 67 |
-
refresh_audio.click(fn=change_audios_choices, inputs=[input_audio], outputs=[input_audio])
|
| 68 |
-
f0_method.change(fn=lambda method: [visible(method == "hybrid") for _ in range(2)], inputs=[f0_method], outputs=[f0_hybrid_method, alpha])
|
| 69 |
-
with gr.Row():
|
| 70 |
-
upload_audio.upload(fn=lambda audio_in: [shutil_move(audio.name, configs["audios_path"]) for audio in audio_in][0], inputs=[upload_audio], outputs=[input_audio])
|
| 71 |
-
embedder_mode.change(fn=change_embedders_mode, inputs=[embedder_mode], outputs=[embedders])
|
| 72 |
-
embedders.change(fn=lambda embedders: visible(embedders == "custom"), inputs=[embedders], outputs=[embedders_custom])
|
| 73 |
-
with gr.Row():
|
| 74 |
-
pitch_guidance.change(fn=visible, inputs=[pitch_guidance], outputs=[f0_method_column])
|
| 75 |
-
create_reference_button.click(
|
| 76 |
-
fn=create_reference,
|
| 77 |
-
inputs=[
|
| 78 |
-
input_audio,
|
| 79 |
-
reference_name,
|
| 80 |
-
pitch_guidance,
|
| 81 |
-
use_energy,
|
| 82 |
-
version,
|
| 83 |
-
embedders,
|
| 84 |
-
embedder_mode,
|
| 85 |
-
f0_method,
|
| 86 |
-
onnx_f0,
|
| 87 |
-
f0_up_key,
|
| 88 |
-
filter_radius,
|
| 89 |
-
f0_autotune,
|
| 90 |
-
f0_autotune_strength,
|
| 91 |
-
proposal_pitch,
|
| 92 |
-
proposal_pitch_threshold,
|
| 93 |
-
alpha
|
| 94 |
-
],
|
| 95 |
-
outputs=[create_reference_info],
|
| 96 |
-
api_name="create_reference"
|
| 97 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/training/child/training.py
DELETED
|
@@ -1,259 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.core.process import zip_file
|
| 9 |
-
from main.app.core.training import preprocess, extract, create_index, training
|
| 10 |
-
from main.app.variables import translations, model_name, index_path, method_f0, embedders_mode, embedders_model, pretrainedD, pretrainedG, config, file_types, hybrid_f0_method, reference_list
|
| 11 |
-
from main.app.core.ui import gr_warning, visible, unlock_f0, hoplength_show, change_models_choices, get_gpu_info, change_embedders_mode, pitch_guidance_lock, vocoders_lock, unlock_ver, unlock_vocoder, change_pretrained_choices, gpu_number_str, shutil_move, change_reference_choices
|
| 12 |
-
|
| 13 |
-
def training_model_tab():
|
| 14 |
-
with gr.Row():
|
| 15 |
-
gr.Markdown(translations["training_markdown"])
|
| 16 |
-
with gr.Row():
|
| 17 |
-
with gr.Column():
|
| 18 |
-
with gr.Row():
|
| 19 |
-
with gr.Column():
|
| 20 |
-
training_name = gr.Textbox(label=translations["modelname"], info=translations["training_model_name"], value="", placeholder=translations["modelname"], interactive=True)
|
| 21 |
-
training_sr = gr.Radio(label=translations["sample_rate"], info=translations["sample_rate_info"], choices=["32k", "40k", "48k"], value="48k", interactive=True)
|
| 22 |
-
training_ver = gr.Radio(label=translations["training_version"], info=translations["training_version_info"], choices=["v1", "v2"], value="v2", interactive=True)
|
| 23 |
-
with gr.Row():
|
| 24 |
-
clean_dataset = gr.Checkbox(label=translations["clear_dataset"], value=False, interactive=True)
|
| 25 |
-
process_effects = gr.Checkbox(label=translations["preprocess_effect"], value=False, interactive=True)
|
| 26 |
-
training_f0 = gr.Checkbox(label=translations["training_pitch"], value=True, interactive=True)
|
| 27 |
-
custom_reference = gr.Checkbox(label=translations["custom_reference"], value=False, interactive=True)
|
| 28 |
-
checkpointing1 = gr.Checkbox(label=translations["memory_efficient_training"], value=False, interactive=True)
|
| 29 |
-
upload = gr.Checkbox(label=translations["upload_dataset"], value=False, interactive=True)
|
| 30 |
-
with gr.Row():
|
| 31 |
-
preprocess_split_audio_mode = gr.Radio(label=translations["split_audio_mode"], info=translations["split_audio_mode_info"], value="Automatic", choices=["Automatic", "Simple", "Skip"], interactive=True)
|
| 32 |
-
preprocess_normalization_mode = gr.Radio(label=translations["normalization_mode"], info=translations["normalization_mode_info"], value="none", choices=["none", "pre", "post"], interactive=True)
|
| 33 |
-
with gr.Row(visible=custom_reference.value) as custom_reference_row:
|
| 34 |
-
with gr.Accordion(translations["custom_reference"], open=True):
|
| 35 |
-
reference_name = gr.Dropdown(label=translations["reference_name"], info=translations["reference_name_info"], choices=reference_list, value=reference_list[0] if len(reference_list) >= 1 else "", allow_custom_value=True, interactive=True)
|
| 36 |
-
reference_refresh = gr.Button(translations["refresh"], scale=2)
|
| 37 |
-
with gr.Row(visible=clean_dataset.value) as clean_dataset_row:
|
| 38 |
-
clean_dataset_strength = gr.Slider(label=translations["clean_strength"], info=translations["clean_strength_info"], minimum=0, maximum=1, value=0.7, step=0.1, interactive=True)
|
| 39 |
-
with gr.Column():
|
| 40 |
-
preprocess_button = gr.Button(translations["preprocess_button"], scale=2)
|
| 41 |
-
upload_dataset = gr.Files(label=translations["drop_audio"], file_types=file_types, visible=upload.value)
|
| 42 |
-
preprocess_info = gr.Textbox(label=translations["preprocess_info"], value="", interactive=False, container=True, lines=2)
|
| 43 |
-
with gr.Column():
|
| 44 |
-
with gr.Row():
|
| 45 |
-
with gr.Column():
|
| 46 |
-
with gr.Accordion(label=translations["f0_method"], open=False):
|
| 47 |
-
with gr.Group():
|
| 48 |
-
with gr.Row():
|
| 49 |
-
onnx_f0_mode2 = gr.Checkbox(label=translations["f0_onnx_mode"], value=False, interactive=True)
|
| 50 |
-
unlock_full_method4 = gr.Checkbox(label=translations["f0_unlock"], value=False, interactive=True)
|
| 51 |
-
autotune = gr.Checkbox(label=translations["autotune"], value=False, interactive=True)
|
| 52 |
-
extract_method = gr.Radio(label=translations["f0_method"], info=translations["f0_method_info"], choices=method_f0, value="rmvpe", interactive=True)
|
| 53 |
-
extract_hybrid_method = gr.Dropdown(label=translations["f0_method_hybrid"], info=translations["f0_method_hybrid_info"], choices=hybrid_f0_method, value=hybrid_f0_method[0], interactive=True, allow_custom_value=True, visible=extract_method.value == "hybrid")
|
| 54 |
-
extract_hop_length = gr.Slider(label=translations['hop_length'], info=translations["hop_length_info"], minimum=64, maximum=512, value=160, step=1, interactive=True, visible=False)
|
| 55 |
-
f0_autotune_strength = gr.Slider(minimum=0, maximum=1, label=translations["autotune_rate"], info=translations["autotune_rate_info"], value=1, step=0.1, interactive=True, visible=autotune.value)
|
| 56 |
-
alpha = gr.Slider(label=translations["alpha_label"], info=translations["alpha_info"], minimum=0.1, maximum=1, value=0.5, step=0.1, interactive=True, visible=False)
|
| 57 |
-
with gr.Accordion(label=translations["hubert_model"], open=False):
|
| 58 |
-
with gr.Group():
|
| 59 |
-
embed_mode2 = gr.Radio(label=translations["embed_mode"], info=translations["embed_mode_info"], value="fairseq", choices=embedders_mode, interactive=True, visible=True)
|
| 60 |
-
extract_embedders = gr.Radio(label=translations["hubert_model"], info=translations["hubert_info"], choices=embedders_model, value="hubert_base", interactive=True)
|
| 61 |
-
with gr.Row():
|
| 62 |
-
extract_embedders_custom = gr.Textbox(label=translations["modelname"], info=translations["modelname_info"], value="", placeholder="hubert_base", interactive=True, visible=extract_embedders.value == "custom")
|
| 63 |
-
with gr.Column():
|
| 64 |
-
extract_button = gr.Button(translations["extract_button"], scale=2)
|
| 65 |
-
extract_info = gr.Textbox(label=translations["extract_info"], value="", interactive=False, lines=2)
|
| 66 |
-
with gr.Column():
|
| 67 |
-
with gr.Row():
|
| 68 |
-
with gr.Column():
|
| 69 |
-
total_epochs = gr.Slider(label=translations["total_epoch"], info=translations["total_epoch_info"], minimum=1, maximum=10000, value=300, step=1, interactive=True)
|
| 70 |
-
save_epochs = gr.Slider(label=translations["save_epoch"], info=translations["save_epoch_info"], minimum=1, maximum=10000, value=50, step=1, interactive=True)
|
| 71 |
-
with gr.Column():
|
| 72 |
-
index_button = gr.Button(f"3. {translations['create_index']}", variant="primary", scale=2)
|
| 73 |
-
training_button = gr.Button(f"4. {translations['training_model']}", variant="primary", scale=2)
|
| 74 |
-
with gr.Row():
|
| 75 |
-
with gr.Accordion(label=translations["setting"], open=False):
|
| 76 |
-
with gr.Row():
|
| 77 |
-
index_algorithm = gr.Radio(label=translations["index_algorithm"], info=translations["index_algorithm_info"], choices=["Auto", "Faiss", "KMeans"], value="Auto", interactive=True)
|
| 78 |
-
with gr.Row():
|
| 79 |
-
cache_in_gpu = gr.Checkbox(label=translations["cache_in_gpu"], info=translations["cache_in_gpu_info"], value=True, interactive=True)
|
| 80 |
-
rms_extract = gr.Checkbox(label=translations["train&energy"], info=translations["train&energy_info"], value=False, interactive=True)
|
| 81 |
-
overtraining_detector = gr.Checkbox(label=translations["overtraining_detector"], info=translations["overtraining_detector_info"], value=False, interactive=True)
|
| 82 |
-
with gr.Row():
|
| 83 |
-
custom_dataset = gr.Checkbox(label=translations["custom_dataset"], info=translations["custom_dataset_info"], value=False, interactive=True)
|
| 84 |
-
save_only_latest = gr.Checkbox(label=translations["save_only_latest"], info=translations["save_only_latest_info"], value=True, interactive=True)
|
| 85 |
-
save_every_weights = gr.Checkbox(label=translations["save_every_weights"], info=translations["save_every_weights_info"], value=True, interactive=True)
|
| 86 |
-
with gr.Row():
|
| 87 |
-
clean_up = gr.Checkbox(label=translations["cleanup_training"], info=translations["cleanup_training_info"], value=False, interactive=True)
|
| 88 |
-
not_use_pretrain = gr.Checkbox(label=translations["not_use_pretrain_2"], info=translations["not_use_pretrain_info"], value=False, interactive=True)
|
| 89 |
-
custom_pretrain = gr.Checkbox(label=translations["custom_pretrain"], info=translations["custom_pretrain_info"], value=False, interactive=True)
|
| 90 |
-
with gr.Column():
|
| 91 |
-
dataset_path = gr.Textbox(label=translations["dataset_folder"], value="dataset", interactive=True, visible=custom_dataset.value)
|
| 92 |
-
with gr.Column():
|
| 93 |
-
with gr.Row(visible=False) as simple_option:
|
| 94 |
-
chunk_len = gr.Slider(minimum=0.5, maximum=5.0, value=3.0, step=0.1, label=translations["chunk_length"], info=translations["chunk_length_info"], interactive=True)
|
| 95 |
-
overlap_len = gr.Slider(minimum=0.0, maximum=0.4, value=0.3, step=0.1, label=translations["overlap_length"], info=translations["overlap_length_info"], interactive=True)
|
| 96 |
-
threshold = gr.Slider(minimum=1, maximum=100, value=50, step=1, label=translations["threshold"], interactive=True, visible=overtraining_detector.value)
|
| 97 |
-
with gr.Accordion(translations["setting_cpu_gpu"], open=False):
|
| 98 |
-
with gr.Column():
|
| 99 |
-
gpu_number = gr.Textbox(label=translations["gpu_number"], value=gpu_number_str(), info=translations["gpu_number_info"], interactive=True)
|
| 100 |
-
gpu_info = gr.Textbox(label=translations["gpu_info"], value=get_gpu_info(), info=translations["gpu_info_2"], interactive=False)
|
| 101 |
-
cpu_core = gr.Slider(label=translations["cpu_core"], info=translations["cpu_core_info"], minimum=1, maximum=os.cpu_count(), value=os.cpu_count(), step=1, interactive=True)
|
| 102 |
-
train_batch_size = gr.Slider(label=translations["batch_size"], info=translations["batch_size_info"], minimum=1, maximum=64, value=8, step=1, interactive=True)
|
| 103 |
-
with gr.Group():
|
| 104 |
-
multiscale_mel_loss = gr.Checkbox(label=translations["multiscale_mel_loss"], info=translations["multiscale_mel_loss_info"], value=False, interactive=True)
|
| 105 |
-
vocoders = gr.Radio(label=translations["vocoder"], info=translations["vocoder_info"], choices=["Default", "MRF-HiFi-GAN", "RefineGAN"], value="Default", interactive=True)
|
| 106 |
-
with gr.Row():
|
| 107 |
-
deterministic = gr.Checkbox(label=translations["deterministic"], info=translations["deterministic_info"], value=False, interactive=config.device.startswith("cuda"))
|
| 108 |
-
benchmark = gr.Checkbox(label=translations["benchmark"], info=translations["benchmark_info"], value=False, interactive=config.device.startswith("cuda"))
|
| 109 |
-
with gr.Row():
|
| 110 |
-
optimizer = gr.Radio(label=translations["optimizer"], info=translations["optimizer_info"], value="AdamW", choices=["AdamW", "RAdam", "AnyPrecisionAdamW"], interactive=True)
|
| 111 |
-
with gr.Row():
|
| 112 |
-
model_author = gr.Textbox(label=translations["training_author"], info=translations["training_author_info"], value="", placeholder=translations["training_author"], interactive=True)
|
| 113 |
-
with gr.Row():
|
| 114 |
-
with gr.Column():
|
| 115 |
-
with gr.Accordion(translations["custom_pretrain_info"], open=False, visible=custom_pretrain.value and not not_use_pretrain.value) as pretrain_setting:
|
| 116 |
-
pretrained_D = gr.Dropdown(label=translations["pretrain_file"].format(dg="D"), choices=pretrainedD, value=pretrainedD[0] if len(pretrainedD) > 0 else '', interactive=True, allow_custom_value=True)
|
| 117 |
-
pretrained_G = gr.Dropdown(label=translations["pretrain_file"].format(dg="G"), choices=pretrainedG, value=pretrainedG[0] if len(pretrainedG) > 0 else '', interactive=True, allow_custom_value=True)
|
| 118 |
-
refresh_pretrain = gr.Button(translations["refresh"], scale=2)
|
| 119 |
-
with gr.Row():
|
| 120 |
-
training_info = gr.Textbox(label=translations["train_info"], value="", interactive=False, lines=3)
|
| 121 |
-
with gr.Row():
|
| 122 |
-
with gr.Column():
|
| 123 |
-
with gr.Accordion(translations["export_model"], open=False):
|
| 124 |
-
with gr.Row():
|
| 125 |
-
model_file = gr.Dropdown(label=translations["model_name"], choices=model_name, value=model_name[0] if len(model_name) >= 1 else "", interactive=True, allow_custom_value=True)
|
| 126 |
-
index_file = gr.Dropdown(label=translations["index_path"], choices=index_path, value=index_path[0] if len(index_path) >= 1 else "", interactive=True, allow_custom_value=True)
|
| 127 |
-
with gr.Row():
|
| 128 |
-
refresh_file = gr.Button(f"1. {translations['refresh']}", scale=2)
|
| 129 |
-
zip_model = gr.Button(translations["zip_model"], variant="primary", scale=2)
|
| 130 |
-
with gr.Row():
|
| 131 |
-
zip_output = gr.File(label=translations["output_zip"], file_types=[".zip"], interactive=False, visible=False)
|
| 132 |
-
with gr.Row():
|
| 133 |
-
vocoders.change(fn=pitch_guidance_lock, inputs=[vocoders], outputs=[training_f0])
|
| 134 |
-
training_f0.change(fn=vocoders_lock, inputs=[training_f0, vocoders], outputs=[vocoders])
|
| 135 |
-
unlock_full_method4.change(fn=unlock_f0, inputs=[unlock_full_method4], outputs=[extract_method])
|
| 136 |
-
with gr.Row():
|
| 137 |
-
refresh_file.click(fn=change_models_choices, inputs=[], outputs=[model_file, index_file])
|
| 138 |
-
zip_model.click(fn=zip_file, inputs=[training_name, model_file, index_file], outputs=[zip_output])
|
| 139 |
-
dataset_path.change(fn=lambda folder: os.makedirs(folder, exist_ok=True), inputs=[dataset_path], outputs=[])
|
| 140 |
-
with gr.Row():
|
| 141 |
-
upload.change(fn=visible, inputs=[upload], outputs=[upload_dataset])
|
| 142 |
-
overtraining_detector.change(fn=visible, inputs=[overtraining_detector], outputs=[threshold])
|
| 143 |
-
clean_dataset.change(fn=visible, inputs=[clean_dataset], outputs=[clean_dataset_row])
|
| 144 |
-
with gr.Row():
|
| 145 |
-
custom_dataset.change(fn=lambda custom_dataset: [visible(custom_dataset), "dataset"],inputs=[custom_dataset], outputs=[dataset_path, dataset_path])
|
| 146 |
-
training_ver.change(fn=unlock_vocoder, inputs=[training_ver, vocoders], outputs=[vocoders])
|
| 147 |
-
vocoders.change(fn=unlock_ver, inputs=[training_ver, vocoders], outputs=[training_ver])
|
| 148 |
-
with gr.Row():
|
| 149 |
-
custom_reference.change(fn=visible, inputs=[custom_reference], outputs=[custom_reference_row])
|
| 150 |
-
extract_method.change(fn=lambda method, hybrid: [visible(method == "hybrid"), visible(method == "hybrid"), hoplength_show(method, hybrid)], inputs=[extract_method, extract_hybrid_method], outputs=[extract_hybrid_method, alpha, extract_hop_length])
|
| 151 |
-
extract_hybrid_method.change(fn=hoplength_show, inputs=[extract_method, extract_hybrid_method], outputs=[extract_hop_length])
|
| 152 |
-
with gr.Row():
|
| 153 |
-
autotune.change(fn=visible, inputs=[autotune], outputs=[f0_autotune_strength])
|
| 154 |
-
preprocess_split_audio_mode.change(fn=lambda a: visible(a == "Simple"), inputs=[preprocess_split_audio_mode], outputs=[simple_option])
|
| 155 |
-
upload_dataset.upload(
|
| 156 |
-
fn=lambda files, folder: [shutil_move(f.name, os.path.join(folder, os.path.split(f.name)[1])) for f in files] if folder != "" else gr_warning(translations["dataset_folder1"]),
|
| 157 |
-
inputs=[upload_dataset, dataset_path],
|
| 158 |
-
outputs=[],
|
| 159 |
-
api_name="upload_dataset"
|
| 160 |
-
)
|
| 161 |
-
with gr.Row():
|
| 162 |
-
not_use_pretrain.change(fn=lambda a, b: visible(a and not b), inputs=[custom_pretrain, not_use_pretrain], outputs=[pretrain_setting])
|
| 163 |
-
custom_pretrain.change(fn=lambda a, b: visible(a and not b), inputs=[custom_pretrain, not_use_pretrain], outputs=[pretrain_setting])
|
| 164 |
-
refresh_pretrain.click(fn=change_pretrained_choices, inputs=[], outputs=[pretrained_D, pretrained_G])
|
| 165 |
-
with gr.Row():
|
| 166 |
-
preprocess_button.click(
|
| 167 |
-
fn=preprocess,
|
| 168 |
-
inputs=[
|
| 169 |
-
training_name,
|
| 170 |
-
training_sr,
|
| 171 |
-
cpu_core,
|
| 172 |
-
preprocess_split_audio_mode,
|
| 173 |
-
process_effects,
|
| 174 |
-
dataset_path,
|
| 175 |
-
clean_dataset,
|
| 176 |
-
clean_dataset_strength,
|
| 177 |
-
chunk_len,
|
| 178 |
-
overlap_len,
|
| 179 |
-
preprocess_normalization_mode
|
| 180 |
-
],
|
| 181 |
-
outputs=[preprocess_info],
|
| 182 |
-
api_name="preprocess"
|
| 183 |
-
)
|
| 184 |
-
with gr.Row():
|
| 185 |
-
embed_mode2.change(fn=change_embedders_mode, inputs=[embed_mode2], outputs=[extract_embedders])
|
| 186 |
-
extract_embedders.change(fn=lambda extract_embedders: visible(extract_embedders == "custom"), inputs=[extract_embedders], outputs=[extract_embedders_custom])
|
| 187 |
-
reference_refresh.click(fn=change_reference_choices, inputs=[], outputs=[reference_name])
|
| 188 |
-
with gr.Row():
|
| 189 |
-
extract_button.click(
|
| 190 |
-
fn=extract,
|
| 191 |
-
inputs=[
|
| 192 |
-
training_name,
|
| 193 |
-
training_ver,
|
| 194 |
-
extract_method,
|
| 195 |
-
training_f0,
|
| 196 |
-
extract_hop_length,
|
| 197 |
-
cpu_core,
|
| 198 |
-
gpu_number,
|
| 199 |
-
training_sr,
|
| 200 |
-
extract_embedders,
|
| 201 |
-
extract_embedders_custom,
|
| 202 |
-
onnx_f0_mode2,
|
| 203 |
-
embed_mode2,
|
| 204 |
-
autotune,
|
| 205 |
-
f0_autotune_strength,
|
| 206 |
-
extract_hybrid_method,
|
| 207 |
-
rms_extract,
|
| 208 |
-
alpha
|
| 209 |
-
],
|
| 210 |
-
outputs=[extract_info],
|
| 211 |
-
api_name="extract"
|
| 212 |
-
)
|
| 213 |
-
with gr.Row():
|
| 214 |
-
index_button.click(
|
| 215 |
-
fn=create_index,
|
| 216 |
-
inputs=[
|
| 217 |
-
training_name,
|
| 218 |
-
training_ver,
|
| 219 |
-
index_algorithm
|
| 220 |
-
],
|
| 221 |
-
outputs=[training_info],
|
| 222 |
-
api_name="create_index"
|
| 223 |
-
)
|
| 224 |
-
with gr.Row():
|
| 225 |
-
training_button.click(
|
| 226 |
-
fn=training,
|
| 227 |
-
inputs=[
|
| 228 |
-
training_name,
|
| 229 |
-
training_ver,
|
| 230 |
-
save_epochs,
|
| 231 |
-
save_only_latest,
|
| 232 |
-
save_every_weights,
|
| 233 |
-
total_epochs,
|
| 234 |
-
training_sr,
|
| 235 |
-
train_batch_size,
|
| 236 |
-
gpu_number,
|
| 237 |
-
training_f0,
|
| 238 |
-
not_use_pretrain,
|
| 239 |
-
custom_pretrain,
|
| 240 |
-
pretrained_G,
|
| 241 |
-
pretrained_D,
|
| 242 |
-
overtraining_detector,
|
| 243 |
-
threshold,
|
| 244 |
-
clean_up,
|
| 245 |
-
cache_in_gpu,
|
| 246 |
-
model_author,
|
| 247 |
-
vocoders,
|
| 248 |
-
checkpointing1,
|
| 249 |
-
deterministic,
|
| 250 |
-
benchmark,
|
| 251 |
-
optimizer,
|
| 252 |
-
rms_extract,
|
| 253 |
-
custom_reference,
|
| 254 |
-
reference_name,
|
| 255 |
-
multiscale_mel_loss
|
| 256 |
-
],
|
| 257 |
-
outputs=[training_info],
|
| 258 |
-
api_name="training_model"
|
| 259 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/training/training.py
DELETED
|
@@ -1,25 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.variables import translations, configs
|
| 9 |
-
from main.app.tabs.training.child.training import training_model_tab
|
| 10 |
-
from main.app.tabs.training.child.create_dataset import create_dataset_tab
|
| 11 |
-
from main.app.tabs.training.child.create_reference import create_reference_tab
|
| 12 |
-
|
| 13 |
-
def training_tab():
|
| 14 |
-
with gr.TabItem(translations["training_model"], visible=configs.get("create_and_training_tab", True)):
|
| 15 |
-
with gr.TabItem(translations["createdataset"], visible=configs.get("create_dataset_tab", True)):
|
| 16 |
-
gr.Markdown(translations["create_dataset_markdown"])
|
| 17 |
-
create_dataset_tab()
|
| 18 |
-
|
| 19 |
-
with gr.TabItem(translations["create_reference"], visible=configs.get("create_reference_tab", True)):
|
| 20 |
-
gr.Markdown(translations["create_reference_markdown"])
|
| 21 |
-
create_reference_tab()
|
| 22 |
-
|
| 23 |
-
with gr.TabItem(translations["training_model"], visible=configs.get("training_tab", True)):
|
| 24 |
-
gr.Markdown(f"## {translations['training_model']}")
|
| 25 |
-
training_model_tab()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/variables.py
DELETED
|
@@ -1,117 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
import csv
|
| 4 |
-
import json
|
| 5 |
-
import codecs
|
| 6 |
-
import logging
|
| 7 |
-
import urllib.request
|
| 8 |
-
import logging.handlers
|
| 9 |
-
|
| 10 |
-
sys.path.append(os.getcwd())
|
| 11 |
-
|
| 12 |
-
from main.configs.config import Config
|
| 13 |
-
|
| 14 |
-
logger = logging.getLogger(__name__)
|
| 15 |
-
logger.propagate = False
|
| 16 |
-
|
| 17 |
-
config = Config()
|
| 18 |
-
python = sys.executable
|
| 19 |
-
translations = config.translations
|
| 20 |
-
configs_json = os.path.join("main", "configs", "config.json")
|
| 21 |
-
configs = json.load(open(configs_json, "r"))
|
| 22 |
-
|
| 23 |
-
if not logger.hasHandlers():
|
| 24 |
-
console_handler = logging.StreamHandler()
|
| 25 |
-
console_formatter = logging.Formatter(fmt="\n%(asctime)s.%(msecs)03d | %(levelname)s | %(module)s | %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
|
| 26 |
-
console_handler.setFormatter(console_formatter)
|
| 27 |
-
console_handler.setLevel(logging.DEBUG if config.debug_mode else logging.INFO)
|
| 28 |
-
file_handler = logging.handlers.RotatingFileHandler(os.path.join(configs["logs_path"], "app.log"), maxBytes=5*1024*1024, backupCount=3, encoding='utf-8')
|
| 29 |
-
file_formatter = logging.Formatter(fmt="\n%(asctime)s.%(msecs)03d | %(levelname)s | %(module)s | %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
|
| 30 |
-
file_handler.setFormatter(file_formatter)
|
| 31 |
-
file_handler.setLevel(logging.DEBUG)
|
| 32 |
-
logger.addHandler(console_handler)
|
| 33 |
-
logger.addHandler(file_handler)
|
| 34 |
-
logger.setLevel(logging.DEBUG)
|
| 35 |
-
|
| 36 |
-
if config.device in ["cpu", "mps", "ocl:0"] and configs.get("fp16", False):
|
| 37 |
-
logger.warning(translations["fp16_not_support"])
|
| 38 |
-
configs["fp16"] = config.is_half = False
|
| 39 |
-
|
| 40 |
-
with open(configs_json, "w") as f:
|
| 41 |
-
json.dump(configs, f, indent=4)
|
| 42 |
-
|
| 43 |
-
models = {}
|
| 44 |
-
model_options = {}
|
| 45 |
-
|
| 46 |
-
method_f0 = ["mangio-crepe-full", "crepe-full", "fcpe", "rmvpe", "harvest", "pyin", "hybrid"]
|
| 47 |
-
method_f0_full = ["pm-ac", "pm-cc", "pm-shs", "dio", "mangio-crepe-tiny", "mangio-crepe-small", "mangio-crepe-medium", "mangio-crepe-large", "mangio-crepe-full", "crepe-tiny", "crepe-small", "crepe-medium", "crepe-large", "crepe-full", "fcpe", "fcpe-legacy", "fcpe-previous", "rmvpe", "rmvpe-clipping", "rmvpe-medfilt", "rmvpe-clipping-medfilt", "harvest", "yin", "pyin", "swipe", "piptrack", "penn", "mangio-penn", "djcm", "djcm-clipping", "djcm-medfilt", "djcm-clipping-medfilt", "swift", "pesto", "hybrid"]
|
| 48 |
-
hybrid_f0_method = ["hybrid[pm+dio]", "hybrid[pm+crepe-tiny]", "hybrid[pm+crepe]", "hybrid[pm+fcpe]", "hybrid[pm+rmvpe]", "hybrid[pm+harvest]", "hybrid[pm+yin]", "hybrid[dio+crepe-tiny]", "hybrid[dio+crepe]", "hybrid[dio+fcpe]", "hybrid[dio+rmvpe]", "hybrid[dio+harvest]", "hybrid[dio+yin]", "hybrid[crepe-tiny+crepe]", "hybrid[crepe-tiny+fcpe]", "hybrid[crepe-tiny+rmvpe]", "hybrid[crepe-tiny+harvest]", "hybrid[crepe+fcpe]", "hybrid[crepe+rmvpe]", "hybrid[crepe+harvest]", "hybrid[crepe+yin]", "hybrid[fcpe+rmvpe]", "hybrid[fcpe+harvest]", "hybrid[fcpe+yin]", "hybrid[rmvpe+harvest]", "hybrid[rmvpe+yin]", "hybrid[harvest+yin]"]
|
| 49 |
-
|
| 50 |
-
embedders_mode = ["fairseq", "onnx", "transformers", "spin", "whisper"]
|
| 51 |
-
embedders_model = ["contentvec_base", "hubert_base", "vietnamese_hubert_base", "japanese_hubert_base", "korean_hubert_base", "chinese_hubert_base", "portuguese_hubert_base", "custom"]
|
| 52 |
-
spin_model = ["spin-v1", "spin-v2"]
|
| 53 |
-
whisper_model = ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3", "large-v3-turbo"]
|
| 54 |
-
|
| 55 |
-
paths_for_files = sorted([os.path.abspath(os.path.join(root, f)) for root, _, files in os.walk(configs["audios_path"]) for f in files if os.path.splitext(f)[1].lower() in (".wav", ".mp3", ".flac", ".ogg", ".opus", ".m4a", ".mp4", ".aac", ".alac", ".wma", ".aiff", ".webm", ".ac3")])
|
| 56 |
-
reference_list = sorted([name for name in os.listdir(configs["reference_path"]) if os.path.exists(os.path.join(configs["reference_path"], name)) and os.path.isdir(os.path.join(configs["reference_path"], name))])
|
| 57 |
-
model_name = sorted(list(model for model in os.listdir(configs["weights_path"]) if model.endswith((".pth", ".onnx")) and not model.startswith("G_") and not model.startswith("D_")))
|
| 58 |
-
index_path = sorted([os.path.join(root, name) for root, _, files in os.walk(configs["logs_path"], topdown=False) for name in files if name.endswith(".index") and "trained" not in name])
|
| 59 |
-
|
| 60 |
-
pretrainedD = [model for model in os.listdir(configs["pretrained_custom_path"]) if model.endswith(".pth") and "D" in model]
|
| 61 |
-
pretrainedG = [model for model in os.listdir(configs["pretrained_custom_path"]) if model.endswith(".pth") and "G" in model]
|
| 62 |
-
|
| 63 |
-
presets_file = sorted(list(f for f in os.listdir(configs["presets_path"]) if f.endswith(".conversion.json")))
|
| 64 |
-
audio_effect_presets_file = sorted(list(f for f in os.listdir(configs["presets_path"]) if f.endswith(".effect.json")))
|
| 65 |
-
f0_file = sorted([os.path.abspath(os.path.join(root, f)) for root, _, files in os.walk(configs["f0_path"]) for f in files if f.endswith(".txt")])
|
| 66 |
-
|
| 67 |
-
file_types = [".wav", ".mp3", ".flac", ".ogg", ".opus", ".m4a", ".mp4", ".aac", ".alac", ".wma", ".aiff", ".webm", ".ac3"]
|
| 68 |
-
export_format_choices = ["wav", "mp3", "flac", "ogg", "opus", "m4a", "mp4", "aac", "alac", "wma", "aiff", "webm", "ac3"]
|
| 69 |
-
|
| 70 |
-
language = configs.get("language", "vi-VN")
|
| 71 |
-
theme = configs.get("theme", "NoCrypt/miku")
|
| 72 |
-
|
| 73 |
-
edgetts = configs.get("edge_tts", ["vi-VN-HoaiMyNeural", "vi-VN-NamMinhNeural"])
|
| 74 |
-
google_tts_voice = configs.get("google_tts_voice", ["vi", "en"])
|
| 75 |
-
|
| 76 |
-
vr_models = configs.get("vr_models", "")
|
| 77 |
-
demucs_models = configs.get("demucs_models", "")
|
| 78 |
-
mdx_models = configs.get("mdx_models", "")
|
| 79 |
-
karaoke_models = configs.get("karaoke_models", "")
|
| 80 |
-
reverb_models = configs.get("reverb_models", "")
|
| 81 |
-
denoise_models = configs.get("denoise_models", "")
|
| 82 |
-
uvr_model = list(demucs_models.keys()) + list(vr_models.keys()) + list(mdx_models.keys())
|
| 83 |
-
|
| 84 |
-
font = configs.get("font", "https://fonts.googleapis.com/css2?family=Courgette&display=swap")
|
| 85 |
-
sample_rate_choice = [8000, 11025, 12000, 16000, 22050, 24000, 32000, 44100, 48000, 96000]
|
| 86 |
-
csv_path = configs["csv_path"]
|
| 87 |
-
|
| 88 |
-
if "--allow_all_disk" in sys.argv and sys.platform == "win32":
|
| 89 |
-
try:
|
| 90 |
-
import win32api
|
| 91 |
-
except:
|
| 92 |
-
os.system(f"{python} -m pip install pywin32")
|
| 93 |
-
import win32api
|
| 94 |
-
|
| 95 |
-
allow_disk = win32api.GetLogicalDriveStrings().split('\x00')[:-1]
|
| 96 |
-
else: allow_disk = []
|
| 97 |
-
|
| 98 |
-
try:
|
| 99 |
-
if os.path.exists(csv_path): reader = list(csv.DictReader(open(csv_path, newline='', encoding='utf-8')))
|
| 100 |
-
else:
|
| 101 |
-
reader = list(csv.DictReader([line.decode('utf-8') for line in urllib.request.urlopen(codecs.decode("uggcf://qbpf.tbbtyr.pbz/fcernqfurrgf/q/1gNHnDeRULtEfz1Yieaw14USUQjWJy0Oq9k0DrCrjApb/rkcbeg?sbezng=pfi&tvq=1977693859", "rot13")).readlines()]))
|
| 102 |
-
writer = csv.DictWriter(open(csv_path, mode='w', newline='', encoding='utf-8'), fieldnames=reader[0].keys())
|
| 103 |
-
writer.writeheader()
|
| 104 |
-
writer.writerows(reader)
|
| 105 |
-
|
| 106 |
-
for row in reader:
|
| 107 |
-
filename = row['Filename']
|
| 108 |
-
url = None
|
| 109 |
-
|
| 110 |
-
for value in row.values():
|
| 111 |
-
if isinstance(value, str) and "huggingface" in value:
|
| 112 |
-
url = value
|
| 113 |
-
break
|
| 114 |
-
|
| 115 |
-
if url: models[filename] = url
|
| 116 |
-
except:
|
| 117 |
-
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/configs/config.json
DELETED
|
@@ -1,622 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"language": "vi-VN",
|
| 3 |
-
"support_language": [
|
| 4 |
-
"en-US",
|
| 5 |
-
"vi-VN"
|
| 6 |
-
],
|
| 7 |
-
"theme": "NoCrypt/miku",
|
| 8 |
-
"themes": [
|
| 9 |
-
"NoCrypt/miku",
|
| 10 |
-
"gstaff/xkcd",
|
| 11 |
-
"JohnSmith9982/small_and_pretty",
|
| 12 |
-
"ParityError/Interstellar",
|
| 13 |
-
"earneleh/paris",
|
| 14 |
-
"shivi/calm_seafoam",
|
| 15 |
-
"Hev832/Applio",
|
| 16 |
-
"YTheme/Minecraft",
|
| 17 |
-
"gstaff/sketch",
|
| 18 |
-
"SebastianBravo/simci_css",
|
| 19 |
-
"allenai/gradio-theme",
|
| 20 |
-
"Nymbo/Nymbo_Theme_5",
|
| 21 |
-
"lone17/kotaemon",
|
| 22 |
-
"Zarkel/IBM_Carbon_Theme",
|
| 23 |
-
"SherlockRamos/Feliz",
|
| 24 |
-
"freddyaboulton/dracula_revamped",
|
| 25 |
-
"freddyaboulton/bad-theme-space",
|
| 26 |
-
"gradio/dracula_revamped",
|
| 27 |
-
"abidlabs/dracula_revamped",
|
| 28 |
-
"gradio/dracula_test",
|
| 29 |
-
"gradio/seafoam",
|
| 30 |
-
"gradio/glass",
|
| 31 |
-
"gradio/monochrome",
|
| 32 |
-
"gradio/soft",
|
| 33 |
-
"gradio/default",
|
| 34 |
-
"gradio/base",
|
| 35 |
-
"abidlabs/pakistan",
|
| 36 |
-
"dawood/microsoft_windows",
|
| 37 |
-
"ysharma/steampunk",
|
| 38 |
-
"ysharma/huggingface",
|
| 39 |
-
"abidlabs/Lime",
|
| 40 |
-
"freddyaboulton/this-theme-does-not-exist-2",
|
| 41 |
-
"aliabid94/new-theme",
|
| 42 |
-
"aliabid94/test2",
|
| 43 |
-
"aliabid94/test3",
|
| 44 |
-
"aliabid94/test4",
|
| 45 |
-
"abidlabs/banana",
|
| 46 |
-
"freddyaboulton/test-blue",
|
| 47 |
-
"gstaff/whiteboard",
|
| 48 |
-
"ysharma/llamas",
|
| 49 |
-
"abidlabs/font-test",
|
| 50 |
-
"YenLai/Superhuman",
|
| 51 |
-
"bethecloud/storj_theme",
|
| 52 |
-
"sudeepshouche/minimalist",
|
| 53 |
-
"knotdgaf/gradiotest",
|
| 54 |
-
"ParityError/Anime",
|
| 55 |
-
"Ajaxon6255/Emerald_Isle",
|
| 56 |
-
"ParityError/LimeFace",
|
| 57 |
-
"finlaymacklon/smooth_slate",
|
| 58 |
-
"finlaymacklon/boxy_violet",
|
| 59 |
-
"derekzen/stardust",
|
| 60 |
-
"EveryPizza/Cartoony-Gradio-Theme",
|
| 61 |
-
"Ifeanyi/Cyanister",
|
| 62 |
-
"Tshackelton/IBMPlex-DenseReadable",
|
| 63 |
-
"snehilsanyal/scikit-learn",
|
| 64 |
-
"Himhimhim/xkcd",
|
| 65 |
-
"nota-ai/theme",
|
| 66 |
-
"rawrsor1/Everforest",
|
| 67 |
-
"rottenlittlecreature/Moon_Goblin",
|
| 68 |
-
"abidlabs/test-yellow",
|
| 69 |
-
"abidlabs/test-yellow3",
|
| 70 |
-
"idspicQstitho/dracula_revamped",
|
| 71 |
-
"kfahn/AnimalPose",
|
| 72 |
-
"HaleyCH/HaleyCH_Theme",
|
| 73 |
-
"simulKitke/dracula_test",
|
| 74 |
-
"braintacles/CrimsonNight",
|
| 75 |
-
"wentaohe/whiteboardv2",
|
| 76 |
-
"reilnuud/polite",
|
| 77 |
-
"remilia/Ghostly",
|
| 78 |
-
"Franklisi/darkmode",
|
| 79 |
-
"coding-alt/soft",
|
| 80 |
-
"xiaobaiyuan/theme_land",
|
| 81 |
-
"step-3-profit/Midnight-Deep",
|
| 82 |
-
"xiaobaiyuan/theme_demo",
|
| 83 |
-
"Taithrah/Minimal",
|
| 84 |
-
"Insuz/SimpleIndigo",
|
| 85 |
-
"zkunn/Alipay_Gradio_theme",
|
| 86 |
-
"Insuz/Mocha",
|
| 87 |
-
"xiaobaiyuan/theme_brief",
|
| 88 |
-
"Ama434/434-base-Barlow",
|
| 89 |
-
"Ama434/def_barlow",
|
| 90 |
-
"Ama434/neutral-barlow",
|
| 91 |
-
"dawood/dracula_test",
|
| 92 |
-
"nuttea/Softblue",
|
| 93 |
-
"BlueDancer/Alien_Diffusion",
|
| 94 |
-
"naughtondale/monochrome",
|
| 95 |
-
"Dagfinn1962/standard",
|
| 96 |
-
"default"
|
| 97 |
-
],
|
| 98 |
-
"mdx_models": {
|
| 99 |
-
"Main_340": "UVR-MDX-NET_Main_340.onnx",
|
| 100 |
-
"Main_390": "UVR-MDX-NET_Main_390.onnx",
|
| 101 |
-
"Main_406": "UVR-MDX-NET_Main_406.onnx",
|
| 102 |
-
"Main_427": "UVR-MDX-NET_Main_427.onnx",
|
| 103 |
-
"Main_438": "UVR-MDX-NET_Main_438.onnx",
|
| 104 |
-
"Inst_full_292": "UVR-MDX-NET-Inst_full_292.onnx",
|
| 105 |
-
"Inst_HQ_1": "UVR-MDX-NET-Inst_HQ_1.onnx",
|
| 106 |
-
"Inst_HQ_2": "UVR-MDX-NET-Inst_HQ_2.onnx",
|
| 107 |
-
"Inst_HQ_3": "UVR-MDX-NET-Inst_HQ_3.onnx",
|
| 108 |
-
"Inst_HQ_4": "UVR-MDX-NET-Inst_HQ_4.onnx",
|
| 109 |
-
"Inst_HQ_5": "UVR-MDX-NET-Inst_HQ_5.onnx",
|
| 110 |
-
"Kim_Vocal_1": "Kim_Vocal_1.onnx",
|
| 111 |
-
"Kim_Vocal_2": "Kim_Vocal_2.onnx",
|
| 112 |
-
"Kim_Inst": "Kim_Inst.onnx",
|
| 113 |
-
"Inst_187_beta": "UVR-MDX-NET_Inst_187_beta.onnx",
|
| 114 |
-
"Inst_82_beta": "UVR-MDX-NET_Inst_82_beta.onnx",
|
| 115 |
-
"Inst_90_beta": "UVR-MDX-NET_Inst_90_beta.onnx",
|
| 116 |
-
"Voc_FT": "UVR-MDX-NET-Voc_FT.onnx",
|
| 117 |
-
"Crowd_HQ": "UVR-MDX-NET_Crowd_HQ_1.onnx",
|
| 118 |
-
"MDXNET_9482": "UVR_MDXNET_9482.onnx",
|
| 119 |
-
"Inst_1": "UVR-MDX-NET-Inst_1.onnx",
|
| 120 |
-
"Inst_2": "UVR-MDX-NET-Inst_2.onnx",
|
| 121 |
-
"Inst_3": "UVR-MDX-NET-Inst_3.onnx",
|
| 122 |
-
"MDXNET_1_9703": "UVR_MDXNET_1_9703.onnx",
|
| 123 |
-
"MDXNET_2_9682": "UVR_MDXNET_2_9682.onnx",
|
| 124 |
-
"MDXNET_3_9662": "UVR_MDXNET_3_9662.onnx",
|
| 125 |
-
"Inst_Main": "UVR-MDX-NET-Inst_Main.onnx",
|
| 126 |
-
"MDXNET_Main": "UVR_MDXNET_Main.onnx"
|
| 127 |
-
},
|
| 128 |
-
"demucs_models": {
|
| 129 |
-
"HT-Tuned": "htdemucs_ft.yaml",
|
| 130 |
-
"HT-Normal": "htdemucs.yaml",
|
| 131 |
-
"HD_MMI": "hdemucs_mmi.yaml",
|
| 132 |
-
"HT_6S": "htdemucs_6s.yaml"
|
| 133 |
-
},
|
| 134 |
-
"vr_models": {
|
| 135 |
-
"HP-1": "1_HP-UVR.pth",
|
| 136 |
-
"HP-2": "2_HP-UVR.pth",
|
| 137 |
-
"HP-Vocal-1": "3_HP-Vocal-UVR.pth",
|
| 138 |
-
"HP-Vocal-2": "4_HP-Vocal-UVR.pth",
|
| 139 |
-
"HP2-1": "7_HP2-UVR.pth",
|
| 140 |
-
"HP2-2": "8_HP2-UVR.pth",
|
| 141 |
-
"HP2-3": "9_HP2-UVR.pth",
|
| 142 |
-
"SP-2B-1": "10_SP-UVR-2B-32000-1.pth",
|
| 143 |
-
"SP-2B-2": "11_SP-UVR-2B-32000-2.pth",
|
| 144 |
-
"SP-3B-1": "12_SP-UVR-3B-44100.pth",
|
| 145 |
-
"SP-4B-1": "13_SP-UVR-4B-44100-1.pth",
|
| 146 |
-
"SP-4B-2": "14_SP-UVR-4B-44100-2.pth",
|
| 147 |
-
"SP-MID-1": "15_SP-UVR-MID-44100-1.pth",
|
| 148 |
-
"SP-MID-2": "16_SP-UVR-MID-44100-2.pth"
|
| 149 |
-
},
|
| 150 |
-
"karaoke_models": {
|
| 151 |
-
"MDX-Version-1": "UVR_MDXNET_KARA.onnx",
|
| 152 |
-
"MDX-Version-2": "UVR_MDXNET_KARA_2.onnx",
|
| 153 |
-
"VR-Version-1": "5_HP-Karaoke-UVR.pth",
|
| 154 |
-
"VR-Version-2": "6_HP-Karaoke-UVR.pth"
|
| 155 |
-
},
|
| 156 |
-
"reverb_models": {
|
| 157 |
-
"MDX-Reverb": "Reverb_HQ_By_FoxJoy.onnx",
|
| 158 |
-
"VR-Reverb": "UVR-DeEcho-DeReverb.pth",
|
| 159 |
-
"Echo-Aggressive": "UVR-De-Echo-Aggressive.pth",
|
| 160 |
-
"Echo-Normal": "UVR-De-Echo-Normal.pth"
|
| 161 |
-
},
|
| 162 |
-
"denoise_models": {
|
| 163 |
-
"Lite": "UVR-DeNoise-Lite.pth",
|
| 164 |
-
"Normal": "UVR-DeNoise.pth"
|
| 165 |
-
},
|
| 166 |
-
"edge_tts": [
|
| 167 |
-
"af-ZA-AdriNeural",
|
| 168 |
-
"af-ZA-WillemNeural",
|
| 169 |
-
"sq-AL-AnilaNeural",
|
| 170 |
-
"sq-AL-IlirNeural",
|
| 171 |
-
"am-ET-AmehaNeural",
|
| 172 |
-
"am-ET-MekdesNeural",
|
| 173 |
-
"ar-DZ-AminaNeural",
|
| 174 |
-
"ar-DZ-IsmaelNeural",
|
| 175 |
-
"ar-BH-AliNeural",
|
| 176 |
-
"ar-BH-LailaNeural",
|
| 177 |
-
"ar-EG-SalmaNeural",
|
| 178 |
-
"ar-EG-ShakirNeural",
|
| 179 |
-
"ar-IQ-BasselNeural",
|
| 180 |
-
"ar-IQ-RanaNeural",
|
| 181 |
-
"ar-JO-SanaNeural",
|
| 182 |
-
"ar-JO-TaimNeural",
|
| 183 |
-
"ar-KW-FahedNeural",
|
| 184 |
-
"ar-KW-NouraNeural",
|
| 185 |
-
"ar-LB-LaylaNeural",
|
| 186 |
-
"ar-LB-RamiNeural",
|
| 187 |
-
"ar-LY-ImanNeural",
|
| 188 |
-
"ar-LY-OmarNeural",
|
| 189 |
-
"ar-MA-JamalNeural",
|
| 190 |
-
"ar-MA-MounaNeural",
|
| 191 |
-
"ar-OM-AbdullahNeural",
|
| 192 |
-
"ar-OM-AyshaNeural",
|
| 193 |
-
"ar-QA-AmalNeural",
|
| 194 |
-
"ar-QA-MoazNeural",
|
| 195 |
-
"ar-SA-HamedNeural",
|
| 196 |
-
"ar-SA-ZariyahNeural",
|
| 197 |
-
"ar-SY-AmanyNeural",
|
| 198 |
-
"ar-SY-LaithNeural",
|
| 199 |
-
"ar-TN-HediNeural",
|
| 200 |
-
"ar-TN-ReemNeural",
|
| 201 |
-
"ar-AE-FatimaNeural",
|
| 202 |
-
"ar-AE-HamdanNeural",
|
| 203 |
-
"ar-YE-MaryamNeural",
|
| 204 |
-
"ar-YE-SalehNeural",
|
| 205 |
-
"az-AZ-BabekNeural",
|
| 206 |
-
"az-AZ-BanuNeural",
|
| 207 |
-
"bn-BD-NabanitaNeural",
|
| 208 |
-
"bn-BD-PradeepNeural",
|
| 209 |
-
"bn-IN-BashkarNeural",
|
| 210 |
-
"bn-IN-TanishaaNeural",
|
| 211 |
-
"bs-BA-GoranNeural",
|
| 212 |
-
"bs-BA-VesnaNeural",
|
| 213 |
-
"bg-BG-BorislavNeural",
|
| 214 |
-
"bg-BG-KalinaNeural",
|
| 215 |
-
"my-MM-NilarNeural",
|
| 216 |
-
"my-MM-ThihaNeural",
|
| 217 |
-
"ca-ES-EnricNeural",
|
| 218 |
-
"ca-ES-JoanaNeural",
|
| 219 |
-
"zh-HK-HiuGaaiNeural",
|
| 220 |
-
"zh-HK-HiuMaanNeural",
|
| 221 |
-
"zh-HK-WanLungNeural",
|
| 222 |
-
"zh-CN-XiaoxiaoNeural",
|
| 223 |
-
"zh-CN-XiaoyiNeural",
|
| 224 |
-
"zh-CN-YunjianNeural",
|
| 225 |
-
"zh-CN-YunxiNeural",
|
| 226 |
-
"zh-CN-YunxiaNeural",
|
| 227 |
-
"zh-CN-YunyangNeural",
|
| 228 |
-
"zh-CN-liaoning-XiaobeiNeural",
|
| 229 |
-
"zh-TW-HsiaoChenNeural",
|
| 230 |
-
"zh-TW-YunJheNeural",
|
| 231 |
-
"zh-TW-HsiaoYuNeural",
|
| 232 |
-
"zh-CN-shaanxi-XiaoniNeural",
|
| 233 |
-
"hr-HR-GabrijelaNeural",
|
| 234 |
-
"hr-HR-SreckoNeural",
|
| 235 |
-
"cs-CZ-AntoninNeural",
|
| 236 |
-
"cs-CZ-VlastaNeural",
|
| 237 |
-
"da-DK-ChristelNeural",
|
| 238 |
-
"da-DK-JeppeNeural",
|
| 239 |
-
"nl-BE-ArnaudNeural",
|
| 240 |
-
"nl-BE-DenaNeural",
|
| 241 |
-
"nl-NL-ColetteNeural",
|
| 242 |
-
"nl-NL-FennaNeural",
|
| 243 |
-
"nl-NL-MaartenNeural",
|
| 244 |
-
"en-AU-NatashaNeural",
|
| 245 |
-
"en-AU-WilliamNeural",
|
| 246 |
-
"en-CA-ClaraNeural",
|
| 247 |
-
"en-CA-LiamNeural",
|
| 248 |
-
"en-HK-SamNeural",
|
| 249 |
-
"en-HK-YanNeural",
|
| 250 |
-
"en-IN-NeerjaExpressiveNeural",
|
| 251 |
-
"en-IN-NeerjaNeural",
|
| 252 |
-
"en-IN-PrabhatNeural",
|
| 253 |
-
"en-IE-ConnorNeural",
|
| 254 |
-
"en-IE-EmilyNeural",
|
| 255 |
-
"en-KE-AsiliaNeural",
|
| 256 |
-
"en-KE-ChilembaNeural",
|
| 257 |
-
"en-NZ-MitchellNeural",
|
| 258 |
-
"en-NZ-MollyNeural",
|
| 259 |
-
"en-NG-AbeoNeural",
|
| 260 |
-
"en-NG-EzinneNeural",
|
| 261 |
-
"en-PH-JamesNeural",
|
| 262 |
-
"en-PH-RosaNeural",
|
| 263 |
-
"en-SG-LunaNeural",
|
| 264 |
-
"en-SG-WayneNeural",
|
| 265 |
-
"en-ZA-LeahNeural",
|
| 266 |
-
"en-ZA-LukeNeural",
|
| 267 |
-
"en-TZ-ElimuNeural",
|
| 268 |
-
"en-TZ-ImaniNeural",
|
| 269 |
-
"en-GB-LibbyNeural",
|
| 270 |
-
"en-GB-MaisieNeural",
|
| 271 |
-
"en-GB-RyanNeural",
|
| 272 |
-
"en-GB-SoniaNeural",
|
| 273 |
-
"en-GB-ThomasNeural",
|
| 274 |
-
"en-US-AvaMultilingualNeural",
|
| 275 |
-
"en-US-AndrewMultilingualNeural",
|
| 276 |
-
"en-US-EmmaMultilingualNeural",
|
| 277 |
-
"en-US-BrianMultilingualNeural",
|
| 278 |
-
"en-US-AvaNeural",
|
| 279 |
-
"en-US-AndrewNeural",
|
| 280 |
-
"en-US-EmmaNeural",
|
| 281 |
-
"en-US-BrianNeural",
|
| 282 |
-
"en-US-AnaNeural",
|
| 283 |
-
"en-US-AriaNeural",
|
| 284 |
-
"en-US-ChristopherNeural",
|
| 285 |
-
"en-US-EricNeural",
|
| 286 |
-
"en-US-GuyNeural",
|
| 287 |
-
"en-US-JennyNeural",
|
| 288 |
-
"en-US-MichelleNeural",
|
| 289 |
-
"en-US-RogerNeural",
|
| 290 |
-
"en-US-SteffanNeural",
|
| 291 |
-
"et-EE-AnuNeural",
|
| 292 |
-
"et-EE-KertNeural",
|
| 293 |
-
"fil-PH-AngeloNeural",
|
| 294 |
-
"fil-PH-BlessicaNeural",
|
| 295 |
-
"fi-FI-HarriNeural",
|
| 296 |
-
"fi-FI-NooraNeural",
|
| 297 |
-
"fr-BE-CharlineNeural",
|
| 298 |
-
"fr-BE-GerardNeural",
|
| 299 |
-
"fr-CA-ThierryNeural",
|
| 300 |
-
"fr-CA-AntoineNeural",
|
| 301 |
-
"fr-CA-JeanNeural",
|
| 302 |
-
"fr-CA-SylvieNeural",
|
| 303 |
-
"fr-FR-VivienneMultilingualNeural",
|
| 304 |
-
"fr-FR-RemyMultilingualNeural",
|
| 305 |
-
"fr-FR-DeniseNeural",
|
| 306 |
-
"fr-FR-EloiseNeural",
|
| 307 |
-
"fr-FR-HenriNeural",
|
| 308 |
-
"fr-CH-ArianeNeural",
|
| 309 |
-
"fr-CH-FabriceNeural",
|
| 310 |
-
"gl-ES-RoiNeural",
|
| 311 |
-
"gl-ES-SabelaNeural",
|
| 312 |
-
"ka-GE-EkaNeural",
|
| 313 |
-
"ka-GE-GiorgiNeural",
|
| 314 |
-
"de-AT-IngridNeural",
|
| 315 |
-
"de-AT-JonasNeural",
|
| 316 |
-
"de-DE-SeraphinaMultilingualNeural",
|
| 317 |
-
"de-DE-FlorianMultilingualNeural",
|
| 318 |
-
"de-DE-AmalaNeural",
|
| 319 |
-
"de-DE-ConradNeural",
|
| 320 |
-
"de-DE-KatjaNeural",
|
| 321 |
-
"de-DE-KillianNeural",
|
| 322 |
-
"de-CH-JanNeural",
|
| 323 |
-
"de-CH-LeniNeural",
|
| 324 |
-
"el-GR-AthinaNeural",
|
| 325 |
-
"el-GR-NestorasNeural",
|
| 326 |
-
"gu-IN-DhwaniNeural",
|
| 327 |
-
"gu-IN-NiranjanNeural",
|
| 328 |
-
"he-IL-AvriNeural",
|
| 329 |
-
"he-IL-HilaNeural",
|
| 330 |
-
"hi-IN-MadhurNeural",
|
| 331 |
-
"hi-IN-SwaraNeural",
|
| 332 |
-
"hu-HU-NoemiNeural",
|
| 333 |
-
"hu-HU-TamasNeural",
|
| 334 |
-
"is-IS-GudrunNeural",
|
| 335 |
-
"is-IS-GunnarNeural",
|
| 336 |
-
"id-ID-ArdiNeural",
|
| 337 |
-
"id-ID-GadisNeural",
|
| 338 |
-
"ga-IE-ColmNeural",
|
| 339 |
-
"ga-IE-OrlaNeural",
|
| 340 |
-
"it-IT-GiuseppeNeural",
|
| 341 |
-
"it-IT-DiegoNeural",
|
| 342 |
-
"it-IT-ElsaNeural",
|
| 343 |
-
"it-IT-IsabellaNeural",
|
| 344 |
-
"ja-JP-KeitaNeural",
|
| 345 |
-
"ja-JP-NanamiNeural",
|
| 346 |
-
"jv-ID-DimasNeural",
|
| 347 |
-
"jv-ID-SitiNeural",
|
| 348 |
-
"kn-IN-GaganNeural",
|
| 349 |
-
"kn-IN-SapnaNeural",
|
| 350 |
-
"kk-KZ-AigulNeural",
|
| 351 |
-
"kk-KZ-DauletNeural",
|
| 352 |
-
"km-KH-PisethNeural",
|
| 353 |
-
"km-KH-SreymomNeural",
|
| 354 |
-
"ko-KR-HyunsuNeural",
|
| 355 |
-
"ko-KR-InJoonNeural",
|
| 356 |
-
"ko-KR-SunHiNeural",
|
| 357 |
-
"lo-LA-ChanthavongNeural",
|
| 358 |
-
"lo-LA-KeomanyNeural",
|
| 359 |
-
"lv-LV-EveritaNeural",
|
| 360 |
-
"lv-LV-NilsNeural",
|
| 361 |
-
"lt-LT-LeonasNeural",
|
| 362 |
-
"lt-LT-OnaNeural",
|
| 363 |
-
"mk-MK-AleksandarNeural",
|
| 364 |
-
"mk-MK-MarijaNeural",
|
| 365 |
-
"ms-MY-OsmanNeural",
|
| 366 |
-
"ms-MY-YasminNeural",
|
| 367 |
-
"ml-IN-MidhunNeural",
|
| 368 |
-
"ml-IN-SobhanaNeural",
|
| 369 |
-
"mt-MT-GraceNeural",
|
| 370 |
-
"mt-MT-JosephNeural",
|
| 371 |
-
"mr-IN-AarohiNeural",
|
| 372 |
-
"mr-IN-ManoharNeural",
|
| 373 |
-
"mn-MN-BataaNeural",
|
| 374 |
-
"mn-MN-YesuiNeural",
|
| 375 |
-
"ne-NP-HemkalaNeural",
|
| 376 |
-
"ne-NP-SagarNeural",
|
| 377 |
-
"nb-NO-FinnNeural",
|
| 378 |
-
"nb-NO-PernilleNeural",
|
| 379 |
-
"ps-AF-GulNawazNeural",
|
| 380 |
-
"ps-AF-LatifaNeural",
|
| 381 |
-
"fa-IR-DilaraNeural",
|
| 382 |
-
"fa-IR-FaridNeural",
|
| 383 |
-
"pl-PL-MarekNeural",
|
| 384 |
-
"pl-PL-ZofiaNeural",
|
| 385 |
-
"pt-BR-ThalitaNeural",
|
| 386 |
-
"pt-BR-AntonioNeural",
|
| 387 |
-
"pt-BR-FranciscaNeural",
|
| 388 |
-
"pt-PT-DuarteNeural",
|
| 389 |
-
"pt-PT-RaquelNeural",
|
| 390 |
-
"ro-RO-AlinaNeural",
|
| 391 |
-
"ro-RO-EmilNeural",
|
| 392 |
-
"ru-RU-DmitryNeural",
|
| 393 |
-
"ru-RU-SvetlanaNeural",
|
| 394 |
-
"sr-RS-NicholasNeural",
|
| 395 |
-
"sr-RS-SophieNeural",
|
| 396 |
-
"si-LK-SameeraNeural",
|
| 397 |
-
"si-LK-ThiliniNeural",
|
| 398 |
-
"sk-SK-LukasNeural",
|
| 399 |
-
"sk-SK-ViktoriaNeural",
|
| 400 |
-
"sl-SI-PetraNeural",
|
| 401 |
-
"sl-SI-RokNeural",
|
| 402 |
-
"so-SO-MuuseNeural",
|
| 403 |
-
"so-SO-UbaxNeural",
|
| 404 |
-
"es-AR-ElenaNeural",
|
| 405 |
-
"es-AR-TomasNeural",
|
| 406 |
-
"es-BO-MarceloNeural",
|
| 407 |
-
"es-BO-SofiaNeural",
|
| 408 |
-
"es-CL-CatalinaNeural",
|
| 409 |
-
"es-CL-LorenzoNeural",
|
| 410 |
-
"es-ES-XimenaNeural",
|
| 411 |
-
"es-CO-GonzaloNeural",
|
| 412 |
-
"es-CO-SalomeNeural",
|
| 413 |
-
"es-CR-JuanNeural",
|
| 414 |
-
"es-CR-MariaNeural",
|
| 415 |
-
"es-CU-BelkysNeural",
|
| 416 |
-
"es-CU-ManuelNeural",
|
| 417 |
-
"es-DO-EmilioNeural",
|
| 418 |
-
"es-DO-RamonaNeural",
|
| 419 |
-
"es-EC-AndreaNeural",
|
| 420 |
-
"es-EC-LuisNeural",
|
| 421 |
-
"es-SV-LorenaNeural",
|
| 422 |
-
"es-SV-RodrigoNeural",
|
| 423 |
-
"es-GQ-JavierNeural",
|
| 424 |
-
"es-GQ-TeresaNeural",
|
| 425 |
-
"es-GT-AndresNeural",
|
| 426 |
-
"es-GT-MartaNeural",
|
| 427 |
-
"es-HN-CarlosNeural",
|
| 428 |
-
"es-HN-KarlaNeural",
|
| 429 |
-
"es-MX-DaliaNeural",
|
| 430 |
-
"es-MX-JorgeNeural",
|
| 431 |
-
"es-NI-FedericoNeural",
|
| 432 |
-
"es-NI-YolandaNeural",
|
| 433 |
-
"es-PA-MargaritaNeural",
|
| 434 |
-
"es-PA-RobertoNeural",
|
| 435 |
-
"es-PY-MarioNeural",
|
| 436 |
-
"es-PY-TaniaNeural",
|
| 437 |
-
"es-PE-AlexNeural",
|
| 438 |
-
"es-PE-CamilaNeural",
|
| 439 |
-
"es-PR-KarinaNeural",
|
| 440 |
-
"es-PR-VictorNeural",
|
| 441 |
-
"es-ES-AlvaroNeural",
|
| 442 |
-
"es-ES-ElviraNeural",
|
| 443 |
-
"es-US-AlonsoNeural",
|
| 444 |
-
"es-US-PalomaNeural",
|
| 445 |
-
"es-UY-MateoNeural",
|
| 446 |
-
"es-UY-ValentinaNeural",
|
| 447 |
-
"es-VE-PaolaNeural",
|
| 448 |
-
"es-VE-SebastianNeural",
|
| 449 |
-
"su-ID-JajangNeural",
|
| 450 |
-
"su-ID-TutiNeural",
|
| 451 |
-
"sw-KE-RafikiNeural",
|
| 452 |
-
"sw-KE-ZuriNeural",
|
| 453 |
-
"sw-TZ-DaudiNeural",
|
| 454 |
-
"sw-TZ-RehemaNeural",
|
| 455 |
-
"sv-SE-MattiasNeural",
|
| 456 |
-
"sv-SE-SofieNeural",
|
| 457 |
-
"ta-IN-PallaviNeural",
|
| 458 |
-
"ta-IN-ValluvarNeural",
|
| 459 |
-
"ta-MY-KaniNeural",
|
| 460 |
-
"ta-MY-SuryaNeural",
|
| 461 |
-
"ta-SG-AnbuNeural",
|
| 462 |
-
"ta-SG-VenbaNeural",
|
| 463 |
-
"ta-LK-KumarNeural",
|
| 464 |
-
"ta-LK-SaranyaNeural",
|
| 465 |
-
"te-IN-MohanNeural",
|
| 466 |
-
"te-IN-ShrutiNeural",
|
| 467 |
-
"th-TH-NiwatNeural",
|
| 468 |
-
"th-TH-PremwadeeNeural",
|
| 469 |
-
"tr-TR-AhmetNeural",
|
| 470 |
-
"tr-TR-EmelNeural",
|
| 471 |
-
"uk-UA-OstapNeural",
|
| 472 |
-
"uk-UA-PolinaNeural",
|
| 473 |
-
"ur-IN-GulNeural",
|
| 474 |
-
"ur-IN-SalmanNeural",
|
| 475 |
-
"ur-PK-AsadNeural",
|
| 476 |
-
"ur-PK-UzmaNeural",
|
| 477 |
-
"uz-UZ-MadinaNeural",
|
| 478 |
-
"uz-UZ-SardorNeural",
|
| 479 |
-
"vi-VN-HoaiMyNeural",
|
| 480 |
-
"vi-VN-NamMinhNeural",
|
| 481 |
-
"cy-GB-AledNeural",
|
| 482 |
-
"cy-GB-NiaNeural",
|
| 483 |
-
"zu-ZA-ThandoNeural",
|
| 484 |
-
"zu-ZA-ThembaNeural"
|
| 485 |
-
],
|
| 486 |
-
"google_tts_voice": [
|
| 487 |
-
"af",
|
| 488 |
-
"am",
|
| 489 |
-
"ar",
|
| 490 |
-
"bg",
|
| 491 |
-
"bn",
|
| 492 |
-
"bs",
|
| 493 |
-
"ca",
|
| 494 |
-
"cs",
|
| 495 |
-
"cy",
|
| 496 |
-
"da",
|
| 497 |
-
"de",
|
| 498 |
-
"el",
|
| 499 |
-
"en",
|
| 500 |
-
"es",
|
| 501 |
-
"et",
|
| 502 |
-
"eu",
|
| 503 |
-
"fi",
|
| 504 |
-
"fr",
|
| 505 |
-
"fr-CA",
|
| 506 |
-
"gl",
|
| 507 |
-
"gu",
|
| 508 |
-
"ha",
|
| 509 |
-
"hi",
|
| 510 |
-
"hr",
|
| 511 |
-
"hu",
|
| 512 |
-
"id",
|
| 513 |
-
"is",
|
| 514 |
-
"it",
|
| 515 |
-
"iw",
|
| 516 |
-
"ja",
|
| 517 |
-
"jw",
|
| 518 |
-
"km",
|
| 519 |
-
"kn",
|
| 520 |
-
"ko",
|
| 521 |
-
"la",
|
| 522 |
-
"lt",
|
| 523 |
-
"lv",
|
| 524 |
-
"ml",
|
| 525 |
-
"mr",
|
| 526 |
-
"ms",
|
| 527 |
-
"my",
|
| 528 |
-
"ne",
|
| 529 |
-
"nl",
|
| 530 |
-
"no",
|
| 531 |
-
"pa",
|
| 532 |
-
"pl",
|
| 533 |
-
"pt",
|
| 534 |
-
"pt-PT",
|
| 535 |
-
"ro",
|
| 536 |
-
"ru",
|
| 537 |
-
"si",
|
| 538 |
-
"sk",
|
| 539 |
-
"sq",
|
| 540 |
-
"sr",
|
| 541 |
-
"su",
|
| 542 |
-
"sv",
|
| 543 |
-
"sw",
|
| 544 |
-
"ta",
|
| 545 |
-
"te",
|
| 546 |
-
"th",
|
| 547 |
-
"tl",
|
| 548 |
-
"tr",
|
| 549 |
-
"uk",
|
| 550 |
-
"ur",
|
| 551 |
-
"vi",
|
| 552 |
-
"yue",
|
| 553 |
-
"zh-CN",
|
| 554 |
-
"zh-TW",
|
| 555 |
-
"zh"
|
| 556 |
-
],
|
| 557 |
-
"fp16": false,
|
| 558 |
-
"editing_tab": true,
|
| 559 |
-
"inference_tab": true,
|
| 560 |
-
"create_and_training_tab": true,
|
| 561 |
-
"extra_tab": true,
|
| 562 |
-
"separator_tab": true,
|
| 563 |
-
"convert_tab": true,
|
| 564 |
-
"convert_with_whisper": true,
|
| 565 |
-
"tts_tab": true,
|
| 566 |
-
"effects_tab": true,
|
| 567 |
-
"quirk": true,
|
| 568 |
-
"create_dataset_tab": true,
|
| 569 |
-
"training_tab": true,
|
| 570 |
-
"fushion_tab": true,
|
| 571 |
-
"read_tab": true,
|
| 572 |
-
"onnx_tab": true,
|
| 573 |
-
"downloads_tab": true,
|
| 574 |
-
"f0_extractor_tab": true,
|
| 575 |
-
"settings_tab": true,
|
| 576 |
-
"create_srt_tab": true,
|
| 577 |
-
"realtime_tab": true,
|
| 578 |
-
"realtime_client_tab": true,
|
| 579 |
-
"create_reference_tab": true,
|
| 580 |
-
"font": "https://fonts.googleapis.com/css2?family=Roboto&display=swap",
|
| 581 |
-
"app_port": 7860,
|
| 582 |
-
"tensorboard_port": 6870,
|
| 583 |
-
"num_of_restart": 5,
|
| 584 |
-
"server_name": "0.0.0.0",
|
| 585 |
-
"app_show_error": true,
|
| 586 |
-
"delete_exists_file": false,
|
| 587 |
-
"audio_effects_path": "main/inference/audio_effects.py",
|
| 588 |
-
"convert_path": "main/inference/conversion/convert.py",
|
| 589 |
-
"separate_path": "main/inference/separate_music.py",
|
| 590 |
-
"create_dataset_path": "main/inference/create_dataset.py",
|
| 591 |
-
"preprocess_path": "main/inference/preprocess/preprocess.py",
|
| 592 |
-
"extract_path": "main/inference/extracting/extract.py",
|
| 593 |
-
"create_index_path": "main/inference/create_index.py",
|
| 594 |
-
"train_path": "main/inference/training/train.py",
|
| 595 |
-
"create_reference_path": "main/inference/create_reference.py",
|
| 596 |
-
"ico_path": "assets/ico.png",
|
| 597 |
-
"csv_path": "assets/spreadsheet.csv",
|
| 598 |
-
"weights_path": "assets/weights",
|
| 599 |
-
"logs_path": "assets/logs",
|
| 600 |
-
"binary_path": "assets/binary",
|
| 601 |
-
"f0_path": "assets/f0",
|
| 602 |
-
"language_path": "assets/languages",
|
| 603 |
-
"presets_path": "assets/presets",
|
| 604 |
-
"embedders_path": "assets/models/embedders",
|
| 605 |
-
"predictors_path": "assets/models/predictors",
|
| 606 |
-
"pretrained_custom_path": "assets/models/pretrained_custom",
|
| 607 |
-
"pretrained_v1_path": "assets/models/pretrained_v1",
|
| 608 |
-
"pretrained_v2_path": "assets/models/pretrained_v2",
|
| 609 |
-
"speaker_diarization_path": "assets/models/speaker_diarization",
|
| 610 |
-
"uvr5_path": "assets/models/uvr5",
|
| 611 |
-
"audios_path": "audios",
|
| 612 |
-
"reference_path": "assets/logs/reference",
|
| 613 |
-
"demucs_segments_enable": true,
|
| 614 |
-
"demucs_cpu_mode": false,
|
| 615 |
-
"limit_f0": 8,
|
| 616 |
-
"debug_mode": false,
|
| 617 |
-
"pretrain_verify_shape": true,
|
| 618 |
-
"pretrain_strict": true,
|
| 619 |
-
"cpu_mode": false,
|
| 620 |
-
"brain": false,
|
| 621 |
-
"discord_presence": true
|
| 622 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/configs/config.py
DELETED
|
@@ -1,131 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
import json
|
| 4 |
-
import torch
|
| 5 |
-
import onnxruntime
|
| 6 |
-
|
| 7 |
-
sys.path.append(os.getcwd())
|
| 8 |
-
|
| 9 |
-
from main.library.backends import directml, opencl, zluda
|
| 10 |
-
|
| 11 |
-
version_config_paths = [os.path.join(version, size) for version in ["v1", "v2"] for size in ["32000.json", "40000.json", "48000.json"]]
|
| 12 |
-
|
| 13 |
-
def singleton(cls):
|
| 14 |
-
instances = {}
|
| 15 |
-
|
| 16 |
-
def get_instance(*args, **kwargs):
|
| 17 |
-
if cls not in instances: instances[cls] = cls(*args, **kwargs)
|
| 18 |
-
return instances[cls]
|
| 19 |
-
|
| 20 |
-
return get_instance
|
| 21 |
-
|
| 22 |
-
@singleton
|
| 23 |
-
class Config:
|
| 24 |
-
def __init__(self):
|
| 25 |
-
self.configs_path = os.path.join("main", "configs", "config.json")
|
| 26 |
-
self.configs = json.load(open(self.configs_path, "r"))
|
| 27 |
-
|
| 28 |
-
self.cpu_mode = self.configs.get("cpu_mode", False)
|
| 29 |
-
self.brain = self.configs.get("brain", False)
|
| 30 |
-
self.debug_mode = self.configs.get("debug_mode", False)
|
| 31 |
-
|
| 32 |
-
self.json_config = self.load_config_json()
|
| 33 |
-
self.translations = self.multi_language()
|
| 34 |
-
|
| 35 |
-
self.gpu_mem = None
|
| 36 |
-
self.per_preprocess = 3.7
|
| 37 |
-
self.device = self.get_default_device()
|
| 38 |
-
self.providers = self.get_providers()
|
| 39 |
-
self.is_half = self.is_fp16()
|
| 40 |
-
self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
|
| 41 |
-
|
| 42 |
-
def multi_language(self):
|
| 43 |
-
try:
|
| 44 |
-
lang = self.configs.get("language", "vi-VN")
|
| 45 |
-
if len([l for l in os.listdir(self.configs["language_path"]) if l.endswith(".json")]) < 1: raise FileNotFoundError("Không tìm thấy bất cứ gói ngôn ngữ nào(No package languages found)")
|
| 46 |
-
|
| 47 |
-
if not lang: lang = "vi-VN"
|
| 48 |
-
if lang not in self.configs["support_language"]: raise ValueError("Ngôn ngữ không được hỗ trợ(Language not supported)")
|
| 49 |
-
|
| 50 |
-
lang_path = os.path.join(self.configs["language_path"], f"{lang}.json")
|
| 51 |
-
if not os.path.exists(lang_path): lang_path = os.path.join(self.configs["language_path"], "vi-VN.json")
|
| 52 |
-
|
| 53 |
-
with open(lang_path, encoding="utf-8") as f:
|
| 54 |
-
translations = json.load(f)
|
| 55 |
-
except json.JSONDecodeError:
|
| 56 |
-
print(self.translations["empty_json"].format(file=lang))
|
| 57 |
-
pass
|
| 58 |
-
|
| 59 |
-
return translations
|
| 60 |
-
|
| 61 |
-
def is_fp16(self):
|
| 62 |
-
fp16 = self.configs.get("fp16", False)
|
| 63 |
-
|
| 64 |
-
if self.device in ["cpu", "mps"] and fp16:
|
| 65 |
-
self.configs["fp16"] = False
|
| 66 |
-
fp16 = False
|
| 67 |
-
|
| 68 |
-
with open(self.configs_path, "w") as f:
|
| 69 |
-
json.dump(self.configs, f, indent=4)
|
| 70 |
-
|
| 71 |
-
if not fp16: self.per_preprocess = 3.0
|
| 72 |
-
return fp16
|
| 73 |
-
|
| 74 |
-
def load_config_json(self):
|
| 75 |
-
configs = {}
|
| 76 |
-
|
| 77 |
-
for config_file in version_config_paths:
|
| 78 |
-
try:
|
| 79 |
-
with open(os.path.join("main", "configs", config_file), "r") as f:
|
| 80 |
-
configs[config_file] = json.load(f)
|
| 81 |
-
except json.JSONDecodeError:
|
| 82 |
-
print(self.translations["empty_json"].format(file=config_file))
|
| 83 |
-
pass
|
| 84 |
-
|
| 85 |
-
return configs
|
| 86 |
-
|
| 87 |
-
def device_config(self):
|
| 88 |
-
if self.gpu_mem is not None and self.gpu_mem <= 4:
|
| 89 |
-
self.per_preprocess = 3.0
|
| 90 |
-
return 1, 5, 30, 32
|
| 91 |
-
|
| 92 |
-
return (3, 10, 60, 65) if self.is_half else (1, 6, 38, 41)
|
| 93 |
-
|
| 94 |
-
def get_default_device(self):
|
| 95 |
-
if not self.cpu_mode:
|
| 96 |
-
if torch.cuda.is_available():
|
| 97 |
-
device = "cuda:0"
|
| 98 |
-
self.gpu_mem = torch.cuda.get_device_properties(int(device.split(":")[-1])).total_memory // (1024**3)
|
| 99 |
-
elif directml.is_available():
|
| 100 |
-
device = "privateuseone:0"
|
| 101 |
-
elif opencl.is_available():
|
| 102 |
-
device = "ocl:0"
|
| 103 |
-
elif torch.backends.mps.is_available():
|
| 104 |
-
device = "mps"
|
| 105 |
-
else:
|
| 106 |
-
device = "cpu"
|
| 107 |
-
else:
|
| 108 |
-
torch.cuda.is_available = lambda : False
|
| 109 |
-
directml.is_available = lambda : False
|
| 110 |
-
opencl.is_available = lambda : False
|
| 111 |
-
torch.backends.mps.is_available = lambda : False
|
| 112 |
-
|
| 113 |
-
device = "cpu"
|
| 114 |
-
|
| 115 |
-
return device
|
| 116 |
-
|
| 117 |
-
def get_providers(self):
|
| 118 |
-
ort_providers = onnxruntime.get_available_providers()
|
| 119 |
-
|
| 120 |
-
if "CUDAExecutionProvider" in ort_providers and self.device.startswith("cuda"):
|
| 121 |
-
providers = ["CUDAExecutionProvider"]
|
| 122 |
-
elif "ROCMExecutionProvider" in ort_providers and self.device.startswith("cuda"):
|
| 123 |
-
providers = ["ROCMExecutionProvider"]
|
| 124 |
-
elif "DmlExecutionProvider" in ort_providers and self.device.startswith(("ocl", "privateuseone")):
|
| 125 |
-
providers = ["DmlExecutionProvider"]
|
| 126 |
-
elif "CoreMLExecutionProvider" in ort_providers and self.device.startswith("mps"):
|
| 127 |
-
providers = ["CoreMLExecutionProvider"]
|
| 128 |
-
else:
|
| 129 |
-
providers = ["CPUExecutionProvider"]
|
| 130 |
-
|
| 131 |
-
return providers
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/configs/rpc.py
DELETED
|
@@ -1,78 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
import json
|
| 4 |
-
import time
|
| 5 |
-
import struct
|
| 6 |
-
import codecs
|
| 7 |
-
|
| 8 |
-
sys.path.append(os.getcwd())
|
| 9 |
-
|
| 10 |
-
from main.app.variables import translations
|
| 11 |
-
|
| 12 |
-
CLIENT_ID = "1392816674159202396"
|
| 13 |
-
|
| 14 |
-
def create_payload(opcode, payload):
|
| 15 |
-
data = json.dumps(payload).encode("utf-8")
|
| 16 |
-
|
| 17 |
-
return struct.pack(
|
| 18 |
-
"<I",
|
| 19 |
-
opcode
|
| 20 |
-
) + struct.pack(
|
| 21 |
-
"<I",
|
| 22 |
-
len(data)
|
| 23 |
-
) + data
|
| 24 |
-
|
| 25 |
-
def connect_discord_ipc():
|
| 26 |
-
try:
|
| 27 |
-
return open(
|
| 28 |
-
r"\\?\pipe\discord-ipc-0",
|
| 29 |
-
"r+b",
|
| 30 |
-
buffering=0
|
| 31 |
-
)
|
| 32 |
-
except Exception:
|
| 33 |
-
return None
|
| 34 |
-
|
| 35 |
-
def send_discord_rpc(pipe):
|
| 36 |
-
pipe.write(
|
| 37 |
-
create_payload(
|
| 38 |
-
0, {
|
| 39 |
-
"v": 1,
|
| 40 |
-
"client_id": CLIENT_ID
|
| 41 |
-
}
|
| 42 |
-
)
|
| 43 |
-
)
|
| 44 |
-
|
| 45 |
-
pipe.read(8)
|
| 46 |
-
pipe.read(
|
| 47 |
-
struct.unpack(
|
| 48 |
-
"<I",
|
| 49 |
-
pipe.read(4)
|
| 50 |
-
)[0]
|
| 51 |
-
)
|
| 52 |
-
|
| 53 |
-
pipe.write(
|
| 54 |
-
create_payload(
|
| 55 |
-
1, {
|
| 56 |
-
"cmd": "SET_ACTIVITY",
|
| 57 |
-
"args": {
|
| 58 |
-
"pid": os.getpid(),
|
| 59 |
-
"activity": {
|
| 60 |
-
"buttons": [{
|
| 61 |
-
"label": "Github",
|
| 62 |
-
"url": codecs.decode("uggcf://tvguho.pbz/CunzUhlauNau16/Ivrganzrfr-EIP", "rot13")
|
| 63 |
-
}],
|
| 64 |
-
"details": translations["details"],
|
| 65 |
-
"timestamps": {
|
| 66 |
-
"start": int(
|
| 67 |
-
time.time()
|
| 68 |
-
)
|
| 69 |
-
},
|
| 70 |
-
"state": translations["use"]
|
| 71 |
-
}
|
| 72 |
-
},
|
| 73 |
-
"nonce": str(
|
| 74 |
-
time.time()
|
| 75 |
-
)
|
| 76 |
-
}
|
| 77 |
-
)
|
| 78 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/configs/v1/32000.json
DELETED
|
@@ -1,46 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"train": {
|
| 3 |
-
"log_interval": 200,
|
| 4 |
-
"seed": 1234,
|
| 5 |
-
"epochs": 20000,
|
| 6 |
-
"learning_rate": 0.0001,
|
| 7 |
-
"betas": [0.8, 0.99],
|
| 8 |
-
"eps": 1e-09,
|
| 9 |
-
"batch_size": 4,
|
| 10 |
-
"lr_decay": 0.999875,
|
| 11 |
-
"segment_size": 12800,
|
| 12 |
-
"init_lr_ratio": 1,
|
| 13 |
-
"warmup_epochs": 0,
|
| 14 |
-
"c_mel": 45,
|
| 15 |
-
"c_kl": 1.0
|
| 16 |
-
},
|
| 17 |
-
"data": {
|
| 18 |
-
"max_wav_value": 32768.0,
|
| 19 |
-
"sample_rate": 32000,
|
| 20 |
-
"filter_length": 1024,
|
| 21 |
-
"hop_length": 320,
|
| 22 |
-
"win_length": 1024,
|
| 23 |
-
"n_mel_channels": 80,
|
| 24 |
-
"mel_fmin": 0.0,
|
| 25 |
-
"mel_fmax": null
|
| 26 |
-
},
|
| 27 |
-
"model": {
|
| 28 |
-
"inter_channels": 192,
|
| 29 |
-
"hidden_channels": 192,
|
| 30 |
-
"filter_channels": 768,
|
| 31 |
-
"text_enc_hidden_dim": 256,
|
| 32 |
-
"n_heads": 2,
|
| 33 |
-
"n_layers": 6,
|
| 34 |
-
"kernel_size": 3,
|
| 35 |
-
"p_dropout": 0,
|
| 36 |
-
"resblock": "1",
|
| 37 |
-
"resblock_kernel_sizes": [3, 7, 11],
|
| 38 |
-
"resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
|
| 39 |
-
"upsample_rates": [10, 4, 2, 2, 2],
|
| 40 |
-
"upsample_initial_channel": 512,
|
| 41 |
-
"upsample_kernel_sizes": [16, 16, 4, 4, 4],
|
| 42 |
-
"use_spectral_norm": false,
|
| 43 |
-
"gin_channels": 256,
|
| 44 |
-
"spk_embed_dim": 109
|
| 45 |
-
}
|
| 46 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/configs/v1/40000.json
DELETED
|
@@ -1,46 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"train": {
|
| 3 |
-
"log_interval": 200,
|
| 4 |
-
"seed": 1234,
|
| 5 |
-
"epochs": 20000,
|
| 6 |
-
"learning_rate": 0.0001,
|
| 7 |
-
"betas": [0.8, 0.99],
|
| 8 |
-
"eps": 1e-09,
|
| 9 |
-
"batch_size": 4,
|
| 10 |
-
"lr_decay": 0.999875,
|
| 11 |
-
"segment_size": 12800,
|
| 12 |
-
"init_lr_ratio": 1,
|
| 13 |
-
"warmup_epochs": 0,
|
| 14 |
-
"c_mel": 45,
|
| 15 |
-
"c_kl": 1.0
|
| 16 |
-
},
|
| 17 |
-
"data": {
|
| 18 |
-
"max_wav_value": 32768.0,
|
| 19 |
-
"sample_rate": 40000,
|
| 20 |
-
"filter_length": 2048,
|
| 21 |
-
"hop_length": 400,
|
| 22 |
-
"win_length": 2048,
|
| 23 |
-
"n_mel_channels": 125,
|
| 24 |
-
"mel_fmin": 0.0,
|
| 25 |
-
"mel_fmax": null
|
| 26 |
-
},
|
| 27 |
-
"model": {
|
| 28 |
-
"inter_channels": 192,
|
| 29 |
-
"hidden_channels": 192,
|
| 30 |
-
"filter_channels": 768,
|
| 31 |
-
"text_enc_hidden_dim": 256,
|
| 32 |
-
"n_heads": 2,
|
| 33 |
-
"n_layers": 6,
|
| 34 |
-
"kernel_size": 3,
|
| 35 |
-
"p_dropout": 0,
|
| 36 |
-
"resblock": "1",
|
| 37 |
-
"resblock_kernel_sizes": [3, 7, 11],
|
| 38 |
-
"resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
|
| 39 |
-
"upsample_rates": [10, 10, 2, 2],
|
| 40 |
-
"upsample_initial_channel": 512,
|
| 41 |
-
"upsample_kernel_sizes": [16, 16, 4, 4],
|
| 42 |
-
"use_spectral_norm": false,
|
| 43 |
-
"gin_channels": 256,
|
| 44 |
-
"spk_embed_dim": 109
|
| 45 |
-
}
|
| 46 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/configs/v1/48000.json
DELETED
|
@@ -1,46 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"train": {
|
| 3 |
-
"log_interval": 200,
|
| 4 |
-
"seed": 1234,
|
| 5 |
-
"epochs": 20000,
|
| 6 |
-
"learning_rate": 0.0001,
|
| 7 |
-
"betas": [0.8, 0.99],
|
| 8 |
-
"eps": 1e-09,
|
| 9 |
-
"batch_size": 4,
|
| 10 |
-
"lr_decay": 0.999875,
|
| 11 |
-
"segment_size": 11520,
|
| 12 |
-
"init_lr_ratio": 1,
|
| 13 |
-
"warmup_epochs": 0,
|
| 14 |
-
"c_mel": 45,
|
| 15 |
-
"c_kl": 1.0
|
| 16 |
-
},
|
| 17 |
-
"data": {
|
| 18 |
-
"max_wav_value": 32768.0,
|
| 19 |
-
"sample_rate": 48000,
|
| 20 |
-
"filter_length": 2048,
|
| 21 |
-
"hop_length": 480,
|
| 22 |
-
"win_length": 2048,
|
| 23 |
-
"n_mel_channels": 128,
|
| 24 |
-
"mel_fmin": 0.0,
|
| 25 |
-
"mel_fmax": null
|
| 26 |
-
},
|
| 27 |
-
"model": {
|
| 28 |
-
"inter_channels": 192,
|
| 29 |
-
"hidden_channels": 192,
|
| 30 |
-
"filter_channels": 768,
|
| 31 |
-
"text_enc_hidden_dim": 256,
|
| 32 |
-
"n_heads": 2,
|
| 33 |
-
"n_layers": 6,
|
| 34 |
-
"kernel_size": 3,
|
| 35 |
-
"p_dropout": 0,
|
| 36 |
-
"resblock": "1",
|
| 37 |
-
"resblock_kernel_sizes": [3, 7, 11],
|
| 38 |
-
"resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
|
| 39 |
-
"upsample_rates": [10, 6, 2, 2, 2],
|
| 40 |
-
"upsample_initial_channel": 512,
|
| 41 |
-
"upsample_kernel_sizes": [16, 16, 4, 4, 4],
|
| 42 |
-
"use_spectral_norm": false,
|
| 43 |
-
"gin_channels": 256,
|
| 44 |
-
"spk_embed_dim": 109
|
| 45 |
-
}
|
| 46 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/configs/v2/32000.json
DELETED
|
@@ -1,42 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"train": {
|
| 3 |
-
"log_interval": 200,
|
| 4 |
-
"seed": 1234,
|
| 5 |
-
"learning_rate": 0.0001,
|
| 6 |
-
"betas": [0.8, 0.99],
|
| 7 |
-
"eps": 1e-09,
|
| 8 |
-
"lr_decay": 0.999875,
|
| 9 |
-
"segment_size": 12800,
|
| 10 |
-
"c_mel": 45,
|
| 11 |
-
"c_kl": 1.0
|
| 12 |
-
},
|
| 13 |
-
"data": {
|
| 14 |
-
"max_wav_value": 32768.0,
|
| 15 |
-
"sample_rate": 32000,
|
| 16 |
-
"filter_length": 1024,
|
| 17 |
-
"hop_length": 320,
|
| 18 |
-
"win_length": 1024,
|
| 19 |
-
"n_mel_channels": 80,
|
| 20 |
-
"mel_fmin": 0.0,
|
| 21 |
-
"mel_fmax": null
|
| 22 |
-
},
|
| 23 |
-
"model": {
|
| 24 |
-
"inter_channels": 192,
|
| 25 |
-
"hidden_channels": 192,
|
| 26 |
-
"filter_channels": 768,
|
| 27 |
-
"text_enc_hidden_dim": 768,
|
| 28 |
-
"n_heads": 2,
|
| 29 |
-
"n_layers": 6,
|
| 30 |
-
"kernel_size": 3,
|
| 31 |
-
"p_dropout": 0,
|
| 32 |
-
"resblock": "1",
|
| 33 |
-
"resblock_kernel_sizes": [3, 7, 11],
|
| 34 |
-
"resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
|
| 35 |
-
"upsample_rates": [10, 8, 2, 2],
|
| 36 |
-
"upsample_initial_channel": 512,
|
| 37 |
-
"upsample_kernel_sizes": [20, 16, 4, 4],
|
| 38 |
-
"use_spectral_norm": false,
|
| 39 |
-
"gin_channels": 256,
|
| 40 |
-
"spk_embed_dim": 109
|
| 41 |
-
}
|
| 42 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/configs/v2/40000.json
DELETED
|
@@ -1,42 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"train": {
|
| 3 |
-
"log_interval": 200,
|
| 4 |
-
"seed": 1234,
|
| 5 |
-
"learning_rate": 0.0001,
|
| 6 |
-
"betas": [0.8, 0.99],
|
| 7 |
-
"eps": 1e-09,
|
| 8 |
-
"lr_decay": 0.999875,
|
| 9 |
-
"segment_size": 12800,
|
| 10 |
-
"c_mel": 45,
|
| 11 |
-
"c_kl": 1.0
|
| 12 |
-
},
|
| 13 |
-
"data": {
|
| 14 |
-
"max_wav_value": 32768.0,
|
| 15 |
-
"sample_rate": 40000,
|
| 16 |
-
"filter_length": 2048,
|
| 17 |
-
"hop_length": 400,
|
| 18 |
-
"win_length": 2048,
|
| 19 |
-
"n_mel_channels": 125,
|
| 20 |
-
"mel_fmin": 0.0,
|
| 21 |
-
"mel_fmax": null
|
| 22 |
-
},
|
| 23 |
-
"model": {
|
| 24 |
-
"inter_channels": 192,
|
| 25 |
-
"hidden_channels": 192,
|
| 26 |
-
"filter_channels": 768,
|
| 27 |
-
"text_enc_hidden_dim": 768,
|
| 28 |
-
"n_heads": 2,
|
| 29 |
-
"n_layers": 6,
|
| 30 |
-
"kernel_size": 3,
|
| 31 |
-
"p_dropout": 0,
|
| 32 |
-
"resblock": "1",
|
| 33 |
-
"resblock_kernel_sizes": [3, 7, 11],
|
| 34 |
-
"resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
|
| 35 |
-
"upsample_rates": [10, 10, 2, 2],
|
| 36 |
-
"upsample_initial_channel": 512,
|
| 37 |
-
"upsample_kernel_sizes": [16, 16, 4, 4],
|
| 38 |
-
"use_spectral_norm": false,
|
| 39 |
-
"gin_channels": 256,
|
| 40 |
-
"spk_embed_dim": 109
|
| 41 |
-
}
|
| 42 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|