Spaces:

AnhP
/

RVC-GUI

Running

App Files Files Community

AnhP commited on 12 days ago

Commit

464d3e5

verified ·

1 Parent(s): 90c5266

Delete main

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

main/app/app.py +0 -524
main/app/core/csrt.py +0 -72
main/app/core/downloads.py +0 -208
main/app/core/editing.py +0 -92
main/app/core/f0_extract.py +0 -54
main/app/core/inference.py +0 -441
main/app/core/model_utils.py +0 -164
main/app/core/presets.py +0 -166
main/app/core/process.py +0 -135
main/app/core/realtime.py +0 -174
main/app/core/realtime_client.py +0 -114
main/app/core/restart.py +0 -48
main/app/core/separate.py +0 -95
main/app/core/training.py +0 -265
main/app/core/tts.py +0 -100
main/app/core/ui.py +0 -362
main/app/core/utils.py +0 -61
main/app/parser.py +0 -369
main/app/run_tensorboard.py +0 -32
main/app/tabs/downloads/downloads.py +0 -112
main/app/tabs/editing/child/audio_effects.py +0 -393
main/app/tabs/editing/child/quirk.py +0 -48
main/app/tabs/editing/editing.py +0 -20
main/app/tabs/extra/child/convert_model.py +0 -31
main/app/tabs/extra/child/create_srt.py +0 -56
main/app/tabs/extra/child/f0_extract.py +0 -51
main/app/tabs/extra/child/fushion.py +0 -45
main/app/tabs/extra/child/read_model.py +0 -29
main/app/tabs/extra/child/settings.py +0 -61
main/app/tabs/extra/extra.py +0 -40
main/app/tabs/inference/child/convert.py +0 -328
main/app/tabs/inference/child/convert_tts.py +0 -280
main/app/tabs/inference/child/convert_with_whisper.py +0 -164
main/app/tabs/inference/child/separate.py +0 -263
main/app/tabs/inference/inference.py +0 -30
main/app/tabs/realtime/realtime.py +0 -226
main/app/tabs/realtime/realtime_client.py +0 -210
main/app/tabs/training/child/create_dataset.py +0 -282
main/app/tabs/training/child/create_reference.py +0 -97
main/app/tabs/training/child/training.py +0 -259
main/app/tabs/training/training.py +0 -25
main/app/variables.py +0 -117
main/configs/config.json +0 -622
main/configs/config.py +0 -131
main/configs/rpc.py +0 -78
main/configs/v1/32000.json +0 -46
main/configs/v1/40000.json +0 -46
main/configs/v1/48000.json +0 -46
main/configs/v2/32000.json +0 -42
main/configs/v2/40000.json +0 -42

main/app/app.py DELETED Viewed

@@ -1,524 +0,0 @@
-import os
-import io
-import ssl
-import sys
-import time
-import codecs
-import logging
-import warnings
-import gradio as gr
-sys.path.append(os.getcwd())
-start_time = time.time()
-from main.app.tabs.extra.extra import extra_tab
-from main.app.tabs.editing.editing import editing_tab
-from main.app.tabs.training.training import training_tab
-from main.app.tabs.downloads.downloads import download_tab
-from main.app.tabs.inference.inference import inference_tab
-from main.configs.rpc import connect_discord_ipc, send_discord_rpc
-from main.app.variables import logger, config, translations, theme, font, configs, language, allow_disk
-ssl._create_default_https_context = ssl._create_unverified_context
-warnings.filterwarnings("ignore")
-for l in ["httpx", "gradio", "uvicorn", "httpcore", "urllib3"]:
-    logging.getLogger(l).setLevel(logging.ERROR)
-js_code = """
-() => {
-    window._activeStream = null;
-    window._audioCtx = null;
-    window._workletNode = null;
-    window._playbackNode = null;
-    window._ws = null;
-    function setStatus(msg, use_alert = true) {
-        const realtimeStatus = document.querySelector("#realtime-status-info h2.output-class");
-        if (use_alert) alert(msg);
-        if (realtimeStatus) {
-            realtimeStatus.innerText = msg;
-            realtimeStatus.style.whiteSpace = "nowrap";
-            realtimeStatus.style.textAlign = "center";
-        }
-    }
-    async function addModuleFromString(ctx, codeStr) {
-        const blob = new Blob([codeStr], {type: 'application/javascript'});
-        const url = URL.createObjectURL(blob);
-        await ctx.audioWorklet.addModule(url);
-        URL.revokeObjectURL(url);
-    };
-    function createOutputRoute(audioCtx, playbackNode, sinkId, gainValue = 1.0) {
-        const dest = audioCtx.createMediaStreamDestination();
-        const gainNode = audioCtx.createGain();
-        gainNode.gain.value = gainValue;
-        playbackNode.connect(gainNode);
-        gainNode.connect(dest);
-        const el = document.createElement('audio');
-        el.autoplay = true;
-        el.srcObject = dest.stream;
-        el.style.display = 'none';
-        document.body.appendChild(el);
-        if (el.setSinkId) el.setSinkId(sinkId).catch(err => console.error(err));
-        return { dest, gainNode, el };
-    }
-    const inputWorkletSource = `
-        class InputProcessor extends AudioWorkletProcessor {
-            constructor() {
-                super();
-                this.buffer = new Float32Array(0);
-                this.block_frame = 128;
-                this.port.onmessage = (e) => {
-                    if (e.data && e.data.block_frame) this.block_frame = e.data.block_frame;
-                };
-            }
-            process(inputs) {
-                const input = inputs[0];
-                if (!input || !input[0]) return true;
-                const frame = input[0];
-                const newBuf = new Float32Array(this.buffer.length + frame.length);
-                newBuf.set(this.buffer, 0);
-                newBuf.set(frame, this.buffer.length);
-                this.buffer = newBuf;
-                while (this.buffer.length >= this.block_frame) {
-                    const chunk = this.buffer.slice(0, this.block_frame);
-                    this.port.postMessage({chunk}, [chunk.buffer]);
-                    this.buffer = this.buffer.slice(this.block_frame);
-                }
-                return true;
-            }
-        }
-        registerProcessor('input-processor', InputProcessor);
-        `;
-        const playbackWorkletSource = `
-            class PlaybackProcessor extends AudioWorkletProcessor {
-                constructor(options) {
-                    super(options);
-                    const bufferSize = options.processorOptions && options.processorOptions.bufferSize ? options.processorOptions.bufferSize: 98304;
-                    this.buffer = new Float32Array(bufferSize);
-                    this.bufferCapacity = bufferSize;
-                    this.writePointer = 0;
-                    this.readPointer = 0;
-                    this.availableSamples = 0;
-                    this.port.onmessage = (e) => {
-                        if (e.data && e.data.chunk) {
-                            const chunk = new Float32Array(e.data.chunk);
-                            const chunkSize = chunk.length;
-                            if (this.availableSamples + chunkSize > this.bufferCapacity) return;
-                            for (let i = 0; i < chunkSize; i++) {
-                                this.buffer[this.writePointer] = chunk[i];
-                                this.writePointer = (this.writePointer + 1) % this.bufferCapacity;
-                            }
-                            this.availableSamples += chunkSize;
-                        }
-                    };
-                }
-                process(inputs, outputs) {
-                    const output = outputs[0];
-                    if (!output || !output[0]) return true;
-                    const frame = output[0];
-                    const frameSize = frame.length;
-                    if (this.availableSamples >= frameSize) {
-                        for (let i = 0; i < frameSize; i++) {
-                            frame[i] = this.buffer[this.readPointer];
-                            this.readPointer = (this.readPointer + 1) % this.bufferCapacity;
-                        }
-                        this.availableSamples -= frameSize;
-                    } else {
-                        frame.fill(0);
-                    }
-                    if (output.length > 1) output[1].set(output[0]);
-                    return true;
-                }
-            }
-            registerProcessor('playback-processor', PlaybackProcessor);
-            `;
-    window.getAudioDevices = async function() {
-        if (!navigator.mediaDevices) {
-            setStatus("__MEDIA_DEVICES__");
-            return {"inputs": {}, "outputs": {}};
-        }
-        try {
-            await navigator.mediaDevices.getUserMedia({ audio: true });
-        } catch (err) {
-            console.error(err);
-            setStatus("__MIC_INACCESSIBLE__")
-            return {"inputs": {}, "outputs": {}};
-        }
-        const devices = await navigator.mediaDevices.enumerateDevices();
-        const inputs = {};
-        const outputs = {};
-        for (const device of devices) {
-            if (device.kind === "audioinput") {
-                inputs[device.label] = device.deviceId
-            } else if (device.kind === "audiooutput") {
-                outputs[device.label] = device.deviceId
-            }
-        }
-        if (!Object.keys(inputs).length && !Object.keys(outputs).length) return {"inputs": {}, "outputs": {}};
-        return {"inputs": inputs, "outputs": outputs};
-    };
-    window.StreamAudioRealtime = async function(
-        monitor,
-        vad_enabled,
-        input_audio_device,
-        output_audio_device,
-        monitor_output_device,
-        input_audio_gain,
-        output_audio_gain,
-        monitor_audio_gain,
-        chunk_size,
-        pitch,
-        model_pth,
-        model_index,
-        index_strength,
-        onnx_f0_mode,
-        f0_method,
-        hop_length,
-        embed_mode,
-        embedders,
-        custom_embedders,
-        f0_autotune,
-        proposal_pitch,
-        f0_autotune_strength,
-        proposal_pitch_threshold,
-        rms_mix_rate,
-        protect,
-        filter_radius,
-        silent_threshold,
-        extra_convert_size,
-        cross_fade_overlap_size,
-        vad_sensitivity,
-        vad_frame_ms,
-        clean_audio,
-        clean_strength
-    ) {
-        const SampleRate = 48000;
-        const ReadChunkSize = Math.round(chunk_size * SampleRate / 1000 / 128);
-        const block_frame = parseInt(ReadChunkSize) * 128;
-        const ButtonState = { start_button: true, stop_button: false };
-        const devices = await window.getAudioDevices();
-        input_audio_device = devices["inputs"][input_audio_device];
-        output_audio_device = devices["outputs"][output_audio_device];
-        if (monitor && devices["outputs"][monitor_output_device]) monitor_output_device = devices["outputs"][monitor_output_device];
-        try {
-            if (!input_audio_device || !output_audio_device) {
-                setStatus("__PROVIDE_AUDIO_DEVICE__");
-                return ButtonState;
-            }
-            if (monitor && !monitor_output_device) {
-                setStatus("__PROVIDE_MONITOR_DEVICE__");
-                return ButtonState;
-            }
-            if (!model_pth) {
-                setStatus("__PROVIDE_MODEL__")
-                return ButtonState;
-            }
-            setStatus("__START_REALTIME__", use_alert=false)
-            const stream = await navigator.mediaDevices.getUserMedia({
-                audio: {
-                    deviceId: { exact: input_audio_device },
-                    channelCount: 1,
-                    sampleRate: SampleRate,
-                    echoCancellation: false,
-                    noiseSuppression: false,
-                    autoGainControl: false
-                }
-            });
-            window._activeStream = stream;
-            window._audioCtx = new AudioContext({ sampleRate: SampleRate, latencyHint: "interactive" });
-            await addModuleFromString(window._audioCtx, inputWorkletSource);
-            await addModuleFromString(window._audioCtx, playbackWorkletSource);
-            const src = window._audioCtx.createMediaStreamSource(stream);
-            const inputNode = new AudioWorkletNode(window._audioCtx, 'input-processor');
-            const playbackNode = new AudioWorkletNode(window._audioCtx, 'playback-processor', {
-                processorOptions: {
-                    bufferSize: block_frame * 2
-                }
-            });
-            inputNode.port.postMessage({ block_frame: block_frame });
-            src.connect(inputNode);
-            createOutputRoute(window._audioCtx, playbackNode, output_audio_device, output_audio_gain / 100);
-            if (monitor && monitor_output_device) createOutputRoute(window._audioCtx, playbackNode, monitor_output_device, monitor_audio_gain / 100);
-            const protocol = (location.protocol === "https:") ? "wss:" : "ws:";
-            const wsUrl = protocol + '//' + location.hostname + `:${location.port}` + '/api/ws-audio';
-            const ws = new WebSocket(wsUrl);
-            ButtonState.start_button = false;
-            ButtonState.stop_button = true;
-            ws.binaryType = "arraybuffer";
-            window._ws = ws;
-            ws.onopen = () => {
-                console.log("__WS_CONNECTED__")
-                ws.send(
-                    JSON.stringify({
-                        type: 'init',
-                        chunk_size: ReadChunkSize,
-                        embedders: embedders,
-                        model_pth: model_pth,
-                        custom_embedders: custom_embedders,
-                        cross_fade_overlap_size: cross_fade_overlap_size,
-                        extra_convert_size: extra_convert_size,
-                        model_index: model_index,
-                        f0_method: f0_method,
-                        f0_onnx: onnx_f0_mode,
-                        embedders_mode: embed_mode,
-                        hop_length: hop_length,
-                        silent_threshold: silent_threshold,
-                        vad_enabled: vad_enabled,
-                        vad_sensitivity: vad_sensitivity,
-                        vad_frame_ms: vad_frame_ms,
-                        clean_audio: clean_audio,
-                        clean_strength: clean_strength,
-                        f0_up_key: pitch,
-                        index_rate: index_strength,
-                        protect: protect,
-                        filter_radius: filter_radius,
-                        rms_mix_rate: rms_mix_rate,
-                        f0_autotune: f0_autotune,
-                        f0_autotune_strength: f0_autotune_strength,
-                        proposal_pitch: proposal_pitch,
-                        proposal_pitch_threshold: proposal_pitch_threshold,
-                        input_audio_gain: input_audio_gain
-                    })
-                );
-            };
-            inputNode.port.onmessage = (e) => {
-                const chunk = e.data && e.data.chunk;
-                if (!chunk) return;
-                if (ws.readyState === WebSocket.OPEN) ws.send(chunk);
-            };
-            ws.onmessage = (ev) => {
-                if (typeof ev.data === 'string') {
-                    const msg = JSON.parse(ev.data);
-                    if (msg.type === 'latency') setStatus(`__LATENCY__: ${msg.value.toFixed(1)} ms`, use_alert=false)
-                    if (msg.type === 'warnings') {
-                        setStatus(msg.value);
-                        StopAudioStream();
-                    }
-                    return;
-                }
-                const ab = ev.data;
-                playbackNode.port.postMessage({ chunk: ab }, [ab]);
-            };
-            ws.onclose = () => console.log("__WS_CLOSED__");
-            window._workletNode = inputNode;
-            window._playbackNode = playbackNode;
-            if (window._audioCtx.state === 'suspended') await window._audioCtx.resume();
-            console.log("__REALTIME_STARTED__");
-            return ButtonState;
-        } catch (err) {
-            console.error("__ERROR__", err);
-            alert("__ERROR__" + err.message);
-            return StopAudioStream();
-        }
-    };
-    window.StopAudioStream = async function() {
-        try {
-            if (window._ws) {
-                window._ws.close();
-                window._ws = null;
-            }
-            if (window._activeStream) {
-                window._activeStream.getTracks().forEach(t => t.stop());
-                window._activeStream = null;
-            }
-            if (window._workletNode) {
-                window._workletNode.disconnect();
-                window._workletNode = null;
-            }
-            if (window._playbackNode) {
-                window._playbackNode.disconnect();
-                window._playbackNode = null;
-            }
-            if (window._audioCtx) {
-                await window._audioCtx.close();
-                window._audioCtx = null;
-            }
-            document.querySelectorAll('audio').forEach(a => a.remove());
-            setStatus("__REALTIME_HAS_STOP__", use_alert=false);
-            return {"start_button": true, "stop_button": false};
-        } catch (e) {
-            setStatus(`__ERROR__ ${e}`);
-            return {"start_button": false, "stop_button": true}
-        }
-    };
-}
-""".replace(
-    "__MEDIA_DEVICES__", translations["media_devices"]
-).replace(
-    "__MIC_INACCESSIBLE__", translations["mic_inaccessible"]
-).replace(
-    "__PROVIDE_AUDIO_DEVICE__", translations["provide_audio_device"]
-).replace(
-    "__PROVIDE_MONITOR_DEVICE__", translations["provide_monitor_device"]
-).replace(
-    "__START_REALTIME__", translations["start_realtime"]
-).replace(
-    "__LATENCY__", translations['latency']
-).replace(
-    "__WS_CONNECTED__", translations["ws_connected"]
-).replace(
-    "__WS_CLOSED__", translations["ws_closed"]
-).replace(
-    "__REALTIME_STARTED__", translations["realtime_is_ready"]
-).replace(
-    "__ERROR__", translations["error_occurred"].format(e="")
-).replace(
-    "__REALTIME_HAS_STOP__", translations["realtime_has_stop"]
-).replace(
-    "__PROVIDE_MODEL__", translations["provide_file"].format(filename=translations["model"])
-)
-client_mode = True # "--client" in sys.argv
-with gr.Blocks(
-    title="📱 Vietnamese-RVC GUI BY ANH",
-    js=js_code if client_mode else None,
-    theme=theme,
-    css="<style> @import url('{fonts}'); * {{font-family: 'Courgette', cursive !important;}} body, html {{font-family: 'Courgette', cursive !important;}} h1, h2, h3, h4, h5, h6, p, button, input, textarea, label, span, div, select {{font-family: 'Courgette', cursive !important;}} </style>".format(fonts=font or "https://fonts.googleapis.com/css2?family=Courgette&display=swap")
-) as app:
-    gr.HTML("<h1 style='text-align: center;'>🎵VIETNAMESE RVC BY ANH🎵</h1>")
-    gr.HTML(f"<h3 style='text-align: center;'>{translations['title']}</h3>")
-    with gr.Tabs():
-        inference_tab()
-        editing_tab()
-        if client_mode:
-            from main.app.tabs.realtime.realtime_client import realtime_client_tab
-            realtime_client_tab()
-        else:
-            from main.app.tabs.realtime.realtime import realtime_tab
-            realtime_tab()
-        training_tab()
-        download_tab()
-        extra_tab(app)
-    with gr.Row():
-        gr.Markdown(translations["rick_roll"].format(rickroll=codecs.decode('uggcf://jjj.lbhghor.pbz/jngpu?i=qDj4j9JtKpD', 'rot13')))
-    with gr.Row():
-        gr.Markdown(translations["terms_of_use"])
-    with gr.Row():
-        gr.Markdown(translations["exemption"])
-    if __name__ == "__main__":
-        logger.info(config.device.replace("privateuseone", "dml"))
-        logger.info(translations["start_app"])
-        logger.info(translations["set_lang"].format(lang=language))
-        port = configs.get("app_port", 7860)
-        server_name = configs.get("server_name", "0.0.0.0")
-        share = "--share" in sys.argv
-        original_stdout = sys.stdout
-        sys.stdout = io.StringIO()
-        for i in range(configs.get("num_of_restart", 5)):
-            try:
-                gradio_app, _, share_url = app.queue().launch(
-                    favicon_path=configs["ico_path"],
-                    server_name=server_name,
-                    server_port=port,
-                    show_error=configs.get("app_show_error", False),
-                    inbrowser="--open" in sys.argv,
-                    share=share,
-                    allowed_paths=allow_disk,
-                    prevent_thread_lock=True,
-                    quiet=True
-                )
-                break
-            except OSError:
-                logger.debug(translations["port"].format(port=port))
-                port -= 1
-            except Exception as e:
-                logger.error(translations["error_occurred"].format(e=e))
-                sys.exit(1)
-        if client_mode:
-            from main.app.core.realtime_client import app as fastapi_app
-            gradio_app.mount("/api", fastapi_app)
-        sys.stdout = original_stdout
-        if configs.get("discord_presence", True):
-            pipe = connect_discord_ipc()
-            if pipe:
-                try:
-                    logger.info(translations["start_rpc"])
-                    send_discord_rpc(pipe)
-                except KeyboardInterrupt:
-                    logger.info(translations["stop_rpc"])
-                    pipe.close()
-        logger.info(f"{translations['running_local_url']}: {server_name}:{port}")
-        if share: logger.info(f"{translations['running_share_url']}: {share_url}")
-        logger.info(f"{translations['gradio_start']}: {(time.time() - start_time):.2f}s")
-        while 1:
-            time.sleep(5)

main/app/core/csrt.py DELETED Viewed

@@ -1,72 +0,0 @@
-import os
-import sys
-sys.path.append(os.getcwd())
-from main.app.core.inference import whisper_process
-from main.library.utils import check_spk_diarization
-from main.app.core.ui import gr_info, gr_warning, process_output
-from main.app.variables import config, translations, configs, logger
-def create_srt(model_size, input_audio, output_file, word_timestamps):
-    import multiprocessing as mp
-    if not input_audio or not os.path.exists(input_audio) or os.path.isdir(input_audio):
-        gr_warning(translations["input_not_valid"])
-        return [None]*2
-    if not output_file.endswith(".srt"): output_file += ".srt"
-    if not output_file:
-        gr_warning(translations["output_not_valid"])
-        return [None]*2
-    output_dir = os.path.dirname(output_file)
-    if not os.path.exists(output_dir): os.makedirs(output_dir, exist_ok=True)
-    info = ""
-    output_file = process_output(output_file)
-    check_spk_diarization(model_size, speechbrain=False)
-    gr_info(translations["csrt"])
-    try:
-        mp.set_start_method("spawn")
-    except:
-        pass
-    whisper_queue = mp.Queue()
-    whisperprocess = mp.Process(target=whisper_process, args=(model_size, input_audio, configs, config.device, whisper_queue, word_timestamps))
-    whisperprocess.start()
-    segments = whisper_queue.get()
-    with open(output_file, "w", encoding="utf-8") as f:
-        for i, segment in enumerate(segments):
-            start = segment["start"]
-            end = segment["end"]
-            text = segment["text"].strip()
-            index = f"{i+1}\n"
-            timestamp = f"{format_timestamp(start)} --> {format_timestamp(end)}\n"
-            text1 = f"{text}\n\n"
-            f.write(index)
-            f.write(timestamp)
-            f.write(text1)
-            info = info + index + timestamp + text1
-        logger.info(info)
-    gr_info(translations["success"])
-    return [{"value": output_file, "visible": True, "__type__": "update"}, info]
-def format_timestamp(seconds):
-    hours = int(seconds // 3600)
-    minutes = int((seconds % 3600) // 60)
-    seconds = int(seconds % 60)
-    miliseconds = int((seconds - int(seconds)) * 1000)
-    return f"{hours:02}:{minutes:02}:{seconds:02},{miliseconds:03}"

main/app/core/downloads.py DELETED Viewed

@@ -1,208 +0,0 @@
-import os
-import re
-import sys
-import json
-import codecs
-import shutil
-import yt_dlp
-import warnings
-import requests
-from bs4 import BeautifulSoup
-sys.path.append(os.getcwd())
-from main.tools import huggingface, gdown, meganz, mediafire, pixeldrain
-from main.app.variables import logger, translations, model_options, configs
-from main.app.core.process import move_files_from_directory, fetch_pretrained_data, extract_name_model
-from main.app.core.ui import gr_info, gr_warning, gr_error, process_output, replace_url, replace_modelname
-def download_url(url):
-    if not url:
-        gr_warning(translations["provide_url"])
-        return [None]*3
-    if not os.path.exists(configs["audios_path"]): os.makedirs(configs["audios_path"], exist_ok=True)
-    with warnings.catch_warnings():
-        warnings.filterwarnings("ignore")
-        ydl_opts = {
-            "format": "bestaudio/best",
-            "postprocessors": [{
-                "key": "FFmpegExtractAudio",
-                "preferredcodec": "wav",
-                "preferredquality": "192"
-            }],
-            "quiet": True,
-            "no_warnings": True,
-            "noplaylist": True,
-            "verbose": False
-        }
-        gr_info(translations["start"].format(start=translations["download_music"]))
-        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            audio_output = os.path.join(configs["audios_path"], re.sub(r'\s+', '-', re.sub(r'[^\w\s\u4e00-\u9fff\uac00-\ud7af\u0400-\u04FF\u1100-\u11FF]', '', ydl.extract_info(url, download=False).get('title', 'video')).strip()))
-            if os.path.exists(audio_output): shutil.rmtree(audio_output, ignore_errors=True)
-            ydl_opts['outtmpl'] = audio_output
-        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            audio_output = process_output(audio_output + ".wav")
-            ydl.download([url])
-        gr_info(translations["success"])
-        return [audio_output, audio_output, translations["success"]]
-def move_file(file, download_dir, model):
-    weights_dir = configs["weights_path"]
-    logs_dir = configs["logs_path"]
-    if not os.path.exists(weights_dir): os.makedirs(weights_dir, exist_ok=True)
-    if not os.path.exists(logs_dir): os.makedirs(logs_dir, exist_ok=True)
-    if file.endswith(".zip"): shutil.unpack_archive(file, download_dir)
-    move_files_from_directory(download_dir, weights_dir, logs_dir, model)
-def download_model(url=None, model=None):
-    if not url: return gr_warning(translations["provide_url"])
-    url = replace_url(url)
-    download_dir = "download_model"
-    os.makedirs(download_dir, exist_ok=True)
-    try:
-        gr_info(translations["start"].format(start=translations["download"]))
-        if "huggingface.co" in url: file = huggingface.HF_download_file(url, download_dir)
-        elif "google.com" in url: file = gdown.gdown_download(url, download_dir)
-        elif "mediafire.com" in url: file = mediafire.Mediafire_Download(url, download_dir)
-        elif "pixeldrain.com" in url: file = pixeldrain.pixeldrain(url, download_dir)
-        elif "mega.nz" in url: file = meganz.mega_download_url(url, download_dir)
-        else:
-            gr_warning(translations["not_support_url"])
-            return translations["not_support_url"]
-        if not model:
-            modelname = os.path.basename(file)
-            model = extract_name_model(modelname) if modelname.endswith(".index") else os.path.splitext(modelname)[0]
-            if model is None: model = os.path.splitext(modelname)[0]
-        model = replace_modelname(model)
-        move_file(file, download_dir, model)
-        gr_info(translations["success"])
-        return translations["success"]
-    except Exception as e:
-        gr_error(message=translations["error_occurred"].format(e=e))
-        return translations["error_occurred"].format(e=e)
-    finally:
-        shutil.rmtree(download_dir, ignore_errors=True)
-def download_pretrained_model(choices, model, sample_rate):
-    pretraineds_custom_path = configs["pretrained_custom_path"]
-    if choices == translations["list_model"]:
-        paths = fetch_pretrained_data()[model][sample_rate]
-        if not os.path.exists(pretraineds_custom_path): os.makedirs(pretraineds_custom_path, exist_ok=True)
-        url = codecs.decode("uggcf://uhttvatsnpr.pb/NauC/Ivrganzrfr-EIP-Cebwrpg/erfbyir/znva/cergenvarq_phfgbz/", "rot13") + paths
-        gr_info(translations["download_pretrain"])
-        file = huggingface.HF_download_file(replace_url(url), os.path.join(pretraineds_custom_path, paths))
-        if file.endswith(".zip"):
-            shutil.unpack_archive(file, pretraineds_custom_path)
-            os.remove(file)
-        gr_info(translations["success"])
-        return translations["success"]
-    elif choices == translations["download_url"]:
-        pretrain_is_zip = model.endswith(".zip") or model.endswith(".zip?download=true") or sample_rate.endswith(".zip") or sample_rate.endswith(".zip?download=true")
-        urls = []
-        if not model and not pretrain_is_zip:
-            gr_warning(translations["provide_pretrain"].format(dg="D"))
-            return [None]*2
-        if not sample_rate and not pretrain_is_zip:
-            gr_warning(translations["provide_pretrain"].format(dg="G"))
-            return [None]*2
-        gr_info(translations["download_pretrain"])
-        if model: urls.append(model)
-        if sample_rate: urls.append(sample_rate)
-        for url in urls:
-            url = replace_url(url)
-            if "huggingface.co" in url: file = huggingface.HF_download_file(url, pretraineds_custom_path)
-            elif "google.com" in url: file = gdown.gdown_download(url, pretraineds_custom_path)
-            elif "mediafire.com" in url: file = mediafire.Mediafire_Download(url, pretraineds_custom_path)
-            elif "pixeldrain.com" in url: file = pixeldrain.pixeldrain(url, pretraineds_custom_path)
-            elif "mega.nz" in url: file = meganz.mega_download_url(url, pretraineds_custom_path)
-            else:
-                gr_warning(translations["not_support_url"])
-                return translations["not_support_url"], translations["not_support_url"]
-            if file.endswith(".zip"):
-                shutil.unpack_archive(file, pretraineds_custom_path)
-                if os.path.exists(file): os.remove(file)
-        gr_info(translations["success"])
-        return translations["success"], translations["success"]
-def fetch_models_data(search):
-    all_table_data = []
-    page = 1
-    while 1:
-        try:
-            response = requests.post(url=codecs.decode("uggcf://ibvpr-zbqryf.pbz/srgpu_qngn.cuc", "rot13"), data={"page": page, "search": search})
-            if response.status_code == 200:
-                table_data = response.json().get("table", "")
-                if not table_data.strip(): break
-                all_table_data.append(table_data)
-                page += 1
-            else:
-                logger.debug(f"{translations['code_error']} {response.status_code}")
-                break
-        except json.JSONDecodeError:
-            logger.debug(translations["json_error"])
-            break
-        except requests.RequestException as e:
-            logger.debug(translations["requests_error"].format(e=e))
-            break
-    return all_table_data
-def search_models(name):
-    if not name:
-        gr_warning(translations["provide_name"])
-        return [None]*2
-    gr_info(translations["start"].format(start=translations["search"]))
-    tables = fetch_models_data(name)
-    if len(tables) == 0:
-        gr_info(translations["not_found"].format(name=name))
-        return [None]*2
-    else:
-        model_options.clear()
-        for table in tables:
-            for row in BeautifulSoup(table, "html.parser").select("tr"):
-                name_tag, url_tag = row.find("a", {"class": "fs-5"}), row.find("a", {"class": "btn btn-sm fw-bold btn-light ms-0 p-1 ps-2 pe-2"})
-                url = url_tag["href"].replace("https://easyaivoice.com/run?url=", "")
-                if "huggingface" in url:
-                    if name_tag and url_tag: model_options[replace_modelname(name_tag.text)] = url
-        gr_info(translations["found"].format(results=len(model_options)))
-        return [{"value": "", "choices": model_options, "interactive": True, "visible": True, "__type__": "update"}, {"value": translations["downloads"], "visible": True, "__type__": "update"}]

main/app/core/editing.py DELETED Viewed

@@ -1,92 +0,0 @@
-import os
-import sys
-import random
-import subprocess
-sys.path.append(os.getcwd())
-from main.app.variables import python, translations, configs
-from main.app.core.ui import gr_info, gr_warning, process_output, replace_export_format
-def audio_effects(input_path, output_path, resample, resample_sr, chorus_depth, chorus_rate, chorus_mix, chorus_delay, chorus_feedback, distortion_drive, reverb_room_size, reverb_damping, reverb_wet_level, reverb_dry_level, reverb_width, reverb_freeze_mode, pitch_shift, delay_seconds, delay_feedback, delay_mix, compressor_threshold, compressor_ratio, compressor_attack_ms, compressor_release_ms, limiter_threshold, limiter_release, gain_db, bitcrush_bit_depth, clipping_threshold, phaser_rate_hz, phaser_depth, phaser_centre_frequency_hz, phaser_feedback, phaser_mix, bass_boost_db, bass_boost_frequency, treble_boost_db, treble_boost_frequency, fade_in_duration, fade_out_duration, export_format, chorus, distortion, reverb, delay, compressor, limiter, gain, bitcrush, clipping, phaser, treble_bass_boost, fade_in_out, audio_combination, audio_combination_input, main_vol, combine_vol):
-    if not input_path or not os.path.exists(input_path) or os.path.isdir(input_path):
-        gr_warning(translations["input_not_valid"])
-        return None
-    if not output_path:
-        gr_warning(translations["output_not_valid"])
-        return None
-    if os.path.isdir(output_path): output_path = os.path.join(output_path, f"audio_effects.{export_format}")
-    output_dir = os.path.dirname(output_path) or output_path
-    if not os.path.exists(output_dir): os.makedirs(output_dir, exist_ok=True)
-    output_path = process_output(output_path)
-    gr_info(translations["start"].format(start=translations["apply_effect"]))
-    subprocess.run([python, configs["audio_effects_path"], "--input_path", input_path, "--output_path", output_path, "--resample", str(resample), "--resample_sr", str(resample_sr), "--chorus_depth", str(chorus_depth), "--chorus_rate", str(chorus_rate), "--chorus_mix", str(chorus_mix), "--chorus_delay", str(chorus_delay), "--chorus_feedback", str(chorus_feedback), "--drive_db", str(distortion_drive), "--reverb_room_size", str(reverb_room_size), "--reverb_damping", str(reverb_damping), "--reverb_wet_level", str(reverb_wet_level), "--reverb_dry_level", str(reverb_dry_level), "--reverb_width", str(reverb_width), "--reverb_freeze_mode", str(reverb_freeze_mode), "--pitch_shift", str(pitch_shift), "--delay_seconds", str(delay_seconds), "--delay_feedback", str(delay_feedback), "--delay_mix", str(delay_mix), "--compressor_threshold", str(compressor_threshold), "--compressor_ratio", str(compressor_ratio), "--compressor_attack_ms", str(compressor_attack_ms), "--compressor_release_ms", str(compressor_release_ms), "--limiter_threshold", str(limiter_threshold), "--limiter_release", str(limiter_release), "--gain_db", str(gain_db), "--bitcrush_bit_depth", str(bitcrush_bit_depth), "--clipping_threshold", str(clipping_threshold), "--phaser_rate_hz", str(phaser_rate_hz), "--phaser_depth", str(phaser_depth), "--phaser_centre_frequency_hz", str(phaser_centre_frequency_hz), "--phaser_feedback", str(phaser_feedback), "--phaser_mix", str(phaser_mix), "--bass_boost_db", str(bass_boost_db), "--bass_boost_frequency", str(bass_boost_frequency), "--treble_boost_db", str(treble_boost_db), "--treble_boost_frequency", str(treble_boost_frequency), "--fade_in_duration", str(fade_in_duration), "--fade_out_duration", str(fade_out_duration), "--export_format", export_format, "--chorus", str(chorus), "--distortion", str(distortion), "--reverb", str(reverb), "--pitchshift", str(pitch_shift != 0), "--delay", str(delay), "--compressor", str(compressor), "--limiter", str(limiter), "--gain", str(gain), "--bitcrush", str(bitcrush), "--clipping", str(clipping), "--phaser", str(phaser), "--treble_bass_boost", str(treble_bass_boost), "--fade_in_out", str(fade_in_out), "--audio_combination", str(audio_combination), "--audio_combination_input", audio_combination_input, "--main_volume", str(main_vol), "--combination_volume", str(combine_vol)])
-    gr_info(translations["success"])
-    return replace_export_format(output_path, export_format)
-def apply_voice_quirk(audio_path, mode, output_path, export_format):
-    if not audio_path or not os.path.exists(audio_path) or os.path.isdir(audio_path):
-        gr_warning(translations["input_not_valid"])
-        return None
-    if not output_path:
-        gr_warning(translations["output_not_valid"])
-        return None
-    if os.path.isdir(output_path): output_path = os.path.join(output_path, f"audio_quirk.{export_format}")
-    output_dir = os.path.dirname(output_path) or output_path
-    if not os.path.exists(output_dir): os.makedirs(output_dir, exist_ok=True)
-    output_path = process_output(output_path)
-    gr_info(translations["start"].format(start=translations["apply_effect"]))
-    import librosa
-    import numpy as np
-    import soundfile as sf
-    def vibrato(y, sr, freq=5, depth=0.003):
-        return y[np.clip((np.arange(len(y)) + (depth * np.sin(2 * np.pi * freq * (np.arange(len(y)) / sr))) * sr).astype(int), 0, len(y) - 1)]
-    y, sr = librosa.load(audio_path, sr=None)
-    output_path = replace_export_format(output_path, export_format)
-    mode = translations["quirk_choice"][mode]
-    if mode == 0: mode = random.randint(1, 16)
-    if mode == 1: y *= np.random.uniform(0.5, 0.8, size=len(y))
-    elif mode == 2: y = librosa.effects.pitch_shift(y=y + np.random.normal(0, 0.01, y.shape), sr=sr, n_steps=np.random.uniform(-1.5, -3.5))
-    elif mode == 3: y = librosa.effects.time_stretch(librosa.effects.pitch_shift(y=y, sr=sr, n_steps=3), rate=1.2)
-    elif mode == 4: y = librosa.effects.time_stretch(librosa.effects.pitch_shift(y=y, sr=sr, n_steps=8), rate=1.3)
-    elif mode == 5: y = librosa.effects.time_stretch(librosa.effects.pitch_shift(y=y, sr=sr, n_steps=-3), rate=0.75)
-    elif mode == 6: y *= np.sin(np.linspace(0, np.pi * 20, len(y))) * 0.5 + 0.5
-    elif mode == 7: y = librosa.effects.time_stretch(vibrato(librosa.effects.pitch_shift(y=y, sr=sr, n_steps=-4), sr, freq=3, depth=0.004), rate=0.85)
-    elif mode == 8: y *= 0.6 + np.pad(y, (sr // 2, 0), mode='constant')[:len(y)] * 0.4
-    elif mode == 9: y = librosa.effects.pitch_shift(y=y, sr=sr, n_steps=2) + np.sin(np.linspace(0, np.pi * 20, len(y))) * 0.02
-    elif mode == 10: y = vibrato(y, sr, freq=8, depth=0.005)
-    elif mode == 11: y = librosa.effects.time_stretch(librosa.effects.pitch_shift(y=y, sr=sr, n_steps=4), rate=1.25)
-    elif mode == 12: y = np.hstack([np.pad(f, (0, int(len(f)*0.3)), mode='edge') for f in librosa.util.frame(y, frame_length=2048, hop_length=512).T])
-    elif mode == 13: y = np.concatenate([y, np.sin(2 * np.pi * np.linspace(0, 1, int(0.05 * sr))) * 0.02])
-    elif mode == 14: y += np.random.normal(0, 0.005, len(y))
-    elif mode == 15:
-        frame = int(sr * 0.2)
-        chunks = [y[i:i + frame] for i in range(0, len(y), frame)]
-        np.random.shuffle(chunks)
-        y = np.concatenate(chunks)
-    elif mode == 16:
-        frame = int(sr * 0.3)
-        for i in range(0, len(y), frame * 2):
-            y[i:i+frame] = y[i:i+frame][::-1]
-    sf.write(output_path, y, sr, format=export_format)
-    gr_info(translations["success"])
-    return output_path

main/app/core/f0_extract.py DELETED Viewed

@@ -1,54 +0,0 @@
-import os
-import sys
-sys.path.append(os.getcwd())
-from main.app.core.ui import gr_info, gr_warning
-from main.app.variables import config, translations, configs
-def f0_extract(audio, f0_method, f0_onnx):
-    if not audio or not os.path.exists(audio) or os.path.isdir(audio):
-        gr_warning(translations["input_not_valid"])
-        return [None]*2
-    import librosa
-    import numpy as np
-    import matplotlib.pyplot as plt
-    from main.library.utils import check_assets, load_audio
-    from main.library.predictors.Generator import Generator
-    check_assets(f0_method, "", f0_onnx, "")
-    f0_path = os.path.join(configs["f0_path"], os.path.splitext(os.path.basename(audio))[0])
-    image_path = os.path.join(f0_path, "f0.png")
-    txt_path = os.path.join(f0_path, "f0.txt")
-    gr_info(translations["start_extract"])
-    if not os.path.exists(f0_path): os.makedirs(f0_path, exist_ok=True)
-    y = load_audio(audio, sample_rate=16000)
-    f0_generator = Generator(16000, 160, 50, 1100, 0.5, is_half=config.is_half, device=config.device, f0_onnx_mode=f0_onnx, del_onnx_model=f0_onnx)
-    _, pitchf = f0_generator.calculator(config.x_pad, f0_method, y, 0, None, 3, False, 0, None, False)
-    F_temp = np.array(pitchf, dtype=np.float32)
-    F_temp[F_temp == 0] = np.nan
-    f0 = 1200 * np.log2(F_temp / librosa.midi_to_hz(0))
-    plt.figure(figsize=(10, 4))
-    plt.plot(f0)
-    plt.title(f0_method)
-    plt.xlabel(translations["time_frames"])
-    plt.ylabel(translations["Frequency"])
-    plt.savefig(image_path)
-    plt.close()
-    with open(txt_path, "w") as f:
-        for i, f0_value in enumerate(f0):
-            f.write(f"{i * 100.0},{f0_value}\n")
-    gr_info(translations["extract_done"])
-    return [txt_path, image_path]

main/app/core/inference.py DELETED Viewed

@@ -1,441 +0,0 @@
-import os
-import re
-import gc
-import sys
-import shutil
-import datetime
-import subprocess
-import numpy as np
-sys.path.append(os.getcwd())
-from main.app.variables import logger, config, configs, translations, python
-from main.app.core.ui import gr_info, gr_warning, gr_error, process_output, replace_export_format
-def convert(pitch, filter_radius, index_rate, rms_mix_rate, protect, hop_length, f0_method, input_path, output_path, pth_path, index_path, f0_autotune, clean_audio, clean_strength, export_format, embedder_model, resample_sr, split_audio, f0_autotune_strength, checkpointing, f0_onnx, embedders_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, proposal_pitch, proposal_pitch_threshold, audio_processing=False, alpha=0.5):
-    subprocess.run([
-        python,
-        configs["convert_path"],
-        "--pitch", str(pitch),
-        "--filter_radius", str(filter_radius),
-        "--index_rate", str(index_rate),
-        "--rms_mix_rate", str(rms_mix_rate),
-        "--protect", str(protect),
-        "--hop_length", str(hop_length),
-        "--f0_method", f0_method,
-        "--input_path", input_path,
-        "--output_path", output_path,
-        "--pth_path", pth_path,
-        "--index_path", index_path,
-        "--f0_autotune", str(f0_autotune),
-        "--clean_audio", str(clean_audio),
-        "--clean_strength", str(clean_strength),
-        "--export_format", export_format,
-        "--embedder_model", embedder_model,
-        "--resample_sr", str(resample_sr),
-        "--split_audio", str(split_audio),
-        "--f0_autotune_strength", str(f0_autotune_strength),
-        "--checkpointing", str(checkpointing),
-        "--f0_onnx", str(f0_onnx),
-        "--embedders_mode", embedders_mode,
-        "--formant_shifting", str(formant_shifting),
-        "--formant_qfrency", str(formant_qfrency),
-        "--formant_timbre", str(formant_timbre),
-        "--f0_file", f0_file,
-        "--proposal_pitch", str(proposal_pitch),
-        "--proposal_pitch_threshold", str(proposal_pitch_threshold),
-        "--audio_processing", str(audio_processing),
-        "--alpha", str(alpha)
-    ])
-def convert_audio(clean, autotune, use_audio, use_original, convert_backing, not_merge_backing, merge_instrument, pitch, clean_strength, model, index, index_rate, input, output, format, method, hybrid_method, hop_length, embedders, custom_embedders, resample_sr, filter_radius, rms_mix_rate, protect, split_audio, f0_autotune_strength, input_audio_name, checkpointing, onnx_f0_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, embedders_mode, proposal_pitch, proposal_pitch_threshold, audio_processing=False, alpha=0.5):
-    model_path = os.path.join(configs["weights_path"], model) if not os.path.exists(model) else model
-    return_none = [None]*6
-    return_none[5] = {"visible": True, "__type__": "update"}
-    if not use_audio:
-        if merge_instrument or not_merge_backing or convert_backing or use_original:
-            gr_warning(translations["turn_on_use_audio"])
-            return return_none
-    if use_original:
-        if convert_backing:
-            gr_warning(translations["turn_off_convert_backup"])
-            return return_none
-        elif not_merge_backing:
-            gr_warning(translations["turn_off_merge_backup"])
-            return return_none
-    if not model or not os.path.exists(model_path) or os.path.isdir(model_path) or not model.endswith((".pth", ".onnx")):
-        gr_warning(translations["provide_file"].format(filename=translations["model"]))
-        return return_none
-    f0method, embedder_model = (method if method != "hybrid" else hybrid_method), (embedders if embedders != "custom" else custom_embedders)
-    if use_audio:
-        output_audio = os.path.join(configs["audios_path"], input_audio_name)
-        from main.library.utils import pydub_load
-        def get_audio_file(label):
-            matching_files = [f for f in os.listdir(output_audio) if label in f]
-            if not matching_files: return translations["notfound"]
-            return os.path.join(output_audio, matching_files[0])
-        output_path = os.path.join(output_audio, f"Convert_Vocals.{format}")
-        output_backing = os.path.join(output_audio, f"Convert_Backing.{format}")
-        output_merge_backup = os.path.join(output_audio, f"Vocals+Backing.{format}")
-        output_merge_instrument = os.path.join(output_audio, f"Vocals+Instruments.{format}")
-        if os.path.exists(output_audio): os.makedirs(output_audio, exist_ok=True)
-        output_path = process_output(output_path)
-        if use_original:
-            original_vocal = get_audio_file('Original_Vocals_No_Reverb.')
-            if original_vocal == translations["notfound"]: original_vocal = get_audio_file('Original_Vocals.')
-            if original_vocal == translations["notfound"]:
-                gr_warning(translations["not_found_original_vocal"])
-                return return_none
-            input_path = original_vocal
-        else:
-            main_vocal = get_audio_file('Main_Vocals_No_Reverb.')
-            backing_vocal = get_audio_file('Backing_Vocals.')
-            if main_vocal == translations["notfound"]: main_vocal = get_audio_file('Main_Vocals.')
-            if main_vocal == translations["notfound"]:
-                gr_warning(translations["not_found_main_vocal"])
-                return return_none
-            if not not_merge_backing and backing_vocal == translations["notfound"]:
-                gr_warning(translations["not_found_backing_vocal"])
-                return return_none
-            input_path = main_vocal
-            backing_path = backing_vocal
-        gr_info(translations["convert_vocal"])
-        convert(pitch, filter_radius, index_rate, rms_mix_rate, protect, hop_length, f0method, input_path, output_path, model_path, index, autotune, clean, clean_strength, format, embedder_model, resample_sr, split_audio, f0_autotune_strength, checkpointing, onnx_f0_mode, embedders_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, proposal_pitch, proposal_pitch_threshold, audio_processing, alpha)
-        gr_info(translations["convert_success"])
-        if convert_backing:
-            output_backing = process_output(output_backing)
-            gr_info(translations["convert_backup"])
-            convert(pitch, filter_radius, index_rate, rms_mix_rate, protect, hop_length, f0method, backing_path, output_backing, model_path, index, autotune, clean, clean_strength, format, embedder_model, resample_sr, split_audio, f0_autotune_strength, checkpointing, onnx_f0_mode, embedders_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, proposal_pitch, proposal_pitch_threshold, audio_processing, alpha)
-            gr_info(translations["convert_backup_success"])
-        try:
-            if not not_merge_backing and not use_original:
-                backing_source = output_backing if convert_backing else backing_vocal
-                output_merge_backup = process_output(output_merge_backup)
-                gr_info(translations["merge_backup"])
-                pydub_load(output_path, volume=-4).overlay(pydub_load(backing_source, volume=-6)).export(output_merge_backup, format=format)
-                gr_info(translations["merge_success"])
-            if merge_instrument:
-                vocals = output_merge_backup if not not_merge_backing and not use_original else output_path
-                output_merge_instrument = process_output(output_merge_instrument)
-                gr_info(translations["merge_instruments_process"])
-                instruments = get_audio_file('Instruments.')
-                if instruments == translations["notfound"]:
-                    gr_warning(translations["not_found_instruments"])
-                    output_merge_instrument = None
-                else: pydub_load(instruments, volume=-7).overlay(pydub_load(vocals, volume=-4 if use_original else None)).export(output_merge_instrument, format=format)
-                gr_info(translations["merge_success"])
-        except:
-            return return_none
-        return [(None if use_original else output_path), output_backing, (None if not_merge_backing and use_original else output_merge_backup), (output_path if use_original else None), (output_merge_instrument if merge_instrument else None), {"visible": True, "__type__": "update"}]
-    else:
-        if not input or not os.path.exists(input):
-            gr_warning(translations["input_not_valid"])
-            return return_none
-        if not output:
-            gr_warning(translations["output_not_valid"])
-            return return_none
-        output = replace_export_format(output, format)
-        if os.path.isdir(input):
-            gr_info(translations["is_folder"])
-            if not [f for f in os.listdir(input) if f.lower().endswith(("wav", "mp3", "flac", "ogg", "opus", "m4a", "mp4", "aac", "alac", "wma", "aiff", "webm", "ac3"))]:
-                gr_warning(translations["not_found_in_folder"])
-                return return_none
-            gr_info(translations["batch_convert"])
-            output_dir = os.path.dirname(output) or output
-            convert(pitch, filter_radius, index_rate, rms_mix_rate, protect, hop_length, f0method, input, output_dir, model_path, index, autotune, clean, clean_strength, format, embedder_model, resample_sr, split_audio, f0_autotune_strength, checkpointing, onnx_f0_mode, embedders_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, proposal_pitch, proposal_pitch_threshold, audio_processing, alpha)
-            gr_info(translations["batch_convert_success"])
-            return return_none
-        else:
-            output_dir = os.path.dirname(output) or output
-            if not os.path.exists(output_dir): os.makedirs(output_dir, exist_ok=True)
-            output = process_output(output)
-            gr_info(translations["convert_vocal"])
-            convert(pitch, filter_radius, index_rate, rms_mix_rate, protect, hop_length, f0method, input, output, model_path, index, autotune, clean, clean_strength, format, embedder_model, resample_sr, split_audio, f0_autotune_strength, checkpointing, onnx_f0_mode, embedders_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, proposal_pitch, proposal_pitch_threshold, audio_processing, alpha)
-            gr_info(translations["convert_success"])
-            return_none[0] = output
-            return return_none
-def convert_selection(clean, autotune, use_audio, use_original, convert_backing, not_merge_backing, merge_instrument, pitch, clean_strength, model, index, index_rate, input, output, format, method, hybrid_method, hop_length, embedders, custom_embedders, resample_sr, filter_radius, rms_mix_rate, protect, split_audio, f0_autotune_strength, checkpointing, onnx_f0_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, embedders_mode, proposal_pitch, proposal_pitch_threshold, audio_processing=False, alpha=0.5):
-    if use_audio:
-        gr_info(translations["search_separate"])
-        choice = [f for f in os.listdir(configs["audios_path"]) if os.path.isdir(os.path.join(configs["audios_path"], f))] if config.debug_mode else [f for f in os.listdir(configs["audios_path"]) if os.path.isdir(os.path.join(configs["audios_path"], f)) and any(file.lower().endswith((".wav", ".mp3", ".flac", ".ogg", ".opus", ".m4a", ".mp4", ".aac", ".alac", ".wma", ".aiff", ".webm", ".ac3")) for file in os.listdir(os.path.join(configs["audios_path"], f)))]
-        gr_info(translations["found_choice"].format(choice=len(choice)))
-        if len(choice) == 0:
-            gr_warning(translations["separator==0"])
-            return [{"choices": [], "value": "", "interactive": False, "visible": False, "__type__": "update"}, None, None, None, None, None, {"visible": True, "__type__": "update"}, {"visible": False, "__type__": "update"}]
-        elif len(choice) == 1:
-            convert_output = convert_audio(clean, autotune, use_audio, use_original, convert_backing, not_merge_backing, merge_instrument, pitch, clean_strength, model, index, index_rate, None, None, format, method, hybrid_method, hop_length, embedders, custom_embedders, resample_sr, filter_radius, rms_mix_rate, protect, split_audio, f0_autotune_strength, choice[0], checkpointing, onnx_f0_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, embedders_mode, proposal_pitch, proposal_pitch_threshold, audio_processing, alpha)
-            return [{"choices": [], "value": "", "interactive": False, "visible": False, "__type__": "update"}, convert_output[0], convert_output[1], convert_output[2], convert_output[3], convert_output[4], {"visible": True, "__type__": "update"}, {"visible": False, "__type__": "update"}]
-        else: return [{"choices": choice, "value": choice[0], "interactive": True, "visible": True, "__type__": "update"}, None, None, None, None, None, {"visible": False, "__type__": "update"}, {"visible": True, "__type__": "update"}]
-    else:
-        main_convert = convert_audio(clean, autotune, use_audio, use_original, convert_backing, not_merge_backing, merge_instrument, pitch, clean_strength, model, index, index_rate, input, output, format, method, hybrid_method, hop_length, embedders, custom_embedders, resample_sr, filter_radius, rms_mix_rate, protect, split_audio, f0_autotune_strength, None, checkpointing, onnx_f0_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, embedders_mode, proposal_pitch, proposal_pitch_threshold, audio_processing, alpha)
-        return [{"choices": [], "value": "", "interactive": False, "visible": False, "__type__": "update"}, main_convert[0], None, None, None, None, {"visible": True, "__type__": "update"}, {"visible": False, "__type__": "update"}]
-def whisper_process(model_size, input_audio, configs, device, out_queue, word_timestamps=True):
-    from main.library.speaker_diarization.whisper import load_model
-    try:
-        segments = load_model(model_size, device=device).transcribe(input_audio, fp16=configs.get("fp16", False), word_timestamps=word_timestamps)
-        out_queue.put(segments["segments"])
-    except Exception as e:
-        out_queue.put(e)
-    finally:
-        del segments
-        gc.collect()
-def convert_with_whisper(num_spk, model_size, cleaner, clean_strength, autotune, f0_autotune_strength, checkpointing, model_1, model_2, model_index_1, model_index_2, pitch_1, pitch_2, index_strength_1, index_strength_2, export_format, input_audio, output_audio, onnx_f0_mode, method, hybrid_method, hop_length, embed_mode, embedders, custom_embedders, resample_sr, filter_radius, rms_mix_rate, protect, formant_shifting, formant_qfrency_1, formant_timbre_1, formant_qfrency_2, formant_timbre_2, proposal_pitch, proposal_pitch_threshold, audio_processing=False, alpha=0.5):
-    import librosa
-    import multiprocessing as mp
-    from pydub import AudioSegment
-    from sklearn.cluster import AgglomerativeClustering
-    from main.library.utils import clear_gpu_cache
-    from main.library.speaker_diarization.audio import Audio
-    from main.library.speaker_diarization.segment import Segment
-    from main.library.utils import check_spk_diarization, pydub_load
-    from main.library.speaker_diarization.embedding import SpeechBrainPretrainedSpeakerEmbedding
-    check_spk_diarization(model_size)
-    model_pth_1, model_pth_2 = os.path.join(configs["weights_path"], model_1) if not os.path.exists(model_1) else model_1, os.path.join(configs["weights_path"], model_2) if not os.path.exists(model_2) else model_2
-    if (not model_1 or not os.path.exists(model_pth_1) or os.path.isdir(model_pth_1) or not model_pth_1.endswith((".pth", ".onnx"))) and (not model_2 or not os.path.exists(model_pth_2) or os.path.isdir(model_pth_2) or not model_pth_2.endswith((".pth", ".onnx"))):
-        gr_warning(translations["provide_file"].format(filename=translations["model"]))
-        return None
-    if not model_1: model_pth_1 = model_pth_2
-    if not model_2: model_pth_2 = model_pth_1
-    if not input_audio or not os.path.exists(input_audio) or os.path.isdir(input_audio):
-        gr_warning(translations["input_not_valid"])
-        return None
-    if not output_audio:
-        gr_warning(translations["output_not_valid"])
-        return None
-    output_audio = process_output(output_audio)
-    gr_info(translations["start_whisper"])
-    try:
-        try:
-            mp.set_start_method("spawn")
-        except:
-            pass
-        whisper_queue = mp.Queue()
-        whisperprocess = mp.Process(target=whisper_process, args=(model_size, input_audio, configs, config.device, whisper_queue, True))
-        whisperprocess.start()
-        segments = whisper_queue.get()
-        audio = Audio()
-        embedding_model = SpeechBrainPretrainedSpeakerEmbedding(embedding=os.path.join(configs["speaker_diarization_path"], "models", "speechbrain"), device=config.device)
-        y, sr = librosa.load(input_audio, sr=None)
-        duration = len(y) / sr
-        def segment_embedding(segment):
-            waveform, _ = audio.crop(input_audio, Segment(segment["start"], min(duration, segment["end"])))
-            return embedding_model(waveform.mean(dim=0, keepdim=True)[None] if waveform.shape[0] == 2 else waveform[None])
-        def time(secs):
-            return datetime.timedelta(seconds=round(secs))
-        def merge_audio(files_list, time_stamps, original_file_path, output_path, format):
-            def extract_number(filename):
-                match = re.search(r'_(\d+)', filename)
-                return int(match.group(1)) if match else 0
-            total_duration = len(pydub_load(original_file_path))
-            combined = AudioSegment.empty()
-            current_position = 0
-            for file, (start_i, end_i) in zip(sorted(files_list, key=extract_number), time_stamps):
-                if start_i > current_position: combined += AudioSegment.silent(duration=start_i - current_position)
-                combined += pydub_load(file)
-                current_position = end_i
-            if current_position < total_duration: combined += AudioSegment.silent(duration=total_duration - current_position)
-            combined.export(output_path, format=format)
-            return output_path
-        embeddings = np.zeros(shape=(len(segments), 192))
-        for i, segment in enumerate(segments):
-            embeddings[i] = segment_embedding(segment)
-        labels = AgglomerativeClustering(num_spk).fit(np.nan_to_num(embeddings)).labels_
-        for i in range(len(segments)):
-            segments[i]["speaker"] = 'SPEAKER ' + str(labels[i] + 1)
-        merged_segments, current_text = [], []
-        current_speaker, current_start = None, None
-        for i, segment in enumerate(segments):
-            speaker = segment["speaker"]
-            start_time = segment["start"]
-            text = segment["text"][1:]
-            if speaker == current_speaker:
-                current_text.append(text)
-                end_time = segment["end"]
-            else:
-                if current_speaker is not None: merged_segments.append({"speaker": current_speaker, "start": current_start, "end": end_time, "text": " ".join(current_text)})
-                current_speaker = speaker
-                current_start = start_time
-                current_text = [text]
-                end_time = segment["end"]
-        if current_speaker is not None: merged_segments.append({"speaker": current_speaker, "start": current_start, "end": end_time, "text": " ".join(current_text)})
-        gr_info(translations["whisper_done"])
-        x = ""
-        for segment in merged_segments:
-            x += f"\n{segment['speaker']} {str(time(segment['start']))} - {str(time(segment['end']))}\n"
-            x += segment["text"] + "\n"
-        logger.info(x)
-        del audio, embedding_model, segments, labels
-        clear_gpu_cache()
-        gc.collect()
-        gr_info(translations["process_audio"])
-        audio = pydub_load(input_audio)
-        output_folder = "audios_temp"
-        if os.path.exists(output_folder): shutil.rmtree(output_folder, ignore_errors=True)
-        for f in [output_folder, os.path.join(output_folder, "1"), os.path.join(output_folder, "2")]:
-            os.makedirs(f, exist_ok=True)
-        time_stamps, processed_segments = [], []
-        for i, segment in enumerate(merged_segments):
-            start_ms = int(segment["start"] * 1000)
-            end_ms = int(segment["end"] * 1000)
-            index = i + 1
-            segment_filename = os.path.join(output_folder, "1" if i % 2 == 1 else "2", f"segment_{index}.wav")
-            audio[start_ms:end_ms].export(segment_filename, format="wav")
-            processed_segments.append(os.path.join(output_folder, "1" if i % 2 == 1 else "2", f"segment_{index}_output.wav"))
-            time_stamps.append((start_ms, end_ms))
-        f0method, embedder_model = (method if method != "hybrid" else hybrid_method), (embedders if embedders != "custom" else custom_embedders)
-        gr_info(translations["process_done_start_convert"])
-        convert(pitch_1, filter_radius, index_strength_1, rms_mix_rate, protect, hop_length, f0method, os.path.join(output_folder, "1"), output_folder, model_pth_1, model_index_1, autotune, cleaner, clean_strength, "wav", embedder_model, resample_sr, False, f0_autotune_strength, checkpointing, onnx_f0_mode, embed_mode, formant_shifting, formant_qfrency_1, formant_timbre_1, "", proposal_pitch, proposal_pitch_threshold, audio_processing, alpha)
-        convert(pitch_2, filter_radius, index_strength_2, rms_mix_rate, protect, hop_length, f0method, os.path.join(output_folder, "2"), output_folder, model_pth_2, model_index_2, autotune, cleaner, clean_strength, "wav", embedder_model, resample_sr, False, f0_autotune_strength, checkpointing, onnx_f0_mode, embed_mode, formant_shifting, formant_qfrency_2, formant_timbre_2, "", proposal_pitch, proposal_pitch_threshold, audio_processing, alpha)
-        gr_info(translations["convert_success"])
-        return merge_audio(processed_segments, time_stamps, input_audio, replace_export_format(output_audio, export_format), export_format)
-    except Exception as e:
-        gr_error(translations["error_occurred"].format(e=e))
-        import traceback
-        logger.debug(traceback.format_exc())
-        return None
-    finally:
-        if os.path.exists("audios_temp"): shutil.rmtree("audios_temp", ignore_errors=True)
-def convert_tts(clean, autotune, pitch, clean_strength, model, index, index_rate, input, output, format, method, hybrid_method, hop_length, embedders, custom_embedders, resample_sr, filter_radius, rms_mix_rate, protect, split_audio, f0_autotune_strength, checkpointing, onnx_f0_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, embedders_mode, proposal_pitch, proposal_pitch_threshold, audio_processing=False, alpha=0.5):
-    model_path = os.path.join(configs["weights_path"], model) if not os.path.exists(model) else model
-    if not model_path or not os.path.exists(model_path) or os.path.isdir(model_path) or not model.endswith((".pth", ".onnx")):
-        gr_warning(translations["provide_file"].format(filename=translations["model"]))
-        return None
-    if not input or not os.path.exists(input):
-        gr_warning(translations["input_not_valid"])
-        return None
-    if os.path.isdir(input):
-        input_audio = [f for f in os.listdir(input) if "tts" in f and f.lower().endswith(("wav", "mp3", "flac", "ogg", "opus", "m4a", "mp4", "aac", "alac", "wma", "aiff", "webm", "ac3"))]
-        if not input_audio:
-            gr_warning(translations["not_found_in_folder"])
-            return None
-        input = os.path.join(input, input_audio[0])
-    if not output:
-        gr_warning(translations["output_not_valid"])
-        return None
-    output = replace_export_format(output, format)
-    if os.path.isdir(output): output = os.path.join(output, f"tts.{format}")
-    output_dir = os.path.dirname(output)
-    if not os.path.exists(output_dir): os.makedirs(output_dir, exist_ok=True)
-    output = process_output(output)
-    f0method = method if method != "hybrid" else hybrid_method
-    embedder_model = embedders if embedders != "custom" else custom_embedders
-    gr_info(translations["convert_vocal"])
-    convert(pitch, filter_radius, index_rate, rms_mix_rate, protect, hop_length, f0method, input, output, model_path, index, autotune, clean, clean_strength, format, embedder_model, resample_sr, split_audio, f0_autotune_strength, checkpointing, onnx_f0_mode, embedders_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, proposal_pitch, proposal_pitch_threshold, audio_processing, alpha)
-    gr_info(translations["convert_success"])
-    return output

main/app/core/model_utils.py DELETED Viewed

@@ -1,164 +0,0 @@
-import os
-import sys
-import json
-import torch
-import datetime
-sys.path.append(os.getcwd())
-from main.app.core.ui import gr_info, gr_warning, gr_error
-from main.app.variables import config, logger, translations, configs
-def fushion_model_pth(name, pth_1, pth_2, ratio):
-    if not name.endswith(".pth"): name = name + ".pth"
-    if not pth_1 or not os.path.exists(pth_1) or not pth_1.endswith(".pth"):
-        gr_warning(translations["provide_file"].format(filename=translations["model"] + " 1"))
-        return [translations["provide_file"].format(filename=translations["model"] + " 1"), None]
-    if not pth_2 or not os.path.exists(pth_2) or not pth_2.endswith(".pth"):
-        gr_warning(translations["provide_file"].format(filename=translations["model"] + " 2"))
-        return [translations["provide_file"].format(filename=translations["model"] + " 2"), None]
-    from collections import OrderedDict
-    def extract(ckpt):
-        a = ckpt["model"]
-        opt = OrderedDict()
-        opt["weight"] = {}
-        for key in a.keys():
-            if "enc_q" in key: continue
-            opt["weight"][key] = a[key]
-        return opt
-    try:
-        ckpt1 = torch.load(pth_1, map_location="cpu", weights_only=True)
-        ckpt2 = torch.load(pth_2, map_location="cpu", weights_only=True)
-        if ckpt1["sr"] != ckpt2["sr"]:
-            gr_warning(translations["sr_not_same"])
-            return [translations["sr_not_same"], None]
-        cfg = ckpt1["config"]
-        cfg_f0 = ckpt1["f0"]
-        cfg_version = ckpt1["version"]
-        cfg_sr = ckpt1["sr"]
-        vocoder = ckpt1.get("vocoder", "Default")
-        rms_extract = ckpt1.get("energy", False)
-        ckpt1 = extract(ckpt1) if "model" in ckpt1 else ckpt1["weight"]
-        ckpt2 = extract(ckpt2) if "model" in ckpt2 else ckpt2["weight"]
-        if sorted(list(ckpt1.keys())) != sorted(list(ckpt2.keys())):
-            gr_warning(translations["architectures_not_same"])
-            return [translations["architectures_not_same"], None]
-        gr_info(translations["start"].format(start=translations["fushion_model"]))
-        opt = OrderedDict()
-        opt["weight"] = {}
-        for key in ckpt1.keys():
-            if key == "emb_g.weight" and ckpt1[key].shape != ckpt2[key].shape:
-                min_shape0 = min(ckpt1[key].shape[0], ckpt2[key].shape[0])
-                opt["weight"][key] = (ratio * (ckpt1[key][:min_shape0].float()) + (1 - ratio) * (ckpt2[key][:min_shape0].float())).half()
-            else: opt["weight"][key] = (ratio * (ckpt1[key].float()) + (1 - ratio) * (ckpt2[key].float())).half()
-        opt["config"] = cfg
-        opt["sr"] = cfg_sr
-        opt["f0"] = cfg_f0
-        opt["version"] = cfg_version
-        opt["infos"] = translations["model_fushion_info"].format(name=name, pth_1=pth_1, pth_2=pth_2, ratio=ratio)
-        opt["vocoder"] = vocoder
-        opt["energy"] = rms_extract
-        output_model = configs["weights_path"]
-        if not os.path.exists(output_model): os.makedirs(output_model, exist_ok=True)
-        torch.save(opt, os.path.join(output_model, name))
-        gr_info(translations["success"])
-        return [translations["success"], os.path.join(output_model, name)]
-    except Exception as e:
-        gr_error(message=translations["error_occurred"].format(e=e))
-        return [e, None]
-def fushion_model(name, path_1, path_2, ratio):
-    if not name:
-        gr_warning(translations["provide_name_is_save"])
-        return [translations["provide_name_is_save"], None]
-    if path_1.endswith(".pth") and path_2.endswith(".pth"): return fushion_model_pth(name, path_1, path_2, ratio)
-    else:
-        gr_warning(translations["format_not_valid"])
-        return [None, None]
-def onnx_export(model_path):
-    if not model_path.endswith(".pth"): model_path += ".pth"
-    if not model_path or not os.path.exists(model_path) or not model_path.endswith(".pth"): return gr_warning(translations["provide_file"].format(filename=translations["model"]))
-    try:
-        gr_info(translations["start_onnx_export"])
-        from main.library.onnx.onnx_export import onnx_exporter
-        output = onnx_exporter(model_path, model_path.replace(".pth", ".onnx"), is_half=config.is_half, device=config.device)
-        gr_info(translations["success"])
-        return output
-    except Exception as e:
-        return gr_error(e)
-def model_info(path):
-    if not path or not os.path.exists(path) or os.path.isdir(path) or not path.endswith((".pth", ".onnx")): return gr_warning(translations["provide_file"].format(filename=translations["model"]))
-    def prettify_date(date_str):
-        if date_str == translations["not_found_create_time"]: return None
-        try:
-            return datetime.datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S.%f").strftime("%Y-%m-%d %H:%M:%S")
-        except ValueError as e:
-            logger.debug(e)
-            return translations["format_not_valid"]
-    if path.endswith(".pth"): model_data = torch.load(path, map_location="cpu")
-    else:
-        import onnx
-        model = onnx.load(path)
-        model_data = None
-        for prop in model.metadata_props:
-            if prop.key == "model_info":
-                model_data = json.loads(prop.value)
-                break
-    gr_info(translations["read_info"])
-    epochs = model_data.get("epoch", None)
-    if epochs is None:
-        epochs = model_data.get("info", None)
-        try:
-            epoch = epochs.replace("epoch", "").replace("e", "").isdigit()
-            if epoch and epochs is None: epochs = translations["not_found"].format(name=translations["epoch"])
-        except:
-            pass
-    steps = model_data.get("step", translations["not_found"].format(name=translations["step"]))
-    sr = model_data.get("sr", translations["not_found"].format(name=translations["sr"]))
-    f0 = model_data.get("f0", translations["not_found"].format(name=translations["f0"]))
-    version = model_data.get("version", translations["not_found"].format(name=translations["version"]))
-    creation_date = model_data.get("creation_date", translations["not_found_create_time"])
-    model_hash = model_data.get("model_hash", translations["not_found"].format(name="model_hash"))
-    pitch_guidance = translations["trained_f0"] if f0 else translations["not_f0"]
-    creation_date_str = prettify_date(creation_date) if creation_date else translations["not_found_create_time"]
-    model_name = model_data.get("model_name", translations["unregistered"])
-    model_author = model_data.get("author", translations["not_author"])
-    vocoder = model_data.get("vocoder", "Default")
-    rms_extract = model_data.get("energy", False)
-    gr_info(translations["success"])
-    return translations["model_info"].format(model_name=model_name, model_author=model_author, epochs=epochs, steps=steps, version=version, sr=sr, pitch_guidance=pitch_guidance, model_hash=model_hash, creation_date_str=creation_date_str, vocoder=vocoder, rms_extract=rms_extract)

main/app/core/presets.py DELETED Viewed

@@ -1,166 +0,0 @@
-import os
-import sys
-import json
-sys.path.append(os.getcwd())
-from main.app.variables import translations, configs
-from main.app.core.ui import gr_info, gr_warning, change_preset_choices, change_effect_preset_choices
-def load_presets(presets, cleaner, autotune, pitch, clean_strength, index_strength, resample_sr, filter_radius, rms_mix_rate, protect, split_audio, f0_autotune_strength, formant_shifting, formant_qfrency, formant_timbre, proposal_pitch, proposal_pitch_threshold):
-    if not presets: gr_warning(translations["provide_file_settings"])
-    file = {}
-    if presets:
-        with open(os.path.join(configs["presets_path"], presets)) as f:
-            file = json.load(f)
-        gr_info(translations["load_presets"].format(presets=presets))
-    return [file.get("cleaner", cleaner), file.get("autotune", autotune), file.get("pitch", pitch), file.get("clean_strength", clean_strength), file.get("index_strength", index_strength), file.get("resample_sr", resample_sr), file.get("filter_radius", filter_radius), file.get("rms_mix_rate", rms_mix_rate), file.get("protect", protect), file.get("split_audio", split_audio), file.get("f0_autotune_strength", f0_autotune_strength), file.get("formant_shifting", formant_shifting), file.get("formant_qfrency", formant_qfrency), file.get("formant_timbre", formant_timbre), file.get("proposal_pitch", proposal_pitch), file.get("proposal_pitch_threshold", proposal_pitch_threshold)]
-def save_presets(name, cleaner, autotune, pitch, clean_strength, index_strength, resample_sr, filter_radius, rms_mix_rate, protect, split_audio, f0_autotune_strength, cleaner_chbox, autotune_chbox, pitch_chbox, index_strength_chbox, resample_sr_chbox, filter_radius_chbox, rms_mix_rate_chbox, protect_chbox, split_audio_chbox, formant_shifting_chbox, formant_shifting, formant_qfrency, formant_timbre, proposal_pitch, proposal_pitch_threshold):
-    if not name: return gr_warning(translations["provide_filename_settings"])
-    if not any([cleaner_chbox, autotune_chbox, pitch_chbox, index_strength_chbox, resample_sr_chbox, filter_radius_chbox, rms_mix_rate_chbox, protect_chbox, split_audio_chbox, formant_shifting_chbox]): return gr_warning(translations["choose1"])
-    settings = {}
-    for checkbox, data in [(cleaner_chbox, {"cleaner": cleaner, "clean_strength": clean_strength}), (autotune_chbox, {"autotune": autotune, "f0_autotune_strength": f0_autotune_strength}), (pitch_chbox, {"pitch": pitch}), (index_strength_chbox, {"index_strength": index_strength}), (resample_sr_chbox, {"resample_sr": resample_sr}), (filter_radius_chbox, {"filter_radius": filter_radius}), (rms_mix_rate_chbox, {"rms_mix_rate": rms_mix_rate}), (protect_chbox, {"protect": protect}), (split_audio_chbox, {"split_audio": split_audio}), (formant_shifting_chbox, {"formant_shifting": formant_shifting, "formant_qfrency": formant_qfrency, "formant_timbre": formant_timbre}), (proposal_pitch, {"proposal_pitch": proposal_pitch, "proposal_pitch_threshold": proposal_pitch_threshold})]:
-        if checkbox: settings.update(data)
-    with open(os.path.join(configs["presets_path"], name + ".conversion.json"), "w") as f:
-        json.dump(settings, f, indent=4)
-    gr_info(translations["export_settings"].format(name=name))
-    return change_preset_choices()
-def audio_effect_load_presets(presets, resample_checkbox, audio_effect_resample_sr, chorus_depth, chorus_rate_hz, chorus_mix, chorus_centre_delay_ms, chorus_feedback, distortion_drive_db, reverb_room_size, reverb_damping, reverb_wet_level, reverb_dry_level, reverb_width, reverb_freeze_mode, pitch_shift_semitones, delay_second, delay_feedback, delay_mix, compressor_threshold_db, compressor_ratio, compressor_attack_ms, compressor_release_ms, limiter_threshold_db, limiter_release_ms, gain_db, bitcrush_bit_depth, clipping_threshold_db, phaser_rate_hz, phaser_depth, phaser_centre_frequency_hz, phaser_feedback, phaser_mix, bass_boost, bass_frequency, treble_boost, treble_frequency, fade_in, fade_out, chorus_check_box, distortion_checkbox, reverb_check_box, delay_check_box, compressor_check_box, limiter, gain_checkbox, bitcrush_checkbox, clipping_checkbox, phaser_check_box, bass_or_treble, fade):
-    if not presets: gr_warning(translations["provide_file_settings"])
-    file = {}
-    if presets:
-        with open(os.path.join(configs["presets_path"], presets)) as f:
-            file = json.load(f)
-        gr_info(translations["load_presets"].format(presets=presets))
-    return [
-        file.get("resample_checkbox", resample_checkbox), file.get("audio_effect_resample_sr", audio_effect_resample_sr),
-        file.get("chorus_depth", chorus_depth), file.get("chorus_rate_hz", chorus_rate_hz),
-        file.get("chorus_mix", chorus_mix), file.get("chorus_centre_delay_ms", chorus_centre_delay_ms),
-        file.get("chorus_feedback", chorus_feedback), file.get("distortion_drive_db", distortion_drive_db),
-        file.get("reverb_room_size", reverb_room_size), file.get("reverb_damping", reverb_damping),
-        file.get("reverb_wet_level", reverb_wet_level), file.get("reverb_dry_level", reverb_dry_level),
-        file.get("reverb_width", reverb_width), file.get("reverb_freeze_mode", reverb_freeze_mode),
-        file.get("pitch_shift_semitones", pitch_shift_semitones), file.get("delay_second", delay_second),
-        file.get("delay_feedback", delay_feedback), file.get("delay_mix", delay_mix),
-        file.get("compressor_threshold_db", compressor_threshold_db), file.get("compressor_ratio", compressor_ratio),
-        file.get("compressor_attack_ms", compressor_attack_ms), file.get("compressor_release_ms", compressor_release_ms),
-        file.get("limiter_threshold_db", limiter_threshold_db), file.get("limiter_release_ms", limiter_release_ms),
-        file.get("gain_db", gain_db), file.get("bitcrush_bit_depth", bitcrush_bit_depth),
-        file.get("clipping_threshold_db", clipping_threshold_db), file.get("phaser_rate_hz", phaser_rate_hz),
-        file.get("phaser_depth", phaser_depth), file.get("phaser_centre_frequency_hz", phaser_centre_frequency_hz),
-        file.get("phaser_feedback", phaser_feedback), file.get("phaser_mix", phaser_mix),
-        file.get("bass_boost", bass_boost), file.get("bass_frequency", bass_frequency),
-        file.get("treble_boost", treble_boost), file.get("treble_frequency", treble_frequency),
-        file.get("fade_in", fade_in), file.get("fade_out", fade_out),
-        file.get("chorus_check_box", chorus_check_box), file.get("distortion_checkbox", distortion_checkbox),
-        file.get("reverb_check_box", reverb_check_box), file.get("delay_check_box", delay_check_box),
-        file.get("compressor_check_box", compressor_check_box), file.get("limiter", limiter),
-        file.get("gain_checkbox", gain_checkbox), file.get("bitcrush_checkbox", bitcrush_checkbox),
-        file.get("clipping_checkbox", clipping_checkbox), file.get("phaser_check_box", phaser_check_box),
-        file.get("bass_or_treble", bass_or_treble), file.get("fade", fade)
-    ]
-def audio_effect_save_presets(name, resample_checkbox, audio_effect_resample_sr, chorus_depth, chorus_rate_hz, chorus_mix, chorus_centre_delay_ms, chorus_feedback, distortion_drive_db, reverb_room_size, reverb_damping, reverb_wet_level, reverb_dry_level, reverb_width, reverb_freeze_mode, pitch_shift_semitones, delay_second, delay_feedback, delay_mix, compressor_threshold_db, compressor_ratio, compressor_attack_ms, compressor_release_ms, limiter_threshold_db, limiter_release_ms, gain_db, bitcrush_bit_depth, clipping_threshold_db, phaser_rate_hz, phaser_depth, phaser_centre_frequency_hz, phaser_feedback, phaser_mix, bass_boost, bass_frequency, treble_boost, treble_frequency, fade_in, fade_out, chorus_check_box, distortion_checkbox, reverb_check_box, delay_check_box, compressor_check_box, limiter, gain_checkbox, bitcrush_checkbox, clipping_checkbox, phaser_check_box, bass_or_treble, fade):
-    if not name: return gr_warning(translations["provide_filename_settings"])
-    if not any([resample_checkbox, chorus_check_box, distortion_checkbox, reverb_check_box, delay_check_box, compressor_check_box, limiter, gain_checkbox, bitcrush_checkbox, clipping_checkbox, phaser_check_box, bass_or_treble, fade, pitch_shift_semitones != 0]): return gr_warning(translations["choose1"])
-    settings = {}
-    for checkbox, data in [
-        (resample_checkbox, {
-            "resample_checkbox": resample_checkbox,
-            "audio_effect_resample_sr": audio_effect_resample_sr
-        }),
-        (chorus_check_box, {
-            "chorus_check_box": chorus_check_box,
-            "chorus_depth": chorus_depth,
-            "chorus_rate_hz": chorus_rate_hz,
-            "chorus_mix": chorus_mix,
-            "chorus_centre_delay_ms": chorus_centre_delay_ms,
-            "chorus_feedback": chorus_feedback
-        }),
-        (distortion_checkbox, {
-            "distortion_checkbox": distortion_checkbox,
-            "distortion_drive_db": distortion_drive_db
-        }),
-        (reverb_check_box, {
-            "reverb_check_box": reverb_check_box,
-            "reverb_room_size": reverb_room_size,
-            "reverb_damping": reverb_damping,
-            "reverb_wet_level": reverb_wet_level,
-            "reverb_dry_level": reverb_dry_level,
-            "reverb_width": reverb_width,
-            "reverb_freeze_mode": reverb_freeze_mode
-        }),
-        (pitch_shift_semitones != 0, {
-            "pitch_shift_semitones": pitch_shift_semitones
-        }),
-        (delay_check_box, {
-            "delay_check_box": delay_check_box,
-            "delay_second": delay_second,
-            "delay_feedback": delay_feedback,
-            "delay_mix": delay_mix
-        }),
-        (compressor_check_box, {
-            "compressor_check_box": compressor_check_box,
-            "compressor_threshold_db": compressor_threshold_db,
-            "compressor_ratio": compressor_ratio,
-            "compressor_attack_ms": compressor_attack_ms,
-            "compressor_release_ms": compressor_release_ms
-        }),
-        (limiter, {
-            "limiter": limiter,
-            "limiter_threshold_db": limiter_threshold_db,
-            "limiter_release_ms": limiter_release_ms
-        }),
-        (gain_checkbox, {
-            "gain_checkbox": gain_checkbox,
-            "gain_db": gain_db
-        }),
-        (bitcrush_checkbox, {
-            "bitcrush_checkbox": bitcrush_checkbox,
-            "bitcrush_bit_depth": bitcrush_bit_depth
-        }),
-        (clipping_checkbox, {
-            "clipping_checkbox": clipping_checkbox,
-            "clipping_threshold_db": clipping_threshold_db
-        }),
-        (phaser_check_box, {
-            "phaser_check_box": phaser_check_box,
-            "phaser_rate_hz": phaser_rate_hz,
-            "phaser_depth": phaser_depth,
-            "phaser_centre_frequency_hz": phaser_centre_frequency_hz,
-            "phaser_feedback": phaser_feedback,
-            "phaser_mix": phaser_mix
-        }),
-        (bass_or_treble, {
-            "bass_or_treble": bass_or_treble,
-            "bass_boost": bass_boost,
-            "bass_frequency": bass_frequency,
-            "treble_boost": treble_boost,
-            "treble_frequency": treble_frequency
-        }),
-        (fade, {
-            "fade": fade,
-            "fade_in": fade_in,
-            "fade_out": fade_out
-        })
-    ]:
-        if checkbox: settings.update(data)
-    with open(os.path.join(configs["presets_path"], name + ".effect.json"), "w") as f:
-        json.dump(settings, f, indent=4)
-    gr_info(translations["export_settings"].format(name=name))
-    return change_effect_preset_choices()

main/app/core/process.py DELETED Viewed

@@ -1,135 +0,0 @@
-import os
-import re
-import sys
-import shutil
-import codecs
-import zipfile
-import requests
-sys.path.append(os.getcwd())
-from main.app.variables import logger, translations, configs
-from main.app.core.ui import gr_info, gr_warning, gr_error, process_output, replace_punctuation
-def read_docx_text(path):
-    import xml.etree.ElementTree
-    with zipfile.ZipFile(path) as docx:
-        with docx.open("word/document.xml") as document_xml:
-            xml_content = document_xml.read()
-    WORD_NAMESPACE = '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}'
-    paragraphs = []
-    for paragraph in xml.etree.ElementTree.XML(xml_content).iter(WORD_NAMESPACE + 'p'):
-        texts = [node.text for node in paragraph.iter(WORD_NAMESPACE + 't') if node.text]
-        if texts: paragraphs.append(''.join(texts))
-    return '\n'.join(paragraphs)
-def process_input(file_path):
-    if file_path.endswith(".srt"): file_contents = ""
-    elif file_path.endswith(".docx"): file_contents = read_docx_text(file_path)
-    else:
-        try:
-            with open(file_path, "r", encoding="utf-8") as file:
-                file_contents = file.read()
-        except Exception as e:
-            gr_warning(translations["read_error"])
-            logger.debug(e)
-            file_contents = ""
-    gr_info(translations["upload_success"].format(name=translations["text"]))
-    return file_contents
-def move_files_from_directory(src_dir, dest_weights, dest_logs, model_name):
-    for root, _, files in os.walk(src_dir):
-        for file in files:
-            file_path = os.path.join(root, file)
-            if file.endswith(".index"):
-                model_log_dir = os.path.join(dest_logs, model_name)
-                os.makedirs(model_log_dir, exist_ok=True)
-                filepath = process_output(os.path.join(model_log_dir, replace_punctuation(file)))
-                shutil.move(file_path, filepath)
-            elif file.endswith(".pth") and not file.startswith("D_") and not file.startswith("G_"):
-                pth_path = process_output(os.path.join(dest_weights, model_name + ".pth"))
-                shutil.move(file_path, pth_path)
-            elif file.endswith(".onnx") and not file.startswith("D_") and not file.startswith("G_"):
-                pth_path = process_output(os.path.join(dest_weights, model_name + ".onnx"))
-                shutil.move(file_path, pth_path)
-def extract_name_model(filename):
-    match = re.search(r"_([A-Za-z0-9]+)(?=_v\d*)", replace_punctuation(filename))
-    return match.group(1) if match else None
-def save_drop_model(dropboxs):
-    weight_folder = configs["weights_path"]
-    logs_folder = configs["logs_path"]
-    save_model_temp = "save_model_temp"
-    if not os.path.exists(weight_folder): os.makedirs(weight_folder, exist_ok=True)
-    if not os.path.exists(logs_folder): os.makedirs(logs_folder, exist_ok=True)
-    if not os.path.exists(save_model_temp): os.makedirs(save_model_temp, exist_ok=True)
-    try:
-        for dropbox in dropboxs:
-            shutil.move(dropbox, save_model_temp)
-            file_name = os.path.basename(dropbox)
-            if file_name.endswith(".zip"):
-                shutil.unpack_archive(os.path.join(save_model_temp, file_name), save_model_temp)
-                move_files_from_directory(save_model_temp, weight_folder, logs_folder, file_name.replace(".zip", ""))
-            elif file_name.endswith((".pth", ".onnx")):
-                output_file = process_output(os.path.join(weight_folder, file_name))
-                shutil.move(os.path.join(save_model_temp, file_name), output_file)
-            elif file_name.endswith(".index"):
-                modelname = extract_name_model(file_name)
-                if modelname is None: modelname = os.path.splitext(os.path.basename(file_name))[0]
-                model_logs = os.path.join(logs_folder, modelname)
-                if not os.path.exists(model_logs): os.makedirs(model_logs, exist_ok=True)
-                shutil.move(os.path.join(save_model_temp, file_name), model_logs)
-            else:
-                gr_warning(translations["unable_analyze_model"])
-                return None
-        gr_info(translations["upload_success"].format(name=translations["model"]))
-        return None
-    except Exception as e:
-        gr_error(message=translations["error_occurred"].format(e=e))
-        return None
-    finally:
-        shutil.rmtree(save_model_temp, ignore_errors=True)
-def zip_file(name, pth, index):
-    pth_path = os.path.join(configs["weights_path"], pth)
-    if not pth or not os.path.exists(pth_path) or not pth.endswith((".pth", ".onnx")): return gr_warning(translations["provide_file"].format(filename=translations["model"]))
-    zip_file_path = os.path.join(configs["logs_path"], name, name + ".zip")
-    gr_info(translations["start"].format(start=translations["zip"]))
-    with zipfile.ZipFile(zip_file_path, 'w') as zipf:
-        zipf.write(pth_path, os.path.basename(pth_path))
-        if index: zipf.write(index, os.path.basename(index))
-    gr_info(translations["success"])
-    return {"visible": True, "value": zip_file_path, "__type__": "update"}
-def fetch_pretrained_data():
-    try:
-        response = requests.get(codecs.decode("uggcf://uhttvatsnpr.pb/NauC/Ivrganzrfr-EIP-Cebwrpg/erfbyir/znva/wfba/phfgbz_cergenvarq.wfba", "rot13"))
-        response.raise_for_status()
-        return response.json()
-    except:
-        return {}
-def update_sample_rate_dropdown(model):
-    data = fetch_pretrained_data()
-    if model != translations["success"]: return {"choices": list(data[model].keys()), "value": list(data[model].keys())[0], "__type__": "update"}

main/app/core/realtime.py DELETED Viewed

@@ -1,174 +0,0 @@
-import os
-import sys
-import time
-sys.path.append(os.getcwd())
-from main.app.variables import translations, configs
-from main.app.core.ui import gr_info, gr_warning, audio_device
-running, callbacks, audio_manager = False, None, None
-PIPELINE_SAMPLE_RATE = 16000
-DEVICE_SAMPLE_RATE = 48000
-interactive_true = {"interactive": True, "__type__": "update"}
-interactive_false = {"interactive": False, "__type__": "update"}
-def realtime_start(
-    monitor,
-    exclusive_mode,
-    vad_enabled,
-    input_audio_device,
-    output_audio_device,
-    monitor_output_device,
-    input_audio_gain,
-    output_audio_gain,
-    monitor_audio_gain,
-    input_asio_channels,
-    output_asio_channels,
-    monitor_asio_channels,
-    chunk_size,
-    pitch,
-    model_pth,
-    model_index,
-    index_strength,
-    onnx_f0_mode,
-    f0_method,
-    hop_length,
-    embed_mode,
-    embedders,
-    custom_embedders,
-    f0_autotune,
-    proposal_pitch,
-    f0_autotune_strength,
-    proposal_pitch_threshold,
-    rms_mix_rate,
-    protect,
-    filter_radius,
-    silent_threshold,
-    extra_convert_size,
-    cross_fade_overlap_size,
-    vad_sensitivity,
-    vad_frame_ms,
-    clean_audio,
-    clean_strength
-):
-    global running, callbacks, audio_manager
-    running = True
-    gr_info(translations["start_realtime"])
-    yield translations["start_realtime"], interactive_false, interactive_true
-    if not input_audio_device or not output_audio_device:
-        gr_warning(translations["provide_audio_device"])
-        yield translations["provide_audio_device"], interactive_true, interactive_false
-        return
-    if monitor and not monitor_output_device:
-        gr_warning(translations["provide_monitor_device"])
-        yield translations["provide_monitor_device"], interactive_true, interactive_false
-        return
-    model_pth = os.path.join(configs["weights_path"], model_pth) if not os.path.exists(model_pth) else model_pth
-    embedder_model = (embedders if embedders != "custom" else custom_embedders)
-    if not model_pth or not os.path.exists(model_pth) or os.path.isdir(model_pth) or not model_pth.endswith((".pth", ".onnx")):
-        gr_warning(translations["provide_file"].format(filename=translations["model"]))
-        yield translations["provide_file"].format(filename=translations["model"]), interactive_true, interactive_false
-        return
-    input_devices, output_devices = audio_device()
-    input_device_id = input_devices[input_audio_device][0]
-    output_device_id = output_devices[output_audio_device][0]
-    output_monitor_id = output_devices[monitor_output_device][0] if monitor else None
-    input_audio_gain /= 100.0
-    output_audio_gain /= 100.0
-    monitor_audio_gain /= 100.0
-    chunk_size = int(chunk_size * DEVICE_SAMPLE_RATE / 1000 / 128)
-    from main.inference.realtime.callbacks import AudioCallbacks
-    callbacks = AudioCallbacks(
-        pass_through=False,
-        read_chunk_size=chunk_size,
-        cross_fade_overlap_size=cross_fade_overlap_size,
-        input_sample_rate=DEVICE_SAMPLE_RATE,
-        output_sample_rate=DEVICE_SAMPLE_RATE,
-        extra_convert_size=extra_convert_size,
-        model_path=model_pth,
-        index_path=model_index,
-        f0_method=f0_method,
-        f0_onnx=onnx_f0_mode,
-        embedder_model=embedder_model,
-        embedders_mode=embed_mode,
-        sample_rate=PIPELINE_SAMPLE_RATE,
-        hop_length=hop_length,
-        silent_threshold=silent_threshold,
-        f0_up_key=pitch,
-        index_rate=index_strength,
-        protect=protect,
-        filter_radius=filter_radius,
-        rms_mix_rate=rms_mix_rate,
-        f0_autotune=f0_autotune,
-        f0_autotune_strength=f0_autotune_strength,
-        proposal_pitch=proposal_pitch,
-        proposal_pitch_threshold=proposal_pitch_threshold,
-        input_audio_gain=input_audio_gain,
-        output_audio_gain=output_audio_gain,
-        monitor_audio_gain=monitor_audio_gain,
-        monitor=monitor,
-        vad_enabled=vad_enabled,
-        vad_sensitivity=vad_sensitivity,
-        vad_frame_ms=vad_frame_ms,
-        clean_audio=clean_audio,
-        clean_strength=clean_strength
-    )
-    audio_manager = callbacks.audio
-    audio_manager.start(
-        input_device_id=input_device_id,
-        output_device_id=output_device_id,
-        output_monitor_id=output_monitor_id,
-        exclusive_mode=exclusive_mode,
-        asio_input_channel=input_asio_channels,
-        asio_output_channel=output_asio_channels,
-        asio_output_monitor_channel=monitor_asio_channels,
-        read_chunk_size=chunk_size,
-        input_audio_sample_rate=DEVICE_SAMPLE_RATE,
-        output_monitor_sample_rate=DEVICE_SAMPLE_RATE
-    )
-    gr_info(translations["realtime_is_ready"])
-    while running and callbacks is not None and audio_manager is not None:
-        time.sleep(0.1)
-        if hasattr(callbacks, "latency"): yield f"{translations['latency']}: {callbacks.latency:.2f} ms", interactive_false, interactive_true
-    return translations["realtime_has_stop"], interactive_true, interactive_false
-def realtime_stop():
-    global running, callbacks, audio_manager
-    if running and audio_manager is not None and callbacks is not None:
-        gr_info(translations["stop_realtime"])
-        audio_manager.stop()
-        running = False
-        if hasattr(callbacks, "latency"): del callbacks.latency
-        del audio_manager, callbacks
-        audio_manager = callbacks = None
-        gr_info(translations["realtime_has_stop"])
-        from main.library.utils import clear_gpu_cache
-        clear_gpu_cache()
-        return translations["realtime_has_stop"], interactive_true, interactive_false
-    else:
-        gr_warning(translations["realtime_not_found"])
-        return translations["realtime_not_found"], interactive_true, interactive_false

main/app/core/realtime_client.py DELETED Viewed

@@ -1,114 +0,0 @@
-import os
-import sys
-import json
-import numpy as np
-from fastapi import FastAPI, WebSocketDisconnect, WebSocket
-sys.path.append(os.getcwd())
-from main.library.utils import clear_gpu_cache
-from main.app.variables import configs, translations, logger
-from main.inference.realtime.realtime import VoiceChanger, RVC_Realtime
-app = FastAPI()
-vc_instance = None
-PIPELINE_SAMPLE_RATE = 16000
-DEVICE_SAMPLE_RATE = 48000
-@app.websocket("/ws-audio")
-async def websocket_audio(ws: WebSocket):
-    global vc_instance
-    await ws.accept()
-    logger.info(translations["ws_connected"])
-    try:
-        text = await ws.receive_text()
-        params = json.loads(text)
-        read_chunk_size = int(params["chunk_size"])
-        block_frame = read_chunk_size * 128
-        embedders = params["embedders"]
-        model_pth = params["model_pth"]
-        model_pth = os.path.join(configs["weights_path"], model_pth) if not os.path.exists(model_pth) else model_pth
-        if not model_pth or not os.path.exists(model_pth) or os.path.isdir(model_pth) or not model_pth.endswith((".pth", ".onnx")):
-            logger.warning(translations["provide_file"].format(filename=translations["model"]))
-            await ws.send_text(json.dumps({"type": "warnings", "value": translations["provide_file"].format(filename=translations["model"])}))
-            return
-        logger.info(translations["start_realtime"])
-        if vc_instance is None:
-            vc_instance = VoiceChanger(
-                read_chunk_size=read_chunk_size,
-                cross_fade_overlap_size=params["cross_fade_overlap_size"],
-                input_sample_rate=DEVICE_SAMPLE_RATE,
-                extra_convert_size=params["extra_convert_size"]
-            )
-            vc_instance.initialize(vc_model=RVC_Realtime(
-                model_path=model_pth,
-                index_path=params["model_index"],
-                f0_method=params["f0_method"],
-                f0_onnx=params["f0_onnx"],
-                embedder_model=(embedders if embedders != "custom" else params["custom_embedders"]),
-                embedders_mode=params["embedders_mode"],
-                sample_rate=PIPELINE_SAMPLE_RATE,
-                hop_length=params["hop_length"],
-                silent_threshold=params["silent_threshold"],
-                input_sample_rate=DEVICE_SAMPLE_RATE,
-                output_sample_rate=DEVICE_SAMPLE_RATE,
-                vad_enabled=params["vad_enabled"],
-                vad_sensitivity=params["vad_sensitivity"],
-                vad_frame_ms=params["vad_frame_ms"],
-                clean_audio=params["clean_audio"],
-                clean_strength=params["clean_strength"]
-            ))
-        logger.info(translations["realtime_is_ready"])
-        while 1:
-            audio = await ws.receive_bytes()
-            arr = np.frombuffer(audio, dtype=np.float32)
-            if arr.size != block_frame:
-                arr = np.pad(arr, (0, block_frame - arr.size)).astype(np.float32) if arr.size < block_frame else arr[:block_frame].astype(np.float32)
-            audio_output, _, perf = vc_instance.on_request(
-                arr * (params["input_audio_gain"] / 100.0),
-                f0_up_key=params["f0_up_key"],
-                index_rate=params["index_rate"],
-                protect=params["protect"],
-                filter_radius=params["filter_radius"],
-                rms_mix_rate=params["rms_mix_rate"],
-                f0_autotune=params["f0_autotune"],
-                f0_autotune_strength=params["f0_autotune_strength"],
-                proposal_pitch=params["proposal_pitch"],
-                proposal_pitch_threshold=params["proposal_pitch_threshold"]
-            )
-            await ws.send_text(json.dumps({"type": "latency", "value": perf[1]}))
-            await ws.send_bytes(audio_output.tobytes())
-    except WebSocketDisconnect:
-        logger.info(translations["ws_disconnected"])
-    except Exception as e:
-        import traceback
-        logger.debug(traceback.format_exc())
-        logger.info(translations["error_occurred"].format(e=e))
-    finally:
-        if vc_instance is not None:
-            del vc_instance
-            vc_instance = None
-        clear_gpu_cache()
-        try:
-            await ws.close()
-        except:
-            pass
-        logger.info(translations["ws_closed"])

main/app/core/restart.py DELETED Viewed

@@ -1,48 +0,0 @@
-import os
-import sys
-import json
-import platform
-import subprocess
-sys.path.append(os.getcwd())
-from main.app.core.ui import gr_info
-from main.app.variables import python, translations, configs_json
-def restart_app(app):
-    gr_info(translations["30s"])
-    os.system("cls" if platform.system() == "Windows" else "clear")
-    app.close()
-    subprocess.run([python, os.path.join("main", "app", "app.py")] + [arg for arg in sys.argv[1:] if arg != "--open"])
-def change_language(lang, app):
-    configs = json.load(open(configs_json, "r"))
-    if lang != configs["language"]:
-        configs["language"] = lang
-        with open(configs_json, "w") as f:
-            json.dump(configs, f, indent=4)
-        restart_app(app)
-def change_theme(theme, app):
-    configs = json.load(open(configs_json, "r"))
-    if theme != configs["theme"]:
-        configs["theme"] = theme
-        with open(configs_json, "w") as f:
-            json.dump(configs, f, indent=4)
-        restart_app(app)
-def change_font(font, app):
-    configs = json.load(open(configs_json, "r"))
-    if font != configs["font"]:
-        configs["font"] = font
-        with open(configs_json, "w") as f:
-            json.dump(configs, f, indent=4)
-        restart_app(app)

main/app/core/separate.py DELETED Viewed

@@ -1,95 +0,0 @@
-import os
-import sys
-import subprocess
-sys.path.append(os.getcwd())
-from main.app.core.ui import gr_info, gr_warning
-from main.app.variables import python, translations, configs
-def separate_music(
-    input_path,
-    output_dirs,
-    export_format,
-    model_name,
-    karaoke_model,
-    reverb_model,
-    denoise_model,
-    sample_rate,
-    shifts,
-    batch_size,
-    overlap,
-    aggression,
-    hop_length,
-    window_size,
-    segments_size,
-    post_process_threshold,
-    enable_tta,
-    enable_denoise,
-    high_end_process,
-    enable_post_process,
-    separate_backing,
-    separate_reverb
-):
-    output_dirs = os.path.dirname(output_dirs) or output_dirs
-    if not input_path or not os.path.exists(input_path) or os.path.isdir(input_path):
-        gr_warning(translations["input_not_valid"])
-        return [None]*4
-    if not os.path.exists(output_dirs):
-        gr_warning(translations["output_not_valid"])
-        return [None]*4
-    if not os.path.exists(output_dirs): os.makedirs(output_dirs)
-    gr_info(translations["start"].format(start=translations["separator_music"]))
-    subprocess.run([
-        python, configs["separate_path"],
-        "--input_path", input_path,
-        "--output_dirs", output_dirs,
-        "--export_format", export_format,
-        "--model_name", model_name,
-        "--karaoke_model", karaoke_model,
-        "--reverb_model", reverb_model,
-        "--denoise_model", denoise_model,
-        "--sample_rate", str(sample_rate),
-        "--shifts", str(shifts),
-        "--batch_size", str(batch_size),
-        "--overlap", str(overlap),
-        "--aggression", str(aggression),
-        "--hop_length", str(hop_length),
-        "--window_size", str(window_size),
-        "--segments_size", str(segments_size),
-        "--post_process_threshold", str(post_process_threshold),
-        "--enable_tta", str(enable_tta),
-        "--enable_denoise", str(enable_denoise),
-        "--high_end_process", str(high_end_process),
-        "--enable_post_process", str(enable_post_process),
-        "--separate_backing", str(separate_backing),
-        "--separate_reverb", str(separate_reverb),
-    ])
-    gr_info(translations["success"])
-    filename, _ = os.path.splitext(os.path.basename(input_path))
-    output_dirs = os.path.join(output_dirs, filename)
-    return [
-        os.path.join(
-            output_dirs,
-            f"Original_Vocals_No_Reverb.{export_format}" if separate_reverb else f"Original_Vocals.{export_format}"
-        ),
-        os.path.join(
-            output_dirs,
-            f"Instruments.{export_format}"
-        ),
-        os.path.join(
-            output_dirs,
-            f"Main_Vocals_No_Reverb.{export_format}" if separate_reverb else f"Main_Vocals.{export_format}"
-        ) if separate_backing else None,
-        os.path.join(
-            output_dirs,
-            f"Backing_Vocals.{export_format}"
-        ) if separate_backing else None
-    ] if os.path.isfile(input_path) else [None]*4

main/app/core/training.py DELETED Viewed

@@ -1,265 +0,0 @@
-import os
-import sys
-import time
-import shutil
-import codecs
-import threading
-import subprocess
-sys.path.append(os.getcwd())
-from main.tools import huggingface
-from main.app.core.ui import gr_info, gr_warning
-from main.app.variables import python, translations, configs
-def if_done(done, p):
-    while 1:
-        if p.poll() is None: time.sleep(0.5)
-        else: break
-    done[0] = True
-def log_read(done, name):
-    log_file = os.path.join(configs["logs_path"], "app.log")
-    f = open(log_file, "w", encoding="utf-8")
-    f.close()
-    while 1:
-        with open(log_file, "r", encoding="utf-8") as f:
-            yield "".join(line for line in f.readlines() if "DEBUG" not in line and name in line and line.strip() != "")
-        time.sleep(1)
-        if done[0]: break
-    with open(log_file, "r", encoding="utf-8") as f:
-        log = "".join(line for line in f.readlines() if "DEBUG" not in line and line.strip() != "")
-    yield log
-def create_dataset(
-    input_data,
-    output_dirs,
-    skip_seconds,
-    skip_start_audios,
-    skip_end_audios,
-    separate,
-    model_name,
-    reverb_model,
-    denoise_model,
-    sample_rate,
-    shifts,
-    batch_size,
-    overlap,
-    aggression,
-    hop_length,
-    window_size,
-    segments_size,
-    post_process_threshold,
-    enable_tta,
-    enable_denoise,
-    high_end_process,
-    enable_post_process,
-    separate_reverb,
-    clean_dataset,
-    clean_strength
-):
-    gr_info(translations["start"].format(start=translations["create"]))
-    p = subprocess.Popen(f'{python} {configs["create_dataset_path"]} --input_data "{input_data}" --output_dirs "{output_dirs}" --skip_seconds {skip_seconds} --skip_start_audios "{skip_start_audios}" --skip_end_audios "{skip_end_audios}" --separate {separate} --model_name "{model_name}" --reverb_model "{reverb_model}" --denoise_model "{denoise_model}" --sample_rate {sample_rate} --shifts {shifts} --batch_size {batch_size} --overlap {overlap} --aggression {aggression} --hop_length {hop_length} --window_size {window_size} --segments_size {segments_size} --post_process_threshold {post_process_threshold} --enable_tta {enable_tta} --enable_denoise {enable_denoise} --high_end_process {high_end_process} --enable_post_process {enable_post_process} --separate_reverb {separate_reverb} --clean_dataset {clean_dataset} --clean_strength {clean_strength}', shell=True)
-    done = [False]
-    threading.Thread(target=if_done, args=(done, p)).start()
-    for log in log_read(done, "create_dataset"):
-        yield log
-def create_reference(audio_path, reference_name, pitch_guidance, use_energy, version, embedder_model, embedders_mode, f0_method, f0_onnx, f0_up_key, filter_radius, f0_autotune, f0_autotune_strength, proposal_pitch, proposal_pitch_threshold, alpha=0.5):
-    gr_info(translations["start"].format(start=translations["create_reference"]))
-    p = subprocess.Popen(f'{python} {configs["create_reference_path"]} --audio_path "{audio_path}" --reference_name "{reference_name}" --pitch_guidance {pitch_guidance} --use_energy {use_energy} --version {version} --embedder_model {embedder_model} --embedders_mode {embedders_mode} --f0_method {f0_method} --f0_onnx {f0_onnx} --f0_up_key {f0_up_key} --filter_radius {filter_radius} --f0_autotune {f0_autotune} --f0_autotune_strength {f0_autotune_strength} --proposal_pitch {proposal_pitch} --proposal_pitch_threshold {proposal_pitch_threshold} --alpha {alpha}', shell=True)
-    done = [False]
-    threading.Thread(target=if_done, args=(done, p)).start()
-    for log in log_read(done, "create_reference"):
-        yield log
-def preprocess(model_name, sample_rate, cpu_core, cut_preprocess, process_effects, dataset, clean_dataset, clean_strength, chunk_len=3.0, overlap_len=0.3, normalization_mode="none"):
-    sr = int(float(sample_rate.rstrip("k")) * 1000)
-    if not model_name: return gr_warning(translations["provide_name"])
-    if not os.path.exists(dataset) or not any(f.lower().endswith(("wav", "mp3", "flac", "ogg", "opus", "m4a", "mp4", "aac", "alac", "wma", "aiff", "webm", "ac3")) for f in os.listdir(dataset) if os.path.isfile(os.path.join(dataset, f))): return gr_warning(translations["not_found_data"])
-    model_dir = os.path.join(configs["logs_path"], model_name)
-    if os.path.exists(model_dir): shutil.rmtree(model_dir, ignore_errors=True)
-    p = subprocess.Popen(f'{python} {configs["preprocess_path"]} --model_name "{model_name}" --dataset_path "{dataset}" --sample_rate {sr} --cpu_cores {cpu_core} --cut_preprocess {cut_preprocess} --process_effects {process_effects} --clean_dataset {clean_dataset} --clean_strength {clean_strength} --chunk_len {chunk_len} --overlap_len {overlap_len} --normalization_mode {normalization_mode}', shell=True)
-    done = [False]
-    threading.Thread(target=if_done, args=(done, p)).start()
-    os.makedirs(model_dir, exist_ok=True)
-    for log in log_read(done, "preprocess"):
-        yield log
-def extract(model_name, version, method, pitch_guidance, hop_length, cpu_cores, gpu, sample_rate, embedders, custom_embedders, onnx_f0_mode, embedders_mode, f0_autotune, f0_autotune_strength, hybrid_method, rms_extract, alpha=0.5):
-    f0method, embedder_model = (method if method != "hybrid" else hybrid_method), (embedders if embedders != "custom" else custom_embedders)
-    sr = int(float(sample_rate.rstrip("k")) * 1000)
-    if not model_name: return gr_warning(translations["provide_name"])
-    model_dir = os.path.join(configs["logs_path"], model_name)
-    try:
-        if not any(os.path.isfile(os.path.join(model_dir, "sliced_audios", f)) for f in os.listdir(os.path.join(model_dir, "sliced_audios"))) or not any(os.path.isfile(os.path.join(model_dir, "sliced_audios_16k", f)) for f in os.listdir(os.path.join(model_dir, "sliced_audios_16k"))): return gr_warning(translations["not_found_data_preprocess"])
-    except:
-        return gr_warning(translations["not_found_data_preprocess"])
-    p = subprocess.Popen(f'{python} {configs["extract_path"]} --model_name "{model_name}" --rvc_version {version} --f0_method {f0method} --pitch_guidance {pitch_guidance} --hop_length {hop_length} --cpu_cores {cpu_cores} --gpu {gpu} --sample_rate {sr} --embedder_model {embedder_model} --f0_onnx {onnx_f0_mode} --embedders_mode {embedders_mode} --f0_autotune {f0_autotune} --f0_autotune_strength {f0_autotune_strength} --rms_extract {rms_extract} --alpha {alpha}', shell=True)
-    done = [False]
-    threading.Thread(target=if_done, args=(done, p)).start()
-    os.makedirs(model_dir, exist_ok=True)
-    for log in log_read(done, "extract"):
-        yield log
-def create_index(model_name, rvc_version, index_algorithm):
-    if not model_name: return gr_warning(translations["provide_name"])
-    model_dir = os.path.join(configs["logs_path"], model_name)
-    try:
-        if not any(os.path.isfile(os.path.join(model_dir, f"{rvc_version}_extracted", f)) for f in os.listdir(os.path.join(model_dir, f"{rvc_version}_extracted"))): return gr_warning(translations["not_found_data_extract"])
-    except:
-        return gr_warning(translations["not_found_data_extract"])
-    p = subprocess.Popen(f'{python} {configs["create_index_path"]} --model_name "{model_name}" --rvc_version {rvc_version} --index_algorithm {index_algorithm}', shell=True)
-    done = [False]
-    threading.Thread(target=if_done, args=(done, p)).start()
-    os.makedirs(model_dir, exist_ok=True)
-    for log in log_read(done, "create_index"):
-        yield log
-def training(model_name, rvc_version, save_every_epoch, save_only_latest, save_every_weights, total_epoch, sample_rate, batch_size, gpu, pitch_guidance, not_pretrain, custom_pretrained, pretrain_g, pretrain_d, detector, threshold, clean_up, cache, model_author, vocoder, checkpointing, deterministic, benchmark, optimizer, energy_use, custom_reference=False, reference_name="", multiscale_mel_loss=False):
-    sr = int(float(sample_rate.rstrip("k")) * 1000)
-    if not model_name: return gr_warning(translations["provide_name"])
-    model_dir = os.path.join(configs["logs_path"], model_name)
-    if os.path.exists(os.path.join(model_dir, "train_pid.txt")): os.remove(os.path.join(model_dir, "train_pid.txt"))
-    try:
-        if not any(os.path.isfile(os.path.join(model_dir, f"{rvc_version}_extracted", f)) for f in os.listdir(os.path.join(model_dir, f"{rvc_version}_extracted"))): return gr_warning(translations["not_found_data_extract"])
-    except:
-        return gr_warning(translations["not_found_data_extract"])
-    if not not_pretrain:
-        if not custom_pretrained:
-            pretrain_dir = configs["pretrained_v2_path"] if rvc_version == 'v2' else configs["pretrained_v1_path"]
-            download_version = codecs.decode(f"uggcf://uhttvatsnpr.pb/NauC/Ivrganzrfr-EIP-Cebwrpg/erfbyir/znva/cergenvarq_", "rot13") + f"{rvc_version}/"
-            pretrained_selector = {
-                True: {
-                    32000: ("f0G32k.pth", "f0D32k.pth"),
-                    40000: ("f0G40k.pth", "f0D40k.pth"),
-                    48000: ("f0G48k.pth", "f0D48k.pth")
-                },
-                False: {
-                    32000: ("G32k.pth", "D32k.pth"),
-                    40000: ("G40k.pth", "D40k.pth"),
-                    48000: ("G48k.pth", "D48k.pth")
-                }
-            }
-            pg2, pd2 = "", ""
-            pg, pd = pretrained_selector[pitch_guidance][sr]
-            if energy_use: pg2, pd2 = pg2 + "ENERGY_", pd2 + "ENERGY_"
-            if vocoder != 'Default': pg2, pd2 = pg2 + vocoder + "_", pd2 + vocoder + "_"
-            pg2, pd2 = pg2 + pg, pd2 + pd
-            pretrained_G, pretrained_D = (
-                os.path.join(
-                    pretrain_dir,
-                    pg2
-                ),
-                os.path.join(
-                    pretrain_dir,
-                    pd2
-                )
-            )
-            try:
-                if not os.path.exists(pretrained_G):
-                    gr_info(translations["download_pretrained"].format(dg="G", rvc_version=rvc_version))
-                    huggingface.HF_download_file(
-                        "".join(
-                            [
-                                download_version,
-                                pg2
-                            ]
-                        ),
-                        os.path.join(
-                            pretrain_dir,
-                            pg2
-                        )
-                    )
-                if not os.path.exists(pretrained_D):
-                    gr_info(translations["download_pretrained"].format(dg="D", rvc_version=rvc_version))
-                    huggingface.HF_download_file(
-                        "".join(
-                            [
-                                download_version,
-                                pd2
-                            ]
-                        ),
-                        os.path.join(
-                            pretrain_dir,
-                            pd2
-                        )
-                    )
-            except:
-                gr_warning(translations["not_use_pretrain_error_download"])
-                pretrained_G = pretrained_D = None
-        else:
-            if not pretrain_g: return gr_warning(translations["provide_pretrained"].format(dg="G"))
-            if not pretrain_d: return gr_warning(translations["provide_pretrained"].format(dg="D"))
-            pg2, pd2 = pretrain_g, pretrain_d
-            pretrained_G, pretrained_D = (
-                (os.path.join(configs["pretrained_custom_path"], pg2) if not os.path.exists(pg2) else pg2),
-                (os.path.join(configs["pretrained_custom_path"], pd2) if not os.path.exists(pd2) else pd2)
-            )
-            if not os.path.exists(pretrained_G): return gr_warning(translations["not_found_pretrain"].format(dg="G"))
-            if not os.path.exists(pretrained_D): return gr_warning(translations["not_found_pretrain"].format(dg="D"))
-    else:
-        pretrained_G = pretrained_D = None
-        gr_warning(translations["not_use_pretrain"])
-    if custom_reference:
-        reference_path = os.path.join(configs["reference_path"], reference_name)
-        if not os.path.exists(reference_path):
-            gr_warning(translations["not_found_reference"])
-            custom_reference = False
-            reference_path = None
-    else: reference_path = None
-    gr_info(translations["start"].format(start=translations["training"]))
-    p = subprocess.Popen(f'{python} {configs["train_path"]} --model_name "{model_name}" --rvc_version {rvc_version} --save_every_epoch {save_every_epoch} --save_only_latest {save_only_latest} --save_every_weights {save_every_weights} --total_epoch {total_epoch} --batch_size {batch_size} --gpu {gpu} --pitch_guidance {pitch_guidance} --overtraining_detector {detector} --overtraining_threshold {threshold} --cleanup {clean_up} --cache_data_in_gpu {cache} --g_pretrained_path "{pretrained_G}" --d_pretrained_path "{pretrained_D}" --model_author "{model_author}" --vocoder "{vocoder}" --checkpointing {checkpointing} --deterministic {deterministic} --benchmark {benchmark} --optimizer {optimizer} --energy_use {energy_use} --use_custom_reference {custom_reference} --reference_path {reference_path} --multiscale_mel_loss {multiscale_mel_loss}', shell=True)
-    done = [False]
-    with open(os.path.join(model_dir, "train_pid.txt"), "w") as pid_file:
-        pid_file.write(str(p.pid))
-    threading.Thread(target=if_done, args=(done, p)).start()
-    for log in log_read(done, "train"):
-        lines = log.splitlines()
-        if len(lines) > 50: log = "\n".join(lines[-50:])
-        yield log

main/app/core/tts.py DELETED Viewed

@@ -1,100 +0,0 @@
-import os
-import sys
-import pysrt
-import codecs
-import librosa
-import asyncio
-import requests
-import tempfile
-sys.path.append(os.getcwd())
-from main.app.variables import translations
-from main.app.core.ui import gr_info, gr_warning, gr_error
-def synthesize_tts(prompt, voice, speed, output, pitch, google):
-    if not google:
-        from edge_tts import Communicate
-        asyncio.run(Communicate(text=prompt, voice=voice, rate=f"+{speed}%" if speed >= 0 else f"{speed}%", pitch=f"+{pitch}Hz" if pitch >= 0 else f"{pitch}Hz").save(output))
-    else:
-        response = requests.get(codecs.decode("uggcf://genafyngr.tbbtyr.pbz/genafyngr_ggf", "rot13"), params={"ie": "UTF-8", "q": prompt, "tl": voice, "ttsspeed": speed, "client": "tw-ob"}, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"})
-        if response.status_code == 200:
-            with open(output, "wb") as f:
-                f.write(response.content)
-            if pitch != 0 or speed != 0:
-                y, sr = librosa.load(output, sr=None)
-                if pitch != 0: y = librosa.effects.pitch_shift(y, sr=sr, n_steps=pitch)
-                if speed != 0: y = librosa.effects.time_stretch(y, rate=speed)
-                import soundfile as sf
-                sf.write(file=output, data=y, samplerate=sr, format=os.path.splitext(os.path.basename(output))[-1].lower().replace('.', ''))
-        else: gr_error(f"{response.status_code}, {response.text}")
-def srt_tts(srt_file, out_file, voice, rate = 0, sr = 24000, google = False):
-    import numpy as np
-    import soundfile as sf
-    def time_stretch(y, sr, target_duration):
-        rate = (len(y) / sr) / target_duration
-        if rate != 1.0: y = librosa.effects.time_stretch(y=y.astype(np.float32), rate=rate)
-        n_target = int(round(target_duration * sr))
-        return np.pad(y, (0, n_target - len(y))) if len(y) < n_target else y[:n_target]
-    def pysrttime_to_seconds(t):
-        return (t.hours * 60 + t.minutes) * 60 + t.seconds + t.milliseconds / 1000
-    subs = pysrt.open(srt_file)
-    if not subs: raise ValueError(translations["srt"])
-    final_audio = np.zeros(int(round(pysrttime_to_seconds(subs[-1].end) * sr)), dtype=np.float32)
-    with tempfile.TemporaryDirectory() as tempdir:
-        for idx, seg in enumerate(subs):
-            wav_path = os.path.join(tempdir, f"seg_{idx}.wav")
-            synthesize_tts(" ".join(seg.text.splitlines()), voice, 0, wav_path, rate, google)
-            audio, file_sr = sf.read(wav_path, dtype=np.float32)
-            if file_sr != sr: audio = np.interp(np.linspace(0, len(audio) - 1, int(len(audio) * sr / file_sr)), np.arange(len(audio)), audio)
-            adjusted = time_stretch(audio, sr, pysrttime_to_seconds(seg.duration))
-            start_sample = int(round(pysrttime_to_seconds(seg.start) * sr))
-            end_sample = start_sample + adjusted.shape[0]
-            if end_sample > final_audio.shape[0]:
-                adjusted = adjusted[: final_audio.shape[0] - start_sample]
-                end_sample = final_audio.shape[0]
-            final_audio[start_sample:end_sample] += adjusted
-    sf.write(out_file, final_audio, sr)
-def TTS(prompt, voice, speed, output, pitch, google, srt_input):
-    if not srt_input: srt_input = ""
-    if not prompt and not srt_input.endswith(".srt"):
-        gr_warning(translations["enter_the_text"])
-        return None
-    if not voice:
-        gr_warning(translations["choose_voice"])
-        return None
-    if not output:
-        gr_warning(translations["output_not_valid"])
-        return None
-    if os.path.isdir(output): output = os.path.join(output, f"tts.wav")
-    gr_info(translations["convert"].format(name=translations["text"]))
-    output_dir = os.path.dirname(output) or output
-    if not os.path.exists(output_dir): os.makedirs(output_dir, exist_ok=True)
-    if srt_input.endswith(".srt"): srt_tts(srt_input, output, voice, 0, 24000, google)
-    else: synthesize_tts(prompt, voice, speed, output, pitch, google)
-    gr_info(translations["success"])
-    return output

main/app/core/ui.py DELETED Viewed

@@ -1,362 +0,0 @@
-import os
-import re
-import sys
-import json
-import torch
-import shutil
-import gradio as gr
-import sounddevice as sd
-sys.path.append(os.getcwd())
-from main.library.backends import directml, opencl
-from main.inference.realtime.audio import list_audio_device
-from main.app.variables import config, configs, configs_json, logger, translations, edgetts, google_tts_voice, method_f0, method_f0_full, vr_models, mdx_models, demucs_models, embedders_model, spin_model, whisper_model
-def gr_info(message):
-    gr.Info(message, duration=2)
-    logger.info(message)
-def gr_warning(message):
-    gr.Warning(message, duration=2)
-    logger.warning(message)
-def gr_error(message):
-    gr.Error(message=message, duration=6)
-    logger.error(message)
-def get_gpu_info():
-    ngpu = torch.cuda.device_count()
-    gpu_infos = [
-        f"{i}: {torch.cuda.get_device_name(i)} ({int(torch.cuda.get_device_properties(i).total_memory / 1024 / 1024 / 1024 + 0.4)} GB)"
-        for i in range(ngpu)
-        if torch.cuda.is_available() or ngpu != 0
-    ]
-    if len(gpu_infos) == 0:
-        if directml.torch_available:
-            ngpu = directml.device_count()
-            gpu_infos = [f"{i}: {directml.device_name(i)}" for i in range(ngpu) if directml.is_available() or ngpu != 0]
-        elif opencl.torch_available:
-            ngpu = opencl.device_count()
-            gpu_infos = [f"{i}: {opencl.device_name(i)}" for i in range(ngpu) if opencl.is_available() or ngpu != 0]
-        else:
-            ngpu = 0
-            gpu_infos = []
-    return "\n".join(gpu_infos) if len(gpu_infos) > 0 and not config.cpu_mode else translations["no_support_gpu"]
-def gpu_number_str():
-    if config.cpu_mode: return "-"
-    ngpu = torch.cuda.device_count()
-    if ngpu == 0: ngpu = directml.device_count() if directml.torch_available else opencl.device_count()
-    return str("-".join(map(str, range(ngpu))) if torch.cuda.is_available() or directml.is_available() or opencl.is_available() else "-")
-def change_f0_choices():
-    f0_file = sorted([os.path.abspath(os.path.join(root, f)) for root, _, files in os.walk(configs["f0_path"]) for f in files if f.endswith(".txt")])
-    return {"value": f0_file[0] if len(f0_file) >= 1 else "", "choices": f0_file, "__type__": "update"}
-def change_audios_choices(input_audio):
-    audios = sorted([os.path.abspath(os.path.join(root, f)) for root, _, files in os.walk(configs["audios_path"]) for f in files if os.path.splitext(f)[1].lower() in (".wav", ".mp3", ".flac", ".ogg", ".opus", ".m4a", ".mp4", ".aac", ".alac", ".wma", ".aiff", ".webm", ".ac3")])
-    return {"value": input_audio if input_audio != "" else (audios[0] if len(audios) >= 1 else ""), "choices": audios, "__type__": "update"}
-def change_reference_choices():
-    reference = sorted([re.sub(r'_v\d+_(?:[A-Za-z0-9_]+?)_(True|False)_(True|False)$', '', name) for name in os.listdir(configs["reference_path"]) if os.path.exists(os.path.join(configs["reference_path"], name)) and os.path.isdir(os.path.join(configs["reference_path"], name))])
-    return {"value": reference[0] if len(reference) >= 1 else "", "choices": reference, "__type__": "update"}
-def change_models_choices():
-    model, index = sorted(list(model for model in os.listdir(configs["weights_path"]) if model.endswith((".pth", ".onnx")) and not model.startswith("G_") and not model.startswith("D_"))), sorted([os.path.join(root, name) for root, _, files in os.walk(configs["logs_path"], topdown=False) for name in files if name.endswith(".index") and "trained" not in name])
-    return [{"value": model[0] if len(model) >= 1 else "", "choices": model, "__type__": "update"}, {"value": index[0] if len(index) >= 1 else "", "choices": index, "__type__": "update"}]
-def change_pretrained_choices():
-    pretrainD = sorted([model for model in os.listdir(configs["pretrained_custom_path"]) if model.endswith(".pth") and "D" in model])
-    pretrainG = sorted([model for model in os.listdir(configs["pretrained_custom_path"]) if model.endswith(".pth") and "G" in model])
-    return [{"choices": pretrainD, "value": pretrainD[0] if len(pretrainD) >= 1 else "", "__type__": "update"}, {"choices": pretrainG, "value": pretrainG[0] if len(pretrainG) >= 1 else "", "__type__": "update"}]
-def change_choices_del():
-    return [{"choices": sorted(list(model for model in os.listdir(configs["weights_path"]) if model.endswith(".pth") and not model.startswith("G_") and not model.startswith("D_"))), "__type__": "update"}, {"choices": sorted([os.path.join(configs["logs_path"], f) for f in os.listdir(configs["logs_path"]) if f not in ["mute", "reference"] and os.path.isdir(os.path.join(configs["logs_path"], f))]), "__type__": "update"}]
-def change_preset_choices():
-    return {"value": "", "choices": sorted(list(f for f in os.listdir(configs["presets_path"]) if f.endswith(".conversion.json"))), "__type__": "update"}
-def change_effect_preset_choices():
-    return {"value": "", "choices": sorted(list(f for f in os.listdir(configs["presets_path"]) if f.endswith(".effect.json"))), "__type__": "update"}
-def change_tts_voice_choices(google):
-    return {"choices": google_tts_voice if google else edgetts, "value": google_tts_voice[0] if google else edgetts[0], "__type__": "update"}
-def change_backing_choices(backing, merge):
-    if backing or merge: return {"value": False, "interactive": False, "__type__": "update"}
-    elif not backing or not merge: return  {"interactive": True, "__type__": "update"}
-    else: gr_warning(translations["option_not_valid"])
-def change_download_choices(select):
-    selects = [False]*10
-    if select == translations["download_url"]: selects[0] = selects[1] = selects[2] = True
-    elif select == translations["download_from_csv"]:  selects[3] = selects[4] = True
-    elif select == translations["search_models"]: selects[5] = selects[6] = True
-    elif select == translations["upload"]: selects[9] = True
-    else: gr_warning(translations["option_not_valid"])
-    return [{"visible": selects[i], "__type__": "update"} for i in range(len(selects))]
-def change_download_pretrained_choices(select):
-    selects = [False]*7
-    if select == translations["download_url"]: selects[0] = selects[1] = selects[2] = True
-    elif select == translations["list_model"]: selects[3] = selects[4] = selects[5] = True
-    elif select == translations["upload"]: selects[6] = True
-    else: gr_warning(translations["option_not_valid"])
-    return [{"visible": selects[i], "__type__": "update"} for i in range(len(selects))]
-def get_index(model):
-    model = os.path.basename(model).split("_")[0]
-    return {"value": next((f for f in [os.path.join(root, name) for root, _, files in os.walk(configs["logs_path"], topdown=False) for name in files if name.endswith(".index") and "trained" not in name] if model.split(".")[0] in f), ""), "__type__": "update"} if model else None
-def index_strength_show(index):
-    return {"visible": index != "" and index != None and os.path.exists(index) and os.path.isfile(index), "value": 0.5, "__type__": "update"}
-def hoplength_show(method, hybrid_method=None):
-    visible = False
-    for m in ["mangio-crepe", "fcpe", "yin", "piptrack", "mangio-penn"]:
-        if m in method: visible = True
-        if hybrid_method is not None and m in hybrid_method: visible = True
-        if visible: break
-        else: visible = False
-    return {"visible": visible, "__type__": "update"}
-def visible(value):
-    return {"visible": value, "__type__": "update"}
-def valueFalse_interactive(value):
-    return {"value": False, "interactive": value, "__type__": "update"}
-def valueEmpty_visible1(value):
-    return {"value": "", "visible": value, "__type__": "update"}
-def pitch_guidance_lock(vocoders):
-    return {"value": True, "interactive": vocoders == "Default", "__type__": "update"}
-def vocoders_lock(pitch, vocoders):
-    return {"value": vocoders if pitch else "Default", "interactive": pitch, "__type__": "update"}
-def unlock_f0(value):
-    return {"choices": method_f0_full if value else method_f0, "value": "rmvpe", "__type__": "update"}
-def unlock_vocoder(value, vocoder):
-    return {"value": vocoder if value == "v2" else "Default", "interactive": value == "v2", "__type__": "update"}
-def unlock_ver(value, vocoder):
-    return {"value": "v2" if vocoder == "Default" else value, "interactive": vocoder == "Default", "__type__": "update"}
-def change_embedders_mode(value):
-    if value == "spin":
-        return {"value": spin_model[0], "choices": spin_model, "__type__": "update"}
-    elif value == "whisper":
-        return {"value": whisper_model[0], "choices": whisper_model, "__type__": "update"}
-    else:
-        return {"value": embedders_model[0], "choices": embedders_model, "__type__": "update"}
-def change_fp(fp):
-    fp16 = fp == "fp16"
-    if fp16 and config.device in ["cpu", "mps", "ocl:0"]:
-        gr_warning(translations["fp16_not_support"])
-        return "fp32"
-    else:
-        gr_info(translations["start_update_precision"])
-        configs = json.load(open(configs_json, "r"))
-        configs["fp16"] = config.is_half = fp16
-        with open(configs_json, "w") as f:
-            json.dump(configs, f, indent=4)
-        gr_info(translations["success"])
-        return "fp16" if fp16 else "fp32"
-def process_output(file_path):
-    if config.configs.get("delete_exists_file", True):
-        if os.path.exists(file_path) and os.path.isfile(file_path): os.remove(file_path)
-        return file_path
-    else:
-        if not os.path.exists(file_path): return file_path
-        file = os.path.splitext(os.path.basename(file_path))
-        index = 1
-        while 1:
-            file_path = os.path.join(os.path.dirname(file_path), f"{file[0]}_{index}{file[1]}")
-            if not os.path.exists(file_path): return file_path
-            index += 1
-def shutil_move(input_path, output_path):
-    output_path = os.path.join(output_path, os.path.basename(input_path)) if os.path.isdir(output_path) else output_path
-    return shutil.move(input_path, process_output(output_path)) if os.path.exists(output_path) else shutil.move(input_path, output_path)
-def separate_change(model_name, karaoke_model, reverb_model, enable_post_process, separate_backing, separate_reverb, enable_denoise):
-    model_type = "vr" if model_name in list(vr_models.keys()) else "mdx" if model_name in list(mdx_models.keys()) else "demucs" if model_name in list(demucs_models.keys()) else ""
-    karaoke_type = ("vr" if karaoke_model.startswith("VR") else "mdx") if separate_backing else None
-    reverb_type = ("vr" if not reverb_model.startswith("MDX") else "mdx") if separate_reverb else None
-    all_types = {model_type, karaoke_type, reverb_type}
-    is_vr = "vr" in all_types
-    is_mdx = "mdx" in all_types
-    is_demucs = "demucs" in all_types
-    return [
-        visible(separate_backing),
-        visible(separate_reverb),
-        visible(is_mdx or is_demucs),
-        visible(is_mdx or is_demucs),
-        visible(is_mdx),
-        visible(is_mdx or is_vr),
-        visible(is_demucs),
-        visible(is_vr),
-        visible(is_vr),
-        visible(is_vr and enable_post_process),
-        visible(is_vr and enable_denoise),
-        valueFalse_interactive(is_vr),
-        valueFalse_interactive(is_vr),
-        valueFalse_interactive(is_vr)
-    ]
-def create_dataset_change(model_name, reverb_model, enable_post_process, separate_reverb, enable_denoise):
-    model_type = "vr" if model_name in list(vr_models.keys()) else "mdx" if model_name in list(mdx_models.keys()) else "demucs" if model_name in list(demucs_models.keys()) else ""
-    reverb_type = ("vr" if not reverb_model.startswith("MDX") else "mdx") if separate_reverb else None
-    all_types = {model_type, reverb_type}
-    is_vr = "vr" in all_types
-    is_mdx = "mdx" in all_types
-    is_demucs = "demucs" in all_types
-    return [
-        visible(separate_reverb),
-        visible(is_mdx or is_demucs),
-        visible(is_mdx or is_demucs),
-        visible(is_mdx),
-        visible(is_mdx or is_vr),
-        visible(is_demucs),
-        visible(is_vr),
-        visible(is_vr),
-        visible(is_vr and enable_post_process),
-        visible(is_vr and enable_denoise),
-        valueFalse_interactive(is_vr),
-        valueFalse_interactive(is_vr),
-        valueFalse_interactive(is_vr)
-    ]
-def audio_device():
-    try:
-        input_devices, output_devices = list_audio_device()
-        def priority(name):
-            n = name.lower()
-            if "virtual" in n:
-                return 0
-            if "vb" in n:
-                return 1
-            return 2
-        output_sorted = sorted(output_devices, key=lambda d: priority(d.name))
-        input_sorted = sorted(
-            input_devices, key=lambda d: priority(d.name), reverse=True
-        )
-        input_device_list = {
-            f"{input_sorted.index(d)+1}: {d.name} ({d.host_api})": [d.index, d.max_input_channels] for d in input_sorted
-        }
-        output_device_list = {
-            f"{output_sorted.index(d)+1}: {d.name} ({d.host_api})": [d.index, d.max_output_channels] for d in output_sorted
-        }
-        return input_device_list, output_device_list
-    except Exception:
-        return [], []
-def update_audio_device(input_device, output_device, monitor_device, monitor):
-    input_channels_map, output_channels_map = audio_device()
-    input_is_asio = "ASIO" in input_device if input_device else False
-    output_is_asio = "ASIO" in output_device if output_device else False
-    monitor_is_asio = "ASIO" in monitor_device if monitor_device else False
-    try:
-        input_max_ch = input_channels_map.get(input_device, [])[1]
-        output_max_ch = output_channels_map.get(output_device, [])[1]
-        monitor_max_ch = output_channels_map.get(monitor_device, [])[1] if monitor else 128
-    except:
-        input_max_ch = output_max_ch = monitor_max_ch = -1
-    return [
-        visible(monitor),
-        visible(monitor),
-        visible(monitor_is_asio),
-        visible(input_is_asio or output_is_asio or monitor_is_asio),
-        gr.update(visible=input_is_asio, maximum=input_max_ch),
-        gr.update(visible=output_is_asio, maximum=output_max_ch),
-        gr.update(visible=monitor_is_asio, maximum=monitor_max_ch)
-    ]
-def change_audio_device_choices():
-    sd._terminate()
-    sd._initialize()
-    input_channels_map, output_channels_map = audio_device()
-    input_channels_map, output_channels_map = list(input_channels_map.keys()), list(output_channels_map.keys())
-    return [
-        {"value": input_channels_map[0] if len(input_channels_map) >= 1 else "", "choices": input_channels_map, "__type__": "update"},
-        {"value": output_channels_map[0] if len(output_channels_map) >= 1 else "", "choices": output_channels_map, "__type__": "update"},
-        {"value": output_channels_map[0] if len(output_channels_map) >= 1 else "", "choices": output_channels_map, "__type__": "update"}
-    ]
-def replace_punctuation(filename):
-    return filename.replace(" ", "_").replace("-", "").replace("(", "").replace(")", "").replace("[", "").replace("]", "").replace(",", "").replace('"', "").replace("'", "").replace("|", "_").replace("{", "").replace("}", "").replace("-_-", "_").replace("_-_", "_").replace("-", "_").replace("---", "_").replace("___", "_").strip()
-def replace_url(url):
-    return url.replace("/blob/", "/resolve/").replace("?download=true", "").strip()
-def replace_modelname(modelname):
-    return replace_punctuation(modelname.replace(".onnx", "").replace(".pth", "").replace(".index", "").replace(".zip", ""))
-def replace_export_format(audio_path, export_format = "wav"):
-    export_format = f".{export_format}"
-    return audio_path if audio_path.endswith(export_format) else audio_path.replace(f".{os.path.basename(audio_path).split('.')[-1]}", export_format)
-def update_dropdowns_from_json(data):
-    if not data:
-        return [
-            gr.update(choices=[], value=None),
-            gr.update(choices=[], value=None),
-            gr.update(choices=[], value=None)
-        ]
-    inputs = list(data.get("inputs", {}).keys())
-    outputs = list(data.get("outputs", {}).keys())
-    return [
-        gr.update(choices=inputs, value=inputs[0] if len(inputs) > 0 else None),
-        gr.update(choices=outputs, value=outputs[0] if len(outputs) > 0 else None),
-        gr.update(choices=outputs, value=outputs[0] if len(outputs) > 0 else None),
-    ]
-def update_button_from_json(data):
-    if not data:
-        return [gr.update(interactive=True), gr.update(interactive=False)]
-    return [
-        gr.update(interactive=data.get("start_button", True)),
-        gr.update(interactive=data.get("stop_button", False))
-    ]

main/app/core/utils.py DELETED Viewed

@@ -1,61 +0,0 @@
-import os
-import sys
-import json
-import codecs
-import requests
-sys.path.append(os.getcwd())
-from main.app.core.ui import gr_info, gr_warning
-from main.app.variables import translations, configs
-def stop_pid(pid_file, model_name=None, train=False):
-    try:
-        pid_file_path = os.path.join("assets", f"{pid_file}.txt") if model_name is None else os.path.join(configs["logs_path"], model_name, f"{pid_file}.txt")
-        if not os.path.exists(pid_file_path): return gr_warning(translations["not_found_pid"])
-        else:
-            with open(pid_file_path, "r") as pid_file:
-                pids = [int(pid) for pid in pid_file.readlines()]
-            for pid in pids:
-                os.kill(pid, 9)
-            if os.path.exists(pid_file_path): os.remove(pid_file_path)
-        pid_file_path = os.path.join(configs["logs_path"], model_name, "config.json")
-        if train and os.path.exists(pid_file_path):
-            with open(pid_file_path, "r") as pid_file:
-                pid_data = json.load(pid_file)
-                pids = pid_data.get("process_pids", [])
-            with open(pid_file_path, "w") as pid_file:
-                pid_data.pop("process_pids", None)
-                json.dump(pid_data, pid_file, indent=4)
-            for pid in pids:
-                os.kill(pid, 9)
-            gr_info(translations["end_pid"])
-    except:
-        pass
-def google_translate(text, source='auto', target='vi'):
-    if text == "": return gr_warning(translations["prompt_warning"])
-    try:
-        import textwrap
-        def translate_chunk(chunk):
-            response = requests.get(codecs.decode("uggcf://genafyngr.tbbtyrncvf.pbz/genafyngr_n/fvatyr", "rot13"), params={'client': 'gtx', 'sl': source, 'tl': target, 'dt': 't', 'q': chunk})
-            return ''.join([i[0] for i in response.json()[0]]) if response.status_code == 200 else chunk
-        translated_text = ''
-        for chunk in textwrap.wrap(text, 5000, break_long_words=False, break_on_hyphens=False):
-            translated_text += translate_chunk(chunk)
-        return translated_text
-    except:
-        return text

main/app/parser.py DELETED Viewed

@@ -1,369 +0,0 @@
-import os
-import sys
-sys.path.append(os.getcwd())
-try:
-    argv = sys.argv[1]
-except IndexError:
-    argv = None
-argv_is_allows = ["--audio_effects", "--convert", "--create_dataset", "--create_index", "--extract", "--preprocess", "--separator_music", "--train", "--help_audio_effects", "--help_convert", "--help_create_dataset", "--help_create_index", "--help_extract", "--help_preprocess", "--help_separate_music",  "--help_train", "--help", "--create_reference", "help_create_reference"]
-if argv not in argv_is_allows:
-    print("Cú pháp không hợp lệ! Sử dụng --help để biết thêm")
-    quit()
-if argv_is_allows[0] in argv: from main.inference.audio_effects import main
-elif argv_is_allows[1] in argv: from main.inference.conversion.convert import main
-elif argv_is_allows[2] in argv: from main.inference.create_dataset import main
-elif argv_is_allows[3] in argv: from main.inference.create_index import main
-elif argv_is_allows[4] in argv: from main.inference.extracting.extract import main
-elif argv_is_allows[5] in argv: from main.inference.preprocess.preprocess import main
-elif argv_is_allows[6] in argv: from main.inference.separate_music import main
-elif argv_is_allows[7] in argv: from main.inference.training.train import main
-elif argv_is_allows[17] in argv: from main.inference.create_reference import main
-elif argv_is_allows[8] in argv:
-    print("""Các tham số của `--audio_effects`:
-        1. Đường dẫn tệp:
-            - `--input_path` (bắt buộc): Đường dẫn đến tệp âm thanh đầu vào.
-            - `--output_path` (mặc định: `./audios/apply_effects.wav`): Đường dẫn lưu tệp đầu ra.
-            - `--export_format` (mặc định: `wav`): Định dạng xuất tệp (`wav`, `mp3`, ...).
-        2. Lấy mẫu lại:
-            - `--resample` (mặc định: `False`): Có lấy mẫu lại hay không.
-            - `--resample_sr` (mặc định: `0`): Tần số lấy mẫu mới (Hz).
-        3. Hiệu ứng chorus:
-            - `--chorus`: Bật/tắt chorus.
-            - `--chorus_depth`, `--chorus_rate`, `--chorus_mix`, `--chorus_delay`, `--chorus_feedback`: Các thông số điều chỉnh chorus.
-        4. Hiệu ứng distortion:
-            - `--distortion`: Bật/tắt distortion.
-            - `--drive_db`: Mức độ méo âm thanh.
-        5. Hiệu ứng reverb:
-            - `--reverb`: Bật/tắt hồi âm.
-            - `--reverb_room_size`, `--reverb_damping`, `--reverb_wet_level`, `--reverb_dry_level`, `--reverb_width`, `--reverb_freeze_mode`: Điều chỉnh hồi âm.
-        6. Hiệu ứng pitch shift:
-            - `--pitchshift`: Bật/tắt thay đổi cao độ.
-            - `--pitch_shift`: Giá trị dịch cao độ.
-        7. Hiệu ứng delay:
-            - `--delay`: Bật/tắt delay.
-            - `--delay_seconds`, `--delay_feedback`, `--delay_mix`: Điều chỉnh thời gian trễ, phản hồi và hòa trộn.
-        8. Compressor:
-            - `--compressor`: Bật/tắt compressor.
-            - `--compressor_threshold`, `--compressor_ratio`, `--compressor_attack_ms`, `--compressor_release_ms`: Các thông số nén.
-        9. Limiter:
-            - `--limiter`: Bật/tắt giới hạn mức âm thanh.
-            - `--limiter_threshold`, `--limiter_release`: Ngưỡng giới hạn và thời gian nhả.
-        10. Gain (Khuếch đại):
-            - `--gain`: Bật/tắt gain.
-            - `--gain_db`: Mức gain (dB).
-        11. Bitcrush:
-            - `--bitcrush`: Bật/tắt hiệu ứng giảm độ phân giải.
-            - `--bitcrush_bit_depth`: Số bit của bitcrush.
-        12. Clipping:
-            - `--clipping`: Bật/tắt cắt âm thanh.
-            - `--clipping_threshold`: Ngưỡng clipping.
-        13. Phaser:
-            - `--phaser`: Bật/tắt hiệu ứng phaser.
-            - `--phaser_rate_hz`, `--phaser_depth`, `--phaser_centre_frequency_hz`, `--phaser_feedback`, `--phaser_mix`: Điều chỉnh hiệu ứng phaser.
-        14. Boost bass & treble:
-            - `--treble_bass_boost`: Bật/tắt tăng cường âm bass và treble.
-            - `--bass_boost_db`, `--bass_boost_frequency`, `--treble_boost_db`, `--treble_boost_frequency`: Các thông số tăng bass và treble.
-        15. Fade in & fade out:
-            - `--fade_in_out`: Bật/tắt hiệu ứng fade.
-            - `--fade_in_duration`, `--fade_out_duration`: Thời gian fade vào/ra.
-        16. Kết hợp âm thanh:
-            - `--audio_combination`: Bật/tắt ghép nhiều tệp âm thanh.
-            - `--audio_combination_input`: Đường dẫn tệp âm thanh bổ sung.
-            - `--main_volume`: Âm lượng của âm thanh chính.
-            - `--combination_volume`:: Âm lượng của âm thanh cần kết hợp.
-    """)
-    quit()
-elif argv_is_allows[9] in argv:
-    print("""Các tham số của --convert:
-        1. Cấu hình xử lí giọng nói:
-            - `--pitch` (mặc định: `0`): Điều chỉnh cao độ.
-            - `--filter_radius` (mặc định: `3`): Độ mượt của đường F0.
-            - `--index_rate` (mặc định: `0.5`): Tỷ lệ sử dụng chỉ mục giọng nói.
-            - `--rms_mix_rate` (mặc định: `1`): Hệ số điều chỉnh biên độ âm lượng.
-            - `--protect` (mặc định: `0.33`): Bảo vệ phụ âm.
-            - `--hop_length` (mặc định: `64`): Bước nhảy khi xử lí âm thanh.
-        2. Cấu hình F0:
-            - `--f0_method` (mặc định: `rmvpe`): Phương pháp dự đoán F0 (`pm`, `dio`, `mangio-crepe-tiny`, `mangio-crepe-small`, `mangio-crepe-medium`, `mangio-crepe-large`, `mangio-crepe-full`, `crepe-tiny`, `crepe-small`, `crepe-medium`, `crepe-large`, `crepe-full`, `fcpe`, `fcpe-legacy`, `rmvpe`, `rmvpe-legacy`, `harvest`, `yin`, `pyin`, `swipe`).
-            - `--f0_autotune` (mặc định: `False`): Có tự động điều chỉnh F0 hay không.
-            - `--f0_autotune_strength` (mặc định: `1`): Cường độ hiệu chỉnh tự động F0.
-            - `--f0_file` (mặc định: ``): Đường dẫn tệp F0 có sẵn.
-            - `--f0_onnx` (mặc định: `False`): Có sử dụng phiên bản ONNX của F0 hay không.
-            - `--proposal_pitch` (mặc định: `False`): Đề xuất cao độ thay vì điều chỉnh thủ công.
-            - `--proposal_pitch_threshold` (mặc định: `0.0`): Ngưỡng tần số ước tính cao độ.
-            - `--alpha` (mặc định: `0.5`): Ngưỡng trộn cao độ khi ước tính cao độ hybrid.
-        3. Mô hình nhúng:
-            - `--embedder_model` (mặc định: `hubert_base`): Mô hình nhúng sử dụng.
-            - `--embedders_mode` (mặc định: `fairseq`): Chế độ nhúng (`fairseq`, `transformers`, `onnx`, `whisper`).
-        4. Đường dẫn tệp:
-            - `--input_path` (bắt buộc): Đường dẫn tệp âm thanh đầu vào.
-            - `--output_path` (mặc định: `./audios/output.wav`): Đường dẫn lưu tệp đầu ra.
-            - `--export_format` (mặc định: `wav`): Định dạng xuất tệp.
-            - `--pth_path` (bắt buộc): Đường dẫn đến tệp mô hình `.pth`.
-            - `--index_path` (mặc định: `None`): Đường dẫn tệp chỉ mục (nếu có).
-        5. Làm sạch âm thanh:
-            - `--clean_audio` (mặc định: `False`): Có áp dụng làm sạch âm thanh không.
-            - `--clean_strength` (mặc định: `0.7`): Mức độ làm sạch.
-        6. Resampling & chia nhỏ âm thanh:
-            - `--resample_sr` (mặc định: `0`): Tần số lấy mẫu mới (0 nghĩa là giữ nguyên).
-            - `--split_audio` (mặc định: `False`): Có chia nhỏ audio trước khi xử lí không.
-        7. Kiểm tra & tối ưu hóa:
-            - `--checkpointing` (mặc định: `False`): Bật/tắt checkpointing để tiết kiệm RAM.
-        8. Dịch formant:
-            - `--formant_shifting` (mặc định: `False`): Có bật hiệu ứng dịch formant không.
-            - `--formant_qfrency` (mặc định: `0.8`): Hệ số dịch formant theo tần số.
-            - `--formant_timbre` (mặc định: `0.8`): Hệ số thay đổi màu sắc giọng.
-    """)
-    quit()
-elif argv_is_allows[10] in argv:
-    print("""Các tham số của --create_dataset:
-        1. Đường dẫn & cấu hình dataset:
-            - `--input_data` (bắt buộc): Đường dẫn liên kết đến âm thanh (Liên kết Youtube, có thể dùng dấu `,` để dùng nhiều liên kết).
-            - `--output_dirs` (mặc định: `./dataset`): Thư mục xuất dữ liệu đầu ra.
-            - `--sample_rate` (mặc định: `48000`): Tần số lấy mẫu cho âm thanh.
-        2. Làm sạch dữ liệu:
-            - `--clean_dataset` (mặc định: `False`): Có áp dụng làm sạch dữ liệu hay không.
-            - `--clean_strength` (mặc định: `0.7`): Mức độ làm sạch dữ liệu.
-        3. Tách giọng & hiệu ứng:
-            - `--separate` (mặc định: `True`): có tách nhạc hay không.
-            - `--separator_reverb` (mặc định: `False`): Có tách vang giọng không.
-            - `--model_name` (mặc định: `MDXNET_Main`): Mô hình tách nhạc ('Main_340', 'Main_390', 'Main_406', 'Main_427', 'Main_438', 'Inst_full_292', 'Inst_HQ_1', 'Inst_HQ_2', 'Inst_HQ_3', 'Inst_HQ_4', 'Inst_HQ_5', 'Kim_Vocal_1', 'Kim_Vocal_2', 'Kim_Inst', 'Inst_187_beta', 'Inst_82_beta', 'Inst_90_beta', 'Voc_FT', 'Crowd_HQ', 'MDXNET_9482', 'Inst_1', 'Inst_2', 'Inst_3', 'MDXNET_1_9703', 'MDXNET_2_9682', 'MDXNET_3_9662', 'Inst_Main', 'MDXNET_Main', 'HT-Tuned', 'HT-Normal', 'HD_MMI', 'HT_6S', 'HP-1', 'HP-2', 'HP-Vocal-1', 'HP-Vocal-2', 'HP2-1', 'HP2-2', 'HP2-3', 'SP-2B-1', 'SP-2B-2', 'SP-3B-1', 'SP-4B-1', 'SP-4B-2', 'SP-MID-1', 'SP-MID-2').
-            - `--reverb_model` (mặc định: `MDX-Reverb`): Mô hình tách nhạc ("MDX-Reverb", 'VR-Reverb', 'Echo-Aggressive', 'Echo-Normal').
-            - `--denoise_model` (mặc định: `Normal`): Mô hình tách nhạc ('Lite', 'Normal').
-        4. Cấu hình xử lí âm thanh:
-            - `--shifts` (mặc định: `2`): Số lượng dự đoán.
-            - `--batch_size` (mặc định: `1`): Kích thước lô.
-            - `--overlap` (mặc định: `0.25`): Mức độ chồng lấn giữa các đoạn.
-            - `--aggression` (mặc định: `5`): Cường độ chiết xuất thân chính.
-            - `--hop_length` (mặc định: `1024`): Bước nhảy MDX khi xử lí.
-            - `--window_size` (mặc định: `512`): Kích thước cửa sổ.
-            - `--segments_size` (mặc định: `256`): Kích thước phân đoạn âm thanh.
-            - `--post_process_threshold` (mặc định: `0.2`): Mức độ xử lí hậu kỳ sau khi tách nhạc.
-        5. Cấu hình xử lí âm thanh khác:
-            - `--enable_tta` (mặc định: `False`): Tăng cường suy luận.
-            - `--enable_denoise` (mặc định: `False`): Khữ tách nhạc.
-            - `--high_end_process` (mặc định: `False`): Xử lí dải cao.
-            - `--enable_post_process` (mặc định: `False`): Hậu xử lí.
-        6. Bỏ qua phần âm thanh:
-            - `--skip_seconds` (mặc định: `False`): Có bỏ qua giây âm thanh nào không.
-            - `--skip_start_audios` (mặc định: `0`): Thời gian (giây) cần bỏ qua ở đầu audio.
-            - `--skip_end_audios` (mặc định: `0`): Thời gian (giây) cần bỏ qua ở cuối audio.
-    """)
-    quit()
-elif argv_is_allows[11] in argv:
-    print("""Các tham số của --create_index:
-        1. Thông tin mô hình:
-            - `--model_name` (bắt buộc): Tên mô hình.
-            - `--rvc_version` (mặc định: `v2`): Phiên bản (`v1`, `v2`).
-            - `--index_algorithm` (mặc định: `Auto`): Thuật toán index sử dụng (`Auto`, `Faiss`, `KMeans`).
-    """)
-    quit()
-elif argv_is_allows[12] in argv:
-    print("""Các tham số của --extract:
-        1. Thông tin mô hình:
-            - `--model_name` (bắt buộc): Tên mô hình.
-            - `--rvc_version` (mặc định: `v2`): Phiên bản RVC (`v1`, `v2`).
-        2. Cấu hình F0:
-            - `--f0_method` (mặc định: `rmvpe`): Phương pháp dự đoán F0 (`pm`, `dio`, `mangio-crepe-tiny`, `mangio-crepe-small`, `mangio-crepe-medium`, `mangio-crepe-large`, `mangio-crepe-full`, `crepe-tiny`, `crepe-small`, `crepe-medium`, `crepe-large`, `crepe-full`, `fcpe`, `fcpe-legacy`, `rmvpe`, `rmvpe-legacy`, `harvest`, `yin`, `pyin`, `swipe`).
-            - `--f0_onnx` (mặc định: `False`): Có sử dụng phiên bản ONNX của F0 hay không.
-            - `--pitch_guidance` (mặc định: `True`): Có sử dụng hướng dẫn cao độ hay không.
-            - `--f0_autotune` (mặc định: `False`): Có tự động điều chỉnh F0 hay không.
-            - `--f0_autotune_strength` (mặc định: `1`): Cường độ hiệu chỉnh tự động F0.
-            - `--alpha` (mặc định: `0.5`): Ngưỡng trộn cao độ khi ước tính cao độ hybrid.
-        3. Cấu hình xử lí:
-            - `--hop_length` (mặc định: `128`): Độ dài bước nhảy trong quá trình xử lí.
-            - `--cpu_cores` (mặc định: `2`): Số lượng luồng CPU sử dụng.
-            - `--gpu` (mặc định: `-`): Chỉ định GPU sử dụng (ví dụ: `0` cho GPU đầu tiên, `-` để tắt GPU).
-            - `--sample_rate` (bắt buộc): Tần số lấy mẫu của âm thanh đầu vào.
-        4. Cấu hình nhúng:
-            - `--embedder_model` (mặc định: `hubert_base`): Tên mô hình nhúng.
-            - `--embedders_mode` (mặc định: `fairseq`): Chế độ nhúng (`fairseq`, `transformers`, `onnx`, `whisper`).
-        4. RMS:
-            - `--rms_extract` (mặc định: False): Trích xuất thêm năng lượng rms.
-    """)
-    quit()
-elif argv_is_allows[13] in argv:
-    print("""Các tham số của --preprocess:
-        1. Thông tin mô hình:
-            - `--model_name` (bắt buộc): Tên mô hình.
-        2. Cấu hình dữ liệu:
-            - `--dataset_path` (mặc định: `./dataset`): Đường dẫn thư mục chứa tệp dữ liệu.
-            - `--sample_rate` (bắt buộc): Tần số lấy mẫu của dữ liệu âm thanh.
-        3. Cấu hình xử lí:
-            - `--cpu_cores` (mặc định: `2`): Số lượng luồng CPU sử dụng.
-            - `--cut_preprocess` (mặc định: `Automatic`): Cách cắt dữ liệu tiền xử l�� (`Automatic`, `Simple`, `Skip`).
-            - `--process_effects` (mặc định: `False`): Có áp dụng tiền xử lí hay không.
-            - `--clean_dataset` (mặc định: `False`): Có làm sạch tệp dữ liệu hay không.
-            - `--clean_strength` (mặc định: `0.7`): Độ mạnh của quá trình làm sạch dữ liệu.
-        4. Cấu hình khác:
-            - `--chunk_len` (mặc định: `3.0`): Độ dài của đoạn âm thanh cho phương pháp 'Simple'.
-            - `--overlap_len` (mặc định: `0.3`): Độ dài của phần chồng chéo giữa các lát cắt đối với phương pháp 'Simple'.
-            - `--normalization_mode` (mặc định: `none`): Có xử lí chuẩn hóa âm thanh không (`none`, `pre`, `post`)
-    """)
-    quit()
-elif argv_is_allows[14] in argv:
-    print("""Các tham số của --separate_music:
-        1. Cấu hình đầu vào, đầu ra:
-            - `--input_path` (bắt buộc): Đường dẫn tệp âm thanh đầu vào.
-            - `--output_dirs` (mặc định: `./audios`): Thư mục lưu tệp đầu ra.
-            - `--export_format` (mặc định: `wav`): Định dạng xuất tệp (`wav`, `mp3`,...).
-            - `--sample_rate` (mặc định: `44100`): Tần số lấy mẫu của âm thanh đầu ra.
-        2. Cấu hình mô hình:
-            - `--model_name` (mặc định: `MDXNET_Main`): Mô hình tách nhạc ('Main_340', 'Main_390', 'Main_406', 'Main_427', 'Main_438', 'Inst_full_292', 'Inst_HQ_1', 'Inst_HQ_2', 'Inst_HQ_3', 'Inst_HQ_4', 'Inst_HQ_5', 'Kim_Vocal_1', 'Kim_Vocal_2', 'Kim_Inst', 'Inst_187_beta', 'Inst_82_beta', 'Inst_90_beta', 'Voc_FT', 'Crowd_HQ', 'MDXNET_9482', 'Inst_1', 'Inst_2', 'Inst_3', 'MDXNET_1_9703', 'MDXNET_2_9682', 'MDXNET_3_9662', 'Inst_Main', 'MDXNET_Main', 'HT-Tuned', 'HT-Normal', 'HD_MMI', 'HT_6S', 'HP-1', 'HP-2', 'HP-Vocal-1', 'HP-Vocal-2', 'HP2-1', 'HP2-2', 'HP2-3', 'SP-2B-1', 'SP-2B-2', 'SP-3B-1', 'SP-4B-1', 'SP-4B-2', 'SP-MID-1', 'SP-MID-2').
-            - `--karaoke_model` (mặc định: `MDX-Version-1`): Mô hình tách nhạc ('MDX-Version-1', 'MDX-Version-2', 'VR-Version-1', 'VR-Version-2').
-            - `--reverb_model` (mặc định: `MDX-Reverb`): Mô hình tách nhạc ("MDX-Reverb", 'VR-Reverb', 'Echo-Aggressive', 'Echo-Normal').
-            - `--denoise_model` (mặc định: `Normal`): Mô hình tách nhạc ('Lite', 'Normal').
-        3. Cấu hình xử lí âm thanh:
-            - `--shifts` (mặc định: `2`): Số lượng dự đoán.
-            - `--batch_size` (mặc định: `1`): Kích thước lô.
-            - `--overlap` (mặc định: `0.25`): Mức độ chồng lấn giữa các đoạn.
-            - `--aggression` (mặc định: `5`): Cường độ chiết xuất thân chính.
-            - `--hop_length` (mặc định: `1024`): Bước nhảy MDX khi xử lí.
-            - `--window_size` (mặc định: `512`): Kích thước cửa sổ.
-            - `--segments_size` (mặc định: `256`): Kích thước phân đoạn âm thanh.
-            - `--post_process_threshold` (mặc định: `0.2`): Mức độ xử lí hậu kỳ sau khi tách nhạc.
-        4. Cấu hình xử lí âm thanh khác:
-            - `--enable_tta` (mặc định: `False`): Tăng cường suy luận.
-            - `--enable_denoise` (mặc định: `False`): Khữ tách nhạc.
-            - `--high_end_process` (mặc định: `False`): Xử lí dải cao.
-            - `--enable_post_process` (mặc định: `False`): Hậu xử lí.
-            - `--separate_backing` (mặc định: `False`): Tách bè giọng.
-            - `--separate_reverb` (mặc định: `False`): Tách vang giọng.
-    """)
-    quit()
-elif argv_is_allows[15] in argv:
-    print("""Các tham số của --train:
-        1. Cấu hình mô hình:
-            - `--model_name` (bắt buộc): Tên mô hình.
-            - `--rvc_version` (mặc định: `v2`): Phiên bản RVC (`v1`, `v2`).
-            - `--model_author` (tùy chọn): Tác giả của mô hình.
-        2. Cấu hình lưu:
-            - `--save_every_epoch` (bắt buộc): Số kỷ nguyên giữa mỗi lần lưu.
-            - `--save_only_latest` (mặc định: `True`): Chỉ lưu điểm mới nhất.
-            - `--save_every_weights` (mặc định: `True`): Lưu tất cả trọng số của mô hình.
-        3. Cấu hình huấn luyện:
-            - `--total_epoch` (mặc định: `300`): Tổng số kỷ nguyên huấn luyện.
-            - `--batch_size` (mặc định: `8`): Kích thước lô trong quá trình huấn luyện.
-        4. Cấu hình thiết bị:
-            - `--gpu` (mặc định: `0`): Chỉ định GPU để sử dụng (số hoặc `-` nếu không dùng GPU).
-            - `--cache_data_in_gpu` (mặc định: `False`): Lưu dữ liệu vào GPU để tăng tốc.
-        5. Cấu hình huấn luyện nâng cao:
-            - `--pitch_guidance` (mặc định: `True`): Sử dụng hướng dẫn cao độ.
-            - `--g_pretrained_path` (mặc định: ``): Đường dẫn đến trọng số G đã huấn luyện trước.
-            - `--d_pretrained_path` (mặc định: ``): Đường dẫn đến trọng số D đã huấn luyện trước.
-            - `--vocoder` (mặc định: `Default`): Bộ mã hóa được sử dụng (`Default`, `MRF-HiFi-GAN`, `RefineGAN`).
-            - `--energy_use` (mặc định: `False`): Sử dụng năng lượng rms.
-        6. Phát hiện huấn luyện quá mức:
-            - `--overtraining_detector` (mặc định: `False`): Bật/tắt chế độ phát hiện huấn luyện quá mức.
-            - `--overtraining_threshold` (mặc định: `50`): Ngưỡng để xác định huấn luyện quá mức.
-        7. Xử lí dữ liệu:
-            - `--cleanup` (mặc định: `False`): Dọn dẹp tệp huấn luyện cũ để tiến hành huấn luyện lại từ đầu.
-        8. Tối ưu:
-            - `--checkpointing` (mặc định: `False`): Bật/tắt checkpointing để tiết kiệm RAM.
-            - `--deterministic` (mặc định: `False`): Khi bật sẽ sử dụng các thuật toán có tính xác định cao, đảm bảo rằng mỗi lần chạy cùng một dữ liệu đầu vào sẽ cho kết quả giống nhau.
-            - `--benchmark` (mặc định: `False`): Khi bật sẽ thử nghiệm và chọn thuật toán tối ưu nhất cho phần cứng và kích thước cụ thể.
-            - `--optimizer` (mặc định: `AdamW`): Trình tối ưu hóa được sử dụng (`AdamW`, `RAdam`, `AnyPrecisionAdamW`).
-            - `--multiscale_mel_loss` (mặc định: `False`): So sánh phổ Mel của âm thanh thật và âm thanh giả ở nhiều thang độ khác nhau. Giúp mô hình học được chi tiết âm sắc, độ sáng và cấu trúc tần số tốt hơn, từ đó cải thiện chất lượng và độ tự nhiên của giọng nói đầu ra.
-        9. Bộ tham chiếu:
-            - `--use_custom_reference` (mặc định: `False`): Có tùy chỉnh bộ tham chiếu hay không.
-            - `--reference_path` (mặc định: `False`): Đường dẫn đến bộ tham chiếu.
-    """)
-    quit()
-elif argv_is_allows[18] in argv:
-    print("""Các tham số của --create_reference:
-        1. Đường dẫn tệp:
-            - `--audio_path` (bắt buộc): Đường dẫn tệp âm thanh đầu vào.
-            - `--reference_name` (mặc định: `reference`): Đường dẫn lưu bộ tham chiếu đầu ra.
-        2. Cấu hình bộ tham chiếu:
-            - `--pitch_guidance` (mặc định: `True`): Sử dụng hướng dẫn cao độ.
-            - `--energy_use` (mặc định: `False`): Sử dụng năng lượng rms.
-            - `--version` (mặc định: `v2`): Phiên bản RVC (`v1`, `v2`).
-        3. Cấu hình nhúng:
-            - `--embedder_model` (mặc định: `hubert_base`): Tên mô hình nhúng.
-            - `--embedders_mode` (mặc định: `fairseq`): Chế độ nhúng (`fairseq`, `transformers`, `onnx`, `whisper`).
-        4. Cấu hình F0:
-            - `--f0_method` (mặc định: `rmvpe`): Phương pháp dự đoán F0 (`pm`, `dio`, `mangio-crepe-tiny`, `mangio-crepe-small`, `mangio-crepe-medium`, `mangio-crepe-large`, `mangio-crepe-full`, `crepe-tiny`, `crepe-small`, `crepe-medium`, `crepe-large`, `crepe-full`, `fcpe`, `fcpe-legacy`, `rmvpe`, `rmvpe-legacy`, `harvest`, `yin`, `pyin`, `swipe`).
-            - `--f0_onnx` (mặc định: `False`): Có sử dụng phiên bản ONNX của F0 hay không.
-            - `--f0_up_key` (mặc định: `0`): Điều chỉnh cao độ.
-            - `--filter_radius` (mặc định: `3`): Độ mượt của đường F0.
-            - `--f0_autotune` (mặc định: `False`): Có tự động điều chỉnh F0 hay không.
-            - `--f0_autotune_strength` (mặc định: `1`): Cường độ hiệu chỉnh tự động F0.
-            - `--f0_file` (mặc định: ``): Đường dẫn tệp F0 có sẵn.
-            - `--proposal_pitch` (mặc định: `False`): Đề xuất cao độ thay vì điều chỉnh thủ công.
-            - `--proposal_pitch_threshold` (mặc định: `0.0`): Ngưỡng tần số ước tính cao độ.
-            - `--alpha` (mặc định: `0.5`): Ngưỡng trộn cao độ khi ước tính cao độ hybrid.
-    """)
-    quit()
-elif argv_is_allows[16] in argv:
-    print("""Sử dụng:
-        1. `--help_audio_effects`: Trợ giúp về phần thêm hiệu ứng âm thanh.
-        2. `--help_convert`: Trợ giúp về chuyển đổi âm thanh.
-        3. `--help_create_dataset`: Trợ giúp về tạo dữ liệu huấn luyện.
-        4. `--help_create_index`: Trợ giúp về tạo chỉ mục.
-        5. `--help_extract`: Trợ giúp về trích xuất dữ liệu huấn luyện.
-        6. `--help_preprocess`: Trợ giúp về xử lí trước dữ liệu.
-        7. `--help_separate_music`: Trợ giúp về tách nhạc.
-        8. `--help_train`: Trợ giúp về huấn luyện mô hình.
-        9. `--help_create_reference`: Trợ giúp về tạo bộ tham chiếu.
-    """)
-    quit()
-if __name__ == "__main__":
-    import torch.multiprocessing as mp
-    if "--train" in argv: mp.set_start_method("spawn")
-    if "--preprocess" in argv or "--extract" in argv: mp.set_start_method("spawn", force=True)
-    main()

main/app/run_tensorboard.py DELETED Viewed

@@ -1,32 +0,0 @@
-import os
-import sys
-import time
-import logging
-import warnings
-import webbrowser
-from tensorboard import program
-sys.path.append(os.getcwd())
-from main.app.variables import config, translations, logger
-def launch_tensorboard():
-    warnings.filterwarnings("ignore")
-    for l in ["root", "tensorboard"]:
-        logging.getLogger(l).setLevel(logging.ERROR)
-    tb = program.TensorBoard()
-    tb.configure(argv=[None, "--logdir", config.configs["logs_path"], f"--port={config.configs['tensorboard_port']}"])
-    url = tb.launch()
-    logger.info(f"{translations['tensorboard_url']}: {url}")
-    if "--open" in sys.argv: webbrowser.open(url)
-    return f"{translations['tensorboard_url']}: {url}"
-if __name__ == "__main__":
-    launch_tensorboard()
-    while 1:
-        time.sleep(5)

main/app/tabs/downloads/downloads.py DELETED Viewed

@@ -1,112 +0,0 @@
-import os
-import sys
-import gradio as gr
-sys.path.append(os.getcwd())
-from main.app.variables import translations, configs, models, model_options
-from main.app.core.downloads import download_model, search_models, download_pretrained_model
-from main.app.core.ui import change_download_choices, change_download_pretrained_choices, shutil_move
-from main.app.core.process import fetch_pretrained_data, save_drop_model, update_sample_rate_dropdown
-def download_tab():
-    with gr.TabItem(translations["downloads"], visible=configs.get("downloads_tab", True)):
-        gr.Markdown(translations["download_markdown"])
-        with gr.Row():
-            gr.Markdown(translations["download_markdown_2"])
-        with gr.Row():
-            with gr.Accordion(translations["model_download"], open=True):
-                with gr.Row():
-                    downloadmodel = gr.Radio(label=translations["model_download_select"], choices=[translations["download_url"], translations["download_from_csv"], translations["search_models"], translations["upload"]], interactive=True, value=translations["download_url"])
-                with gr.Row():
-                    gr.Markdown("___")
-                with gr.Column():
-                    with gr.Row():
-                        url_input = gr.Textbox(label=translations["model_url"], value="", placeholder="https://...", scale=6)
-                        download_model_name = gr.Textbox(label=translations["modelname"], value="", placeholder=translations["modelname"], scale=2)
-                    url_download = gr.Button(value=translations["downloads"], scale=2)
-                with gr.Column():
-                    model_browser = gr.Dropdown(choices=models.keys(), label=translations["model_warehouse"], scale=8, allow_custom_value=True, visible=False)
-                    download_from_browser = gr.Button(value=translations["get_model"], scale=2, variant="primary", visible=False)
-                with gr.Column():
-                    search_name = gr.Textbox(label=translations["name_to_search"], placeholder=translations["modelname"], interactive=True, scale=8, visible=False)
-                    search = gr.Button(translations["search_2"], scale=2, visible=False)
-                    search_dropdown = gr.Dropdown(label=translations["select_download_model"], value="", choices=[], allow_custom_value=True, interactive=False, visible=False)
-                    download = gr.Button(translations["downloads"], variant="primary", visible=False)
-                with gr.Column():
-                    model_upload = gr.Files(label=translations["drop_model"], file_types=[".pth", ".onnx", ".index", ".zip"], visible=False)
-        with gr.Row():
-            with gr.Accordion(translations["download_pretrained_2"], open=False):
-                with gr.Row():
-                    pretrain_download_choices = gr.Radio(label=translations["model_download_select"], choices=[translations["download_url"], translations["list_model"], translations["upload"]], value=translations["download_url"], interactive=True)
-                with gr.Row():
-                    gr.Markdown("___")
-                with gr.Column():
-                    with gr.Row():
-                        pretrainD = gr.Textbox(label=translations["pretrained_url"].format(dg="D"), value="", placeholder="https://...", interactive=True, scale=4)
-                        pretrainG = gr.Textbox(label=translations["pretrained_url"].format(dg="G"), value="", placeholder="https://...", interactive=True, scale=4)
-                    download_pretrain_button = gr.Button(translations["downloads"], scale=2)
-                with gr.Column():
-                    with gr.Row():
-                        pretrain_choices = gr.Dropdown(label=translations["select_pretrain"], info=translations["select_pretrain_info"], choices=list(fetch_pretrained_data().keys()), value="Titan_Medium", allow_custom_value=True, interactive=True, scale=6, visible=False)
-                        sample_rate_pretrain = gr.Dropdown(label=translations["pretrain_sr"], info=translations["pretrain_sr"], choices=["48k", "40k", "32k"], value="48k", interactive=True, visible=False)
-                    download_pretrain_choices_button = gr.Button(translations["downloads"], scale=2, variant="primary", visible=False)
-                with gr.Row():
-                    pretrain_upload = gr.Files(label=translations["drop_pretrain"].format(dg="G, D"), file_types=[".pth"], visible=False)
-        with gr.Row():
-            url_download.click(
-                fn=download_model,
-                inputs=[
-                    url_input,
-                    download_model_name
-                ],
-                outputs=[url_input],
-                api_name="download_model"
-            )
-            download_from_browser.click(
-                fn=lambda model: download_model(models[model], model),
-                inputs=[model_browser],
-                outputs=[model_browser],
-                api_name="download_browser"
-            )
-        with gr.Row():
-            downloadmodel.change(fn=change_download_choices, inputs=[downloadmodel], outputs=[url_input, download_model_name, url_download, model_browser, download_from_browser, search_name, search, search_dropdown, download, model_upload])
-            search.click(fn=search_models, inputs=[search_name], outputs=[search_dropdown, download])
-            model_upload.upload(fn=save_drop_model, inputs=[model_upload], outputs=[model_upload])
-            download.click(
-                fn=lambda model: download_model(model_options[model], model),
-                inputs=[search_dropdown],
-                outputs=[search_dropdown],
-                api_name="search_models"
-            )
-        with gr.Row():
-            pretrain_download_choices.change(fn=change_download_pretrained_choices, inputs=[pretrain_download_choices], outputs=[pretrainD, pretrainG, download_pretrain_button, pretrain_choices, sample_rate_pretrain, download_pretrain_choices_button, pretrain_upload])
-            pretrain_choices.change(fn=update_sample_rate_dropdown, inputs=[pretrain_choices], outputs=[sample_rate_pretrain])
-        with gr.Row():
-            download_pretrain_button.click(
-                fn=download_pretrained_model,
-                inputs=[
-                    pretrain_download_choices,
-                    pretrainD,
-                    pretrainG
-                ],
-                outputs=[pretrainD, pretrainG],
-                api_name="download_pretrain_link"
-            )
-            download_pretrain_choices_button.click(
-                fn=download_pretrained_model,
-                inputs=[
-                    pretrain_download_choices,
-                    pretrain_choices,
-                    sample_rate_pretrain
-                ],
-                outputs=[pretrain_choices],
-                api_name="download_pretrain_choices"
-            )
-            pretrain_upload.upload(
-                fn=lambda pretrain_upload: [shutil_move(pretrain.name, configs["pretrained_custom_path"]) for pretrain in pretrain_upload],
-                inputs=[pretrain_upload],
-                outputs=[],
-                api_name="upload_pretrain"
-            )

main/app/tabs/editing/child/audio_effects.py DELETED Viewed

@@ -1,393 +0,0 @@
-import os
-import sys
-import gradio as gr
-sys.path.append(os.getcwd())
-from main.app.core.editing import audio_effects
-from main.app.core.presets import audio_effect_load_presets, audio_effect_save_presets
-from main.app.core.ui import visible, change_audios_choices, change_effect_preset_choices, shutil_move
-from main.app.variables import translations, paths_for_files, sample_rate_choice, audio_effect_presets_file, configs, file_types, export_format_choices
-def audio_effects_tab():
-    with gr.Row():
-        gr.Markdown(translations["audio_effects_edit"])
-    with gr.Row():
-        with gr.Column():
-            with gr.Row():
-                reverb_check_box = gr.Checkbox(label=translations["reverb"], value=False, interactive=True)
-                chorus_check_box = gr.Checkbox(label=translations["chorus"], value=False, interactive=True)
-                delay_check_box = gr.Checkbox(label=translations["delay"], value=False, interactive=True)
-                phaser_check_box = gr.Checkbox(label=translations["phaser"], value=False, interactive=True)
-                compressor_check_box = gr.Checkbox(label=translations["compressor"], value=False, interactive=True)
-                more_options = gr.Checkbox(label=translations["more_option"], value=False, interactive=True)
-    with gr.Row():
-        with gr.Accordion(translations["input_output"], open=False):
-            with gr.Row():
-                upload_audio = gr.Files(label=translations["drop_audio"], file_types=file_types)
-            with gr.Row():
-                audio_in_path = gr.Dropdown(label=translations["input_audio"], value="", choices=paths_for_files, info=translations["provide_audio"], interactive=True, allow_custom_value=True)
-                audio_out_path = gr.Textbox(label=translations["output_audio"], value="audios/audio_effects.wav", placeholder="audios/audio_effects.wav", info=translations["provide_output"], interactive=True)
-            with gr.Row():
-                with gr.Column():
-                    audio_combination = gr.Checkbox(label=translations["merge_instruments"], value=False, interactive=True)
-                    audio_combination_input = gr.Dropdown(label=translations["input_audio"], value="", choices=paths_for_files, info=translations["provide_audio"], interactive=True, allow_custom_value=True, visible=audio_combination.value)
-            with gr.Row():
-                main_vol = gr.Slider(minimum=-80, maximum=80, label=translations["main_volume"], info=translations["main_volume_info"], value=-4, step=1, interactive=True, visible=audio_combination.value)
-                combine_vol = gr.Slider(minimum=-80, maximum=80, label=translations["combination_volume"], info=translations["combination_volume_info"], value=-7, step=1, interactive=True, visible=audio_combination.value)
-            with gr.Row():
-                audio_effects_refresh = gr.Button(translations["refresh"])
-            with gr.Row():
-                audio_output_format = gr.Radio(label=translations["export_format"], info=translations["export_info"], choices=export_format_choices, value="wav", interactive=True)
-    with gr.Row():
-        with gr.Accordion(translations["use_presets"], open=False):
-            with gr.Row():
-                presets_name = gr.Dropdown(label=translations["file_preset"], choices=audio_effect_presets_file, value=audio_effect_presets_file[0] if len(audio_effect_presets_file) > 0 else '', interactive=True, allow_custom_value=True)
-            with gr.Row():
-                load_click = gr.Button(translations["load_file"], variant="primary")
-                refresh_click = gr.Button(translations["refresh"])
-            with gr.Accordion(translations["export_file"], open=False):
-                with gr.Row():
-                    with gr.Column():
-                        name_to_save_file = gr.Textbox(label=translations["filename_to_save"])
-                        save_file_button = gr.Button(translations["export_file"])
-            with gr.Row():
-                upload_presets = gr.Files(label=translations["upload_presets"], file_types=[".effect.json"])
-    with gr.Row():
-        apply_effects_button = gr.Button(translations["apply"], variant="primary", scale=2)
-    with gr.Row():
-        with gr.Column():
-            with gr.Row():
-                with gr.Accordion(translations["reverb"], open=False, visible=reverb_check_box.value) as reverb_accordion:
-                    reverb_freeze_mode = gr.Checkbox(label=translations["reverb_freeze"], info=translations["reverb_freeze_info"], value=False, interactive=True)
-                    reverb_room_size = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.15, label=translations["room_size"], info=translations["room_size_info"], interactive=True)
-                    reverb_damping = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.7, label=translations["damping"], info=translations["damping_info"], interactive=True)
-                    reverb_wet_level = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.2, label=translations["wet_level"], info=translations["wet_level_info"], interactive=True)
-                    reverb_dry_level = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.8, label=translations["dry_level"], info=translations["dry_level_info"], interactive=True)
-                    reverb_width = gr.Slider(minimum=0, maximum=1, step=0.01, value=1, label=translations["width"], info=translations["width_info"], interactive=True)
-            with gr.Row():
-                with gr.Accordion(translations["chorus"], open=False, visible=chorus_check_box.value) as chorus_accordion:
-                    chorus_depth = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["chorus_depth"], info=translations["chorus_depth_info"], interactive=True)
-                    chorus_rate_hz = gr.Slider(minimum=0.1, maximum=10, step=0.1, value=1.5, label=translations["chorus_rate_hz"], info=translations["chorus_rate_hz_info"], interactive=True)
-                    chorus_mix = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["chorus_mix"], info=translations["chorus_mix_info"], interactive=True)
-                    chorus_centre_delay_ms = gr.Slider(minimum=0, maximum=50, step=1, value=10, label=translations["chorus_centre_delay_ms"], info=translations["chorus_centre_delay_ms_info"], interactive=True)
-                    chorus_feedback = gr.Slider(minimum=-1, maximum=1, step=0.01, value=0, label=translations["chorus_feedback"], info=translations["chorus_feedback_info"], interactive=True)
-            with gr.Row():
-                with gr.Accordion(translations["delay"], open=False, visible=delay_check_box.value) as delay_accordion:
-                    delay_second = gr.Slider(minimum=0, maximum=5, step=0.01, value=0.5, label=translations["delay_seconds"], info=translations["delay_seconds_info"], interactive=True)
-                    delay_feedback = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["delay_feedback"], info=translations["delay_feedback_info"], interactive=True)
-                    delay_mix = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["delay_mix"], info=translations["delay_mix_info"], interactive=True)
-        with gr.Column():
-            with gr.Row():
-                with gr.Accordion(translations["more_option"], open=False, visible=more_options.value) as more_accordion:
-                    with gr.Row():
-                        fade = gr.Checkbox(label=translations["fade"], value=False, interactive=True)
-                        bass_or_treble = gr.Checkbox(label=translations["bass_or_treble"], value=False, interactive=True)
-                        limiter = gr.Checkbox(label=translations["limiter"], value=False, interactive=True)
-                        resample_checkbox = gr.Checkbox(label=translations["resample"], value=False, interactive=True)
-                    with gr.Row():
-                        distortion_checkbox = gr.Checkbox(label=translations["distortion"], value=False, interactive=True)
-                        gain_checkbox = gr.Checkbox(label=translations["gain"], value=False, interactive=True)
-                        bitcrush_checkbox = gr.Checkbox(label=translations["bitcrush"], value=False, interactive=True)
-                        clipping_checkbox = gr.Checkbox(label=translations["clipping"], value=False, interactive=True)
-                    with gr.Accordion(translations["fade"], open=True, visible=fade.value) as fade_accordion:
-                        with gr.Row():
-                            fade_in = gr.Slider(minimum=0, maximum=10000, step=100, value=0, label=translations["fade_in"], info=translations["fade_in_info"], interactive=True)
-                            fade_out = gr.Slider(minimum=0, maximum=10000, step=100, value=0, label=translations["fade_out"], info=translations["fade_out_info"], interactive=True)
-                    with gr.Accordion(translations["bass_or_treble"], open=True, visible=bass_or_treble.value) as bass_treble_accordion:
-                        with gr.Row():
-                            bass_boost = gr.Slider(minimum=0, maximum=20, step=1, value=0, label=translations["bass_boost"], info=translations["bass_boost_info"], interactive=True)
-                            bass_frequency = gr.Slider(minimum=20, maximum=200, step=10, value=100, label=translations["bass_frequency"], info=translations["bass_frequency_info"], interactive=True)
-                        with gr.Row():
-                            treble_boost = gr.Slider(minimum=0, maximum=20, step=1, value=0, label=translations["treble_boost"], info=translations["treble_boost_info"], interactive=True)
-                            treble_frequency = gr.Slider(minimum=1000, maximum=10000, step=500, value=3000, label=translations["treble_frequency"], info=translations["treble_frequency_info"], interactive=True)
-                    with gr.Accordion(translations["limiter"], open=True, visible=limiter.value) as limiter_accordion:
-                        with gr.Row():
-                            limiter_threshold_db = gr.Slider(minimum=-60, maximum=0, step=1, value=-1, label=translations["limiter_threshold_db"], info=translations["limiter_threshold_db_info"], interactive=True)
-                            limiter_release_ms = gr.Slider(minimum=10, maximum=1000, step=1, value=100, label=translations["limiter_release_ms"], info=translations["limiter_release_ms_info"], interactive=True)
-                    with gr.Column():
-                        pitch_shift_semitones = gr.Slider(minimum=-20, maximum=20, step=1, value=0, label=translations["pitch"], info=translations["pitch_info"], interactive=True)
-                        audio_effect_resample_sr = gr.Radio(choices=[0]+sample_rate_choice, value=0, label=translations["resample"], info=translations["resample_info"], interactive=True, visible=resample_checkbox.value)
-                        distortion_drive_db = gr.Slider(minimum=0, maximum=50, step=1, value=20, label=translations["distortion"], info=translations["distortion_info"], interactive=True, visible=distortion_checkbox.value)
-                        gain_db = gr.Slider(minimum=-60, maximum=60, step=1, value=0, label=translations["gain"], info=translations["gain_info"], interactive=True, visible=gain_checkbox.value)
-                        clipping_threshold_db = gr.Slider(minimum=-60, maximum=0, step=1, value=-1, label=translations["clipping_threshold_db"], info=translations["clipping_threshold_db_info"], interactive=True, visible=clipping_checkbox.value)
-                        bitcrush_bit_depth = gr.Slider(minimum=1, maximum=24, step=1, value=16, label=translations["bitcrush_bit_depth"], info=translations["bitcrush_bit_depth_info"], interactive=True, visible=bitcrush_checkbox.value)
-            with gr.Row():
-                with gr.Accordion(translations["phaser"], open=False, visible=phaser_check_box.value) as phaser_accordion:
-                    phaser_depth = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["phaser_depth"], info=translations["phaser_depth_info"], interactive=True)
-                    phaser_rate_hz = gr.Slider(minimum=0.1, maximum=10, step=0.1, value=1, label=translations["phaser_rate_hz"], info=translations["phaser_rate_hz_info"], interactive=True)
-                    phaser_mix = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["phaser_mix"], info=translations["phaser_mix_info"], interactive=True)
-                    phaser_centre_frequency_hz = gr.Slider(minimum=50, maximum=5000, step=10, value=1000, label=translations["phaser_centre_frequency_hz"], info=translations["phaser_centre_frequency_hz_info"], interactive=True)
-                    phaser_feedback = gr.Slider(minimum=-1, maximum=1, step=0.01, value=0, label=translations["phaser_feedback"], info=translations["phaser_feedback_info"], interactive=True)
-            with gr.Row():
-                with gr.Accordion(translations["compressor"], open=False, visible=compressor_check_box.value) as compressor_accordion:
-                    compressor_threshold_db = gr.Slider(minimum=-60, maximum=0, step=1, value=-20, label=translations["compressor_threshold_db"], info=translations["compressor_threshold_db_info"], interactive=True)
-                    compressor_ratio = gr.Slider(minimum=1, maximum=20, step=0.1, value=1, label=translations["compressor_ratio"], info=translations["compressor_ratio_info"], interactive=True)
-                    compressor_attack_ms = gr.Slider(minimum=0.1, maximum=100, step=0.1, value=10, label=translations["compressor_attack_ms"], info=translations["compressor_attack_ms_info"], interactive=True)
-                    compressor_release_ms = gr.Slider(minimum=10, maximum=1000, step=1, value=100, label=translations["compressor_release_ms"], info=translations["compressor_release_ms_info"], interactive=True)
-    with gr.Row():
-        gr.Markdown(translations["output_audio"])
-    with gr.Row():
-        audio_play_input = gr.Audio(show_download_button=True, interactive=False, label=translations["input_audio"])
-        audio_play_output = gr.Audio(show_download_button=True, interactive=False, label=translations["output_audio"])
-    with gr.Row():
-        reverb_check_box.change(fn=visible, inputs=[reverb_check_box], outputs=[reverb_accordion])
-        chorus_check_box.change(fn=visible, inputs=[chorus_check_box], outputs=[chorus_accordion])
-        delay_check_box.change(fn=visible, inputs=[delay_check_box], outputs=[delay_accordion])
-    with gr.Row():
-        compressor_check_box.change(fn=visible, inputs=[compressor_check_box], outputs=[compressor_accordion])
-        phaser_check_box.change(fn=visible, inputs=[phaser_check_box], outputs=[phaser_accordion])
-        more_options.change(fn=visible, inputs=[more_options], outputs=[more_accordion])
-    with gr.Row():
-        fade.change(fn=visible, inputs=[fade], outputs=[fade_accordion])
-        bass_or_treble.change(fn=visible, inputs=[bass_or_treble], outputs=[bass_treble_accordion])
-        limiter.change(fn=visible, inputs=[limiter], outputs=[limiter_accordion])
-        resample_checkbox.change(fn=visible, inputs=[resample_checkbox], outputs=[audio_effect_resample_sr])
-    with gr.Row():
-        distortion_checkbox.change(fn=visible, inputs=[distortion_checkbox], outputs=[distortion_drive_db])
-        gain_checkbox.change(fn=visible, inputs=[gain_checkbox], outputs=[gain_db])
-        clipping_checkbox.change(fn=visible, inputs=[clipping_checkbox], outputs=[clipping_threshold_db])
-        bitcrush_checkbox.change(fn=visible, inputs=[bitcrush_checkbox], outputs=[bitcrush_bit_depth])
-    with gr.Row():
-        upload_audio.upload(fn=lambda audio_in: [shutil_move(audio.name, configs["audios_path"]) for audio in audio_in][0], inputs=[upload_audio], outputs=[audio_in_path])
-        audio_in_path.change(fn=lambda audio: audio if audio else None, inputs=[audio_in_path], outputs=[audio_play_input])
-        audio_effects_refresh.click(fn=lambda a, b: [change_audios_choices(a), change_audios_choices(b)], inputs=[audio_in_path, audio_combination_input], outputs=[audio_in_path, audio_combination_input])
-    with gr.Row():
-        more_options.change(fn=lambda: [False]*8, inputs=[], outputs=[fade, bass_or_treble, limiter, resample_checkbox, distortion_checkbox, gain_checkbox, clipping_checkbox, bitcrush_checkbox])
-        audio_combination.change(fn=visible, inputs=[audio_combination], outputs=[audio_combination_input])
-        audio_combination.change(fn=lambda a: [visible(a) for _ in range(2)], inputs=[audio_combination], outputs=[main_vol, combine_vol])
-    with gr.Row():
-        upload_presets.upload(fn=lambda presets_in: [shutil_move(preset.name, configs["presets_path"]) for preset in presets_in][0], inputs=[upload_presets], outputs=[presets_name])
-        refresh_click.click(fn=change_effect_preset_choices, inputs=[], outputs=[presets_name])
-    with gr.Row():
-        load_click.click(
-            fn=audio_effect_load_presets,
-            inputs=[
-                presets_name,
-                resample_checkbox,
-                audio_effect_resample_sr,
-                chorus_depth,
-                chorus_rate_hz,
-                chorus_mix,
-                chorus_centre_delay_ms,
-                chorus_feedback,
-                distortion_drive_db,
-                reverb_room_size,
-                reverb_damping,
-                reverb_wet_level,
-                reverb_dry_level,
-                reverb_width,
-                reverb_freeze_mode,
-                pitch_shift_semitones,
-                delay_second,
-                delay_feedback,
-                delay_mix,
-                compressor_threshold_db,
-                compressor_ratio,
-                compressor_attack_ms,
-                compressor_release_ms,
-                limiter_threshold_db,
-                limiter_release_ms,
-                gain_db,
-                bitcrush_bit_depth,
-                clipping_threshold_db,
-                phaser_rate_hz,
-                phaser_depth,
-                phaser_centre_frequency_hz,
-                phaser_feedback,
-                phaser_mix,
-                bass_boost,
-                bass_frequency,
-                treble_boost,
-                treble_frequency,
-                fade_in,
-                fade_out,
-                chorus_check_box,
-                distortion_checkbox,
-                reverb_check_box,
-                delay_check_box,
-                compressor_check_box,
-                limiter,
-                gain_checkbox,
-                bitcrush_checkbox,
-                clipping_checkbox,
-                phaser_check_box,
-                bass_or_treble,
-                fade
-            ],
-            outputs=[
-                resample_checkbox,
-                audio_effect_resample_sr,
-                chorus_depth,
-                chorus_rate_hz,
-                chorus_mix,
-                chorus_centre_delay_ms,
-                chorus_feedback,
-                distortion_drive_db,
-                reverb_room_size,
-                reverb_damping,
-                reverb_wet_level,
-                reverb_dry_level,
-                reverb_width,
-                reverb_freeze_mode,
-                pitch_shift_semitones,
-                delay_second,
-                delay_feedback,
-                delay_mix,
-                compressor_threshold_db,
-                compressor_ratio,
-                compressor_attack_ms,
-                compressor_release_ms,
-                limiter_threshold_db,
-                limiter_release_ms,
-                gain_db,
-                bitcrush_bit_depth,
-                clipping_threshold_db,
-                phaser_rate_hz,
-                phaser_depth,
-                phaser_centre_frequency_hz,
-                phaser_feedback,
-                phaser_mix,
-                bass_boost,
-                bass_frequency,
-                treble_boost,
-                treble_frequency,
-                fade_in,
-                fade_out,
-                chorus_check_box,
-                distortion_checkbox,
-                reverb_check_box,
-                delay_check_box,
-                compressor_check_box,
-                limiter,
-                gain_checkbox,
-                bitcrush_checkbox,
-                clipping_checkbox,
-                phaser_check_box,
-                bass_or_treble,
-                fade
-            ],
-        )
-        save_file_button.click(
-            fn=audio_effect_save_presets,
-            inputs=[
-                name_to_save_file,
-                resample_checkbox,
-                audio_effect_resample_sr,
-                chorus_depth,
-                chorus_rate_hz,
-                chorus_mix,
-                chorus_centre_delay_ms,
-                chorus_feedback,
-                distortion_drive_db,
-                reverb_room_size,
-                reverb_damping,
-                reverb_wet_level,
-                reverb_dry_level,
-                reverb_width,
-                reverb_freeze_mode,
-                pitch_shift_semitones,
-                delay_second,
-                delay_feedback,
-                delay_mix,
-                compressor_threshold_db,
-                compressor_ratio,
-                compressor_attack_ms,
-                compressor_release_ms,
-                limiter_threshold_db,
-                limiter_release_ms,
-                gain_db,
-                bitcrush_bit_depth,
-                clipping_threshold_db,
-                phaser_rate_hz,
-                phaser_depth,
-                phaser_centre_frequency_hz,
-                phaser_feedback,
-                phaser_mix,
-                bass_boost,
-                bass_frequency,
-                treble_boost,
-                treble_frequency,
-                fade_in,
-                fade_out,
-                chorus_check_box,
-                distortion_checkbox,
-                reverb_check_box,
-                delay_check_box,
-                compressor_check_box,
-                limiter,
-                gain_checkbox,
-                bitcrush_checkbox,
-                clipping_checkbox,
-                phaser_check_box,
-                bass_or_treble,
-                fade
-            ],
-            outputs=[presets_name]
-        )
-    with gr.Row():
-        apply_effects_button.click(
-            fn=audio_effects,
-            inputs=[
-                audio_in_path,
-                audio_out_path,
-                resample_checkbox,
-                audio_effect_resample_sr,
-                chorus_depth,
-                chorus_rate_hz,
-                chorus_mix,
-                chorus_centre_delay_ms,
-                chorus_feedback,
-                distortion_drive_db,
-                reverb_room_size,
-                reverb_damping,
-                reverb_wet_level,
-                reverb_dry_level,
-                reverb_width,
-                reverb_freeze_mode,
-                pitch_shift_semitones,
-                delay_second,
-                delay_feedback,
-                delay_mix,
-                compressor_threshold_db,
-                compressor_ratio,
-                compressor_attack_ms,
-                compressor_release_ms,
-                limiter_threshold_db,
-                limiter_release_ms,
-                gain_db,
-                bitcrush_bit_depth,
-                clipping_threshold_db,
-                phaser_rate_hz,
-                phaser_depth,
-                phaser_centre_frequency_hz,
-                phaser_feedback,
-                phaser_mix,
-                bass_boost,
-                bass_frequency,
-                treble_boost,
-                treble_frequency,
-                fade_in,
-                fade_out,
-                audio_output_format,
-                chorus_check_box,
-                distortion_checkbox,
-                reverb_check_box,
-                delay_check_box,
-                compressor_check_box,
-                limiter,
-                gain_checkbox,
-                bitcrush_checkbox,
-                clipping_checkbox,
-                phaser_check_box,
-                bass_or_treble,
-                fade,
-                audio_combination,
-                audio_combination_input,
-                main_vol,
-                combine_vol
-            ],
-            outputs=[audio_play_output],
-            api_name="audio_effects"
-        )

main/app/tabs/editing/child/quirk.py DELETED Viewed

@@ -1,48 +0,0 @@
-import os
-import sys
-import gradio as gr
-sys.path.append(os.getcwd())
-from main.app.core.editing import apply_voice_quirk
-from main.app.core.ui import change_audios_choices, shutil_move
-from main.app.variables import translations, paths_for_files, configs, file_types, export_format_choices
-def quirk_tab():
-    with gr.Row():
-        gr.Markdown(translations["quirk_markdown"])
-    with gr.Row():
-        input_audio_play = gr.Audio(show_download_button=True, interactive=False, label=translations["input_audio"])
-    with gr.Row():
-        quirk_choice = gr.Radio(label=translations["quirk_label"], info=translations["quirk_label_info"], choices=list(translations["quirk_choice"].keys()), interactive=True, value=list(translations["quirk_choice"].keys())[0])
-    with gr.Row():
-        apply_quirk_button = gr.Button(translations["apply"], variant="primary")
-    with gr.Row():
-        with gr.Accordion(translations["input_output"], open=False):
-            with gr.Row():
-                quirk_upload_audio = gr.Files(label=translations["drop_audio"], file_types=file_types)
-            with gr.Column():
-                quirk_export_format = gr.Radio(label=translations["export_format"], info=translations["export_info"], choices=export_format_choices, value="wav", interactive=True)
-                quirk_input_path = gr.Dropdown(label=translations["audio_path"], value="", choices=paths_for_files, info=translations["provide_audio"], allow_custom_value=True, interactive=True)
-                quirk_output_path = gr.Textbox(label=translations["output_path"], value="audios/output.wav", placeholder="audios/output.wav", info=translations["output_path_info"], interactive=True)
-            with gr.Column():
-                quirk_refresh = gr.Button(translations["refresh"])
-    with gr.Row():
-        output_audio_play = gr.Audio(show_download_button=True, interactive=False, label=translations["output_audio"])
-    with gr.Row():
-        quirk_upload_audio.upload(fn=lambda audio_in: [shutil_move(audio.name, configs["audios_path"]) for audio in audio_in][0], inputs=[quirk_upload_audio], outputs=[quirk_input_path])
-        quirk_input_path.change(fn=lambda audio: audio if audio else None, inputs=[quirk_input_path], outputs=[input_audio_play])
-        quirk_refresh.click(fn=change_audios_choices, inputs=[quirk_input_path], outputs=[quirk_input_path])
-    with gr.Row():
-        apply_quirk_button.click(
-            fn=apply_voice_quirk,
-            inputs=[
-                quirk_input_path,
-                quirk_choice,
-                quirk_output_path,
-                quirk_export_format
-            ],
-            outputs=[output_audio_play],
-            api_name="quirk"
-        )

main/app/tabs/editing/editing.py DELETED Viewed

@@ -1,20 +0,0 @@
-import os
-import sys
-import gradio as gr
-sys.path.append(os.getcwd())
-from main.app.variables import configs, translations
-from main.app.tabs.editing.child.quirk import quirk_tab
-from main.app.tabs.editing.child.audio_effects import audio_effects_tab
-def editing_tab():
-    with gr.TabItem(translations["editing"], visible=configs.get("editing_tab", True)):
-        with gr.TabItem(translations["audio_effects"], visible=configs.get("effects_tab", True)):
-            gr.Markdown(translations["apply_audio_effects"])
-            audio_effects_tab()
-        with gr.TabItem(translations["quirk"], visible=configs.get("quirk", True)):
-            gr.Markdown(translations["quirk_info"])
-            quirk_tab()

main/app/tabs/extra/child/convert_model.py DELETED Viewed

@@ -1,31 +0,0 @@
-import os
-import sys
-import gradio as gr
-sys.path.append(os.getcwd())
-from main.app.core.ui import visible, shutil_move
-from main.app.core.model_utils import onnx_export
-from main.app.variables import translations, configs
-def convert_model_tab():
-    with gr.Row():
-        gr.Markdown(translations["pytorch2onnx_markdown"])
-    with gr.Row():
-        model_pth_upload = gr.File(label=translations["drop_model"], file_types=[".pth"])
-    with gr.Row():
-        convert_onnx = gr.Button(translations["convert_model"], variant="primary", scale=2)
-    with gr.Row():
-        model_pth_path = gr.Textbox(label=translations["model_path"], value="", placeholder="assets/weights/Model.pth", info=translations["model_path_info"], interactive=True)
-    with gr.Row():
-        output_model2 = gr.File(label=translations["output_model_path"], file_types=[".pth", ".onnx"], interactive=False, visible=False)
-    with gr.Row():
-        model_pth_upload.upload(fn=lambda model_pth_upload: shutil_move(model_pth_upload.name, configs["weights_path"]), inputs=[model_pth_upload], outputs=[model_pth_path])
-        convert_onnx.click(
-            fn=onnx_export,
-            inputs=[model_pth_path],
-            outputs=[output_model2],
-            api_name="model_onnx_export"
-        )
-        convert_onnx.click(fn=lambda: visible(True), inputs=[], outputs=[output_model2])

main/app/tabs/extra/child/create_srt.py DELETED Viewed

@@ -1,56 +0,0 @@
-import os
-import sys
-import gradio as gr
-sys.path.append(os.getcwd())
-from main.app.core.csrt import create_srt
-from main.app.core.ui import shutil_move, change_audios_choices
-from main.app.variables import translations, file_types, configs, paths_for_files
-def create_srt_tab():
-    with gr.Row():
-        gr.Markdown(translations["create_srt_markdown_2"])
-    with gr.Row():
-        with gr.Column():
-            srt_content = gr.Textbox(label=translations["srt_content"], value="", lines=9, max_lines=9, interactive=False)
-        with gr.Column():
-            word_timestamps = gr.Checkbox(label=translations["word_timestamps"], info=translations["word_timestamps_info"], value=False, interactive=True)
-            model_size = gr.Radio(label=translations["model_size"], info=translations["model_size_info"], choices=["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3", "large-v3-turbo"], value="medium", interactive=True)
-    with gr.Row():
-        convert_button = gr.Button(translations["convert_audio"], variant="primary")
-    with gr.Row():
-        with gr.Accordion(translations["input_output"], open=False):
-            with gr.Column():
-                input_audio = gr.Dropdown(label=translations["audio_path"], value="", choices=paths_for_files, info=translations["provide_audio"], allow_custom_value=True, interactive=True)
-                output_file = gr.Textbox(label=translations["srt_output_file"], value="srt/output.srt", placeholder="srt/output.srt", interactive=True)
-            with gr.Column():
-                refresh = gr.Button(translations["refresh"])
-            with gr.Row():
-                input_file = gr.Files(label=translations["drop_audio"], file_types=file_types)
-    with gr.Row():
-        play_audio = gr.Audio(show_download_button=True, interactive=False, label=translations["input_audio"])
-    with gr.Row():
-        output_srt = gr.File(label=translations["srt_output_file"], file_types=[".srt"], interactive=False, visible=False)
-    with gr.Row():
-        input_file.upload(fn=lambda audio_in: [shutil_move(audio.name, configs["audios_path"]) for audio in audio_in][0], inputs=[input_file], outputs=[input_audio])
-        input_audio.change(fn=lambda audio: audio if os.path.isfile(audio) else None, inputs=[input_audio], outputs=[play_audio])
-        refresh.click(fn=change_audios_choices, inputs=[input_audio], outputs=[input_audio])
-    with gr.Row():
-        convert_button.click(
-            fn=create_srt,
-            inputs=[
-                model_size,
-                input_audio,
-                output_file,
-                word_timestamps
-            ],
-            outputs=[
-                output_srt,
-                srt_content
-            ],
-            api_name="create_srt"
-        )

main/app/tabs/extra/child/f0_extract.py DELETED Viewed

@@ -1,51 +0,0 @@
-import os
-import sys
-import gradio as gr
-sys.path.append(os.getcwd())
-from main.app.core.f0_extract import f0_extract
-from main.app.core.ui import change_audios_choices, unlock_f0, shutil_move
-from main.app.variables import translations, paths_for_files, method_f0, configs, file_types
-def f0_extract_tab():
-    with gr.Row():
-        gr.Markdown(translations["f0_extractor_markdown_2"])
-    with gr.Row():
-        extractor_button = gr.Button(translations["extract_button"].replace("2. ", ""), variant="primary")
-    with gr.Row():
-        with gr.Column():
-            upload_audio_file = gr.Files(label=translations["drop_audio"], file_types=file_types)
-            audioplay = gr.Audio(show_download_button=True, interactive=False, label=translations["input_audio"])
-        with gr.Column():
-            with gr.Accordion(translations["f0_method"], open=False):
-                with gr.Group():
-                    with gr.Row():
-                        onnx_f0_mode3 = gr.Checkbox(label=translations["f0_onnx_mode"], info=translations["f0_onnx_mode_info"], value=False, interactive=True)
-                        unlock_full_method = gr.Checkbox(label=translations["f0_unlock"], info=translations["f0_unlock_info"], value=False, interactive=True)
-                    f0_method_extract = gr.Radio(label=translations["f0_method"], info=translations["f0_method_info"], choices=[m for m in method_f0 if m != "hybrid"], value="rmvpe", interactive=True)
-            with gr.Accordion(translations["audio_path"], open=True):
-                input_audio_path = gr.Dropdown(label=translations["audio_path"], value="", choices=paths_for_files, allow_custom_value=True, interactive=True)
-                refresh_audio_button = gr.Button(translations["refresh"])
-    with gr.Row():
-        gr.Markdown("___")
-    with gr.Row():
-        file_output = gr.File(label="", file_types=[".txt"], interactive=False)
-        image_output = gr.Image(label="", interactive=False, show_download_button=True)
-    with gr.Row():
-        upload_audio_file.upload(fn=lambda audio_in: [shutil_move(audio.name, configs["audios_path"]) for audio in audio_in][0], inputs=[upload_audio_file], outputs=[input_audio_path])
-        input_audio_path.change(fn=lambda audio: audio if os.path.isfile(audio) else None, inputs=[input_audio_path], outputs=[audioplay])
-        refresh_audio_button.click(fn=change_audios_choices, inputs=[input_audio_path], outputs=[input_audio_path])
-    with gr.Row():
-        unlock_full_method.change(fn=lambda method: {"choices": [m for m in unlock_f0(method)["choices"] if m != "hybrid"], "value": "rmvpe", "__type__": "update"}, inputs=[unlock_full_method], outputs=[f0_method_extract])
-        extractor_button.click(
-            fn=f0_extract,
-            inputs=[
-                input_audio_path,
-                f0_method_extract,
-                onnx_f0_mode3
-            ],
-            outputs=[file_output, image_output],
-            api_name="f0_extract"
-        )

main/app/tabs/extra/child/fushion.py DELETED Viewed

@@ -1,45 +0,0 @@
-import os
-import sys
-import gradio as gr
-sys.path.append(os.getcwd())
-from main.app.core.ui import visible, shutil_move
-from main.app.core.model_utils import fushion_model
-from main.app.variables import translations, configs
-def fushion_tab():
-    with gr.Row():
-        gr.Markdown(translations["fushion_markdown_2"])
-    with gr.Row():
-        name_to_save = gr.Textbox(label=translations["modelname"], placeholder="Model.pth", value="", max_lines=1, interactive=True)
-    with gr.Row():
-        fushion_button = gr.Button(translations["fushion"], variant="primary", scale=4)
-    with gr.Column():
-        with gr.Row():
-            model_a = gr.File(label=f"{translations['model_name']} 1", file_types=[".pth", ".onnx"])
-            model_b = gr.File(label=f"{translations['model_name']} 2", file_types=[".pth", ".onnx"])
-        with gr.Row():
-            model_path_a = gr.Textbox(label=f"{translations['model_path']} 1", value="", placeholder="assets/weights/Model_1.pth")
-            model_path_b = gr.Textbox(label=f"{translations['model_path']} 2", value="", placeholder="assets/weights/Model_2.pth")
-    with gr.Row():
-        ratio = gr.Slider(minimum=0, maximum=1, label=translations["model_ratio"], info=translations["model_ratio_info"], value=0.5, interactive=True)
-    with gr.Row():
-        output_model = gr.File(label=translations["output_model_path"], file_types=[".pth", ".onnx"], interactive=False, visible=False)
-    with gr.Row():
-        model_a.upload(fn=lambda model: shutil_move(model.name, configs["weights_path"]), inputs=[model_a], outputs=[model_path_a])
-        model_b.upload(fn=lambda model: shutil_move(model.name, configs["weights_path"]), inputs=[model_b], outputs=[model_path_b])
-    with gr.Row():
-        fushion_button.click(
-            fn=fushion_model,
-            inputs=[
-                name_to_save,
-                model_path_a,
-                model_path_b,
-                ratio
-            ],
-            outputs=[name_to_save, output_model],
-            api_name="fushion_model"
-        )
-        fushion_button.click(fn=lambda: visible(True), inputs=[], outputs=[output_model])

main/app/tabs/extra/child/read_model.py DELETED Viewed

@@ -1,29 +0,0 @@
-import os
-import sys
-import gradio as gr
-sys.path.append(os.getcwd())
-from main.app.core.ui import shutil_move
-from main.app.core.model_utils import model_info
-from main.app.variables import translations, configs
-def read_model_tab():
-    with gr.Row():
-        gr.Markdown(translations["read_model_markdown_2"])
-    with gr.Row():
-        model = gr.File(label=translations["drop_model"], file_types=[".pth", ".onnx"])
-    with gr.Row():
-        read_button = gr.Button(translations["readmodel"], variant="primary", scale=2)
-    with gr.Column():
-        model_path = gr.Textbox(label=translations["model_path"], value="", placeholder="assets/weights/Model.pth", info=translations["model_path_info"], interactive=True)
-        output_info = gr.Textbox(label=translations["modelinfo"], value="", interactive=False, scale=6)
-    with gr.Row():
-        model.upload(fn=lambda model: shutil_move(model.name, configs["weights_path"]), inputs=[model], outputs=[model_path])
-        read_button.click(
-            fn=model_info,
-            inputs=[model_path],
-            outputs=[output_info],
-            api_name="read_model"
-        )

main/app/tabs/extra/child/settings.py DELETED Viewed

@@ -1,61 +0,0 @@
-import os
-import sys
-import gradio as gr
-sys.path.append(os.getcwd())
-from main.app.core.ui import change_fp
-from main.app.core.utils import stop_pid
-from main.app.core.restart import change_font, change_language, change_theme
-from main.app.variables import translations, theme, font, configs, language, config
-def settings_tab(app):
-    with gr.Row():
-        gr.Markdown(translations["settings_markdown_2"])
-    with gr.Row():
-        toggle_button = gr.Button(translations["change_light_dark"], variant="secondary", scale=2)
-    with gr.Row():
-        with gr.Column():
-            language_dropdown = gr.Dropdown(label=translations["lang"], interactive=True, info=translations["lang_restart"], choices=configs.get("support_language", "vi-VN"), value=language)
-            change_lang = gr.Button(translations["change_lang"], variant="primary", scale=2)
-        with gr.Column():
-            theme_dropdown = gr.Dropdown(label=translations["theme"], interactive=True, info=translations["theme_restart"], choices=configs.get("themes", theme), value=theme, allow_custom_value=True)
-            changetheme = gr.Button(translations["theme_button"], variant="primary", scale=2)
-    with gr.Row():
-        with gr.Column():
-            fp_choice = gr.Radio(choices=["fp16","fp32"], value="fp16" if configs.get("fp16", False) else "fp32", label=translations["precision"], info=translations["precision_info"], interactive=config.device not in ["cpu", "mps", "ocl:0"])
-            fp_button = gr.Button(translations["update_precision"], variant="secondary", scale=2)
-        with gr.Column():
-            font_choice = gr.Textbox(label=translations["font"], info=translations["font_info"], value=font, interactive=True)
-            font_button = gr.Button(translations["change_font"])
-    with gr.Row():
-        with gr.Column():
-            with gr.Accordion(translations["stop"], open=False, visible=True):
-                separate_stop = gr.Button(translations["stop_separate"])
-                convert_stop = gr.Button(translations["stop_convert"])
-                create_dataset_stop = gr.Button(translations["stop_create_dataset"])
-                with gr.Accordion(translations["stop_training"], open=False):
-                    model_name_stop = gr.Textbox(label=translations["modelname"], info=translations["training_model_name"], value="", placeholder=translations["modelname"], interactive=True)
-                    preprocess_stop = gr.Button(translations["stop_preprocess"])
-                    extract_stop = gr.Button(translations["stop_extract"])
-                    train_stop = gr.Button(translations["stop_training"])
-    with gr.Row():
-        toggle_button.click(fn=None, js="() => {document.body.classList.toggle('dark')}")
-        fp_button.click(fn=change_fp, inputs=[fp_choice], outputs=[fp_choice])
-    with gr.Row():
-        change_lang.click(fn=lambda a: change_language(a, app), inputs=[language_dropdown], outputs=[])
-        changetheme.click(fn=lambda a: change_theme(a, app) , inputs=[theme_dropdown], outputs=[])
-        font_button.click(fn=lambda a: change_font(a, app), inputs=[font_choice], outputs=[])
-    with gr.Row():
-        change_lang.click(fn=None, js="setTimeout(function() {location.reload()}, 30000)", inputs=[], outputs=[])
-        changetheme.click(fn=None, js="setTimeout(function() {location.reload()}, 30000)", inputs=[], outputs=[])
-        font_button.click(fn=None, js="setTimeout(function() {location.reload()}, 30000)", inputs=[], outputs=[])
-    with gr.Row():
-        separate_stop.click(fn=lambda: stop_pid("separate_pid", None, False), inputs=[], outputs=[])
-        convert_stop.click(fn=lambda: stop_pid("convert_pid", None, False), inputs=[], outputs=[])
-        create_dataset_stop.click(fn=lambda: stop_pid("create_dataset_pid", None, False), inputs=[], outputs=[])
-    with gr.Row():
-        preprocess_stop.click(fn=lambda model_name_stop: stop_pid("preprocess_pid", model_name_stop, False), inputs=[model_name_stop], outputs=[])
-        extract_stop.click(fn=lambda model_name_stop: stop_pid("extract_pid", model_name_stop, False), inputs=[model_name_stop], outputs=[])
-        train_stop.click(fn=lambda model_name_stop: stop_pid("train_pid", model_name_stop, True), inputs=[model_name_stop], outputs=[])

main/app/tabs/extra/extra.py DELETED Viewed

@@ -1,40 +0,0 @@
-import os
-import sys
-import gradio as gr
-sys.path.append(os.getcwd())
-from main.app.variables import translations, configs
-from main.app.tabs.extra.child.fushion import fushion_tab
-from main.app.tabs.extra.child.settings import settings_tab
-from main.app.tabs.extra.child.read_model import read_model_tab
-from main.app.tabs.extra.child.f0_extract import f0_extract_tab
-from main.app.tabs.extra.child.create_srt import create_srt_tab
-from main.app.tabs.extra.child.convert_model import convert_model_tab
-def extra_tab(app):
-    with gr.TabItem(translations["extra"], visible=configs.get("extra_tab", True)):
-        with gr.TabItem(translations["fushion"], visible=configs.get("fushion_tab", True)):
-            gr.Markdown(translations["fushion_markdown"])
-            fushion_tab()
-        with gr.TabItem(translations["read_model"], visible=configs.get("read_tab", True)):
-            gr.Markdown(translations["read_model_markdown"])
-            read_model_tab()
-        with gr.TabItem(translations["convert_model"], visible=configs.get("onnx_tab", True)):
-            gr.Markdown(translations["pytorch2onnx"])
-            convert_model_tab()
-        with gr.TabItem(translations["f0_extractor_tab"], visible=configs.get("f0_extractor_tab", True)):
-            gr.Markdown(translations["f0_extractor_markdown"])
-            f0_extract_tab()
-        with gr.TabItem(translations["create_srt_tab"], visible=configs.get("create_srt_tab", True)):
-            gr.Markdown(translations["create_srt_markdown"])
-            create_srt_tab()
-        with gr.TabItem(translations["settings"], visible=configs.get("settings_tab", True)):
-            gr.Markdown(translations["settings_markdown"])
-            settings_tab(app)

main/app/tabs/inference/child/convert.py DELETED Viewed

@@ -1,328 +0,0 @@
-import os
-import sys
-import gradio as gr
-sys.path.append(os.getcwd())
-from main.app.core.presets import load_presets, save_presets
-from main.app.core.inference import convert_audio, convert_selection
-from main.app.variables import translations, paths_for_files, sample_rate_choice, model_name, index_path, method_f0, f0_file, embedders_mode, embedders_model, presets_file, configs, file_types, export_format_choices, hybrid_f0_method
-from main.app.core.ui import visible, valueFalse_interactive, change_audios_choices, change_f0_choices, unlock_f0, change_preset_choices, change_backing_choices, hoplength_show, change_models_choices, get_index, index_strength_show, change_embedders_mode, shutil_move
-def convert_tab():
-    with gr.Row():
-        gr.Markdown(translations["convert_info"])
-    with gr.Row():
-        with gr.Column():
-            with gr.Group():
-                with gr.Row():
-                    cleaner0 = gr.Checkbox(label=translations["clear_audio"], value=False, interactive=True)
-                    autotune = gr.Checkbox(label=translations["autotune"], value=False, interactive=True)
-                    use_audio = gr.Checkbox(label=translations["use_audio"], value=False, interactive=True)
-                    checkpointing = gr.Checkbox(label=translations["memory_efficient_training"], value=False, interactive=True)
-                with gr.Row():
-                    use_original = gr.Checkbox(label=translations["convert_original"], value=False, interactive=True, visible=use_audio.value)
-                    convert_backing = gr.Checkbox(label=translations["convert_backing"], value=False, interactive=True, visible=use_audio.value)
-                    not_merge_backing = gr.Checkbox(label=translations["not_merge_backing"], value=False, interactive=True, visible=use_audio.value)
-                    merge_instrument = gr.Checkbox(label=translations["merge_instruments"], value=False, interactive=True, visible=use_audio.value)
-            with gr.Row():
-                pitch = gr.Slider(minimum=-20, maximum=20, step=1, info=translations["pitch_info"], label=translations["pitch"], value=0, interactive=True)
-                clean_strength0 = gr.Slider(label=translations["clean_strength"], info=translations["clean_strength_info"], minimum=0, maximum=1, value=0.5, step=0.1, interactive=True, visible=cleaner0.value)
-            with gr.Row():
-                with gr.Column():
-                    audio_select = gr.Dropdown(label=translations["select_separate"], choices=[], value="", interactive=True, allow_custom_value=True, visible=False)
-                    convert_button_2 = gr.Button(translations["convert_audio"], visible=False)
-    with gr.Row():
-        with gr.Column():
-            convert_button = gr.Button(translations["convert_audio"], variant="primary")
-    with gr.Row():
-        with gr.Column():
-            input0 = gr.Files(label=translations["drop_audio"], file_types=file_types)
-            play_audio = gr.Audio(show_download_button=True, interactive=False, label=translations["input_audio"])
-        with gr.Column():
-            with gr.Accordion(translations["model_accordion"], open=True):
-                with gr.Row():
-                    model_pth = gr.Dropdown(label=translations["model_name"], choices=model_name, value=model_name[0] if len(model_name) >= 1 else "", interactive=True, allow_custom_value=True)
-                    model_index = gr.Dropdown(label=translations["index_path"], choices=index_path, value=index_path[0] if len(index_path) >= 1 else "", interactive=True, allow_custom_value=True)
-                with gr.Row():
-                    refresh = gr.Button(translations["refresh"])
-                with gr.Row():
-                    index_strength = gr.Slider(label=translations["index_strength"], info=translations["index_strength_info"], minimum=0, maximum=1, value=0.5, step=0.01, interactive=True, visible=model_index.value != "")
-            with gr.Accordion(translations["input_output"], open=False):
-                with gr.Column():
-                    export_format = gr.Radio(label=translations["export_format"], info=translations["export_info"], choices=export_format_choices, value="wav", interactive=True)
-                    input_audio0 = gr.Dropdown(label=translations["audio_path"], value="", choices=paths_for_files, info=translations["provide_audio"], allow_custom_value=True, interactive=True)
-                    output_audio = gr.Textbox(label=translations["output_path"], value="audios/output.wav", placeholder="audios/output.wav", info=translations["output_path_info"], interactive=True)
-                with gr.Column():
-                    refresh0 = gr.Button(translations["refresh"])
-            with gr.Accordion(translations["setting"], open=False):
-                with gr.Accordion(translations["f0_method"], open=False):
-                    with gr.Group():
-                        with gr.Row():
-                            onnx_f0_mode = gr.Checkbox(label=translations["f0_onnx_mode"], info=translations["f0_onnx_mode_info"], value=False, interactive=True)
-                            unlock_full_method = gr.Checkbox(label=translations["f0_unlock"], info=translations["f0_unlock_info"], value=False, interactive=True)
-                        method = gr.Radio(label=translations["f0_method"], info=translations["f0_method_info"], choices=method_f0, value="rmvpe", interactive=True)
-                        hybrid_method = gr.Dropdown(label=translations["f0_method_hybrid"], info=translations["f0_method_hybrid_info"], choices=hybrid_f0_method, value=hybrid_f0_method[0], interactive=True, allow_custom_value=True, visible=method.value == "hybrid")
-                    hop_length = gr.Slider(label=translations['hop_length'], info=translations["hop_length_info"], minimum=64, maximum=512, value=160, step=1, interactive=True, visible=False)
-                    alpha = gr.Slider(label=translations["alpha_label"], info=translations["alpha_info"], minimum=0.1, maximum=1, value=0.5, step=0.1, interactive=True, visible=False)
-                with gr.Accordion(translations["f0_file"], open=False):
-                    upload_f0_file = gr.File(label=translations["upload_f0"], file_types=[".txt"])
-                    f0_file_dropdown = gr.Dropdown(label=translations["f0_file_2"], value="", choices=f0_file, allow_custom_value=True, interactive=True)
-                    refresh_f0_file = gr.Button(translations["refresh"])
-                with gr.Accordion(translations["hubert_model"], open=False):
-                    embed_mode = gr.Radio(label=translations["embed_mode"], info=translations["embed_mode_info"], value="fairseq", choices=embedders_mode, interactive=True, visible=True)
-                    embedders = gr.Radio(label=translations["hubert_model"], info=translations["hubert_info"], choices=embedders_model, value="hubert_base", interactive=True)
-                    custom_embedders = gr.Textbox(label=translations["modelname"], info=translations["modelname_info"], value="", placeholder="hubert_base", interactive=True, visible=embedders.value == "custom")
-                with gr.Accordion(translations["use_presets"], open=False):
-                    with gr.Row():
-                        presets_name = gr.Dropdown(label=translations["file_preset"], choices=presets_file, value=presets_file[0] if len(presets_file) > 0 else '', interactive=True, allow_custom_value=True)
-                    with gr.Row():
-                        load_click = gr.Button(translations["load_file"], variant="primary")
-                        refresh_click = gr.Button(translations["refresh"])
-                    with gr.Accordion(translations["export_file"], open=False):
-                        with gr.Row():
-                            with gr.Column():
-                                with gr.Group():
-                                    with gr.Row():
-                                        cleaner_chbox = gr.Checkbox(label=translations["save_clean"], value=True, interactive=True)
-                                        autotune_chbox = gr.Checkbox(label=translations["save_autotune"], value=True, interactive=True)
-                                        pitch_chbox = gr.Checkbox(label=translations["save_pitch"], value=True, interactive=True)
-                                        index_strength_chbox = gr.Checkbox(label=translations["save_index_2"], value=True, interactive=True)
-                                        resample_sr_chbox = gr.Checkbox(label=translations["save_resample"], value=True, interactive=True)
-                                        filter_radius_chbox = gr.Checkbox(label=translations["save_filter"], value=True, interactive=True)
-                                        rms_mix_rate_chbox = gr.Checkbox(label=translations["save_envelope"], value=True, interactive=True)
-                                        protect_chbox = gr.Checkbox(label=translations["save_protect"], value=True, interactive=True)
-                                        split_audio_chbox = gr.Checkbox(label=translations["save_split"], value=True, interactive=True)
-                                        formant_shifting_chbox = gr.Checkbox(label=translations["formantshift"], value=True, interactive=True)
-                        with gr.Row():
-                            with gr.Column():
-                                name_to_save_file = gr.Textbox(label=translations["filename_to_save"])
-                                save_file_button = gr.Button(translations["export_file"])
-                    with gr.Row():
-                        upload_presets = gr.Files(label=translations["upload_presets"], file_types=[".conversion.json"])
-                with gr.Column():
-                    with gr.Group():
-                        with gr.Row():
-                            split_audio = gr.Checkbox(label=translations["split_audio"], value=False, interactive=True)
-                            formant_shifting = gr.Checkbox(label=translations["formantshift"], value=False, interactive=True)
-                        with gr.Row():
-                            proposal_pitch = gr.Checkbox(label=translations["proposal_pitch"], value=False, interactive=True)
-                            audio_processing = gr.Checkbox(label=translations["audio_processing"], value=False, interactive=True)
-                    resample_sr = gr.Radio(choices=[0]+sample_rate_choice, label=translations["resample"], info=translations["resample_info"], value=0, interactive=True)
-                    proposal_pitch_threshold = gr.Slider(minimum=50.0, maximum=1200.0, label=translations["proposal_pitch_threshold"], info=translations["proposal_pitch_threshold_info"], value=255.0, step=0.1, interactive=True, visible=proposal_pitch.value)
-                    f0_autotune_strength = gr.Slider(minimum=0, maximum=1, label=translations["autotune_rate"], info=translations["autotune_rate_info"], value=1, step=0.1, interactive=True, visible=autotune.value)
-                    filter_radius = gr.Slider(minimum=0, maximum=7, label=translations["filter_radius"], info=translations["filter_radius_info"], value=3, step=1, interactive=True)
-                    rms_mix_rate = gr.Slider(minimum=0, maximum=1, label=translations["rms_mix_rate"], info=translations["rms_mix_rate_info"], value=1, step=0.1, interactive=True)
-                    protect = gr.Slider(minimum=0, maximum=1, label=translations["protect"], info=translations["protect_info"], value=0.5, step=0.01, interactive=True)
-                with gr.Row():
-                    formant_qfrency = gr.Slider(value=1.0, label=translations["formant_qfrency"], info=translations["formant_qfrency"], minimum=0.0, maximum=16.0, step=0.1, interactive=True, visible=False)
-                    formant_timbre = gr.Slider(value=1.0, label=translations["formant_timbre"], info=translations["formant_timbre"], minimum=0.0, maximum=16.0, step=0.1, interactive=True, visible=False)
-    with gr.Row():
-        gr.Markdown(translations["output_convert"])
-    with gr.Row():
-        main_convert = gr.Audio(show_download_button=True, interactive=False, label=translations["main_convert"])
-        backing_convert = gr.Audio(show_download_button=True, interactive=False, label=translations["convert_backing"], visible=convert_backing.value)
-        main_backing = gr.Audio(show_download_button=True, interactive=False, label=translations["main_or_backing"], visible=convert_backing.value)
-    with gr.Row():
-        original_convert = gr.Audio(show_download_button=True, interactive=False, label=translations["convert_original"], visible=use_original.value)
-        vocal_instrument = gr.Audio(show_download_button=True, interactive=False, label=translations["voice_or_instruments"], visible=merge_instrument.value)
-    with gr.Row():
-        upload_f0_file.upload(fn=lambda inp: shutil_move(inp.name, configs["f0_path"]), inputs=[upload_f0_file], outputs=[f0_file_dropdown])
-        refresh_f0_file.click(fn=change_f0_choices, inputs=[], outputs=[f0_file_dropdown])
-        unlock_full_method.change(fn=unlock_f0, inputs=[unlock_full_method], outputs=[method])
-    with gr.Row():
-        load_click.click(
-            fn=load_presets,
-            inputs=[
-                presets_name,
-                cleaner0,
-                autotune,
-                pitch,
-                clean_strength0,
-                index_strength,
-                resample_sr,
-                filter_radius,
-                rms_mix_rate,
-                protect,
-                split_audio,
-                f0_autotune_strength,
-                formant_shifting,
-                formant_qfrency,
-                formant_timbre,
-                proposal_pitch,
-                proposal_pitch_threshold
-            ],
-            outputs=[
-                cleaner0,
-                autotune,
-                pitch,
-                clean_strength0,
-                index_strength,
-                resample_sr,
-                filter_radius,
-                rms_mix_rate,
-                protect,
-                split_audio,
-                f0_autotune_strength,
-                formant_shifting,
-                formant_qfrency,
-                formant_timbre,
-                proposal_pitch,
-                proposal_pitch_threshold
-            ]
-        )
-        refresh_click.click(fn=change_preset_choices, inputs=[], outputs=[presets_name])
-        save_file_button.click(
-            fn=save_presets,
-            inputs=[
-                name_to_save_file,
-                cleaner0,
-                autotune,
-                pitch,
-                clean_strength0,
-                index_strength,
-                resample_sr,
-                filter_radius,
-                rms_mix_rate,
-                protect,
-                split_audio,
-                f0_autotune_strength,
-                cleaner_chbox,
-                autotune_chbox,
-                pitch_chbox,
-                index_strength_chbox,
-                resample_sr_chbox,
-                filter_radius_chbox,
-                rms_mix_rate_chbox,
-                protect_chbox,
-                split_audio_chbox,
-                formant_shifting_chbox,
-                formant_shifting,
-                formant_qfrency,
-                formant_timbre,
-                proposal_pitch,
-                proposal_pitch_threshold
-            ],
-            outputs=[presets_name]
-        )
-    with gr.Row():
-        upload_presets.upload(fn=lambda presets_in: [shutil_move(preset.name, configs["presets_path"]) for preset in presets_in][0], inputs=[upload_presets], outputs=[presets_name])
-        autotune.change(fn=visible, inputs=[autotune], outputs=[f0_autotune_strength])
-        use_audio.change(fn=lambda a: [visible(a), visible(a), visible(a), visible(a), visible(a), valueFalse_interactive(a), valueFalse_interactive(a), valueFalse_interactive(a), valueFalse_interactive(a), visible(not a), visible(not a), visible(not a), visible(not a)], inputs=[use_audio], outputs=[main_backing, use_original, convert_backing, not_merge_backing, merge_instrument, use_original, convert_backing, not_merge_backing, merge_instrument, input_audio0, output_audio, input0, play_audio])
-    with gr.Row():
-        convert_backing.change(fn=lambda a,b: [change_backing_choices(a, b), visible(a)], inputs=[convert_backing, not_merge_backing], outputs=[use_original, backing_convert])
-        use_original.change(fn=lambda audio, original: [visible(original), visible(not original), visible(audio and not original), valueFalse_interactive(not original), valueFalse_interactive(not original)], inputs=[use_audio, use_original], outputs=[original_convert, main_convert, main_backing, convert_backing, not_merge_backing])
-        cleaner0.change(fn=visible, inputs=[cleaner0], outputs=[clean_strength0])
-    with gr.Row():
-        merge_instrument.change(fn=visible, inputs=[merge_instrument], outputs=[vocal_instrument])
-        not_merge_backing.change(fn=lambda audio, merge, cvb: [visible(audio and not merge), change_backing_choices(cvb, merge)], inputs=[use_audio, not_merge_backing, convert_backing], outputs=[main_backing, use_original])
-        method.change(fn=lambda method, hybrid: [visible(method == "hybrid"), visible(method == "hybrid"), hoplength_show(method, hybrid)], inputs=[method, hybrid_method], outputs=[hybrid_method, alpha, hop_length])
-    with gr.Row():
-        hybrid_method.change(fn=hoplength_show, inputs=[method, hybrid_method], outputs=[hop_length])
-        refresh.click(fn=change_models_choices, inputs=[], outputs=[model_pth, model_index])
-        model_pth.change(fn=get_index, inputs=[model_pth], outputs=[model_index])
-    with gr.Row():
-        input0.upload(fn=lambda audio_in: [shutil_move(audio.name, configs["audios_path"]) for audio in audio_in][0], inputs=[input0], outputs=[input_audio0])
-        input_audio0.change(fn=lambda audio: audio if os.path.isfile(audio) else None, inputs=[input_audio0], outputs=[play_audio])
-        formant_shifting.change(fn=lambda a: [visible(a) for _ in range(2)], inputs=[formant_shifting], outputs=[formant_qfrency, formant_timbre])
-    with gr.Row():
-        embedders.change(fn=lambda embedders: visible(embedders == "custom"), inputs=[embedders], outputs=[custom_embedders])
-        refresh0.click(fn=change_audios_choices, inputs=[input_audio0], outputs=[input_audio0])
-        model_index.change(fn=index_strength_show, inputs=[model_index], outputs=[index_strength])
-    with gr.Row():
-        convert_button.click(fn=lambda: visible(False), inputs=[], outputs=[convert_button])
-        convert_button_2.click(fn=lambda: [visible(False), visible(False)], inputs=[], outputs=[audio_select, convert_button_2])
-    with gr.Row():
-        proposal_pitch.change(fn=visible, inputs=[proposal_pitch], outputs=[proposal_pitch_threshold])
-        embed_mode.change(fn=change_embedders_mode, inputs=[embed_mode], outputs=[embedders])
-    with gr.Row():
-        convert_button.click(
-            fn=convert_selection,
-            inputs=[
-                cleaner0,
-                autotune,
-                use_audio,
-                use_original,
-                convert_backing,
-                not_merge_backing,
-                merge_instrument,
-                pitch,
-                clean_strength0,
-                model_pth,
-                model_index,
-                index_strength,
-                input_audio0,
-                output_audio,
-                export_format,
-                method,
-                hybrid_method,
-                hop_length,
-                embedders,
-                custom_embedders,
-                resample_sr,
-                filter_radius,
-                rms_mix_rate,
-                protect,
-                split_audio,
-                f0_autotune_strength,
-                checkpointing,
-                onnx_f0_mode,
-                formant_shifting,
-                formant_qfrency,
-                formant_timbre,
-                f0_file_dropdown,
-                embed_mode,
-                proposal_pitch,
-                proposal_pitch_threshold,
-                audio_processing,
-                alpha
-            ],
-            outputs=[audio_select, main_convert, backing_convert, main_backing, original_convert, vocal_instrument, convert_button, convert_button_2],
-            api_name="convert_selection"
-        )
-        convert_button_2.click(
-            fn=convert_audio,
-            inputs=[
-                cleaner0,
-                autotune,
-                use_audio,
-                use_original,
-                convert_backing,
-                not_merge_backing,
-                merge_instrument,
-                pitch,
-                clean_strength0,
-                model_pth,
-                model_index,
-                index_strength,
-                input_audio0,
-                output_audio,
-                export_format,
-                method,
-                hybrid_method,
-                hop_length,
-                embedders,
-                custom_embedders,
-                resample_sr,
-                filter_radius,
-                rms_mix_rate,
-                protect,
-                split_audio,
-                f0_autotune_strength,
-                audio_select,
-                checkpointing,
-                onnx_f0_mode,
-                formant_shifting,
-                formant_qfrency,
-                formant_timbre,
-                f0_file_dropdown,
-                embed_mode,
-                proposal_pitch,
-                proposal_pitch_threshold,
-                audio_processing,
-                alpha
-            ],
-            outputs=[main_convert, backing_convert, main_backing, original_convert, vocal_instrument, convert_button],
-            api_name="convert_audio"
-        )

main/app/tabs/inference/child/convert_tts.py DELETED Viewed

@@ -1,280 +0,0 @@
-import os
-import sys
-import gradio as gr
-sys.path.append(os.getcwd())
-from main.app.core.tts import TTS
-from main.app.core.process import process_input
-from main.app.core.inference import convert_tts
-from main.app.core.utils import google_translate
-from main.app.core.presets import save_presets, load_presets
-from main.app.core.ui import visible, change_f0_choices, unlock_f0, hoplength_show, change_models_choices, get_index, index_strength_show, change_embedders_mode, change_tts_voice_choices, shutil_move, change_preset_choices
-from main.app.variables import translations, sample_rate_choice, model_name, index_path, method_f0, f0_file, embedders_mode, embedders_model, edgetts, google_tts_voice, configs, presets_file, export_format_choices, hybrid_f0_method
-def convert_tts_tab():
-    with gr.Row():
-        gr.Markdown(translations["convert_text_markdown_2"])
-    with gr.Row():
-        with gr.Column():
-            with gr.Group():
-                with gr.Row():
-                    use_txt = gr.Checkbox(label=translations["input_txt"], value=False, interactive=True)
-                    google_tts_check_box = gr.Checkbox(label=translations["googletts"], value=False, interactive=True)
-                prompt = gr.Textbox(label=translations["text_to_speech"], value="", placeholder="Hello Words", lines=3)
-        with gr.Column():
-            speed = gr.Slider(label=translations["voice_speed"], info=translations["voice_speed_info"], minimum=-100, maximum=100, value=0, step=1)
-            pitch0 = gr.Slider(minimum=-20, maximum=20, step=1, info=translations["pitch_info"], label=translations["pitch"], value=0, interactive=True)
-    with gr.Row():
-        tts_button = gr.Button(translations["tts_1"], variant="primary", scale=2)
-        convert_button0 = gr.Button(translations["tts_2"], variant="secondary", scale=2)
-    with gr.Row():
-        with gr.Column():
-            txt_input = gr.File(label=translations["drop_text"], file_types=[".txt", ".srt", ".docx"], visible=use_txt.value)
-            tts_voice = gr.Dropdown(label=translations["voice"], choices=edgetts, interactive=True, value="vi-VN-NamMinhNeural")
-            tts_pitch = gr.Slider(minimum=-20, maximum=20, step=1, info=translations["pitch_info_2"], label=translations["pitch"], value=0, interactive=True)
-            with gr.Accordion(translations["translate"], open=False):
-                with gr.Row():
-                    source_lang = gr.Dropdown(label=translations["source_lang"], choices=["auto"]+google_tts_voice, interactive=True, value="auto")
-                    target_lang = gr.Dropdown(label=translations["target_lang"], choices=google_tts_voice, interactive=True, value="en")
-                translate_button = gr.Button(translations["translate"])
-        with gr.Column():
-            with gr.Accordion(translations["model_accordion"], open=True):
-                with gr.Row():
-                    model_pth0 = gr.Dropdown(label=translations["model_name"], choices=model_name, value=model_name[0] if len(model_name) >= 1 else "", interactive=True, allow_custom_value=True)
-                    model_index0 = gr.Dropdown(label=translations["index_path"], choices=index_path, value=index_path[0] if len(index_path) >= 1 else "", interactive=True, allow_custom_value=True)
-                with gr.Row():
-                    refresh1 = gr.Button(translations["refresh"])
-                with gr.Row():
-                    index_strength0 = gr.Slider(label=translations["index_strength"], info=translations["index_strength_info"], minimum=0, maximum=1, value=0.5, step=0.01, interactive=True, visible=model_index0.value != "")
-            with gr.Accordion(translations["output_path"], open=False):
-                export_format0 = gr.Radio(label=translations["export_format"], info=translations["export_info"], choices=export_format_choices, value="wav", interactive=True)
-                output_audio0 = gr.Textbox(label=translations["output_tts"], value="audios/tts.wav", placeholder="audios/tts.wav", info=translations["tts_output"], interactive=True)
-                output_audio1 = gr.Textbox(label=translations["output_tts_convert"], value="audios/tts-convert.wav", placeholder="audios/tts-convert.wav", info=translations["tts_output"], interactive=True)
-            with gr.Accordion(translations["setting"], open=False):
-                with gr.Accordion(translations["f0_method"], open=False):
-                    with gr.Group():
-                        with gr.Row():
-                            onnx_f0_mode1 = gr.Checkbox(label=translations["f0_onnx_mode"], info=translations["f0_onnx_mode_info"], value=False, interactive=True)
-                            unlock_full_method3 = gr.Checkbox(label=translations["f0_unlock"], info=translations["f0_unlock_info"], value=False, interactive=True)
-                        method0 = gr.Radio(label=translations["f0_method"], info=translations["f0_method_info"], choices=method_f0, value="rmvpe", interactive=True)
-                        hybrid_method0 = gr.Dropdown(label=translations["f0_method_hybrid"], info=translations["f0_method_hybrid_info"], choices=hybrid_f0_method, value=hybrid_f0_method[0], interactive=True, allow_custom_value=True, visible=method0.value == "hybrid")
-                    hop_length0 = gr.Slider(label=translations['hop_length'], info=translations["hop_length_info"], minimum=64, maximum=512, value=160, step=1, interactive=True, visible=False)
-                    alpha = gr.Slider(label=translations["alpha_label"], info=translations["alpha_info"], minimum=0.1, maximum=1, value=0.5, step=0.1, interactive=True, visible=False)
-                with gr.Accordion(translations["f0_file"], open=False):
-                    upload_f0_file0 = gr.File(label=translations["upload_f0"], file_types=[".txt"])
-                    f0_file_dropdown0 = gr.Dropdown(label=translations["f0_file_2"], value="", choices=f0_file, allow_custom_value=True, interactive=True)
-                    refresh_f0_file0 = gr.Button(translations["refresh"])
-                with gr.Accordion(translations["hubert_model"], open=False):
-                    embed_mode1 = gr.Radio(label=translations["embed_mode"], info=translations["embed_mode_info"], value="fairseq", choices=embedders_mode, interactive=True, visible=True)
-                    embedders0 = gr.Radio(label=translations["hubert_model"], info=translations["hubert_info"], choices=embedders_model, value="hubert_base", interactive=True)
-                    custom_embedders0 = gr.Textbox(label=translations["modelname"], info=translations["modelname_info"], value="", placeholder="hubert_base", interactive=True, visible=embedders0.value == "custom")
-                with gr.Accordion(translations["use_presets"], open=False):
-                    with gr.Row():
-                        presets_name = gr.Dropdown(label=translations["file_preset"], choices=presets_file, value=presets_file[0] if len(presets_file) > 0 else '', interactive=True, allow_custom_value=True)
-                    with gr.Row():
-                        load_click = gr.Button(translations["load_file"], variant="primary")
-                        refresh_click = gr.Button(translations["refresh"])
-                    with gr.Accordion(translations["export_file"], open=False):
-                        with gr.Row():
-                            with gr.Column():
-                                with gr.Group():
-                                    with gr.Row():
-                                        cleaner_chbox = gr.Checkbox(label=translations["save_clean"], value=True, interactive=True)
-                                        autotune_chbox = gr.Checkbox(label=translations["save_autotune"], value=True, interactive=True)
-                                        pitch_chbox = gr.Checkbox(label=translations["save_pitch"], value=True, interactive=True)
-                                        index_strength_chbox = gr.Checkbox(label=translations["save_index_2"], value=True, interactive=True)
-                                        resample_sr_chbox = gr.Checkbox(label=translations["save_resample"], value=True, interactive=True)
-                                        filter_radius_chbox = gr.Checkbox(label=translations["save_filter"], value=True, interactive=True)
-                                        rms_mix_rate_chbox = gr.Checkbox(label=translations["save_envelope"], value=True, interactive=True)
-                                        protect_chbox = gr.Checkbox(label=translations["save_protect"], value=True, interactive=True)
-                                        split_audio_chbox = gr.Checkbox(label=translations["save_split"], value=True, interactive=True)
-                                        formant_shifting_chbox = gr.Checkbox(label=translations["formantshift"], value=True, interactive=True)
-                        with gr.Row():
-                            with gr.Column():
-                                name_to_save_file = gr.Textbox(label=translations["filename_to_save"])
-                                save_file_button = gr.Button(translations["export_file"])
-                    with gr.Row():
-                        upload_presets = gr.Files(label=translations["upload_presets"], file_types=[".conversion.json"])
-                with gr.Group():
-                    with gr.Row():
-                        audio_processing = gr.Checkbox(label=translations["audio_processing"], value=False, interactive=True)
-                    with gr.Row():
-                        formant_shifting1 = gr.Checkbox(label=translations["formantshift"], value=False, interactive=True)
-                        split_audio0 = gr.Checkbox(label=translations["split_audio"], value=False, interactive=True)
-                        cleaner1 = gr.Checkbox(label=translations["clear_audio"], value=False, interactive=True)
-                    with gr.Row():
-                        autotune3 = gr.Checkbox(label=translations["autotune"], value=False, interactive=True)
-                        checkpointing0 = gr.Checkbox(label=translations["memory_efficient_training"], value=False, interactive=True)
-                        proposal_pitch = gr.Checkbox(label=translations["proposal_pitch"], value=False, interactive=True)
-                with gr.Column():
-                    resample_sr0 = gr.Radio(choices=[0]+sample_rate_choice, label=translations["resample"], info=translations["resample_info"], value=0, interactive=True)
-                    proposal_pitch_threshold = gr.Slider(minimum=50.0, maximum=1200.0, label=translations["proposal_pitch_threshold"], info=translations["proposal_pitch_threshold_info"], value=255.0, step=0.1, interactive=True, visible=proposal_pitch.value)
-                    f0_autotune_strength0 = gr.Slider(minimum=0, maximum=1, label=translations["autotune_rate"], info=translations["autotune_rate_info"], value=1, step=0.1, interactive=True, visible=autotune3.value)
-                    clean_strength1 = gr.Slider(label=translations["clean_strength"], info=translations["clean_strength_info"], minimum=0, maximum=1, value=0.5, step=0.1, interactive=True, visible=cleaner1.value)
-                    filter_radius0 = gr.Slider(minimum=0, maximum=7, label=translations["filter_radius"], info=translations["filter_radius_info"], value=3, step=1, interactive=True)
-                    rms_mix_rate0 = gr.Slider(minimum=0, maximum=1, label=translations["rms_mix_rate"], info=translations["rms_mix_rate_info"], value=1, step=0.1, interactive=True)
-                    protect0 = gr.Slider(minimum=0, maximum=1, label=translations["protect"], info=translations["protect_info"], value=0.5, step=0.01, interactive=True)
-                with gr.Row():
-                    formant_qfrency1 = gr.Slider(value=1.0, label=translations["formant_qfrency"], info=translations["formant_qfrency"], minimum=0.0, maximum=16.0, step=0.1, interactive=True, visible=False)
-                    formant_timbre1 = gr.Slider(value=1.0, label=translations["formant_timbre"], info=translations["formant_timbre"], minimum=0.0, maximum=16.0, step=0.1, interactive=True, visible=False)
-    with gr.Row():
-        gr.Markdown(translations["output_tts_markdown"])
-    with gr.Row():
-        tts_voice_audio = gr.Audio(show_download_button=True, interactive=False, label=translations["output_text_to_speech"])
-        tts_voice_convert = gr.Audio(show_download_button=True, interactive=False, label=translations["output_file_tts_convert"])
-    with gr.Row():
-        load_click.click(
-            fn=load_presets,
-            inputs=[
-                presets_name,
-                cleaner1,
-                autotune3,
-                pitch0,
-                clean_strength1,
-                index_strength0,
-                resample_sr0,
-                filter_radius0,
-                rms_mix_rate0,
-                protect0,
-                split_audio0,
-                f0_autotune_strength0,
-                formant_shifting1,
-                formant_qfrency1,
-                formant_timbre1,
-                proposal_pitch,
-                proposal_pitch_threshold
-            ],
-            outputs=[
-                cleaner1,
-                autotune3,
-                pitch0,
-                clean_strength1,
-                index_strength0,
-                resample_sr0,
-                filter_radius0,
-                rms_mix_rate0,
-                protect0,
-                split_audio0,
-                f0_autotune_strength0,
-                formant_shifting1,
-                formant_qfrency1,
-                formant_timbre1,
-                proposal_pitch,
-                proposal_pitch_threshold
-            ]
-        )
-        refresh_click.click(fn=change_preset_choices, inputs=[], outputs=[presets_name])
-        save_file_button.click(
-            fn=save_presets,
-            inputs=[
-                name_to_save_file,
-                cleaner1,
-                autotune3,
-                pitch0,
-                clean_strength1,
-                index_strength0,
-                resample_sr0,
-                filter_radius0,
-                rms_mix_rate0,
-                protect0,
-                split_audio0,
-                f0_autotune_strength0,
-                cleaner_chbox,
-                autotune_chbox,
-                pitch_chbox,
-                index_strength_chbox,
-                resample_sr_chbox,
-                filter_radius_chbox,
-                rms_mix_rate_chbox,
-                protect_chbox,
-                split_audio_chbox,
-                formant_shifting_chbox,
-                formant_shifting1,
-                formant_qfrency1,
-                formant_timbre1,
-                proposal_pitch,
-                proposal_pitch_threshold
-            ],
-            outputs=[presets_name]
-        )
-    with gr.Row():
-        proposal_pitch.change(fn=visible, inputs=[proposal_pitch], outputs=[proposal_pitch_threshold])
-        upload_presets.upload(fn=lambda presets_in: [shutil_move(preset.name, configs["presets_path"]) for preset in presets_in][0], inputs=[upload_presets], outputs=[presets_name])
-        translate_button.click(fn=google_translate, inputs=[prompt, source_lang, target_lang], outputs=[prompt], api_name="google_translate")
-    with gr.Row():
-        unlock_full_method3.change(fn=unlock_f0, inputs=[unlock_full_method3], outputs=[method0])
-        upload_f0_file0.upload(fn=lambda inp: shutil_move(inp.name, configs["f0_path"]), inputs=[upload_f0_file0], outputs=[f0_file_dropdown0])
-        refresh_f0_file0.click(fn=change_f0_choices, inputs=[], outputs=[f0_file_dropdown0])
-    with gr.Row():
-        embed_mode1.change(fn=change_embedders_mode, inputs=[embed_mode1], outputs=[embedders0])
-        autotune3.change(fn=visible, inputs=[autotune3], outputs=[f0_autotune_strength0])
-        model_pth0.change(fn=get_index, inputs=[model_pth0], outputs=[model_index0])
-    with gr.Row():
-        cleaner1.change(fn=visible, inputs=[cleaner1], outputs=[clean_strength1])
-        method0.change(fn=lambda method, hybrid: [visible(method == "hybrid"), visible(method == "hybrid"), hoplength_show(method, hybrid)], inputs=[method0, hybrid_method0], outputs=[hybrid_method0, alpha, hop_length0])
-        hybrid_method0.change(fn=hoplength_show, inputs=[method0, hybrid_method0], outputs=[hop_length0])
-    with gr.Row():
-        refresh1.click(fn=change_models_choices, inputs=[], outputs=[model_pth0, model_index0])
-        embedders0.change(fn=lambda embedders: visible(embedders == "custom"), inputs=[embedders0], outputs=[custom_embedders0])
-        formant_shifting1.change(fn=lambda a: [visible(a) for _ in range(2)], inputs=[formant_shifting1], outputs=[formant_qfrency1, formant_timbre1])
-    with gr.Row():
-        model_index0.change(fn=index_strength_show, inputs=[model_index0], outputs=[index_strength0])
-        txt_input.upload(fn=process_input, inputs=[txt_input], outputs=[prompt])
-        use_txt.change(fn=visible, inputs=[use_txt], outputs=[txt_input])
-    with gr.Row():
-        google_tts_check_box.change(fn=change_tts_voice_choices, inputs=[google_tts_check_box], outputs=[tts_voice])
-        tts_button.click(
-            fn=TTS,
-            inputs=[
-                prompt,
-                tts_voice,
-                speed,
-                output_audio0,
-                tts_pitch,
-                google_tts_check_box,
-                txt_input
-            ],
-            outputs=[tts_voice_audio],
-            api_name="text-to-speech"
-        )
-        convert_button0.click(
-            fn=convert_tts,
-            inputs=[
-                cleaner1,
-                autotune3,
-                pitch0,
-                clean_strength1,
-                model_pth0,
-                model_index0,
-                index_strength0,
-                output_audio0,
-                output_audio1,
-                export_format0,
-                method0,
-                hybrid_method0,
-                hop_length0,
-                embedders0,
-                custom_embedders0,
-                resample_sr0,
-                filter_radius0,
-                rms_mix_rate0,
-                protect0,
-                split_audio0,
-                f0_autotune_strength0,
-                checkpointing0,
-                onnx_f0_mode1,
-                formant_shifting1,
-                formant_qfrency1,
-                formant_timbre1,
-                f0_file_dropdown0,
-                embed_mode1,
-                proposal_pitch,
-                proposal_pitch_threshold,
-                audio_processing,
-                alpha
-            ],
-            outputs=[tts_voice_convert],
-            api_name="convert_tts"
-        )

main/app/tabs/inference/child/convert_with_whisper.py DELETED Viewed

@@ -1,164 +0,0 @@
-import os
-import sys
-import gradio as gr
-sys.path.append(os.getcwd())
-from main.app.core.inference import convert_with_whisper
-from main.app.core.ui import visible, change_audios_choices, unlock_f0, hoplength_show, change_models_choices, get_index, index_strength_show, change_embedders_mode, shutil_move
-from main.app.variables import translations, paths_for_files, sample_rate_choice, model_name, index_path, method_f0, embedders_mode, embedders_model, configs, file_types, export_format_choices, whisper_model, hybrid_f0_method
-def convert_with_whisper_tab():
-    with gr.Row():
-        gr.Markdown(translations["convert_with_whisper_info"])
-    with gr.Row():
-        with gr.Column():
-            with gr.Group():
-                with gr.Row():
-                    cleaner2 = gr.Checkbox(label=translations["clear_audio"], value=False, interactive=True)
-                    autotune2 = gr.Checkbox(label=translations["autotune"], value=False, interactive=True)
-                    checkpointing2 = gr.Checkbox(label=translations["memory_efficient_training"], value=False, interactive=True)
-                    formant_shifting2 = gr.Checkbox(label=translations["formantshift"], value=False, interactive=True)
-                    proposal_pitch = gr.Checkbox(label=translations["proposal_pitch"], value=False, interactive=True)
-                    audio_processing = gr.Checkbox(label=translations["audio_processing"], value=False, interactive=True)
-                with gr.Row():
-                    num_spk = gr.Slider(minimum=2, maximum=8, step=1, info=translations["num_spk_info"], label=translations["num_spk"], value=2, interactive=True)
-    with gr.Row():
-        with gr.Column():
-            convert_button3 = gr.Button(translations["convert_audio"], variant="primary")
-    with gr.Row():
-        with gr.Column():
-            with gr.Accordion(translations["model_accordion"] + " 1", open=True):
-                with gr.Row():
-                    model_pth2 = gr.Dropdown(label=translations["model_name"], choices=model_name, value=model_name[0] if len(model_name) >= 1 else "", interactive=True, allow_custom_value=True)
-                    model_index2 = gr.Dropdown(label=translations["index_path"], choices=index_path, value=index_path[0] if len(index_path) >= 1 else "", interactive=True, allow_custom_value=True)
-                with gr.Row():
-                    refresh2 = gr.Button(translations["refresh"])
-                with gr.Row():
-                    pitch3 = gr.Slider(minimum=-20, maximum=20, step=1, info=translations["pitch_info"], label=translations["pitch"], value=0, interactive=True)
-                    index_strength2 = gr.Slider(label=translations["index_strength"], info=translations["index_strength_info"], minimum=0, maximum=1, value=0.5, step=0.01, interactive=True, visible=model_index2.value != "")
-            with gr.Accordion(translations["input_output"], open=False):
-                with gr.Column():
-                    export_format2 = gr.Radio(label=translations["export_format"], info=translations["export_info"], choices=export_format_choices, value="wav", interactive=True)
-                    input_audio1 = gr.Dropdown(label=translations["audio_path"], value="", choices=paths_for_files, info=translations["provide_audio"], allow_custom_value=True, interactive=True)
-                    output_audio2 = gr.Textbox(label=translations["output_path"], value="audios/output.wav", placeholder="audios/output.wav", info=translations["output_path_info"], interactive=True)
-                with gr.Column():
-                    refresh4 = gr.Button(translations["refresh"])
-                with gr.Row():
-                    input2 = gr.Files(label=translations["drop_audio"], file_types=file_types)
-        with gr.Column():
-            with gr.Accordion(translations["model_accordion"] + " 2", open=True):
-                with gr.Row():
-                    model_pth3 = gr.Dropdown(label=translations["model_name"], choices=model_name, value=model_name[0] if len(model_name) >= 1 else "", interactive=True, allow_custom_value=True)
-                    model_index3 = gr.Dropdown(label=translations["index_path"], choices=index_path, value=index_path[0] if len(index_path) >= 1 else "", interactive=True, allow_custom_value=True)
-                with gr.Row():
-                    refresh3 = gr.Button(translations["refresh"])
-                with gr.Row():
-                    pitch4 = gr.Slider(minimum=-20, maximum=20, step=1, info=translations["pitch_info"], label=translations["pitch"], value=0, interactive=True)
-                    index_strength3 = gr.Slider(label=translations["index_strength"], info=translations["index_strength_info"], minimum=0, maximum=1, value=0.5, step=0.01, interactive=True, visible=model_index3.value != "")
-            with gr.Accordion(translations["setting"], open=False):
-                with gr.Row():
-                    model_size = gr.Radio(label=translations["model_size"], info=translations["model_size_info"], choices=whisper_model, value="medium", interactive=True)
-                with gr.Accordion(translations["f0_method"], open=False):
-                    with gr.Group():
-                        with gr.Row():
-                            onnx_f0_mode4 = gr.Checkbox(label=translations["f0_onnx_mode"], info=translations["f0_onnx_mode_info"], value=False, interactive=True)
-                            unlock_full_method2 = gr.Checkbox(label=translations["f0_unlock"], info=translations["f0_unlock_info"], value=False, interactive=True)
-                        method3 = gr.Radio(label=translations["f0_method"], info=translations["f0_method_info"], choices=method_f0, value="rmvpe", interactive=True)
-                        hybrid_method3 = gr.Dropdown(label=translations["f0_method_hybrid"], info=translations["f0_method_hybrid_info"], choices=hybrid_f0_method, value=hybrid_f0_method[0], interactive=True, allow_custom_value=True, visible=method3.value == "hybrid")
-                    hop_length3 = gr.Slider(label=translations['hop_length'], info=translations["hop_length_info"], minimum=64, maximum=512, value=160, step=1, interactive=True, visible=False)
-                    alpha = gr.Slider(label=translations["alpha_label"], info=translations["alpha_info"], minimum=0.1, maximum=1, value=0.5, step=0.1, interactive=True, visible=False)
-                with gr.Accordion(translations["hubert_model"], open=False):
-                    embed_mode3 = gr.Radio(label=translations["embed_mode"], info=translations["embed_mode_info"], value="fairseq", choices=embedders_mode, interactive=True, visible=True)
-                    embedders3 = gr.Radio(label=translations["hubert_model"], info=translations["hubert_info"], choices=embedders_model, value="hubert_base", interactive=True)
-                    custom_embedders3 = gr.Textbox(label=translations["modelname"], info=translations["modelname_info"], value="", placeholder="hubert_base", interactive=True, visible=embedders3.value == "custom")
-                with gr.Column():
-                    resample_sr3 = gr.Radio(choices=[0]+sample_rate_choice, label=translations["resample"], info=translations["resample_info"], value=0, interactive=True)
-                    proposal_pitch_threshold = gr.Slider(minimum=50.0, maximum=1200.0, label=translations["proposal_pitch_threshold"], info=translations["proposal_pitch_threshold_info"], value=255.0, step=0.1, interactive=True, visible=proposal_pitch.value)
-                    clean_strength3 = gr.Slider(label=translations["clean_strength"], info=translations["clean_strength_info"], minimum=0, maximum=1, value=0.5, step=0.1, interactive=True, visible=cleaner2.value)
-                    f0_autotune_strength3 = gr.Slider(minimum=0, maximum=1, label=translations["autotune_rate"], info=translations["autotune_rate_info"], value=1, step=0.1, interactive=True, visible=autotune2.value)
-                    filter_radius3 = gr.Slider(minimum=0, maximum=7, label=translations["filter_radius"], info=translations["filter_radius_info"], value=3, step=1, interactive=True)
-                    rms_mix_rate3 = gr.Slider(minimum=0, maximum=1, label=translations["rms_mix_rate"], info=translations["rms_mix_rate_info"], value=1, step=0.1, interactive=True)
-                    protect3 = gr.Slider(minimum=0, maximum=1, label=translations["protect"], info=translations["protect_info"], value=0.5, step=0.01, interactive=True)
-                with gr.Row():
-                    formant_qfrency3 = gr.Slider(value=1.0, label=translations["formant_qfrency"] + " 1", info=translations["formant_qfrency"], minimum=0.0, maximum=16.0, step=0.1, interactive=True, visible=False)
-                    formant_timbre3 = gr.Slider(value=1.0, label=translations["formant_timbre"] + " 1", info=translations["formant_timbre"], minimum=0.0, maximum=16.0, step=0.1, interactive=True, visible=False)
-                with gr.Row():
-                    formant_qfrency4 = gr.Slider(value=1.0, label=translations["formant_qfrency"] + " 2", info=translations["formant_qfrency"], minimum=0.0, maximum=16.0, step=0.1, interactive=True, visible=False)
-                    formant_timbre4 = gr.Slider(value=1.0, label=translations["formant_timbre"] + " 2", info=translations["formant_timbre"], minimum=0.0, maximum=16.0, step=0.1, interactive=True, visible=False)
-    with gr.Row():
-        gr.Markdown(translations["input_output"])
-    with gr.Row():
-        play_audio2 = gr.Audio(show_download_button=True, interactive=False, label=translations["input_audio"])
-        play_audio3 = gr.Audio(show_download_button=True, interactive=False, label=translations["output_file_tts_convert"])
-    with gr.Row():
-        autotune2.change(fn=visible, inputs=[autotune2], outputs=[f0_autotune_strength3])
-        cleaner2.change(fn=visible, inputs=[cleaner2], outputs=[clean_strength3])
-        method3.change(fn=lambda method, hybrid: [visible(method == "hybrid"), visible(method == "hybrid"), hoplength_show(method, hybrid)], inputs=[method3, hybrid_method3], outputs=[hybrid_method3, alpha, hop_length3])
-    with gr.Row():
-        hybrid_method3.change(fn=hoplength_show, inputs=[method3, hybrid_method3], outputs=[hop_length3])
-        refresh2.click(fn=change_models_choices, inputs=[], outputs=[model_pth2, model_index2])
-        model_pth2.change(fn=get_index, inputs=[model_pth2], outputs=[model_index2])
-    with gr.Row():
-        refresh3.click(fn=change_models_choices, inputs=[], outputs=[model_pth3, model_index3])
-        model_pth3.change(fn=get_index, inputs=[model_pth3], outputs=[model_index3])
-        input2.upload(fn=lambda audio_in: [shutil_move(audio.name, configs["audios_path"]) for audio in audio_in][0], inputs=[input2], outputs=[input_audio1])
-    with gr.Row():
-        input_audio1.change(fn=lambda audio: audio if os.path.isfile(audio) else None, inputs=[input_audio1], outputs=[play_audio2])
-        formant_shifting2.change(fn=lambda a: [visible(a) for _ in range(4)], inputs=[formant_shifting2], outputs=[formant_qfrency3, formant_timbre3, formant_qfrency4, formant_timbre4])
-        embedders3.change(fn=lambda embedders: visible(embedders == "custom"), inputs=[embedders3], outputs=[custom_embedders3])
-    with gr.Row():
-        refresh4.click(fn=change_audios_choices, inputs=[input_audio1], outputs=[input_audio1])
-        model_index2.change(fn=index_strength_show, inputs=[model_index2], outputs=[index_strength2])
-        model_index3.change(fn=index_strength_show, inputs=[model_index3], outputs=[index_strength3])
-    with gr.Row():
-        unlock_full_method2.change(fn=unlock_f0, inputs=[unlock_full_method2], outputs=[method3])
-        embed_mode3.change(fn=change_embedders_mode, inputs=[embed_mode3], outputs=[embedders3])
-        proposal_pitch.change(fn=visible, inputs=[proposal_pitch], outputs=[proposal_pitch_threshold])
-    with gr.Row():
-        convert_button3.click(
-            fn=convert_with_whisper,
-            inputs=[
-                num_spk,
-                model_size,
-                cleaner2,
-                clean_strength3,
-                autotune2,
-                f0_autotune_strength3,
-                checkpointing2,
-                model_pth2,
-                model_pth3,
-                model_index2,
-                model_index3,
-                pitch3,
-                pitch4,
-                index_strength2,
-                index_strength3,
-                export_format2,
-                input_audio1,
-                output_audio2,
-                onnx_f0_mode4,
-                method3,
-                hybrid_method3,
-                hop_length3,
-                embed_mode3,
-                embedders3,
-                custom_embedders3,
-                resample_sr3,
-                filter_radius3,
-                rms_mix_rate3,
-                protect3,
-                formant_shifting2,
-                formant_qfrency3,
-                formant_timbre3,
-                formant_qfrency4,
-                formant_timbre4,
-                proposal_pitch,
-                proposal_pitch_threshold,
-                audio_processing,
-                alpha
-            ],
-            outputs=[play_audio3],
-            api_name="convert_with_whisper"
-        )

main/app/tabs/inference/child/separate.py DELETED Viewed

@@ -1,263 +0,0 @@
-import os
-import sys
-import gradio as gr
-sys.path.append(os.getcwd())
-from main.app.core.downloads import download_url
-from main.app.core.separate import separate_music
-from main.app.core.ui import visible, valueFalse_interactive, change_audios_choices, shutil_move, separate_change
-from main.app.variables import translations, uvr_model, karaoke_models, reverb_models, vr_models, denoise_models, mdx_models, paths_for_files, sample_rate_choice, configs, file_types, export_format_choices
-def separate_tab():
-    with gr.Row():
-        gr.Markdown(translations["4_part"])
-    with gr.Row():
-        with gr.Column():
-            with gr.Group():
-                with gr.Row():
-                    enable_denoise = gr.Checkbox(label=translations["denoise_mdx"], value=False, interactive=False)
-                    separate_backing = gr.Checkbox(label=translations["separator_backing"], value=False, interactive=True)
-                    separate_reverb = gr.Checkbox(label=translations["dereveb_audio"], value=False, interactive=True)
-                    enable_tta = gr.Checkbox(label=translations["enable_tta"], value=False, interactive=False)
-                    high_end_process = gr.Checkbox(label=translations["high_end_process"], value=False, interactive=False)
-                    enable_post_process = gr.Checkbox(label=translations["enable_post_process"], value=False, interactive=False)
-                with gr.Row():
-                    model_name = gr.Dropdown(label=translations["separator_model"], value=uvr_model[0], choices=uvr_model, interactive=True)
-                    karaoke_model = gr.Dropdown(label=translations["separator_backing_model"], value=list(karaoke_models.keys())[0], choices=list(karaoke_models.keys()), interactive=True, visible=separate_backing.value)
-                    reverb_model = gr.Dropdown(label=translations["dereveb_model"], value=list(reverb_models.keys())[0], choices=list(reverb_models.keys()), interactive=True, visible=separate_reverb.value)
-                    denoise_model = gr.Dropdown(label=translations["denoise_model"], value=list(denoise_models.keys())[0], choices=list(denoise_models.keys()), interactive=True, visible=enable_denoise.value and model_name.value in list(vr_models.keys()))
-    with gr.Row():
-        with gr.Column():
-            separate_button = gr.Button(translations["separator_tab"], variant="primary")
-    with gr.Row():
-        with gr.Column():
-            with gr.Group():
-                with gr.Row():
-                    shifts = gr.Slider(label=translations["shift"], info=translations["shift_info"], minimum=1, maximum=20, value=2, step=1, interactive=True)
-                    batch_size = gr.Slider(label=translations["batch_size"], info=translations["mdx_batch_size_info"], minimum=1, maximum=64, value=1, step=1, interactive=True, visible=False)
-                with gr.Row():
-                    segments_size = gr.Slider(label=translations["segments_size"], info=translations["segments_size_info"], minimum=32, maximum=3072, value=256, step=32, interactive=True)
-                    aggression = gr.Slider(label=translations['aggression'], info=translations["aggression_info"], minimum=1, maximum=50, value=5, step=1, interactive=True, visible=False)
-            drop_audio = gr.Files(label=translations["drop_audio"], file_types=file_types)
-            with gr.Accordion(translations["use_url"], open=False):
-                url = gr.Textbox(label=translations["url_audio"], value="", placeholder="https://www.youtube.com/...", scale=6)
-                download_button = gr.Button(translations["downloads"])
-        with gr.Column():
-            with gr.Group():
-                with gr.Row():
-                    overlap = gr.Radio(label=translations["overlap"], info=translations["overlap_info"], choices=["0.25", "0.5", "0.75", "0.99"], value="0.25", interactive=True)
-                with gr.Row():
-                    window_size = gr.Slider(label=translations["window_size"], info=translations["window_size_info"], minimum=320, maximum=1024, value=512, step=32, interactive=True, visible=False)
-                    hop_length = gr.Slider(label=translations['hop_length'], info=translations["hop_length_info"], minimum=64, maximum=8192, value=1024, step=1, interactive=True, visible=False)
-                    post_process_threshold = gr.Slider(label=translations['post_process_threshold'], info=translations["post_process_threshold_info"], minimum=0.1, maximum=0.3, value=0.2, step=0.1, interactive=True, visible=False)
-            sample_rate = gr.Radio(choices=sample_rate_choice, value=44100, label=translations["sr"], info=translations["sr_info"], interactive=True)
-            with gr.Accordion(translations["input_output"], open=False):
-                export_format = gr.Radio(label=translations["export_format"], info=translations["export_info"], choices=export_format_choices, value="wav", interactive=True)
-                input_audio = gr.Dropdown(label=translations["audio_path"], value="", choices=paths_for_files, allow_custom_value=True, interactive=True)
-                refresh_audio = gr.Button(translations["refresh"])
-                output_dirs = gr.Textbox(label=translations["output_folder"], value="audios", placeholder="audios", info=translations["output_folder_info"], interactive=True)
-            audio_input = gr.Audio(show_download_button=True, interactive=False, label=translations["input_audio"])
-    with gr.Row():
-        gr.Markdown(translations["output_separator"])
-    with gr.Row():
-        instruments_audio = gr.Audio(show_download_button=True, interactive=False, label=translations["instruments"])
-        original_vocals = gr.Audio(show_download_button=True, interactive=False, label=translations["original_vocal"])
-        main_vocals = gr.Audio(show_download_button=True, interactive=False, label=translations["main_vocal"], visible=separate_backing.value)
-        backing_vocals = gr.Audio(show_download_button=True, interactive=False, label=translations["backing_vocal"], visible=separate_backing.value)
-    with gr.Row():
-        model_name.change(fn=lambda a: valueFalse_interactive(a in list(mdx_models.keys()) + list(vr_models.keys())), inputs=[model_name], outputs=[enable_denoise])
-        separate_backing.change(fn=lambda a, b: valueFalse_interactive(a or b), inputs=[separate_backing, separate_reverb], outputs=[enable_denoise])
-        separate_reverb.change(fn=lambda a, b: valueFalse_interactive(a or b), inputs=[separate_backing, separate_reverb], outputs=[enable_denoise])
-    with gr.Row():
-        input_audio.change(fn=lambda audio: audio if os.path.isfile(audio) else None, inputs=[input_audio], outputs=[audio_input])
-        drop_audio.upload(fn=lambda audio_in: [shutil_move(audio.name, configs["audios_path"]) for audio in audio_in][0], inputs=[drop_audio], outputs=[input_audio])
-        refresh_audio.click(fn=change_audios_choices, inputs=[input_audio], outputs=[input_audio])
-    with gr.Row():
-        separate_backing.change(fn=lambda a: [visible(a) for _ in range(2)], inputs=[separate_backing], outputs=[main_vocals, backing_vocals])
-        download_button.click(
-            fn=download_url,
-            inputs=[url],
-            outputs=[input_audio, audio_input, url],
-            api_name='download_url'
-        )
-    with gr.Row():
-        model_name.change(
-            fn=separate_change,
-            inputs=[model_name, karaoke_model, reverb_model, enable_post_process, separate_backing, separate_reverb, enable_denoise],
-            outputs=[
-                karaoke_model,
-                reverb_model,
-                overlap,
-                segments_size,
-                hop_length,
-                batch_size,
-                shifts,
-                window_size,
-                aggression,
-                post_process_threshold,
-                denoise_model,
-                enable_tta,
-                high_end_process,
-                enable_post_process,
-            ]
-        )
-    with gr.Row():
-        karaoke_model.change(
-            fn=separate_change,
-            inputs=[model_name, karaoke_model, reverb_model, enable_post_process, separate_backing, separate_reverb, enable_denoise],
-            outputs=[
-                karaoke_model,
-                reverb_model,
-                overlap,
-                segments_size,
-                hop_length,
-                batch_size,
-                shifts,
-                window_size,
-                aggression,
-                post_process_threshold,
-                denoise_model,
-                enable_tta,
-                high_end_process,
-                enable_post_process,
-            ]
-        )
-        separate_backing.change(
-            fn=separate_change,
-            inputs=[model_name, karaoke_model, reverb_model, enable_post_process, separate_backing, separate_reverb, enable_denoise],
-            outputs=[
-                karaoke_model,
-                reverb_model,
-                overlap,
-                segments_size,
-                hop_length,
-                batch_size,
-                shifts,
-                window_size,
-                aggression,
-                post_process_threshold,
-                denoise_model,
-                enable_tta,
-                high_end_process,
-                enable_post_process,
-            ]
-        )
-    with gr.Row():
-        reverb_model.change(
-            fn=separate_change,
-            inputs=[model_name, karaoke_model, reverb_model, enable_post_process, separate_backing, separate_reverb, enable_denoise],
-            outputs=[
-                karaoke_model,
-                reverb_model,
-                overlap,
-                segments_size,
-                hop_length,
-                batch_size,
-                shifts,
-                window_size,
-                aggression,
-                post_process_threshold,
-                denoise_model,
-                enable_tta,
-                high_end_process,
-                enable_post_process,
-            ]
-        )
-        separate_reverb.change(
-            fn=separate_change,
-            inputs=[model_name, karaoke_model, reverb_model, enable_post_process, separate_backing, separate_reverb, enable_denoise],
-            outputs=[
-                karaoke_model,
-                reverb_model,
-                overlap,
-                segments_size,
-                hop_length,
-                batch_size,
-                shifts,
-                window_size,
-                aggression,
-                post_process_threshold,
-                denoise_model,
-                enable_tta,
-                high_end_process,
-                enable_post_process,
-            ]
-        )
-    with gr.Row():
-        enable_denoise.change(
-            fn=separate_change,
-            inputs=[model_name, karaoke_model, reverb_model, enable_post_process, separate_backing, separate_reverb, enable_denoise],
-            outputs=[
-                karaoke_model,
-                reverb_model,
-                overlap,
-                segments_size,
-                hop_length,
-                batch_size,
-                shifts,
-                window_size,
-                aggression,
-                post_process_threshold,
-                denoise_model,
-                enable_tta,
-                high_end_process,
-                enable_post_process,
-            ]
-        )
-        enable_post_process.change(
-            fn=separate_change,
-            inputs=[model_name, karaoke_model, reverb_model, enable_post_process, separate_backing, separate_reverb, enable_denoise],
-            outputs=[
-                karaoke_model,
-                reverb_model,
-                overlap,
-                segments_size,
-                hop_length,
-                batch_size,
-                shifts,
-                window_size,
-                aggression,
-                post_process_threshold,
-                denoise_model,
-                enable_tta,
-                high_end_process,
-                enable_post_process,
-            ]
-        )
-    with gr.Row():
-        separate_button.click(
-            fn=separate_music,
-            inputs=[
-                input_audio,
-                output_dirs,
-                export_format,
-                model_name,
-                karaoke_model,
-                reverb_model,
-                denoise_model,
-                sample_rate,
-                shifts,
-                batch_size,
-                overlap,
-                aggression,
-                hop_length,
-                window_size,
-                segments_size,
-                post_process_threshold,
-                enable_tta,
-                enable_denoise,
-                high_end_process,
-                enable_post_process,
-                separate_backing,
-                separate_reverb
-            ],
-            outputs=[
-                original_vocals,
-                instruments_audio,
-                main_vocals,
-                backing_vocals
-            ],
-            api_name="separate_music"
-        )

main/app/tabs/inference/inference.py DELETED Viewed

@@ -1,30 +0,0 @@
-import os
-import sys
-import gradio as gr
-sys.path.append(os.getcwd())
-from main.app.variables import translations, configs
-from main.app.tabs.inference.child.convert import convert_tab
-from main.app.tabs.inference.child.separate import separate_tab
-from main.app.tabs.inference.child.convert_tts import convert_tts_tab
-from main.app.tabs.inference.child.convert_with_whisper import convert_with_whisper_tab
-def inference_tab():
-    with gr.TabItem(translations["inference"], visible=configs.get("inference_tab", True)):
-        with gr.TabItem(translations["separator_tab"], visible=configs.get("separator_tab", True)):
-            gr.Markdown(f"## {translations['separator_tab']}")
-            separate_tab()
-        with gr.TabItem(translations["convert_audio"], visible=configs.get("convert_tab", True)):
-            gr.Markdown(f"## {translations['convert_audio']}")
-            convert_tab()
-        with gr.TabItem(translations["convert_with_whisper"], visible=configs.get("convert_with_whisper", True)):
-            gr.Markdown(f"## {translations['convert_with_whisper']}")
-            convert_with_whisper_tab()
-        with gr.TabItem(translations["convert_text"], visible=configs.get("tts_tab", True)):
-            gr.Markdown(translations["convert_text_markdown"])
-            convert_tts_tab()

main/app/tabs/realtime/realtime.py DELETED Viewed

@@ -1,226 +0,0 @@
-import os
-import sys
-import gradio as gr
-sys.path.append(os.getcwd())
-from main.app.core.realtime import realtime_start, realtime_stop
-from main.app.variables import translations, configs, model_name, index_path, method_f0, embedders_mode, embedders_model
-from main.app.core.ui import change_models_choices, get_index, index_strength_show, unlock_f0, hoplength_show, change_embedders_mode, visible, audio_device, change_audio_device_choices, update_audio_device
-input_channels_map, output_channels_map = audio_device()
-def realtime_tab():
-    with gr.TabItem(translations["realtime"], visible=configs.get("realtime_tab", True)):
-        gr.Markdown(translations["realtime_markdown"])
-        with gr.Row():
-            gr.Markdown(translations["realtime_markdown_2"])
-        with gr.Row():
-            status = gr.Label(label=translations["realtime_latency"], value=translations["realtime_not_startup"])
-        with gr.Row():
-            monitor = gr.Checkbox(label=translations["monitor"], value=False, interactive=True)
-            exclusive_mode = gr.Checkbox(label=translations["exclusive_mode"], value=False, interactive=True)
-            vad_enabled = gr.Checkbox(label=translations["vad_enabled"], value=False, interactive=True)
-            clean_audio = gr.Checkbox(label=translations["clear_audio"], value=False, interactive=True)
-        with gr.Row():
-            with gr.Accordion(translations["audio_device"], open=True):
-                with gr.Row():
-                    input_audio_device = gr.Dropdown(label=translations["input_audio_device_label"], info=translations["input_audio_device_info"], choices=list(input_channels_map.keys()), value=list(input_channels_map.keys())[0] if len(list(input_channels_map.keys())) >= 1 else "", interactive=True)
-                    output_audio_device = gr.Dropdown(label=translations["output_audio_device_label"], info=translations["output_audio_device_info"], choices=list(output_channels_map.keys()), value=list(output_channels_map.keys())[0] if len(list(output_channels_map.keys())) >= 1 else "", interactive=True)
-                    monitor_output_device = gr.Dropdown(label=translations["monitor_output_device_label"], info=translations["monitor_output_device_info"], choices=list(output_channels_map.keys()), value=list(output_channels_map.keys())[0] if len(list(output_channels_map.keys())) >= 1 else "", interactive=True, visible=False)
-                with gr.Row():
-                    input_audio_gain = gr.Slider(minimum=0, maximum=2500, label=translations["input_audio_gain_label"], info=translations["input_audio_gain_info"], value=100, step=1, interactive=True)
-                    output_audio_gain = gr.Slider(minimum=0, maximum=4000, label=translations["output_audio_gain_label"], info=translations["output_audio_gain_info"], value=100, step=1, interactive=True)
-                    monitor_audio_gain = gr.Slider(minimum=0, maximum=4000, label=translations["monitor_audio_gain_label"], info=translations["monitor_audio_gain_info"], value=100, step=1, interactive=True, visible=False)
-                with gr.Row(visible=False) as asio_row:
-                    input_asio_channels = gr.Slider(minimum=-1, maximum=128, label=translations["input_asio_channels_label"], info=translations["input_asio_channels_info"], value=-1, step=1, interactive=True, visible=False)
-                    output_asio_channels = gr.Slider(minimum=-1, maximum=128, label=translations["output_asio_channels_label"], info=translations["output_asio_channels_info"], value=-1, step=1, interactive=True, visible=False)
-                    monitor_asio_channels = gr.Slider(minimum=-1, maximum=128, label=translations["monitor_asio_channels_label"], info=translations["monitor_asio_channels_info"], value=-1, step=1, interactive=True, visible=False)
-                with gr.Row():
-                    refresh_audio_device = gr.Button(value=translations["refresh_audio_device"], variant="secondary")
-        with gr.Row():
-            start_realtime = gr.Button(value=translations["start_realtime_button"], variant="primary", interactive=True)
-            stop_realtime = gr.Button(value=translations["stop_realtime_button"], variant="stop", interactive=False)
-        with gr.Row():
-            chunk_size = gr.Slider(minimum=2.7, maximum=2730.7, step=0.1, label=translations["chunk_size"], info=translations["chunk_size_info"], value=1024, interactive=True)
-            pitch = gr.Slider(minimum=-20, maximum=20, step=1, info=translations["pitch_info"], label=translations["pitch"], value=0, interactive=True)
-        with gr.Row():
-            with gr.Column():
-                with gr.Accordion(translations["model_accordion"], open=True):
-                    with gr.Row():
-                        model_pth = gr.Dropdown(label=translations["model_name"], choices=model_name, value=model_name[0] if len(model_name) >= 1 else "", interactive=True, allow_custom_value=True)
-                        model_index = gr.Dropdown(label=translations["index_path"], choices=index_path, value=index_path[0] if len(index_path) >= 1 else "", interactive=True, allow_custom_value=True)
-                    with gr.Row():
-                        model_refresh = gr.Button(translations["refresh"])
-                    with gr.Row():
-                        index_strength = gr.Slider(label=translations["index_strength"], info=translations["index_strength_info"], minimum=0, maximum=1, value=0.5, step=0.01, interactive=True, visible=model_index.value != "")
-            with gr.Column():
-                with gr.Accordion(translations["f0_method"], open=True):
-                    with gr.Group():
-                        with gr.Row():
-                            onnx_f0_mode = gr.Checkbox(label=translations["f0_onnx_mode"], info=translations["f0_onnx_mode_info"], value=False, interactive=True)
-                            unlock_full_method = gr.Checkbox(label=translations["f0_unlock"], info=translations["f0_unlock_info"], value=False, interactive=True)
-                        f0_method = gr.Radio(label=translations["f0_method"], info=translations["f0_method_info"], choices=[m for m in method_f0 if m != "hybrid"], value="rmvpe", interactive=True)
-                    hop_length = gr.Slider(label=translations['hop_length'], info=translations["hop_length_info"], minimum=64, maximum=512, value=160, step=1, interactive=True, visible=False)
-            with gr.Column():
-                with gr.Accordion(translations["hubert_model"], open=True):
-                    embed_mode = gr.Radio(label=translations["embed_mode"], info=translations["embed_mode_info"], value="fairseq", choices=embedders_mode, interactive=True, visible=True)
-                    embedders = gr.Radio(label=translations["hubert_model"], info=translations["hubert_info"], choices=embedders_model, value="hubert_base", interactive=True)
-                    custom_embedders = gr.Textbox(label=translations["modelname"], info=translations["modelname_info"], value="", placeholder="hubert_base", interactive=True, visible=embedders.value == "custom")
-        with gr.Row():
-                with gr.Accordion(translations["setting"], open=True):
-                    with gr.Row():
-                        f0_autotune = gr.Checkbox(label=translations["autotune"], value=False, interactive=True)
-                        proposal_pitch = gr.Checkbox(label=translations["proposal_pitch"], value=False, interactive=True)
-                    with gr.Group():
-                        with gr.Row():
-                            f0_autotune_strength = gr.Slider(minimum=0, maximum=1, label=translations["autotune_rate"], info=translations["autotune_rate_info"], value=1, step=0.1, interactive=True, visible=f0_autotune.value)
-                            proposal_pitch_threshold = gr.Slider(minimum=50.0, maximum=1200.0, label=translations["proposal_pitch_threshold"], info=translations["proposal_pitch_threshold_info"], value=255.0, step=0.1, interactive=True, visible=proposal_pitch.value)
-                        with gr.Row():
-                            rms_mix_rate = gr.Slider(minimum=0, maximum=1, label=translations["rms_mix_rate"], info=translations["rms_mix_rate_info"], value=1, step=0.1, interactive=True)
-                            protect = gr.Slider(minimum=0, maximum=1, label=translations["protect"], info=translations["protect_info"], value=0.5, step=0.01, interactive=True)
-                        with gr.Row():
-                            clean_strength = gr.Slider(label=translations["clean_strength"], info=translations["clean_strength_info"], minimum=0, maximum=1, value=0.5, step=0.1, interactive=True, visible=False)
-                            filter_radius = gr.Slider(minimum=0, maximum=7, label=translations["filter_radius"], info=translations["filter_radius_info"], value=3, step=1, interactive=True)
-                    with gr.Column():
-                        silent_threshold = gr.Slider(minimum=-90, maximum=-60, label=translations["silent_threshold_label"], info=translations["silent_threshold_info"], value=-90, step=1, interactive=True)
-                        extra_convert_size = gr.Slider(minimum=0.1, maximum=5, label=translations["extra_convert_size_label"], info=translations["extra_convert_size_info"], value=0.5, step=0.1, interactive=True)
-                        cross_fade_overlap_size = gr.Slider(minimum=0.05, maximum=0.2, label=translations["cross_fade_overlap_size_label"], info=translations["cross_fade_overlap_size_info"], value=0.1, step=0.01, interactive=True)
-                    with gr.Row():
-                        vad_sensitivity = gr.Slider(minimum=0, maximum=3, label=translations["vad_sensitivity_label"], info=translations["vad_sensitivity_info"], value=3, step=1, interactive=True, visible=vad_enabled.value)
-                        vad_frame_ms = gr.Slider(minimum=10, maximum=30, label=translations["vad_frame_ms_label"], info=translations["vad_frame_ms_info"], value=30, step=10, interactive=True, visible=vad_enabled.value)
-        with gr.Row():
-            model_pth.change(
-                fn=get_index,
-                inputs=[model_pth],
-                outputs=[model_index]
-            )
-            model_index.change(
-                fn=index_strength_show,
-                inputs=[model_index],
-                outputs=[index_strength]
-            )
-            model_refresh.click(
-                fn=change_models_choices,
-                inputs=[],
-                outputs=[model_pth, model_index]
-            )
-        with gr.Row():
-            unlock_full_method.change(
-                fn=lambda f0_method: {"choices": [m for m in unlock_f0(f0_method)["choices"] if m != "hybrid"], "value": "rmvpe", "__type__": "update"},
-                inputs=[unlock_full_method],
-                outputs=[f0_method]
-            )
-            f0_method.change(
-                fn=lambda f0_method: hoplength_show(f0_method, None),
-                inputs=[f0_method],
-                outputs=[hop_length]
-            )
-            embed_mode.change(
-                fn=change_embedders_mode,
-                inputs=[embed_mode],
-                outputs=[embedders]
-            )
-        with gr.Row():
-            embedders.change(
-                fn=lambda embedders: visible(embedders == "custom"),
-                inputs=[embedders],
-                outputs=[custom_embedders]
-            )
-            input_audio_device.change(
-                fn=update_audio_device,
-                inputs=[input_audio_device, output_audio_device, monitor_output_device, monitor],
-                outputs=[monitor_output_device, monitor_audio_gain, monitor_asio_channels, asio_row, input_asio_channels, output_asio_channels, monitor_asio_channels]
-            )
-            output_audio_device.change(
-                fn=update_audio_device,
-                inputs=[input_audio_device, output_audio_device, monitor_output_device, monitor],
-                outputs=[monitor_output_device, monitor_audio_gain, monitor_asio_channels, asio_row, input_asio_channels, output_asio_channels, monitor_asio_channels]
-            )
-        with gr.Row():
-            monitor_output_device.change(
-                fn=update_audio_device,
-                inputs=[input_audio_device, output_audio_device, monitor_output_device, monitor],
-                outputs=[monitor_output_device, monitor_audio_gain, monitor_asio_channels, asio_row, input_asio_channels, output_asio_channels, monitor_asio_channels]
-            )
-            monitor.change(
-                fn=update_audio_device,
-                inputs=[input_audio_device, output_audio_device, monitor_output_device, monitor],
-                outputs=[monitor_output_device, monitor_audio_gain, monitor_asio_channels, asio_row, input_asio_channels, output_asio_channels, monitor_asio_channels]
-            )
-            f0_autotune.change(
-                fn=visible,
-                inputs=[f0_autotune],
-                outputs=[f0_autotune_strength]
-            )
-        with gr.Row():
-            proposal_pitch.change(
-                fn=visible,
-                inputs=[proposal_pitch],
-                outputs=[proposal_pitch_threshold]
-            )
-            vad_enabled.change(
-                fn=lambda a: [visible(a) for _ in range(2)],
-                inputs=[vad_enabled],
-                outputs=[vad_sensitivity, vad_frame_ms]
-            )
-            refresh_audio_device.click(
-                fn=change_audio_device_choices,
-                inputs=[],
-                outputs=[input_audio_device, output_audio_device, monitor_output_device]
-            )
-        with gr.Row():
-            clean_audio.change(
-                fn=visible,
-                inputs=[clean_audio],
-                outputs=[clean_strength]
-            )
-            start_realtime.click(
-                fn=realtime_start,
-                inputs=[
-                    monitor,
-                    exclusive_mode,
-                    vad_enabled,
-                    input_audio_device,
-                    output_audio_device,
-                    monitor_output_device,
-                    input_audio_gain,
-                    output_audio_gain,
-                    monitor_audio_gain,
-                    input_asio_channels,
-                    output_asio_channels,
-                    monitor_asio_channels,
-                    chunk_size,
-                    pitch,
-                    model_pth,
-                    model_index,
-                    index_strength,
-                    onnx_f0_mode,
-                    f0_method,
-                    hop_length,
-                    embed_mode,
-                    embedders,
-                    custom_embedders,
-                    f0_autotune,
-                    proposal_pitch,
-                    f0_autotune_strength,
-                    proposal_pitch_threshold,
-                    rms_mix_rate,
-                    protect,
-                    filter_radius,
-                    silent_threshold,
-                    extra_convert_size,
-                    cross_fade_overlap_size,
-                    vad_sensitivity,
-                    vad_frame_ms,
-                    clean_audio,
-                    clean_strength
-                ],
-                outputs=[status, start_realtime, stop_realtime]
-            )
-        stop_realtime.click(
-            fn=realtime_stop,
-            inputs=[],
-            outputs=[status, start_realtime, stop_realtime]
-        )

main/app/tabs/realtime/realtime_client.py DELETED Viewed

@@ -1,210 +0,0 @@
-import os
-import sys
-import gradio as gr
-sys.path.append(os.getcwd())
-from main.app.variables import translations, configs, model_name, index_path, method_f0, embedders_mode, embedders_model
-from main.app.core.ui import change_models_choices, get_index, index_strength_show, unlock_f0, hoplength_show, change_embedders_mode, visible, update_dropdowns_from_json, update_button_from_json
-def realtime_client_tab():
-    with gr.TabItem(translations["realtime_client"], visible=configs.get("realtime_client_tab", True)):
-        gr.Markdown(translations["realtime_markdown"])
-        with gr.Row():
-            gr.Markdown(translations["realtime_markdown_2"])
-        with gr.Row():
-            gr.Label(label=translations["realtime_latency"], value=translations["realtime_not_startup"], elem_id="realtime-status-info")
-        with gr.Row():
-            monitor = gr.Checkbox(label=translations["monitor"], value=False, interactive=True)
-            vad_enabled = gr.Checkbox(label=translations["vad_enabled"], value=False, interactive=True)
-            clean_audio = gr.Checkbox(label=translations["clear_audio"], value=False, interactive=True)
-        with gr.Row():
-            with gr.Accordion(translations["audio_device"], open=True):
-                with gr.Row():
-                    input_audio_device = gr.Dropdown(label=translations["input_audio_device_label"], info=translations["input_audio_device_info"], choices=[], value=None, interactive=True)
-                    output_audio_device = gr.Dropdown(label=translations["output_audio_device_label"], info=translations["output_audio_device_info"], choices=[], value=None, interactive=True)
-                    monitor_output_device = gr.Dropdown(label=translations["monitor_output_device_label"], info=translations["monitor_output_device_info"], choices=[], value=None, interactive=True, visible=False)
-                with gr.Row():
-                    input_audio_gain = gr.Slider(minimum=0, maximum=2500, label=translations["input_audio_gain_label"], info=translations["input_audio_gain_info"], value=100, step=1, interactive=True)
-                    output_audio_gain = gr.Slider(minimum=0, maximum=4000, label=translations["output_audio_gain_label"], info=translations["output_audio_gain_info"], value=100, step=1, interactive=True)
-                    monitor_audio_gain = gr.Slider(minimum=0, maximum=4000, label=translations["monitor_audio_gain_label"], info=translations["monitor_audio_gain_info"], value=100, step=1, interactive=True, visible=False)
-                with gr.Row():
-                    refresh_audio_device = gr.Button(value=translations["refresh_audio_device"], variant="secondary")
-        with gr.Row():
-            start_realtime = gr.Button(value=translations["start_realtime_button"], variant="primary", interactive=True)
-            stop_realtime = gr.Button(value=translations["stop_realtime_button"], variant="stop", interactive=False)
-        with gr.Row():
-            chunk_size = gr.Slider(minimum=2.7, maximum=2730.7, step=0.1, label=translations["chunk_size"], info=translations["chunk_size_info"], value=1024, interactive=True)
-            pitch = gr.Slider(minimum=-20, maximum=20, step=1, info=translations["pitch_info"], label=translations["pitch"], value=0, interactive=True)
-        with gr.Row():
-            with gr.Column():
-                with gr.Accordion(translations["model_accordion"], open=True):
-                    with gr.Row():
-                        model_pth = gr.Dropdown(label=translations["model_name"], choices=model_name, value=model_name[0] if len(model_name) >= 1 else "", interactive=True, allow_custom_value=True)
-                        model_index = gr.Dropdown(label=translations["index_path"], choices=index_path, value=index_path[0] if len(index_path) >= 1 else "", interactive=True, allow_custom_value=True)
-                    with gr.Row():
-                        model_refresh = gr.Button(translations["refresh"])
-                    with gr.Row():
-                        index_strength = gr.Slider(label=translations["index_strength"], info=translations["index_strength_info"], minimum=0, maximum=1, value=0.5, step=0.01, interactive=True, visible=model_index.value != "")
-            with gr.Column():
-                with gr.Accordion(translations["f0_method"], open=True):
-                    with gr.Group():
-                        with gr.Row():
-                            onnx_f0_mode = gr.Checkbox(label=translations["f0_onnx_mode"], info=translations["f0_onnx_mode_info"], value=False, interactive=True)
-                            unlock_full_method = gr.Checkbox(label=translations["f0_unlock"], info=translations["f0_unlock_info"], value=False, interactive=True)
-                        f0_method = gr.Radio(label=translations["f0_method"], info=translations["f0_method_info"], choices=[m for m in method_f0 if m != "hybrid"], value="rmvpe", interactive=True)
-                    hop_length = gr.Slider(label=translations['hop_length'], info=translations["hop_length_info"], minimum=64, maximum=512, value=160, step=1, interactive=True, visible=False)
-            with gr.Column():
-                with gr.Accordion(translations["hubert_model"], open=True):
-                    embed_mode = gr.Radio(label=translations["embed_mode"], info=translations["embed_mode_info"], value="fairseq", choices=embedders_mode, interactive=True, visible=True)
-                    embedders = gr.Radio(label=translations["hubert_model"], info=translations["hubert_info"], choices=embedders_model, value="hubert_base", interactive=True)
-                    custom_embedders = gr.Textbox(label=translations["modelname"], info=translations["modelname_info"], value="", placeholder="hubert_base", interactive=True, visible=embedders.value == "custom")
-        with gr.Row():
-                with gr.Accordion(translations["setting"], open=True):
-                    with gr.Row():
-                        f0_autotune = gr.Checkbox(label=translations["autotune"], value=False, interactive=True)
-                        proposal_pitch = gr.Checkbox(label=translations["proposal_pitch"], value=False, interactive=True)
-                    with gr.Group():
-                        with gr.Row():
-                            f0_autotune_strength = gr.Slider(minimum=0, maximum=1, label=translations["autotune_rate"], info=translations["autotune_rate_info"], value=1, step=0.1, interactive=True, visible=f0_autotune.value)
-                            proposal_pitch_threshold = gr.Slider(minimum=50.0, maximum=1200.0, label=translations["proposal_pitch_threshold"], info=translations["proposal_pitch_threshold_info"], value=255.0, step=0.1, interactive=True, visible=proposal_pitch.value)
-                        with gr.Row():
-                            rms_mix_rate = gr.Slider(minimum=0, maximum=1, label=translations["rms_mix_rate"], info=translations["rms_mix_rate_info"], value=1, step=0.1, interactive=True)
-                            protect = gr.Slider(minimum=0, maximum=1, label=translations["protect"], info=translations["protect_info"], value=0.5, step=0.01, interactive=True)
-                        with gr.Row():
-                            clean_strength = gr.Slider(label=translations["clean_strength"], info=translations["clean_strength_info"], minimum=0, maximum=1, value=0.5, step=0.1, interactive=True, visible=False)
-                            filter_radius = gr.Slider(minimum=0, maximum=7, label=translations["filter_radius"], info=translations["filter_radius_info"], value=3, step=1, interactive=True)
-                    with gr.Column():
-                        silent_threshold = gr.Slider(minimum=-90, maximum=-60, label=translations["silent_threshold_label"], info=translations["silent_threshold_info"], value=-90, step=1, interactive=True)
-                        extra_convert_size = gr.Slider(minimum=0.1, maximum=5, label=translations["extra_convert_size_label"], info=translations["extra_convert_size_info"], value=0.5, step=0.1, interactive=True)
-                        cross_fade_overlap_size = gr.Slider(minimum=0.05, maximum=0.2, label=translations["cross_fade_overlap_size_label"], info=translations["cross_fade_overlap_size_info"], value=0.1, step=0.01, interactive=True)
-                    with gr.Row():
-                        vad_sensitivity = gr.Slider(minimum=0, maximum=3, label=translations["vad_sensitivity_label"], info=translations["vad_sensitivity_info"], value=3, step=1, interactive=True, visible=vad_enabled.value)
-                        vad_frame_ms = gr.Slider(minimum=10, maximum=30, label=translations["vad_frame_ms_label"], info=translations["vad_frame_ms_info"], value=30, step=10, interactive=True, visible=vad_enabled.value)
-        with gr.Row():
-            json_audio_hidden = gr.JSON(visible=False)
-            json_button_hidden = gr.JSON(visible=False)
-        with gr.Row():
-            model_pth.change(
-                fn=get_index,
-                inputs=[model_pth],
-                outputs=[model_index]
-            )
-            model_index.change(
-                fn=index_strength_show,
-                inputs=[model_index],
-                outputs=[index_strength]
-            )
-            model_refresh.click(
-                fn=change_models_choices,
-                inputs=[],
-                outputs=[model_pth, model_index]
-            )
-        with gr.Row():
-            unlock_full_method.change(
-                fn=lambda f0_method: {"choices": [m for m in unlock_f0(f0_method)["choices"] if m != "hybrid"], "value": "rmvpe", "__type__": "update"},
-                inputs=[unlock_full_method],
-                outputs=[f0_method]
-            )
-            f0_method.change(
-                fn=lambda f0_method: hoplength_show(f0_method, None),
-                inputs=[f0_method],
-                outputs=[hop_length]
-            )
-            embed_mode.change(
-                fn=change_embedders_mode,
-                inputs=[embed_mode],
-                outputs=[embedders]
-            )
-        with gr.Row():
-            embedders.change(
-                fn=lambda embedders: visible(embedders == "custom"),
-                inputs=[embedders],
-                outputs=[custom_embedders]
-            )
-            f0_autotune.change(
-                fn=visible,
-                inputs=[f0_autotune],
-                outputs=[f0_autotune_strength]
-            )
-            clean_audio.change(
-                fn=visible,
-                inputs=[clean_audio],
-                outputs=[clean_strength]
-            )
-        with gr.Row():
-            proposal_pitch.change(
-                fn=visible,
-                inputs=[proposal_pitch],
-                outputs=[proposal_pitch_threshold]
-            )
-            vad_enabled.change(
-                fn=lambda a: [visible(a) for _ in range(2)],
-                inputs=[vad_enabled],
-                outputs=[vad_sensitivity, vad_frame_ms]
-            )
-            refresh_audio_device.click(
-                fn=None,
-                js="getAudioDevices",
-                inputs=[],
-                outputs=json_audio_hidden
-            )
-        with gr.Row():
-            json_audio_hidden.change(
-                fn=update_dropdowns_from_json,
-                inputs=[json_audio_hidden],
-                outputs=[input_audio_device, output_audio_device, monitor_output_device]
-            )
-            json_button_hidden.change(
-                fn=update_button_from_json,
-                inputs=[json_button_hidden],
-                outputs=[start_realtime, stop_realtime]
-            )
-        with gr.Row():
-            start_realtime.click(
-                fn=None,
-                js="StreamAudioRealtime",
-                inputs=[
-                    monitor,
-                    vad_enabled,
-                    input_audio_device,
-                    output_audio_device,
-                    monitor_output_device,
-                    input_audio_gain,
-                    output_audio_gain,
-                    monitor_audio_gain,
-                    chunk_size,
-                    pitch,
-                    model_pth,
-                    model_index,
-                    index_strength,
-                    onnx_f0_mode,
-                    f0_method,
-                    hop_length,
-                    embed_mode,
-                    embedders,
-                    custom_embedders,
-                    f0_autotune,
-                    proposal_pitch,
-                    f0_autotune_strength,
-                    proposal_pitch_threshold,
-                    rms_mix_rate,
-                    protect,
-                    filter_radius,
-                    silent_threshold,
-                    extra_convert_size,
-                    cross_fade_overlap_size,
-                    vad_sensitivity,
-                    vad_frame_ms,
-                    clean_audio,
-                    clean_strength
-                ],
-                outputs=[json_button_hidden]
-            )
-        stop_realtime.click(
-            fn=None,
-            js="StopAudioStream",
-            inputs=[],
-            outputs=[json_button_hidden]
-        )

main/app/tabs/training/child/create_dataset.py DELETED Viewed

@@ -1,282 +0,0 @@
-import os
-import sys
-import gradio as gr
-sys.path.append(os.getcwd())
-from main.app.core.training import create_dataset
-from main.app.core.ui import visible, valueFalse_interactive, create_dataset_change
-from main.app.variables import translations, sample_rate_choice, uvr_model, reverb_models, denoise_models, vr_models, mdx_models
-def create_dataset_tab():
-    with gr.Row():
-        gr.Markdown(translations["create_dataset_markdown_2"])
-    with gr.Group():
-        with gr.Row():
-            separate = gr.Checkbox(label=translations["separator_tab"], value=False, interactive=True)
-            clean_dataset = gr.Checkbox(label=translations["clear_audio"], value=False, interactive=True)
-            skip_seconds = gr.Checkbox(label=translations["skip"], value=False, interactive=True)
-            separate_reverb = gr.Checkbox(label=translations["dereveb_audio"], value=False, interactive=False)
-        with gr.Row(visible=False) as row:
-            enable_tta = gr.Checkbox(label=translations["enable_tta"], value=False, interactive=False)
-            high_end_process = gr.Checkbox(label=translations["high_end_process"], value=False, interactive=False)
-            enable_post_process = gr.Checkbox(label=translations["enable_post_process"], value=False, interactive=False)
-            enable_denoise = gr.Checkbox(label=translations["denoise_mdx"], value=False, interactive=False)
-    with gr.Row():
-        dataset_url = gr.Textbox(label=translations["url_audio"], info=translations["create_dataset_url"], value="", placeholder="https://www.youtube.com/...", interactive=True, scale=5)
-        output_dataset = gr.Textbox(label=translations["output_data"], info=translations["output_data_info"], value="dataset", placeholder="dataset", interactive=True)
-    with gr.Row():
-        create_dataset_button = gr.Button(translations["createdataset"], variant="primary", scale=2, min_width=4000)
-    with gr.Row(visible=False) as row_2:
-        model_name = gr.Dropdown(label=translations["separator_model"], value=uvr_model[0], choices=uvr_model, interactive=True)
-        reverb_model = gr.Dropdown(label=translations["dereveb_model"], value=list(reverb_models.keys())[0], choices=list(reverb_models.keys()), interactive=True)
-        denoise_model = gr.Dropdown(label=translations["denoise_model"], value=list(denoise_models.keys())[0], choices=list(denoise_models.keys()), interactive=True, visible=False)
-    with gr.Row():
-        with gr.Column(visible=False) as row_3:
-            with gr.Group():
-                with gr.Row():
-                    overlap = gr.Radio(label=translations["overlap"], info=translations["overlap_info"], choices=["0.25", "0.5", "0.75", "0.99"], value="0.25", interactive=True)
-                with gr.Row():
-                    window_size = gr.Slider(label=translations["window_size"], info=translations["window_size_info"], minimum=320, maximum=1024, value=512, step=32, interactive=True, visible=False)
-                with gr.Row():
-                    shifts = gr.Slider(label=translations["shift"], info=translations["shift_info"], minimum=1, maximum=20, value=2, step=1, interactive=True)
-                    segments_size = gr.Slider(label=translations["segments_size"], info=translations["segments_size_info"], minimum=32, maximum=3072, value=256, step=32, interactive=True)
-                with gr.Row():
-                    batch_size = gr.Slider(label=translations["batch_size"], info=translations["mdx_batch_size_info"], minimum=1, maximum=64, value=1, step=1, interactive=True, visible=False)
-                    hop_length = gr.Slider(label=translations['hop_length'], info=translations["hop_length_info"], minimum=64, maximum=8192, value=1024, step=1, interactive=True, visible=False)
-                with gr.Row():
-                    post_process_threshold = gr.Slider(label=translations['post_process_threshold'], info=translations["post_process_threshold_info"], minimum=0.1, maximum=0.3, value=0.2, step=0.1, interactive=True, visible=False)
-                    aggression = gr.Slider(label=translations['aggression'], info=translations["aggression_info"], minimum=1, maximum=50, value=5, step=1, interactive=True, visible=False)
-        with gr.Column():
-            sample_rate = gr.Radio(choices=sample_rate_choice, value=48000, label=translations["sr"], info=translations["sr_info"], interactive=True)
-            clean_strength = gr.Slider(minimum=0, maximum=1, step=0.1, value=0.5, label=translations["clean_strength"], info=translations["clean_strength_info"], interactive=True, visible=False)
-            with gr.Row():
-                skip_start = gr.Textbox(label=translations["skip_start"], info=translations["skip_start_info"], value="", placeholder="0,...", interactive=True, visible=skip_seconds.value)
-                skip_end = gr.Textbox(label=translations["skip_end"], info=translations["skip_end_info"], value="", placeholder="0,...", interactive=True, visible=skip_seconds.value)
-            create_dataset_info = gr.Textbox(label=translations["create_dataset_info"], value="", interactive=False, lines=2)
-    with gr.Row():
-        separate.change(
-            fn=lambda a: [visible(a) for _ in range(3)],
-            inputs=[separate],
-            outputs=[
-                row,
-                row_2,
-                row_3
-            ]
-        )
-        separate.change(
-            fn=valueFalse_interactive,
-            inputs=[separate],
-            outputs=[separate_reverb]
-        )
-        separate.change(
-            fn=create_dataset_change,
-            inputs=[
-                model_name,
-                reverb_model,
-                enable_post_process,
-                separate_reverb,
-                enable_denoise
-            ],
-            outputs=[
-                reverb_model,
-                overlap,
-                segments_size,
-                hop_length,
-                batch_size,
-                shifts,
-                window_size,
-                aggression,
-                post_process_threshold,
-                denoise_model,
-                enable_tta,
-                high_end_process,
-                enable_post_process,
-            ]
-        )
-    with gr.Row():
-        model_name.change(
-            fn=create_dataset_change,
-            inputs=[
-                model_name,
-                reverb_model,
-                enable_post_process,
-                separate_reverb,
-                enable_denoise
-            ],
-            outputs=[
-                reverb_model,
-                overlap,
-                segments_size,
-                hop_length,
-                batch_size,
-                shifts,
-                window_size,
-                aggression,
-                post_process_threshold,
-                denoise_model,
-                enable_tta,
-                high_end_process,
-                enable_post_process,
-            ]
-        )
-        reverb_model.change(
-            fn=create_dataset_change,
-            inputs=[
-                model_name,
-                reverb_model,
-                enable_post_process,
-                separate_reverb,
-                enable_denoise
-            ],
-            outputs=[
-                reverb_model,
-                overlap,
-                segments_size,
-                hop_length,
-                batch_size,
-                shifts,
-                window_size,
-                aggression,
-                post_process_threshold,
-                denoise_model,
-                enable_tta,
-                high_end_process,
-                enable_post_process,
-            ]
-        )
-        denoise_model.change(
-            fn=create_dataset_change,
-            inputs=[
-                model_name,
-                reverb_model,
-                enable_post_process,
-                separate_reverb,
-                enable_denoise
-            ],
-            outputs=[
-                reverb_model,
-                overlap,
-                segments_size,
-                hop_length,
-                batch_size,
-                shifts,
-                window_size,
-                aggression,
-                post_process_threshold,
-                denoise_model,
-                enable_tta,
-                high_end_process,
-                enable_post_process,
-            ]
-        )
-    with gr.Row():
-        separate_reverb.change(
-            fn=create_dataset_change,
-            inputs=[
-                model_name,
-                reverb_model,
-                enable_post_process,
-                separate_reverb,
-                enable_denoise
-            ],
-            outputs=[
-                reverb_model,
-                overlap,
-                segments_size,
-                hop_length,
-                batch_size,
-                shifts,
-                window_size,
-                aggression,
-                post_process_threshold,
-                denoise_model,
-                enable_tta,
-                high_end_process,
-                enable_post_process,
-            ]
-        )
-        enable_denoise.change(
-            fn=create_dataset_change,
-            inputs=[
-                model_name,
-                reverb_model,
-                enable_post_process,
-                separate_reverb,
-                enable_denoise
-            ],
-            outputs=[
-                reverb_model,
-                overlap,
-                segments_size,
-                hop_length,
-                batch_size,
-                shifts,
-                window_size,
-                aggression,
-                post_process_threshold,
-                denoise_model,
-                enable_tta,
-                high_end_process,
-                enable_post_process,
-            ]
-        )
-    with gr.Row():
-        skip_seconds.change(
-            fn=lambda a: [visible(a) for _ in range(2)],
-            inputs=[skip_seconds],
-            outputs=[
-                skip_start,
-                skip_end
-            ]
-        )
-        clean_dataset.change(
-            fn=visible,
-            inputs=[clean_dataset],
-            outputs=[clean_strength]
-        )
-    with gr.Row():
-        model_name.change(
-            fn=lambda a: valueFalse_interactive(a in list(mdx_models.keys()) + list(vr_models.keys())),
-            inputs=[model_name],
-            outputs=[enable_denoise]
-        )
-        separate_reverb.change(
-            fn=valueFalse_interactive,
-            inputs=[separate_reverb],
-            outputs=[enable_denoise]
-        )
-    with gr.Row():
-        create_dataset_button.click(
-            fn=create_dataset,
-            inputs=[
-                dataset_url,
-                output_dataset,
-                skip_seconds,
-                skip_start,
-                skip_end,
-                separate,
-                model_name,
-                reverb_model,
-                denoise_model,
-                sample_rate,
-                shifts,
-                batch_size,
-                overlap,
-                aggression,
-                hop_length,
-                window_size,
-                segments_size,
-                post_process_threshold,
-                enable_tta,
-                enable_denoise,
-                high_end_process,
-                enable_post_process,
-                separate_reverb,
-                clean_dataset,
-                clean_strength
-            ],
-            outputs=[create_dataset_info],
-            api_name="create_dataset"
-        )

main/app/tabs/training/child/create_reference.py DELETED Viewed

@@ -1,97 +0,0 @@
-import os
-import sys
-import gradio as gr
-sys.path.append(os.getcwd())
-from main.app.core.training import create_reference
-from main.app.core.ui import visible, change_audios_choices, unlock_f0, shutil_move, change_embedders_mode
-from main.app.variables import translations, paths_for_files, method_f0, hybrid_f0_method, file_types, configs, embedders_model, embedders_mode
-def create_reference_tab():
-    with gr.Row():
-        gr.Markdown(translations["create_reference_markdown_2"])
-    with gr.Row():
-        pitch_guidance = gr.Checkbox(label=translations["training_pitch"], value=True, interactive=True)
-        use_energy = gr.Checkbox(label=translations["train&energy"], value=False, interactive=True)
-        f0_autotune = gr.Checkbox(label=translations["autotune"], value=False, interactive=True)
-        proposal_pitch = gr.Checkbox(label=translations["proposal_pitch"], value=False, interactive=True)
-    with gr.Row():
-        create_reference_button = gr.Button(translations["create_reference"], variant="primary")
-    with gr.Row():
-        f0_up_key = gr.Slider(minimum=-20, maximum=20, step=1, info=translations["pitch_info"], label=translations["pitch"], value=0, interactive=True)
-        proposal_pitch_threshold = gr.Slider(minimum=50.0, maximum=1200.0, label=translations["proposal_pitch_threshold"], info=translations["proposal_pitch_threshold_info"], value=255.0, step=0.1, interactive=True, visible=proposal_pitch.value)
-    with gr.Row():
-        filter_radius = gr.Slider(minimum=0, maximum=7, label=translations["filter_radius"], info=translations["filter_radius_info"], value=3, step=1, interactive=True)
-        f0_autotune_strength = gr.Slider(minimum=0, maximum=1, label=translations["autotune_rate"], info=translations["autotune_rate_info"], value=1, step=0.1, interactive=True, visible=f0_autotune.value)
-    with gr.Row():
-        with gr.Column():
-            with gr.Accordion(translations["input_output"], open=False):
-                with gr.Column():
-                    input_audio = gr.Dropdown(label=translations["audio_path"], value="", choices=paths_for_files, info=translations["provide_audio"], allow_custom_value=True, interactive=True)
-                    reference_name = gr.Textbox(label=translations["reference_name"], value="reference", placeholder="reference", info=translations["reference_name_info"], interactive=True)
-                with gr.Column():
-                    refresh_audio = gr.Button(translations["refresh"])
-                with gr.Column():
-                    upload_audio = gr.Files(label=translations["drop_audio"], file_types=file_types)
-                with gr.Column():
-                    play_audio = gr.Audio(show_download_button=True, interactive=False, label=translations["input_audio"])
-        with gr.Column() as f0_method_column:
-            with gr.Accordion(label=translations["f0_method"], open=False):
-                with gr.Group():
-                    with gr.Row():
-                        onnx_f0 = gr.Checkbox(label=translations["f0_onnx_mode"], value=False, interactive=True)
-                        unlock_full_method = gr.Checkbox(label=translations["f0_unlock"], value=False, interactive=True)
-                    f0_method = gr.Radio(label=translations["f0_method"], info=translations["f0_method_info"], choices=method_f0, value="rmvpe", interactive=True)
-                    f0_hybrid_method = gr.Dropdown(label=translations["f0_method_hybrid"], info=translations["f0_method_hybrid_info"], choices=hybrid_f0_method, value=hybrid_f0_method[0], interactive=True, allow_custom_value=True, visible=f0_method.value == "hybrid")
-                    with gr.Row():
-                        alpha = gr.Slider(label=translations["alpha_label"], info=translations["alpha_info"], minimum=0.1, maximum=1, value=0.5, step=0.1, interactive=True, visible=False)
-        with gr.Column():
-            with gr.Accordion(label=translations["hubert_model"], open=False):
-                with gr.Row():
-                    version = gr.Radio(label=translations["training_version"], info=translations["training_version_info"], choices=["v1", "v2"], value="v2", interactive=True)
-                with gr.Group():
-                    embedder_mode = gr.Radio(label=translations["embed_mode"], info=translations["embed_mode_info"], value="fairseq", choices=embedders_mode, interactive=True, visible=True)
-                    embedders = gr.Radio(label=translations["hubert_model"], info=translations["hubert_info"], choices=embedders_model, value="hubert_base", interactive=True)
-                with gr.Row():
-                    embedders_custom = gr.Textbox(label=translations["modelname"], info=translations["modelname_info"], value="", placeholder="hubert_base", interactive=True, visible=embedders.value == "custom")
-    with gr.Row():
-        create_reference_info = gr.Textbox(label=translations["reference_info"], value="", interactive=False, lines=2)
-    with gr.Row():
-        f0_autotune.change(fn=visible, inputs=[f0_autotune], outputs=[f0_autotune_strength])
-        proposal_pitch.change(fn=visible, inputs=[proposal_pitch], outputs=[proposal_pitch_threshold])
-        unlock_full_method.change(fn=unlock_f0, inputs=[unlock_full_method], outputs=[f0_method])
-    with gr.Row():
-        input_audio.change(fn=lambda audio: audio, inputs=[input_audio], outputs=[play_audio])
-        refresh_audio.click(fn=change_audios_choices, inputs=[input_audio], outputs=[input_audio])
-        f0_method.change(fn=lambda method: [visible(method == "hybrid") for _ in range(2)], inputs=[f0_method], outputs=[f0_hybrid_method, alpha])
-    with gr.Row():
-        upload_audio.upload(fn=lambda audio_in: [shutil_move(audio.name, configs["audios_path"]) for audio in audio_in][0], inputs=[upload_audio], outputs=[input_audio])
-        embedder_mode.change(fn=change_embedders_mode, inputs=[embedder_mode], outputs=[embedders])
-        embedders.change(fn=lambda embedders: visible(embedders == "custom"), inputs=[embedders], outputs=[embedders_custom])
-    with gr.Row():
-        pitch_guidance.change(fn=visible, inputs=[pitch_guidance], outputs=[f0_method_column])
-        create_reference_button.click(
-            fn=create_reference,
-            inputs=[
-                input_audio,
-                reference_name,
-                pitch_guidance,
-                use_energy,
-                version,
-                embedders,
-                embedder_mode,
-                f0_method,
-                onnx_f0,
-                f0_up_key,
-                filter_radius,
-                f0_autotune,
-                f0_autotune_strength,
-                proposal_pitch,
-                proposal_pitch_threshold,
-                alpha
-            ],
-            outputs=[create_reference_info],
-            api_name="create_reference"
-        )

main/app/tabs/training/child/training.py DELETED Viewed

@@ -1,259 +0,0 @@
-import os
-import sys
-import gradio as gr
-sys.path.append(os.getcwd())
-from main.app.core.process import zip_file
-from main.app.core.training import preprocess, extract, create_index, training
-from main.app.variables import translations, model_name, index_path, method_f0, embedders_mode, embedders_model, pretrainedD, pretrainedG, config, file_types, hybrid_f0_method, reference_list
-from main.app.core.ui import gr_warning, visible, unlock_f0, hoplength_show, change_models_choices, get_gpu_info, change_embedders_mode, pitch_guidance_lock, vocoders_lock, unlock_ver, unlock_vocoder, change_pretrained_choices, gpu_number_str, shutil_move, change_reference_choices
-def training_model_tab():
-    with gr.Row():
-        gr.Markdown(translations["training_markdown"])
-    with gr.Row():
-        with gr.Column():
-            with gr.Row():
-                with gr.Column():
-                    training_name = gr.Textbox(label=translations["modelname"], info=translations["training_model_name"], value="", placeholder=translations["modelname"], interactive=True)
-                    training_sr = gr.Radio(label=translations["sample_rate"], info=translations["sample_rate_info"], choices=["32k", "40k", "48k"], value="48k", interactive=True)
-                    training_ver = gr.Radio(label=translations["training_version"], info=translations["training_version_info"], choices=["v1", "v2"], value="v2", interactive=True)
-                    with gr.Row():
-                        clean_dataset = gr.Checkbox(label=translations["clear_dataset"], value=False, interactive=True)
-                        process_effects = gr.Checkbox(label=translations["preprocess_effect"], value=False, interactive=True)
-                        training_f0 = gr.Checkbox(label=translations["training_pitch"], value=True, interactive=True)
-                        custom_reference = gr.Checkbox(label=translations["custom_reference"], value=False, interactive=True)
-                        checkpointing1 = gr.Checkbox(label=translations["memory_efficient_training"], value=False, interactive=True)
-                        upload = gr.Checkbox(label=translations["upload_dataset"], value=False, interactive=True)
-                    with gr.Row():
-                        preprocess_split_audio_mode = gr.Radio(label=translations["split_audio_mode"], info=translations["split_audio_mode_info"], value="Automatic", choices=["Automatic", "Simple", "Skip"], interactive=True)
-                        preprocess_normalization_mode = gr.Radio(label=translations["normalization_mode"], info=translations["normalization_mode_info"], value="none", choices=["none", "pre", "post"], interactive=True)
-                    with gr.Row(visible=custom_reference.value) as custom_reference_row:
-                        with gr.Accordion(translations["custom_reference"], open=True):
-                            reference_name = gr.Dropdown(label=translations["reference_name"], info=translations["reference_name_info"], choices=reference_list, value=reference_list[0] if len(reference_list) >= 1 else "", allow_custom_value=True, interactive=True)
-                            reference_refresh = gr.Button(translations["refresh"], scale=2)
-                    with gr.Row(visible=clean_dataset.value) as clean_dataset_row:
-                        clean_dataset_strength = gr.Slider(label=translations["clean_strength"], info=translations["clean_strength_info"], minimum=0, maximum=1, value=0.7, step=0.1, interactive=True)
-                with gr.Column():
-                    preprocess_button = gr.Button(translations["preprocess_button"], scale=2)
-                    upload_dataset = gr.Files(label=translations["drop_audio"], file_types=file_types, visible=upload.value)
-                    preprocess_info = gr.Textbox(label=translations["preprocess_info"], value="", interactive=False, container=True, lines=2)
-        with gr.Column():
-            with gr.Row():
-                with gr.Column():
-                    with gr.Accordion(label=translations["f0_method"], open=False):
-                        with gr.Group():
-                            with gr.Row():
-                                onnx_f0_mode2 = gr.Checkbox(label=translations["f0_onnx_mode"], value=False, interactive=True)
-                                unlock_full_method4 = gr.Checkbox(label=translations["f0_unlock"], value=False, interactive=True)
-                                autotune = gr.Checkbox(label=translations["autotune"], value=False, interactive=True)
-                            extract_method = gr.Radio(label=translations["f0_method"], info=translations["f0_method_info"], choices=method_f0, value="rmvpe", interactive=True)
-                            extract_hybrid_method = gr.Dropdown(label=translations["f0_method_hybrid"], info=translations["f0_method_hybrid_info"], choices=hybrid_f0_method, value=hybrid_f0_method[0], interactive=True, allow_custom_value=True, visible=extract_method.value == "hybrid")
-                        extract_hop_length = gr.Slider(label=translations['hop_length'], info=translations["hop_length_info"], minimum=64, maximum=512, value=160, step=1, interactive=True, visible=False)
-                        f0_autotune_strength = gr.Slider(minimum=0, maximum=1, label=translations["autotune_rate"], info=translations["autotune_rate_info"], value=1, step=0.1, interactive=True, visible=autotune.value)
-                        alpha = gr.Slider(label=translations["alpha_label"], info=translations["alpha_info"], minimum=0.1, maximum=1, value=0.5, step=0.1, interactive=True, visible=False)
-                    with gr.Accordion(label=translations["hubert_model"], open=False):
-                        with gr.Group():
-                            embed_mode2 = gr.Radio(label=translations["embed_mode"], info=translations["embed_mode_info"], value="fairseq", choices=embedders_mode, interactive=True, visible=True)
-                            extract_embedders = gr.Radio(label=translations["hubert_model"], info=translations["hubert_info"], choices=embedders_model, value="hubert_base", interactive=True)
-                        with gr.Row():
-                            extract_embedders_custom = gr.Textbox(label=translations["modelname"], info=translations["modelname_info"], value="", placeholder="hubert_base", interactive=True, visible=extract_embedders.value == "custom")
-                with gr.Column():
-                    extract_button = gr.Button(translations["extract_button"], scale=2)
-                    extract_info = gr.Textbox(label=translations["extract_info"], value="", interactive=False, lines=2)
-        with gr.Column():
-            with gr.Row():
-                with gr.Column():
-                    total_epochs = gr.Slider(label=translations["total_epoch"], info=translations["total_epoch_info"], minimum=1, maximum=10000, value=300, step=1, interactive=True)
-                    save_epochs = gr.Slider(label=translations["save_epoch"], info=translations["save_epoch_info"], minimum=1, maximum=10000, value=50, step=1, interactive=True)
-                with gr.Column():
-                    index_button = gr.Button(f"3. {translations['create_index']}", variant="primary", scale=2)
-                    training_button = gr.Button(f"4. {translations['training_model']}", variant="primary", scale=2)
-            with gr.Row():
-                with gr.Accordion(label=translations["setting"], open=False):
-                    with gr.Row():
-                        index_algorithm = gr.Radio(label=translations["index_algorithm"], info=translations["index_algorithm_info"], choices=["Auto", "Faiss", "KMeans"], value="Auto", interactive=True)
-                    with gr.Row():
-                        cache_in_gpu = gr.Checkbox(label=translations["cache_in_gpu"], info=translations["cache_in_gpu_info"], value=True, interactive=True)
-                        rms_extract = gr.Checkbox(label=translations["train&energy"], info=translations["train&energy_info"], value=False, interactive=True)
-                        overtraining_detector = gr.Checkbox(label=translations["overtraining_detector"], info=translations["overtraining_detector_info"], value=False, interactive=True)
-                    with gr.Row():
-                        custom_dataset = gr.Checkbox(label=translations["custom_dataset"], info=translations["custom_dataset_info"], value=False, interactive=True)
-                        save_only_latest = gr.Checkbox(label=translations["save_only_latest"], info=translations["save_only_latest_info"], value=True, interactive=True)
-                        save_every_weights = gr.Checkbox(label=translations["save_every_weights"], info=translations["save_every_weights_info"], value=True, interactive=True)
-                    with gr.Row():
-                        clean_up = gr.Checkbox(label=translations["cleanup_training"], info=translations["cleanup_training_info"], value=False, interactive=True)
-                        not_use_pretrain = gr.Checkbox(label=translations["not_use_pretrain_2"], info=translations["not_use_pretrain_info"], value=False, interactive=True)
-                        custom_pretrain = gr.Checkbox(label=translations["custom_pretrain"], info=translations["custom_pretrain_info"], value=False, interactive=True)
-                    with gr.Column():
-                        dataset_path = gr.Textbox(label=translations["dataset_folder"], value="dataset", interactive=True, visible=custom_dataset.value)
-                    with gr.Column():
-                        with gr.Row(visible=False) as simple_option:
-                            chunk_len = gr.Slider(minimum=0.5, maximum=5.0, value=3.0, step=0.1, label=translations["chunk_length"], info=translations["chunk_length_info"], interactive=True)
-                            overlap_len = gr.Slider(minimum=0.0, maximum=0.4, value=0.3, step=0.1, label=translations["overlap_length"], info=translations["overlap_length_info"], interactive=True)
-                        threshold = gr.Slider(minimum=1, maximum=100, value=50, step=1, label=translations["threshold"], interactive=True, visible=overtraining_detector.value)
-                        with gr.Accordion(translations["setting_cpu_gpu"], open=False):
-                            with gr.Column():
-                                gpu_number = gr.Textbox(label=translations["gpu_number"], value=gpu_number_str(), info=translations["gpu_number_info"], interactive=True)
-                                gpu_info = gr.Textbox(label=translations["gpu_info"], value=get_gpu_info(), info=translations["gpu_info_2"], interactive=False)
-                                cpu_core = gr.Slider(label=translations["cpu_core"], info=translations["cpu_core_info"], minimum=1, maximum=os.cpu_count(), value=os.cpu_count(), step=1, interactive=True)
-                                train_batch_size = gr.Slider(label=translations["batch_size"], info=translations["batch_size_info"], minimum=1, maximum=64, value=8, step=1, interactive=True)
-                    with gr.Group():
-                        multiscale_mel_loss = gr.Checkbox(label=translations["multiscale_mel_loss"], info=translations["multiscale_mel_loss_info"], value=False, interactive=True)
-                        vocoders = gr.Radio(label=translations["vocoder"], info=translations["vocoder_info"], choices=["Default", "MRF-HiFi-GAN", "RefineGAN"], value="Default", interactive=True)
-                    with gr.Row():
-                        deterministic = gr.Checkbox(label=translations["deterministic"], info=translations["deterministic_info"], value=False, interactive=config.device.startswith("cuda"))
-                        benchmark = gr.Checkbox(label=translations["benchmark"], info=translations["benchmark_info"], value=False, interactive=config.device.startswith("cuda"))
-                    with gr.Row():
-                        optimizer = gr.Radio(label=translations["optimizer"], info=translations["optimizer_info"], value="AdamW", choices=["AdamW", "RAdam", "AnyPrecisionAdamW"], interactive=True)
-                    with gr.Row():
-                        model_author = gr.Textbox(label=translations["training_author"], info=translations["training_author_info"], value="", placeholder=translations["training_author"], interactive=True)
-                    with gr.Row():
-                        with gr.Column():
-                            with gr.Accordion(translations["custom_pretrain_info"], open=False, visible=custom_pretrain.value and not not_use_pretrain.value) as pretrain_setting:
-                                pretrained_D = gr.Dropdown(label=translations["pretrain_file"].format(dg="D"), choices=pretrainedD, value=pretrainedD[0] if len(pretrainedD) > 0 else '', interactive=True, allow_custom_value=True)
-                                pretrained_G = gr.Dropdown(label=translations["pretrain_file"].format(dg="G"), choices=pretrainedG, value=pretrainedG[0] if len(pretrainedG) > 0 else '', interactive=True, allow_custom_value=True)
-                                refresh_pretrain = gr.Button(translations["refresh"], scale=2)
-            with gr.Row():
-                training_info = gr.Textbox(label=translations["train_info"], value="", interactive=False, lines=3)
-            with gr.Row():
-                with gr.Column():
-                    with gr.Accordion(translations["export_model"], open=False):
-                        with gr.Row():
-                            model_file = gr.Dropdown(label=translations["model_name"], choices=model_name, value=model_name[0] if len(model_name) >= 1 else "", interactive=True, allow_custom_value=True)
-                            index_file = gr.Dropdown(label=translations["index_path"], choices=index_path, value=index_path[0] if len(index_path) >= 1 else "", interactive=True, allow_custom_value=True)
-                        with gr.Row():
-                            refresh_file = gr.Button(f"1. {translations['refresh']}", scale=2)
-                            zip_model = gr.Button(translations["zip_model"], variant="primary", scale=2)
-                        with gr.Row():
-                            zip_output = gr.File(label=translations["output_zip"], file_types=[".zip"], interactive=False, visible=False)
-    with gr.Row():
-        vocoders.change(fn=pitch_guidance_lock, inputs=[vocoders], outputs=[training_f0])
-        training_f0.change(fn=vocoders_lock, inputs=[training_f0, vocoders], outputs=[vocoders])
-        unlock_full_method4.change(fn=unlock_f0, inputs=[unlock_full_method4], outputs=[extract_method])
-    with gr.Row():
-        refresh_file.click(fn=change_models_choices, inputs=[], outputs=[model_file, index_file])
-        zip_model.click(fn=zip_file, inputs=[training_name, model_file, index_file], outputs=[zip_output])
-        dataset_path.change(fn=lambda folder: os.makedirs(folder, exist_ok=True), inputs=[dataset_path], outputs=[])
-    with gr.Row():
-        upload.change(fn=visible, inputs=[upload], outputs=[upload_dataset])
-        overtraining_detector.change(fn=visible, inputs=[overtraining_detector], outputs=[threshold])
-        clean_dataset.change(fn=visible, inputs=[clean_dataset], outputs=[clean_dataset_row])
-    with gr.Row():
-        custom_dataset.change(fn=lambda custom_dataset: [visible(custom_dataset), "dataset"],inputs=[custom_dataset], outputs=[dataset_path, dataset_path])
-        training_ver.change(fn=unlock_vocoder, inputs=[training_ver, vocoders], outputs=[vocoders])
-        vocoders.change(fn=unlock_ver, inputs=[training_ver, vocoders], outputs=[training_ver])
-    with gr.Row():
-        custom_reference.change(fn=visible, inputs=[custom_reference], outputs=[custom_reference_row])
-        extract_method.change(fn=lambda method, hybrid: [visible(method == "hybrid"), visible(method == "hybrid"), hoplength_show(method, hybrid)], inputs=[extract_method, extract_hybrid_method], outputs=[extract_hybrid_method, alpha, extract_hop_length])
-        extract_hybrid_method.change(fn=hoplength_show, inputs=[extract_method, extract_hybrid_method], outputs=[extract_hop_length])
-    with gr.Row():
-        autotune.change(fn=visible, inputs=[autotune], outputs=[f0_autotune_strength])
-        preprocess_split_audio_mode.change(fn=lambda a: visible(a == "Simple"), inputs=[preprocess_split_audio_mode], outputs=[simple_option])
-        upload_dataset.upload(
-            fn=lambda files, folder: [shutil_move(f.name, os.path.join(folder, os.path.split(f.name)[1])) for f in files] if folder != "" else gr_warning(translations["dataset_folder1"]),
-            inputs=[upload_dataset, dataset_path],
-            outputs=[],
-            api_name="upload_dataset"
-        )
-    with gr.Row():
-        not_use_pretrain.change(fn=lambda a, b: visible(a and not b), inputs=[custom_pretrain, not_use_pretrain], outputs=[pretrain_setting])
-        custom_pretrain.change(fn=lambda a, b: visible(a and not b), inputs=[custom_pretrain, not_use_pretrain], outputs=[pretrain_setting])
-        refresh_pretrain.click(fn=change_pretrained_choices, inputs=[], outputs=[pretrained_D, pretrained_G])
-    with gr.Row():
-        preprocess_button.click(
-            fn=preprocess,
-            inputs=[
-                training_name,
-                training_sr,
-                cpu_core,
-                preprocess_split_audio_mode,
-                process_effects,
-                dataset_path,
-                clean_dataset,
-                clean_dataset_strength,
-                chunk_len,
-                overlap_len,
-                preprocess_normalization_mode
-            ],
-            outputs=[preprocess_info],
-            api_name="preprocess"
-        )
-    with gr.Row():
-        embed_mode2.change(fn=change_embedders_mode, inputs=[embed_mode2], outputs=[extract_embedders])
-        extract_embedders.change(fn=lambda extract_embedders: visible(extract_embedders == "custom"), inputs=[extract_embedders], outputs=[extract_embedders_custom])
-        reference_refresh.click(fn=change_reference_choices, inputs=[], outputs=[reference_name])
-    with gr.Row():
-        extract_button.click(
-            fn=extract,
-            inputs=[
-                training_name,
-                training_ver,
-                extract_method,
-                training_f0,
-                extract_hop_length,
-                cpu_core,
-                gpu_number,
-                training_sr,
-                extract_embedders,
-                extract_embedders_custom,
-                onnx_f0_mode2,
-                embed_mode2,
-                autotune,
-                f0_autotune_strength,
-                extract_hybrid_method,
-                rms_extract,
-                alpha
-            ],
-            outputs=[extract_info],
-            api_name="extract"
-        )
-    with gr.Row():
-        index_button.click(
-            fn=create_index,
-            inputs=[
-                training_name,
-                training_ver,
-                index_algorithm
-            ],
-            outputs=[training_info],
-            api_name="create_index"
-        )
-    with gr.Row():
-        training_button.click(
-            fn=training,
-            inputs=[
-                training_name,
-                training_ver,
-                save_epochs,
-                save_only_latest,
-                save_every_weights,
-                total_epochs,
-                training_sr,
-                train_batch_size,
-                gpu_number,
-                training_f0,
-                not_use_pretrain,
-                custom_pretrain,
-                pretrained_G,
-                pretrained_D,
-                overtraining_detector,
-                threshold,
-                clean_up,
-                cache_in_gpu,
-                model_author,
-                vocoders,
-                checkpointing1,
-                deterministic,
-                benchmark,
-                optimizer,
-                rms_extract,
-                custom_reference,
-                reference_name,
-                multiscale_mel_loss
-            ],
-            outputs=[training_info],
-            api_name="training_model"
-        )

main/app/tabs/training/training.py DELETED Viewed

@@ -1,25 +0,0 @@
-import os
-import sys
-import gradio as gr
-sys.path.append(os.getcwd())
-from main.app.variables import translations, configs
-from main.app.tabs.training.child.training import training_model_tab
-from main.app.tabs.training.child.create_dataset import create_dataset_tab
-from main.app.tabs.training.child.create_reference import create_reference_tab
-def training_tab():
-    with gr.TabItem(translations["training_model"], visible=configs.get("create_and_training_tab", True)):
-        with gr.TabItem(translations["createdataset"], visible=configs.get("create_dataset_tab", True)):
-            gr.Markdown(translations["create_dataset_markdown"])
-            create_dataset_tab()
-        with gr.TabItem(translations["create_reference"], visible=configs.get("create_reference_tab", True)):
-            gr.Markdown(translations["create_reference_markdown"])
-            create_reference_tab()
-        with gr.TabItem(translations["training_model"], visible=configs.get("training_tab", True)):
-            gr.Markdown(f"## {translations['training_model']}")
-            training_model_tab()

main/app/variables.py DELETED Viewed

@@ -1,117 +0,0 @@
-import os
-import sys
-import csv
-import json
-import codecs
-import logging
-import urllib.request
-import logging.handlers
-sys.path.append(os.getcwd())
-from main.configs.config import Config
-logger = logging.getLogger(__name__)
-logger.propagate = False
-config = Config()
-python = sys.executable
-translations = config.translations
-configs_json = os.path.join("main", "configs", "config.json")
-configs = json.load(open(configs_json, "r"))
-if not logger.hasHandlers():
-    console_handler = logging.StreamHandler()
-    console_formatter = logging.Formatter(fmt="\n%(asctime)s.%(msecs)03d | %(levelname)s | %(module)s | %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
-    console_handler.setFormatter(console_formatter)
-    console_handler.setLevel(logging.DEBUG if config.debug_mode else logging.INFO)
-    file_handler = logging.handlers.RotatingFileHandler(os.path.join(configs["logs_path"], "app.log"), maxBytes=5*1024*1024, backupCount=3, encoding='utf-8')
-    file_formatter = logging.Formatter(fmt="\n%(asctime)s.%(msecs)03d | %(levelname)s | %(module)s | %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
-    file_handler.setFormatter(file_formatter)
-    file_handler.setLevel(logging.DEBUG)
-    logger.addHandler(console_handler)
-    logger.addHandler(file_handler)
-    logger.setLevel(logging.DEBUG)
-if config.device in ["cpu", "mps", "ocl:0"] and configs.get("fp16", False):
-    logger.warning(translations["fp16_not_support"])
-    configs["fp16"] = config.is_half = False
-    with open(configs_json, "w") as f:
-        json.dump(configs, f, indent=4)
-models = {}
-model_options = {}
-method_f0 = ["mangio-crepe-full", "crepe-full", "fcpe", "rmvpe", "harvest", "pyin", "hybrid"]
-method_f0_full = ["pm-ac", "pm-cc", "pm-shs", "dio", "mangio-crepe-tiny", "mangio-crepe-small", "mangio-crepe-medium", "mangio-crepe-large", "mangio-crepe-full", "crepe-tiny", "crepe-small", "crepe-medium", "crepe-large", "crepe-full", "fcpe", "fcpe-legacy", "fcpe-previous", "rmvpe", "rmvpe-clipping", "rmvpe-medfilt", "rmvpe-clipping-medfilt", "harvest", "yin", "pyin", "swipe", "piptrack", "penn", "mangio-penn", "djcm", "djcm-clipping", "djcm-medfilt", "djcm-clipping-medfilt", "swift", "pesto", "hybrid"]
-hybrid_f0_method = ["hybrid[pm+dio]", "hybrid[pm+crepe-tiny]", "hybrid[pm+crepe]", "hybrid[pm+fcpe]", "hybrid[pm+rmvpe]", "hybrid[pm+harvest]", "hybrid[pm+yin]", "hybrid[dio+crepe-tiny]", "hybrid[dio+crepe]", "hybrid[dio+fcpe]", "hybrid[dio+rmvpe]", "hybrid[dio+harvest]", "hybrid[dio+yin]", "hybrid[crepe-tiny+crepe]", "hybrid[crepe-tiny+fcpe]", "hybrid[crepe-tiny+rmvpe]", "hybrid[crepe-tiny+harvest]", "hybrid[crepe+fcpe]", "hybrid[crepe+rmvpe]", "hybrid[crepe+harvest]", "hybrid[crepe+yin]", "hybrid[fcpe+rmvpe]", "hybrid[fcpe+harvest]", "hybrid[fcpe+yin]", "hybrid[rmvpe+harvest]", "hybrid[rmvpe+yin]", "hybrid[harvest+yin]"]
-embedders_mode = ["fairseq", "onnx", "transformers", "spin", "whisper"]
-embedders_model = ["contentvec_base", "hubert_base", "vietnamese_hubert_base", "japanese_hubert_base", "korean_hubert_base", "chinese_hubert_base", "portuguese_hubert_base", "custom"]
-spin_model = ["spin-v1", "spin-v2"]
-whisper_model = ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3", "large-v3-turbo"]
-paths_for_files = sorted([os.path.abspath(os.path.join(root, f)) for root, _, files in os.walk(configs["audios_path"]) for f in files if os.path.splitext(f)[1].lower() in (".wav", ".mp3", ".flac", ".ogg", ".opus", ".m4a", ".mp4", ".aac", ".alac", ".wma", ".aiff", ".webm", ".ac3")])
-reference_list = sorted([name for name in os.listdir(configs["reference_path"]) if os.path.exists(os.path.join(configs["reference_path"], name)) and os.path.isdir(os.path.join(configs["reference_path"], name))])
-model_name = sorted(list(model for model in os.listdir(configs["weights_path"]) if model.endswith((".pth", ".onnx")) and not model.startswith("G_") and not model.startswith("D_")))
-index_path = sorted([os.path.join(root, name) for root, _, files in os.walk(configs["logs_path"], topdown=False) for name in files if name.endswith(".index") and "trained" not in name])
-pretrainedD = [model for model in os.listdir(configs["pretrained_custom_path"]) if model.endswith(".pth") and "D" in model]
-pretrainedG = [model for model in os.listdir(configs["pretrained_custom_path"]) if model.endswith(".pth") and "G" in model]
-presets_file = sorted(list(f for f in os.listdir(configs["presets_path"]) if f.endswith(".conversion.json")))
-audio_effect_presets_file = sorted(list(f for f in os.listdir(configs["presets_path"]) if f.endswith(".effect.json")))
-f0_file = sorted([os.path.abspath(os.path.join(root, f)) for root, _, files in os.walk(configs["f0_path"]) for f in files if f.endswith(".txt")])
-file_types = [".wav", ".mp3", ".flac", ".ogg", ".opus", ".m4a", ".mp4", ".aac", ".alac", ".wma", ".aiff", ".webm", ".ac3"]
-export_format_choices = ["wav", "mp3", "flac", "ogg", "opus", "m4a", "mp4", "aac", "alac", "wma", "aiff", "webm", "ac3"]
-language = configs.get("language", "vi-VN")
-theme = configs.get("theme", "NoCrypt/miku")
-edgetts = configs.get("edge_tts", ["vi-VN-HoaiMyNeural", "vi-VN-NamMinhNeural"])
-google_tts_voice = configs.get("google_tts_voice", ["vi", "en"])
-vr_models = configs.get("vr_models", "")
-demucs_models = configs.get("demucs_models", "")
-mdx_models = configs.get("mdx_models", "")
-karaoke_models = configs.get("karaoke_models", "")
-reverb_models = configs.get("reverb_models", "")
-denoise_models = configs.get("denoise_models", "")
-uvr_model = list(demucs_models.keys()) + list(vr_models.keys()) + list(mdx_models.keys())
-font = configs.get("font", "https://fonts.googleapis.com/css2?family=Courgette&display=swap")
-sample_rate_choice = [8000, 11025, 12000, 16000, 22050, 24000, 32000, 44100, 48000, 96000]
-csv_path = configs["csv_path"]
-if "--allow_all_disk" in sys.argv and sys.platform == "win32":
-    try:
-        import win32api
-    except:
-        os.system(f"{python} -m pip install pywin32")
-        import win32api
-    allow_disk = win32api.GetLogicalDriveStrings().split('\x00')[:-1]
-else: allow_disk = []
-try:
-    if os.path.exists(csv_path): reader = list(csv.DictReader(open(csv_path, newline='', encoding='utf-8')))
-    else:
-        reader = list(csv.DictReader([line.decode('utf-8') for line in urllib.request.urlopen(codecs.decode("uggcf://qbpf.tbbtyr.pbz/fcernqfurrgf/q/1gNHnDeRULtEfz1Yieaw14USUQjWJy0Oq9k0DrCrjApb/rkcbeg?sbezng=pfi&tvq=1977693859", "rot13")).readlines()]))
-        writer = csv.DictWriter(open(csv_path, mode='w', newline='', encoding='utf-8'), fieldnames=reader[0].keys())
-        writer.writeheader()
-        writer.writerows(reader)
-    for row in reader:
-        filename = row['Filename']
-        url = None
-        for value in row.values():
-            if isinstance(value, str) and "huggingface" in value:
-                url = value
-                break
-        if url: models[filename] = url
-except:
-    pass

main/configs/config.json DELETED Viewed

@@ -1,622 +0,0 @@
-{
-    "language": "vi-VN",
-    "support_language": [
-        "en-US",
-        "vi-VN"
-    ],
-    "theme": "NoCrypt/miku",
-    "themes": [
-        "NoCrypt/miku",
-        "gstaff/xkcd",
-        "JohnSmith9982/small_and_pretty",
-        "ParityError/Interstellar",
-        "earneleh/paris",
-        "shivi/calm_seafoam",
-        "Hev832/Applio",
-        "YTheme/Minecraft",
-        "gstaff/sketch",
-        "SebastianBravo/simci_css",
-        "allenai/gradio-theme",
-        "Nymbo/Nymbo_Theme_5",
-        "lone17/kotaemon",
-        "Zarkel/IBM_Carbon_Theme",
-        "SherlockRamos/Feliz",
-        "freddyaboulton/dracula_revamped",
-        "freddyaboulton/bad-theme-space",
-        "gradio/dracula_revamped",
-        "abidlabs/dracula_revamped",
-        "gradio/dracula_test",
-        "gradio/seafoam",
-        "gradio/glass",
-        "gradio/monochrome",
-        "gradio/soft",
-        "gradio/default",
-        "gradio/base",
-        "abidlabs/pakistan",
-        "dawood/microsoft_windows",
-        "ysharma/steampunk",
-        "ysharma/huggingface",
-        "abidlabs/Lime",
-        "freddyaboulton/this-theme-does-not-exist-2",
-        "aliabid94/new-theme",
-        "aliabid94/test2",
-        "aliabid94/test3",
-        "aliabid94/test4",
-        "abidlabs/banana",
-        "freddyaboulton/test-blue",
-        "gstaff/whiteboard",
-        "ysharma/llamas",
-        "abidlabs/font-test",
-        "YenLai/Superhuman",
-        "bethecloud/storj_theme",
-        "sudeepshouche/minimalist",
-        "knotdgaf/gradiotest",
-        "ParityError/Anime",
-        "Ajaxon6255/Emerald_Isle",
-        "ParityError/LimeFace",
-        "finlaymacklon/smooth_slate",
-        "finlaymacklon/boxy_violet",
-        "derekzen/stardust",
-        "EveryPizza/Cartoony-Gradio-Theme",
-        "Ifeanyi/Cyanister",
-        "Tshackelton/IBMPlex-DenseReadable",
-        "snehilsanyal/scikit-learn",
-        "Himhimhim/xkcd",
-        "nota-ai/theme",
-        "rawrsor1/Everforest",
-        "rottenlittlecreature/Moon_Goblin",
-        "abidlabs/test-yellow",
-        "abidlabs/test-yellow3",
-        "idspicQstitho/dracula_revamped",
-        "kfahn/AnimalPose",
-        "HaleyCH/HaleyCH_Theme",
-        "simulKitke/dracula_test",
-        "braintacles/CrimsonNight",
-        "wentaohe/whiteboardv2",
-        "reilnuud/polite",
-        "remilia/Ghostly",
-        "Franklisi/darkmode",
-        "coding-alt/soft",
-        "xiaobaiyuan/theme_land",
-        "step-3-profit/Midnight-Deep",
-        "xiaobaiyuan/theme_demo",
-        "Taithrah/Minimal",
-        "Insuz/SimpleIndigo",
-        "zkunn/Alipay_Gradio_theme",
-        "Insuz/Mocha",
-        "xiaobaiyuan/theme_brief",
-        "Ama434/434-base-Barlow",
-        "Ama434/def_barlow",
-        "Ama434/neutral-barlow",
-        "dawood/dracula_test",
-        "nuttea/Softblue",
-        "BlueDancer/Alien_Diffusion",
-        "naughtondale/monochrome",
-        "Dagfinn1962/standard",
-        "default"
-    ],
-    "mdx_models": {
-        "Main_340": "UVR-MDX-NET_Main_340.onnx",
-        "Main_390": "UVR-MDX-NET_Main_390.onnx",
-        "Main_406": "UVR-MDX-NET_Main_406.onnx",
-        "Main_427": "UVR-MDX-NET_Main_427.onnx",
-        "Main_438": "UVR-MDX-NET_Main_438.onnx",
-        "Inst_full_292": "UVR-MDX-NET-Inst_full_292.onnx",
-        "Inst_HQ_1": "UVR-MDX-NET-Inst_HQ_1.onnx",
-        "Inst_HQ_2": "UVR-MDX-NET-Inst_HQ_2.onnx",
-        "Inst_HQ_3": "UVR-MDX-NET-Inst_HQ_3.onnx",
-        "Inst_HQ_4": "UVR-MDX-NET-Inst_HQ_4.onnx",
-        "Inst_HQ_5": "UVR-MDX-NET-Inst_HQ_5.onnx",
-        "Kim_Vocal_1": "Kim_Vocal_1.onnx",
-        "Kim_Vocal_2": "Kim_Vocal_2.onnx",
-        "Kim_Inst": "Kim_Inst.onnx",
-        "Inst_187_beta": "UVR-MDX-NET_Inst_187_beta.onnx",
-        "Inst_82_beta": "UVR-MDX-NET_Inst_82_beta.onnx",
-        "Inst_90_beta": "UVR-MDX-NET_Inst_90_beta.onnx",
-        "Voc_FT": "UVR-MDX-NET-Voc_FT.onnx",
-        "Crowd_HQ": "UVR-MDX-NET_Crowd_HQ_1.onnx",
-        "MDXNET_9482": "UVR_MDXNET_9482.onnx",
-        "Inst_1": "UVR-MDX-NET-Inst_1.onnx",
-        "Inst_2": "UVR-MDX-NET-Inst_2.onnx",
-        "Inst_3": "UVR-MDX-NET-Inst_3.onnx",
-        "MDXNET_1_9703": "UVR_MDXNET_1_9703.onnx",
-        "MDXNET_2_9682": "UVR_MDXNET_2_9682.onnx",
-        "MDXNET_3_9662": "UVR_MDXNET_3_9662.onnx",
-        "Inst_Main": "UVR-MDX-NET-Inst_Main.onnx",
-        "MDXNET_Main": "UVR_MDXNET_Main.onnx"
-    },
-    "demucs_models": {
-        "HT-Tuned": "htdemucs_ft.yaml",
-        "HT-Normal": "htdemucs.yaml",
-        "HD_MMI": "hdemucs_mmi.yaml",
-        "HT_6S": "htdemucs_6s.yaml"
-    },
-    "vr_models": {
-        "HP-1": "1_HP-UVR.pth",
-        "HP-2": "2_HP-UVR.pth",
-        "HP-Vocal-1": "3_HP-Vocal-UVR.pth",
-        "HP-Vocal-2": "4_HP-Vocal-UVR.pth",
-        "HP2-1": "7_HP2-UVR.pth",
-        "HP2-2": "8_HP2-UVR.pth",
-        "HP2-3": "9_HP2-UVR.pth",
-        "SP-2B-1": "10_SP-UVR-2B-32000-1.pth",
-        "SP-2B-2": "11_SP-UVR-2B-32000-2.pth",
-        "SP-3B-1": "12_SP-UVR-3B-44100.pth",
-        "SP-4B-1": "13_SP-UVR-4B-44100-1.pth",
-        "SP-4B-2": "14_SP-UVR-4B-44100-2.pth",
-        "SP-MID-1": "15_SP-UVR-MID-44100-1.pth",
-        "SP-MID-2": "16_SP-UVR-MID-44100-2.pth"
-    },
-    "karaoke_models": {
-        "MDX-Version-1": "UVR_MDXNET_KARA.onnx",
-        "MDX-Version-2": "UVR_MDXNET_KARA_2.onnx",
-        "VR-Version-1": "5_HP-Karaoke-UVR.pth",
-        "VR-Version-2": "6_HP-Karaoke-UVR.pth"
-    },
-    "reverb_models": {
-        "MDX-Reverb": "Reverb_HQ_By_FoxJoy.onnx",
-        "VR-Reverb": "UVR-DeEcho-DeReverb.pth",
-        "Echo-Aggressive": "UVR-De-Echo-Aggressive.pth",
-        "Echo-Normal": "UVR-De-Echo-Normal.pth"
-    },
-    "denoise_models": {
-        "Lite": "UVR-DeNoise-Lite.pth",
-        "Normal": "UVR-DeNoise.pth"
-    },
-    "edge_tts": [
-        "af-ZA-AdriNeural",
-        "af-ZA-WillemNeural",
-        "sq-AL-AnilaNeural",
-        "sq-AL-IlirNeural",
-        "am-ET-AmehaNeural",
-        "am-ET-MekdesNeural",
-        "ar-DZ-AminaNeural",
-        "ar-DZ-IsmaelNeural",
-        "ar-BH-AliNeural",
-        "ar-BH-LailaNeural",
-        "ar-EG-SalmaNeural",
-        "ar-EG-ShakirNeural",
-        "ar-IQ-BasselNeural",
-        "ar-IQ-RanaNeural",
-        "ar-JO-SanaNeural",
-        "ar-JO-TaimNeural",
-        "ar-KW-FahedNeural",
-        "ar-KW-NouraNeural",
-        "ar-LB-LaylaNeural",
-        "ar-LB-RamiNeural",
-        "ar-LY-ImanNeural",
-        "ar-LY-OmarNeural",
-        "ar-MA-JamalNeural",
-        "ar-MA-MounaNeural",
-        "ar-OM-AbdullahNeural",
-        "ar-OM-AyshaNeural",
-        "ar-QA-AmalNeural",
-        "ar-QA-MoazNeural",
-        "ar-SA-HamedNeural",
-        "ar-SA-ZariyahNeural",
-        "ar-SY-AmanyNeural",
-        "ar-SY-LaithNeural",
-        "ar-TN-HediNeural",
-        "ar-TN-ReemNeural",
-        "ar-AE-FatimaNeural",
-        "ar-AE-HamdanNeural",
-        "ar-YE-MaryamNeural",
-        "ar-YE-SalehNeural",
-        "az-AZ-BabekNeural",
-        "az-AZ-BanuNeural",
-        "bn-BD-NabanitaNeural",
-        "bn-BD-PradeepNeural",
-        "bn-IN-BashkarNeural",
-        "bn-IN-TanishaaNeural",
-        "bs-BA-GoranNeural",
-        "bs-BA-VesnaNeural",
-        "bg-BG-BorislavNeural",
-        "bg-BG-KalinaNeural",
-        "my-MM-NilarNeural",
-        "my-MM-ThihaNeural",
-        "ca-ES-EnricNeural",
-        "ca-ES-JoanaNeural",
-        "zh-HK-HiuGaaiNeural",
-        "zh-HK-HiuMaanNeural",
-        "zh-HK-WanLungNeural",
-        "zh-CN-XiaoxiaoNeural",
-        "zh-CN-XiaoyiNeural",
-        "zh-CN-YunjianNeural",
-        "zh-CN-YunxiNeural",
-        "zh-CN-YunxiaNeural",
-        "zh-CN-YunyangNeural",
-        "zh-CN-liaoning-XiaobeiNeural",
-        "zh-TW-HsiaoChenNeural",
-        "zh-TW-YunJheNeural",
-        "zh-TW-HsiaoYuNeural",
-        "zh-CN-shaanxi-XiaoniNeural",
-        "hr-HR-GabrijelaNeural",
-        "hr-HR-SreckoNeural",
-        "cs-CZ-AntoninNeural",
-        "cs-CZ-VlastaNeural",
-        "da-DK-ChristelNeural",
-        "da-DK-JeppeNeural",
-        "nl-BE-ArnaudNeural",
-        "nl-BE-DenaNeural",
-        "nl-NL-ColetteNeural",
-        "nl-NL-FennaNeural",
-        "nl-NL-MaartenNeural",
-        "en-AU-NatashaNeural",
-        "en-AU-WilliamNeural",
-        "en-CA-ClaraNeural",
-        "en-CA-LiamNeural",
-        "en-HK-SamNeural",
-        "en-HK-YanNeural",
-        "en-IN-NeerjaExpressiveNeural",
-        "en-IN-NeerjaNeural",
-        "en-IN-PrabhatNeural",
-        "en-IE-ConnorNeural",
-        "en-IE-EmilyNeural",
-        "en-KE-AsiliaNeural",
-        "en-KE-ChilembaNeural",
-        "en-NZ-MitchellNeural",
-        "en-NZ-MollyNeural",
-        "en-NG-AbeoNeural",
-        "en-NG-EzinneNeural",
-        "en-PH-JamesNeural",
-        "en-PH-RosaNeural",
-        "en-SG-LunaNeural",
-        "en-SG-WayneNeural",
-        "en-ZA-LeahNeural",
-        "en-ZA-LukeNeural",
-        "en-TZ-ElimuNeural",
-        "en-TZ-ImaniNeural",
-        "en-GB-LibbyNeural",
-        "en-GB-MaisieNeural",
-        "en-GB-RyanNeural",
-        "en-GB-SoniaNeural",
-        "en-GB-ThomasNeural",
-        "en-US-AvaMultilingualNeural",
-        "en-US-AndrewMultilingualNeural",
-        "en-US-EmmaMultilingualNeural",
-        "en-US-BrianMultilingualNeural",
-        "en-US-AvaNeural",
-        "en-US-AndrewNeural",
-        "en-US-EmmaNeural",
-        "en-US-BrianNeural",
-        "en-US-AnaNeural",
-        "en-US-AriaNeural",
-        "en-US-ChristopherNeural",
-        "en-US-EricNeural",
-        "en-US-GuyNeural",
-        "en-US-JennyNeural",
-        "en-US-MichelleNeural",
-        "en-US-RogerNeural",
-        "en-US-SteffanNeural",
-        "et-EE-AnuNeural",
-        "et-EE-KertNeural",
-        "fil-PH-AngeloNeural",
-        "fil-PH-BlessicaNeural",
-        "fi-FI-HarriNeural",
-        "fi-FI-NooraNeural",
-        "fr-BE-CharlineNeural",
-        "fr-BE-GerardNeural",
-        "fr-CA-ThierryNeural",
-        "fr-CA-AntoineNeural",
-        "fr-CA-JeanNeural",
-        "fr-CA-SylvieNeural",
-        "fr-FR-VivienneMultilingualNeural",
-        "fr-FR-RemyMultilingualNeural",
-        "fr-FR-DeniseNeural",
-        "fr-FR-EloiseNeural",
-        "fr-FR-HenriNeural",
-        "fr-CH-ArianeNeural",
-        "fr-CH-FabriceNeural",
-        "gl-ES-RoiNeural",
-        "gl-ES-SabelaNeural",
-        "ka-GE-EkaNeural",
-        "ka-GE-GiorgiNeural",
-        "de-AT-IngridNeural",
-        "de-AT-JonasNeural",
-        "de-DE-SeraphinaMultilingualNeural",
-        "de-DE-FlorianMultilingualNeural",
-        "de-DE-AmalaNeural",
-        "de-DE-ConradNeural",
-        "de-DE-KatjaNeural",
-        "de-DE-KillianNeural",
-        "de-CH-JanNeural",
-        "de-CH-LeniNeural",
-        "el-GR-AthinaNeural",
-        "el-GR-NestorasNeural",
-        "gu-IN-DhwaniNeural",
-        "gu-IN-NiranjanNeural",
-        "he-IL-AvriNeural",
-        "he-IL-HilaNeural",
-        "hi-IN-MadhurNeural",
-        "hi-IN-SwaraNeural",
-        "hu-HU-NoemiNeural",
-        "hu-HU-TamasNeural",
-        "is-IS-GudrunNeural",
-        "is-IS-GunnarNeural",
-        "id-ID-ArdiNeural",
-        "id-ID-GadisNeural",
-        "ga-IE-ColmNeural",
-        "ga-IE-OrlaNeural",
-        "it-IT-GiuseppeNeural",
-        "it-IT-DiegoNeural",
-        "it-IT-ElsaNeural",
-        "it-IT-IsabellaNeural",
-        "ja-JP-KeitaNeural",
-        "ja-JP-NanamiNeural",
-        "jv-ID-DimasNeural",
-        "jv-ID-SitiNeural",
-        "kn-IN-GaganNeural",
-        "kn-IN-SapnaNeural",
-        "kk-KZ-AigulNeural",
-        "kk-KZ-DauletNeural",
-        "km-KH-PisethNeural",
-        "km-KH-SreymomNeural",
-        "ko-KR-HyunsuNeural",
-        "ko-KR-InJoonNeural",
-        "ko-KR-SunHiNeural",
-        "lo-LA-ChanthavongNeural",
-        "lo-LA-KeomanyNeural",
-        "lv-LV-EveritaNeural",
-        "lv-LV-NilsNeural",
-        "lt-LT-LeonasNeural",
-        "lt-LT-OnaNeural",
-        "mk-MK-AleksandarNeural",
-        "mk-MK-MarijaNeural",
-        "ms-MY-OsmanNeural",
-        "ms-MY-YasminNeural",
-        "ml-IN-MidhunNeural",
-        "ml-IN-SobhanaNeural",
-        "mt-MT-GraceNeural",
-        "mt-MT-JosephNeural",
-        "mr-IN-AarohiNeural",
-        "mr-IN-ManoharNeural",
-        "mn-MN-BataaNeural",
-        "mn-MN-YesuiNeural",
-        "ne-NP-HemkalaNeural",
-        "ne-NP-SagarNeural",
-        "nb-NO-FinnNeural",
-        "nb-NO-PernilleNeural",
-        "ps-AF-GulNawazNeural",
-        "ps-AF-LatifaNeural",
-        "fa-IR-DilaraNeural",
-        "fa-IR-FaridNeural",
-        "pl-PL-MarekNeural",
-        "pl-PL-ZofiaNeural",
-        "pt-BR-ThalitaNeural",
-        "pt-BR-AntonioNeural",
-        "pt-BR-FranciscaNeural",
-        "pt-PT-DuarteNeural",
-        "pt-PT-RaquelNeural",
-        "ro-RO-AlinaNeural",
-        "ro-RO-EmilNeural",
-        "ru-RU-DmitryNeural",
-        "ru-RU-SvetlanaNeural",
-        "sr-RS-NicholasNeural",
-        "sr-RS-SophieNeural",
-        "si-LK-SameeraNeural",
-        "si-LK-ThiliniNeural",
-        "sk-SK-LukasNeural",
-        "sk-SK-ViktoriaNeural",
-        "sl-SI-PetraNeural",
-        "sl-SI-RokNeural",
-        "so-SO-MuuseNeural",
-        "so-SO-UbaxNeural",
-        "es-AR-ElenaNeural",
-        "es-AR-TomasNeural",
-        "es-BO-MarceloNeural",
-        "es-BO-SofiaNeural",
-        "es-CL-CatalinaNeural",
-        "es-CL-LorenzoNeural",
-        "es-ES-XimenaNeural",
-        "es-CO-GonzaloNeural",
-        "es-CO-SalomeNeural",
-        "es-CR-JuanNeural",
-        "es-CR-MariaNeural",
-        "es-CU-BelkysNeural",
-        "es-CU-ManuelNeural",
-        "es-DO-EmilioNeural",
-        "es-DO-RamonaNeural",
-        "es-EC-AndreaNeural",
-        "es-EC-LuisNeural",
-        "es-SV-LorenaNeural",
-        "es-SV-RodrigoNeural",
-        "es-GQ-JavierNeural",
-        "es-GQ-TeresaNeural",
-        "es-GT-AndresNeural",
-        "es-GT-MartaNeural",
-        "es-HN-CarlosNeural",
-        "es-HN-KarlaNeural",
-        "es-MX-DaliaNeural",
-        "es-MX-JorgeNeural",
-        "es-NI-FedericoNeural",
-        "es-NI-YolandaNeural",
-        "es-PA-MargaritaNeural",
-        "es-PA-RobertoNeural",
-        "es-PY-MarioNeural",
-        "es-PY-TaniaNeural",
-        "es-PE-AlexNeural",
-        "es-PE-CamilaNeural",
-        "es-PR-KarinaNeural",
-        "es-PR-VictorNeural",
-        "es-ES-AlvaroNeural",
-        "es-ES-ElviraNeural",
-        "es-US-AlonsoNeural",
-        "es-US-PalomaNeural",
-        "es-UY-MateoNeural",
-        "es-UY-ValentinaNeural",
-        "es-VE-PaolaNeural",
-        "es-VE-SebastianNeural",
-        "su-ID-JajangNeural",
-        "su-ID-TutiNeural",
-        "sw-KE-RafikiNeural",
-        "sw-KE-ZuriNeural",
-        "sw-TZ-DaudiNeural",
-        "sw-TZ-RehemaNeural",
-        "sv-SE-MattiasNeural",
-        "sv-SE-SofieNeural",
-        "ta-IN-PallaviNeural",
-        "ta-IN-ValluvarNeural",
-        "ta-MY-KaniNeural",
-        "ta-MY-SuryaNeural",
-        "ta-SG-AnbuNeural",
-        "ta-SG-VenbaNeural",
-        "ta-LK-KumarNeural",
-        "ta-LK-SaranyaNeural",
-        "te-IN-MohanNeural",
-        "te-IN-ShrutiNeural",
-        "th-TH-NiwatNeural",
-        "th-TH-PremwadeeNeural",
-        "tr-TR-AhmetNeural",
-        "tr-TR-EmelNeural",
-        "uk-UA-OstapNeural",
-        "uk-UA-PolinaNeural",
-        "ur-IN-GulNeural",
-        "ur-IN-SalmanNeural",
-        "ur-PK-AsadNeural",
-        "ur-PK-UzmaNeural",
-        "uz-UZ-MadinaNeural",
-        "uz-UZ-SardorNeural",
-        "vi-VN-HoaiMyNeural",
-        "vi-VN-NamMinhNeural",
-        "cy-GB-AledNeural",
-        "cy-GB-NiaNeural",
-        "zu-ZA-ThandoNeural",
-        "zu-ZA-ThembaNeural"
-    ],
-    "google_tts_voice": [
-        "af",
-        "am",
-        "ar",
-        "bg",
-        "bn",
-        "bs",
-        "ca",
-        "cs",
-        "cy",
-        "da",
-        "de",
-        "el",
-        "en",
-        "es",
-        "et",
-        "eu",
-        "fi",
-        "fr",
-        "fr-CA",
-        "gl",
-        "gu",
-        "ha",
-        "hi",
-        "hr",
-        "hu",
-        "id",
-        "is",
-        "it",
-        "iw",
-        "ja",
-        "jw",
-        "km",
-        "kn",
-        "ko",
-        "la",
-        "lt",
-        "lv",
-        "ml",
-        "mr",
-        "ms",
-        "my",
-        "ne",
-        "nl",
-        "no",
-        "pa",
-        "pl",
-        "pt",
-        "pt-PT",
-        "ro",
-        "ru",
-        "si",
-        "sk",
-        "sq",
-        "sr",
-        "su",
-        "sv",
-        "sw",
-        "ta",
-        "te",
-        "th",
-        "tl",
-        "tr",
-        "uk",
-        "ur",
-        "vi",
-        "yue",
-        "zh-CN",
-        "zh-TW",
-        "zh"
-    ],
-    "fp16": false,
-    "editing_tab": true,
-    "inference_tab": true,
-    "create_and_training_tab": true,
-    "extra_tab": true,
-    "separator_tab": true,
-    "convert_tab": true,
-    "convert_with_whisper": true,
-    "tts_tab": true,
-    "effects_tab": true,
-    "quirk": true,
-    "create_dataset_tab": true,
-    "training_tab": true,
-    "fushion_tab": true,
-    "read_tab": true,
-    "onnx_tab": true,
-    "downloads_tab": true,
-    "f0_extractor_tab": true,
-    "settings_tab": true,
-    "create_srt_tab": true,
-    "realtime_tab": true,
-    "realtime_client_tab": true,
-    "create_reference_tab": true,
-    "font": "https://fonts.googleapis.com/css2?family=Roboto&display=swap",
-    "app_port": 7860,
-    "tensorboard_port": 6870,
-    "num_of_restart": 5,
-    "server_name": "0.0.0.0",
-    "app_show_error": true,
-    "delete_exists_file": false,
-    "audio_effects_path": "main/inference/audio_effects.py",
-    "convert_path": "main/inference/conversion/convert.py",
-    "separate_path": "main/inference/separate_music.py",
-    "create_dataset_path": "main/inference/create_dataset.py",
-    "preprocess_path": "main/inference/preprocess/preprocess.py",
-    "extract_path": "main/inference/extracting/extract.py",
-    "create_index_path": "main/inference/create_index.py",
-    "train_path": "main/inference/training/train.py",
-    "create_reference_path": "main/inference/create_reference.py",
-    "ico_path": "assets/ico.png",
-    "csv_path": "assets/spreadsheet.csv",
-    "weights_path": "assets/weights",
-    "logs_path": "assets/logs",
-    "binary_path": "assets/binary",
-    "f0_path": "assets/f0",
-    "language_path": "assets/languages",
-    "presets_path": "assets/presets",
-    "embedders_path": "assets/models/embedders",
-    "predictors_path": "assets/models/predictors",
-    "pretrained_custom_path": "assets/models/pretrained_custom",
-    "pretrained_v1_path": "assets/models/pretrained_v1",
-    "pretrained_v2_path": "assets/models/pretrained_v2",
-    "speaker_diarization_path": "assets/models/speaker_diarization",
-    "uvr5_path": "assets/models/uvr5",
-    "audios_path": "audios",
-    "reference_path": "assets/logs/reference",
-    "demucs_segments_enable": true,
-    "demucs_cpu_mode": false,
-    "limit_f0": 8,
-    "debug_mode": false,
-    "pretrain_verify_shape": true,
-    "pretrain_strict": true,
-    "cpu_mode": false,
-    "brain": false,
-    "discord_presence": true
-}

main/configs/config.py DELETED Viewed

@@ -1,131 +0,0 @@
-import os
-import sys
-import json
-import torch
-import onnxruntime
-sys.path.append(os.getcwd())
-from main.library.backends import directml, opencl, zluda
-version_config_paths = [os.path.join(version, size) for version in ["v1", "v2"] for size in ["32000.json", "40000.json", "48000.json"]]
-def singleton(cls):
-    instances = {}
-    def get_instance(*args, **kwargs):
-        if cls not in instances: instances[cls] = cls(*args, **kwargs)
-        return instances[cls]
-    return get_instance
-@singleton
-class Config:
-    def __init__(self):
-        self.configs_path = os.path.join("main", "configs", "config.json")
-        self.configs = json.load(open(self.configs_path, "r"))
-        self.cpu_mode = self.configs.get("cpu_mode", False)
-        self.brain = self.configs.get("brain", False)
-        self.debug_mode = self.configs.get("debug_mode", False)
-        self.json_config = self.load_config_json()
-        self.translations = self.multi_language()
-        self.gpu_mem = None
-        self.per_preprocess = 3.7
-        self.device = self.get_default_device()
-        self.providers = self.get_providers()
-        self.is_half = self.is_fp16()
-        self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
-    def multi_language(self):
-        try:
-            lang = self.configs.get("language", "vi-VN")
-            if len([l for l in os.listdir(self.configs["language_path"]) if l.endswith(".json")]) < 1: raise FileNotFoundError("Không tìm thấy bất cứ gói ngôn ngữ nào(No package languages found)")
-            if not lang: lang = "vi-VN"
-            if lang not in self.configs["support_language"]: raise ValueError("Ngôn ngữ không được hỗ trợ(Language not supported)")
-            lang_path = os.path.join(self.configs["language_path"], f"{lang}.json")
-            if not os.path.exists(lang_path): lang_path = os.path.join(self.configs["language_path"], "vi-VN.json")
-            with open(lang_path, encoding="utf-8") as f:
-                translations = json.load(f)
-        except json.JSONDecodeError:
-            print(self.translations["empty_json"].format(file=lang))
-            pass
-        return translations
-    def is_fp16(self):
-        fp16 = self.configs.get("fp16", False)
-        if self.device in ["cpu", "mps"] and fp16:
-            self.configs["fp16"] = False
-            fp16 = False
-            with open(self.configs_path, "w") as f:
-                json.dump(self.configs, f, indent=4)
-        if not fp16: self.per_preprocess = 3.0
-        return fp16
-    def load_config_json(self):
-        configs = {}
-        for config_file in version_config_paths:
-            try:
-                with open(os.path.join("main", "configs", config_file), "r") as f:
-                    configs[config_file] = json.load(f)
-            except json.JSONDecodeError:
-                print(self.translations["empty_json"].format(file=config_file))
-                pass
-        return configs
-    def device_config(self):
-        if self.gpu_mem is not None and self.gpu_mem <= 4:
-            self.per_preprocess = 3.0
-            return 1, 5, 30, 32
-        return (3, 10, 60, 65) if self.is_half else (1, 6, 38, 41)
-    def get_default_device(self):
-        if not self.cpu_mode:
-            if torch.cuda.is_available():
-                device = "cuda:0"
-                self.gpu_mem = torch.cuda.get_device_properties(int(device.split(":")[-1])).total_memory // (1024**3)
-            elif directml.is_available():
-                device = "privateuseone:0"
-            elif opencl.is_available():
-                device = "ocl:0"
-            elif torch.backends.mps.is_available():
-                device = "mps"
-            else:
-                device = "cpu"
-        else:
-            torch.cuda.is_available = lambda : False
-            directml.is_available = lambda : False
-            opencl.is_available = lambda : False
-            torch.backends.mps.is_available = lambda : False
-            device = "cpu"
-        return device
-    def get_providers(self):
-        ort_providers = onnxruntime.get_available_providers()
-        if "CUDAExecutionProvider" in ort_providers and self.device.startswith("cuda"):
-            providers = ["CUDAExecutionProvider"]
-        elif "ROCMExecutionProvider" in ort_providers and self.device.startswith("cuda"):
-            providers = ["ROCMExecutionProvider"]
-        elif "DmlExecutionProvider" in ort_providers and self.device.startswith(("ocl", "privateuseone")):
-            providers = ["DmlExecutionProvider"]
-        elif "CoreMLExecutionProvider" in ort_providers and self.device.startswith("mps"):
-            providers = ["CoreMLExecutionProvider"]
-        else:
-            providers = ["CPUExecutionProvider"]
-        return providers

main/configs/rpc.py DELETED Viewed

@@ -1,78 +0,0 @@
-import os
-import sys
-import json
-import time
-import struct
-import codecs
-sys.path.append(os.getcwd())
-from main.app.variables import translations
-CLIENT_ID = "1392816674159202396"
-def create_payload(opcode, payload):
-    data = json.dumps(payload).encode("utf-8")
-    return struct.pack(
-        "<I",
-        opcode
-    ) + struct.pack(
-        "<I",
-        len(data)
-    ) + data
-def connect_discord_ipc():
-    try:
-        return open(
-            r"\\?\pipe\discord-ipc-0",
-            "r+b",
-            buffering=0
-        )
-    except Exception:
-        return None
-def send_discord_rpc(pipe):
-    pipe.write(
-        create_payload(
-            0, {
-                "v": 1,
-                "client_id": CLIENT_ID
-            }
-        )
-    )
-    pipe.read(8)
-    pipe.read(
-        struct.unpack(
-            "<I",
-            pipe.read(4)
-        )[0]
-    )
-    pipe.write(
-        create_payload(
-            1, {
-                "cmd": "SET_ACTIVITY",
-                "args": {
-                    "pid": os.getpid(),
-                    "activity": {
-                        "buttons": [{
-                            "label": "Github",
-                            "url": codecs.decode("uggcf://tvguho.pbz/CunzUhlauNau16/Ivrganzrfr-EIP", "rot13")
-                        }],
-                        "details": translations["details"],
-                        "timestamps": {
-                            "start": int(
-                                time.time()
-                            )
-                        },
-                        "state": translations["use"]
-                    }
-                },
-                "nonce": str(
-                    time.time()
-                )
-            }
-        )
-    )

main/configs/v1/32000.json DELETED Viewed

@@ -1,46 +0,0 @@
-{
-    "train": {
-        "log_interval": 200,
-        "seed": 1234,
-        "epochs": 20000,
-        "learning_rate": 0.0001,
-        "betas": [0.8, 0.99],
-        "eps": 1e-09,
-        "batch_size": 4,
-        "lr_decay": 0.999875,
-        "segment_size": 12800,
-        "init_lr_ratio": 1,
-        "warmup_epochs": 0,
-        "c_mel": 45,
-        "c_kl": 1.0
-    },
-    "data": {
-        "max_wav_value": 32768.0,
-        "sample_rate": 32000,
-        "filter_length": 1024,
-        "hop_length": 320,
-        "win_length": 1024,
-        "n_mel_channels": 80,
-        "mel_fmin": 0.0,
-        "mel_fmax": null
-    },
-    "model": {
-        "inter_channels": 192,
-        "hidden_channels": 192,
-        "filter_channels": 768,
-        "text_enc_hidden_dim": 256,
-        "n_heads": 2,
-        "n_layers": 6,
-        "kernel_size": 3,
-        "p_dropout": 0,
-        "resblock": "1",
-        "resblock_kernel_sizes": [3, 7, 11],
-        "resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
-        "upsample_rates": [10, 4, 2, 2, 2],
-        "upsample_initial_channel": 512,
-        "upsample_kernel_sizes": [16, 16, 4, 4, 4],
-        "use_spectral_norm": false,
-        "gin_channels": 256,
-        "spk_embed_dim": 109
-    }
-}

main/configs/v1/40000.json DELETED Viewed

@@ -1,46 +0,0 @@
-{
-    "train": {
-        "log_interval": 200,
-        "seed": 1234,
-        "epochs": 20000,
-        "learning_rate": 0.0001,
-        "betas": [0.8, 0.99],
-        "eps": 1e-09,
-        "batch_size": 4,
-        "lr_decay": 0.999875,
-        "segment_size": 12800,
-        "init_lr_ratio": 1,
-        "warmup_epochs": 0,
-        "c_mel": 45,
-        "c_kl": 1.0
-    },
-    "data": {
-        "max_wav_value": 32768.0,
-        "sample_rate": 40000,
-        "filter_length": 2048,
-        "hop_length": 400,
-        "win_length": 2048,
-        "n_mel_channels": 125,
-        "mel_fmin": 0.0,
-        "mel_fmax": null
-    },
-    "model": {
-        "inter_channels": 192,
-        "hidden_channels": 192,
-        "filter_channels": 768,
-        "text_enc_hidden_dim": 256,
-        "n_heads": 2,
-        "n_layers": 6,
-        "kernel_size": 3,
-        "p_dropout": 0,
-        "resblock": "1",
-        "resblock_kernel_sizes": [3, 7, 11],
-        "resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
-        "upsample_rates": [10, 10, 2, 2],
-        "upsample_initial_channel": 512,
-        "upsample_kernel_sizes": [16, 16, 4, 4],
-        "use_spectral_norm": false,
-        "gin_channels": 256,
-        "spk_embed_dim": 109
-    }
-}

main/configs/v1/48000.json DELETED Viewed

@@ -1,46 +0,0 @@
-{
-    "train": {
-        "log_interval": 200,
-        "seed": 1234,
-        "epochs": 20000,
-        "learning_rate": 0.0001,
-        "betas": [0.8, 0.99],
-        "eps": 1e-09,
-        "batch_size": 4,
-        "lr_decay": 0.999875,
-        "segment_size": 11520,
-        "init_lr_ratio": 1,
-        "warmup_epochs": 0,
-        "c_mel": 45,
-        "c_kl": 1.0
-    },
-    "data": {
-        "max_wav_value": 32768.0,
-        "sample_rate": 48000,
-        "filter_length": 2048,
-        "hop_length": 480,
-        "win_length": 2048,
-        "n_mel_channels": 128,
-        "mel_fmin": 0.0,
-        "mel_fmax": null
-    },
-    "model": {
-        "inter_channels": 192,
-        "hidden_channels": 192,
-        "filter_channels": 768,
-        "text_enc_hidden_dim": 256,
-        "n_heads": 2,
-        "n_layers": 6,
-        "kernel_size": 3,
-        "p_dropout": 0,
-        "resblock": "1",
-        "resblock_kernel_sizes": [3, 7, 11],
-        "resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
-        "upsample_rates": [10, 6, 2, 2, 2],
-        "upsample_initial_channel": 512,
-        "upsample_kernel_sizes": [16, 16, 4, 4, 4],
-        "use_spectral_norm": false,
-        "gin_channels": 256,
-        "spk_embed_dim": 109
-    }
-}

main/configs/v2/32000.json DELETED Viewed

@@ -1,42 +0,0 @@
-{
-    "train": {
-        "log_interval": 200,
-        "seed": 1234,
-        "learning_rate": 0.0001,
-        "betas": [0.8, 0.99],
-        "eps": 1e-09,
-        "lr_decay": 0.999875,
-        "segment_size": 12800,
-        "c_mel": 45,
-        "c_kl": 1.0
-    },
-    "data": {
-        "max_wav_value": 32768.0,
-        "sample_rate": 32000,
-        "filter_length": 1024,
-        "hop_length": 320,
-        "win_length": 1024,
-        "n_mel_channels": 80,
-        "mel_fmin": 0.0,
-        "mel_fmax": null
-    },
-    "model": {
-        "inter_channels": 192,
-        "hidden_channels": 192,
-        "filter_channels": 768,
-        "text_enc_hidden_dim": 768,
-        "n_heads": 2,
-        "n_layers": 6,
-        "kernel_size": 3,
-        "p_dropout": 0,
-        "resblock": "1",
-        "resblock_kernel_sizes": [3, 7, 11],
-        "resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
-        "upsample_rates": [10, 8, 2, 2],
-        "upsample_initial_channel": 512,
-        "upsample_kernel_sizes": [20, 16, 4, 4],
-        "use_spectral_norm": false,
-        "gin_channels": 256,
-        "spk_embed_dim": 109
-    }
-}

main/configs/v2/40000.json DELETED Viewed

@@ -1,42 +0,0 @@
-{
-    "train": {
-        "log_interval": 200,
-        "seed": 1234,
-        "learning_rate": 0.0001,
-        "betas": [0.8, 0.99],
-        "eps": 1e-09,
-        "lr_decay": 0.999875,
-        "segment_size": 12800,
-        "c_mel": 45,
-        "c_kl": 1.0
-    },
-    "data": {
-        "max_wav_value": 32768.0,
-        "sample_rate": 40000,
-        "filter_length": 2048,
-        "hop_length": 400,
-        "win_length": 2048,
-        "n_mel_channels": 125,
-        "mel_fmin": 0.0,
-        "mel_fmax": null
-    },
-    "model": {
-        "inter_channels": 192,
-        "hidden_channels": 192,
-        "filter_channels": 768,
-        "text_enc_hidden_dim": 768,
-        "n_heads": 2,
-        "n_layers": 6,
-        "kernel_size": 3,
-        "p_dropout": 0,
-        "resblock": "1",
-        "resblock_kernel_sizes": [3, 7, 11],
-        "resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
-        "upsample_rates": [10, 10, 2, 2],
-        "upsample_initial_channel": 512,
-        "upsample_kernel_sizes": [16, 16, 4, 4],
-        "use_spectral_norm": false,
-        "gin_channels": 256,
-        "spk_embed_dim": 109
-    }
-}