Elesh Vaishnav commited on
Commit
5682687
·
verified ·
1 Parent(s): f3d5970

Upload 26 files

Browse files
tabs/download/download.py ADDED
@@ -0,0 +1,227 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import json
4
+ import shutil
5
+ import requests
6
+ import tempfile
7
+ import gradio as gr
8
+ import pandas as pd
9
+
10
+ from concurrent.futures import ThreadPoolExecutor
11
+ from tqdm import tqdm
12
+
13
+
14
+ now_dir = os.getcwd()
15
+ sys.path.append(now_dir)
16
+
17
+ from core import run_download_script
18
+ from rvc.lib.utils import format_title
19
+
20
+ from assets.i18n.i18n import I18nAuto
21
+
22
+ i18n = I18nAuto()
23
+
24
+ gradio_temp_dir = os.path.join(tempfile.gettempdir(), "gradio")
25
+
26
+ if os.path.exists(gradio_temp_dir):
27
+ shutil.rmtree(gradio_temp_dir)
28
+
29
+
30
+ def save_drop_model(dropbox):
31
+ if "pth" not in dropbox and "index" not in dropbox:
32
+ raise gr.Error(
33
+ message="The file you dropped is not a valid model file. Please try again."
34
+ )
35
+
36
+ file_name = format_title(os.path.basename(dropbox))
37
+ model_name = file_name
38
+
39
+ if ".pth" in model_name:
40
+ model_name = model_name.split(".pth")[0]
41
+ elif ".index" in model_name:
42
+ replacements = ["nprobe_1_", "_v1", "_v2", "added_"]
43
+ for rep in replacements:
44
+ model_name = model_name.replace(rep, "")
45
+ model_name = model_name.split(".index")[0]
46
+
47
+ model_path = os.path.join(now_dir, "logs", model_name)
48
+ if not os.path.exists(model_path):
49
+ os.makedirs(model_path)
50
+ if os.path.exists(os.path.join(model_path, file_name)):
51
+ os.remove(os.path.join(model_path, file_name))
52
+ shutil.move(dropbox, os.path.join(model_path, file_name))
53
+ print(f"{file_name} saved in {model_path}")
54
+ gr.Info(f"{file_name} saved in {model_path}")
55
+
56
+ return None
57
+
58
+
59
+ json_url = "https://huggingface.co/IAHispano/Applio/raw/main/pretrains.json"
60
+
61
+
62
+ def fetch_pretrained_data():
63
+ pretraineds_custom_path = os.path.join("rvc", "models", "pretraineds", "custom")
64
+ os.makedirs(pretraineds_custom_path, exist_ok=True)
65
+ try:
66
+ with open(
67
+ os.path.join(pretraineds_custom_path, json_url.split("/")[-1]), "r"
68
+ ) as f:
69
+ data = json.load(f)
70
+ except:
71
+ try:
72
+ response = requests.get(json_url)
73
+ response.raise_for_status()
74
+ data = response.json()
75
+ with open(
76
+ os.path.join(pretraineds_custom_path, json_url.split("/")[-1]),
77
+ "w",
78
+ encoding="utf-8",
79
+ ) as f:
80
+ json.dump(
81
+ data,
82
+ f,
83
+ indent=2,
84
+ separators=(",", ": "),
85
+ ensure_ascii=False,
86
+ )
87
+ except:
88
+ data = {
89
+ "Titan": {
90
+ "32k": {"D": "null", "G": "null"},
91
+ },
92
+ }
93
+ return data
94
+
95
+
96
+ def get_pretrained_list():
97
+ data = fetch_pretrained_data()
98
+ return list(data.keys())
99
+
100
+
101
+ def get_pretrained_sample_rates(model):
102
+ data = fetch_pretrained_data()
103
+ return list(data[model].keys())
104
+
105
+
106
+ def get_file_size(url):
107
+ response = requests.head(url)
108
+ return int(response.headers.get("content-length", 0))
109
+
110
+
111
+ def download_file(url, destination_path, progress_bar):
112
+ os.makedirs(os.path.dirname(destination_path), exist_ok=True)
113
+ response = requests.get(url, stream=True)
114
+ block_size = 1024
115
+ with open(destination_path, "wb") as file:
116
+ for data in response.iter_content(block_size):
117
+ file.write(data)
118
+ progress_bar.update(len(data))
119
+
120
+
121
+ def download_pretrained_model(model, sample_rate):
122
+ data = fetch_pretrained_data()
123
+ paths = data[model][sample_rate]
124
+ pretraineds_custom_path = os.path.join("rvc", "models", "pretraineds", "custom")
125
+ os.makedirs(pretraineds_custom_path, exist_ok=True)
126
+
127
+ d_url = f"https://huggingface.co/{paths['D']}"
128
+ g_url = f"https://huggingface.co/{paths['G']}"
129
+
130
+ total_size = get_file_size(d_url) + get_file_size(g_url)
131
+
132
+ gr.Info("Downloading pretrained model...")
133
+
134
+ with tqdm(
135
+ total=total_size, unit="iB", unit_scale=True, desc="Downloading files"
136
+ ) as progress_bar:
137
+ with ThreadPoolExecutor(max_workers=2) as executor:
138
+ futures = [
139
+ executor.submit(
140
+ download_file,
141
+ d_url,
142
+ os.path.join(pretraineds_custom_path, os.path.basename(paths["D"])),
143
+ progress_bar,
144
+ ),
145
+ executor.submit(
146
+ download_file,
147
+ g_url,
148
+ os.path.join(pretraineds_custom_path, os.path.basename(paths["G"])),
149
+ progress_bar,
150
+ ),
151
+ ]
152
+ for future in futures:
153
+ future.result()
154
+
155
+ gr.Info("Pretrained model downloaded successfully!")
156
+ print("Pretrained model downloaded successfully!")
157
+
158
+
159
+ def update_sample_rate_dropdown(model):
160
+ return {
161
+ "choices": get_pretrained_sample_rates(model),
162
+ "value": get_pretrained_sample_rates(model)[0],
163
+ "__type__": "update",
164
+ }
165
+
166
+
167
+ def download_tab():
168
+ with gr.Column():
169
+ gr.Markdown(value=i18n("## Download Model"))
170
+ model_link = gr.Textbox(
171
+ label=i18n("Model Link"),
172
+ placeholder=i18n("Introduce the model link"),
173
+ interactive=True,
174
+ )
175
+ model_download_output_info = gr.Textbox(
176
+ label=i18n("Output Information"),
177
+ info=i18n("The output information will be displayed here."),
178
+ value="",
179
+ max_lines=8,
180
+ interactive=False,
181
+ )
182
+ model_download_button = gr.Button(i18n("Download Model"))
183
+ model_download_button.click(
184
+ fn=run_download_script,
185
+ inputs=[model_link],
186
+ outputs=[model_download_output_info],
187
+ )
188
+ gr.Markdown(value=i18n("## Drop files"))
189
+ dropbox = gr.File(
190
+ label=i18n(
191
+ "Drag your .pth file and .index file into this space. Drag one and then the other."
192
+ ),
193
+ type="filepath",
194
+ )
195
+
196
+ dropbox.upload(
197
+ fn=save_drop_model,
198
+ inputs=[dropbox],
199
+ outputs=[dropbox],
200
+ )
201
+ gr.Markdown(value=i18n("## Download Pretrained Models"))
202
+ pretrained_model = gr.Dropdown(
203
+ label=i18n("Pretrained"),
204
+ info=i18n("Select the pretrained model you want to download."),
205
+ choices=get_pretrained_list(),
206
+ value="Titan",
207
+ interactive=True,
208
+ )
209
+ pretrained_sample_rate = gr.Dropdown(
210
+ label=i18n("Sampling Rate"),
211
+ info=i18n("And select the sampling rate."),
212
+ choices=get_pretrained_sample_rates(pretrained_model.value),
213
+ value="40k",
214
+ interactive=True,
215
+ allow_custom_value=True,
216
+ )
217
+ pretrained_model.change(
218
+ update_sample_rate_dropdown,
219
+ inputs=[pretrained_model],
220
+ outputs=[pretrained_sample_rate],
221
+ )
222
+ download_pretrained = gr.Button(i18n("Download"))
223
+ download_pretrained.click(
224
+ fn=download_pretrained_model,
225
+ inputs=[pretrained_model, pretrained_sample_rate],
226
+ outputs=[],
227
+ )
tabs/extra/extra.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import gradio as gr
4
+
5
+ now_dir = os.getcwd()
6
+ sys.path.append(now_dir)
7
+
8
+ from tabs.extra.sections.processing import processing_tab
9
+ from tabs.extra.sections.analyzer import analyzer_tab
10
+ from tabs.extra.sections.f0_extractor import f0_extractor_tab
11
+
12
+ from assets.i18n.i18n import I18nAuto
13
+
14
+ i18n = I18nAuto()
15
+
16
+
17
+ def extra_tab():
18
+ with gr.TabItem(i18n("Model information")):
19
+ processing_tab()
20
+
21
+ with gr.TabItem(i18n("F0 Curve")):
22
+ f0_extractor_tab()
23
+
24
+ with gr.TabItem(i18n("Audio Analyzer")):
25
+ analyzer_tab()
tabs/extra/model_information.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from core import run_model_information_script
3
+
4
+ from assets.i18n.i18n import I18nAuto
5
+
6
+ i18n = I18nAuto()
7
+
8
+
9
+ def model_information_tab():
10
+ with gr.Column():
11
+ model_name = gr.Textbox(
12
+ label=i18n("Path to Model"),
13
+ info=i18n("Introduce the model pth path"),
14
+ placeholder=i18n("Introduce the model pth path"),
15
+ interactive=True,
16
+ )
17
+ model_information_output_info = gr.Textbox(
18
+ label=i18n("Output Information"),
19
+ info=i18n("The output information will be displayed here."),
20
+ value="",
21
+ max_lines=12,
22
+ interactive=False,
23
+ )
24
+ model_information_button = gr.Button(i18n("See Model Information"))
25
+ model_information_button.click(
26
+ fn=run_model_information_script,
27
+ inputs=[model_name],
28
+ outputs=[model_information_output_info],
29
+ )
tabs/extra/sections/analyzer.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, sys
2
+ import gradio as gr
3
+
4
+ now_dir = os.getcwd()
5
+ sys.path.append(now_dir)
6
+
7
+ from core import run_audio_analyzer_script
8
+ from assets.i18n.i18n import I18nAuto
9
+
10
+ i18n = I18nAuto()
11
+
12
+
13
+ def analyzer_tab():
14
+ with gr.Column():
15
+ audio_input = gr.Audio(type="filepath")
16
+ output_info = gr.Textbox(
17
+ label=i18n("Output Information"),
18
+ info=i18n("The output information will be displayed here."),
19
+ value="",
20
+ max_lines=8,
21
+ interactive=False,
22
+ )
23
+ get_info_button = gr.Button(value=i18n("Get information about the audio"))
24
+ image_output = gr.Image(type="filepath", interactive=False)
25
+
26
+ get_info_button.click(
27
+ fn=run_audio_analyzer_script,
28
+ inputs=[audio_input],
29
+ outputs=[output_info, image_output],
30
+ )
tabs/extra/sections/f0_extractor.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import librosa
3
+ import gradio as gr
4
+ from matplotlib import pyplot as plt
5
+
6
+ from rvc.lib.predictors.F0Extractor import F0Extractor
7
+
8
+ from assets.i18n.i18n import I18nAuto
9
+
10
+ i18n = I18nAuto()
11
+
12
+
13
+ def extract_f0_curve(audio_path: str, method: str):
14
+ print("Extracting F0 Curve...")
15
+ image_path = os.path.join("logs", "f0_plot.png")
16
+ txt_path = os.path.join("logs", "f0_curve.txt")
17
+ y, sr = librosa.load(audio_path, sr=None)
18
+ hop_length = 160
19
+
20
+ librosa.note_to_hz("C1")
21
+ librosa.note_to_hz("C8")
22
+
23
+ f0_extractor = F0Extractor(audio_path, sample_rate=sr, method=method)
24
+ f0 = f0_extractor.extract_f0()
25
+
26
+ plt.figure(figsize=(10, 4))
27
+ plt.plot(f0)
28
+ plt.title(method)
29
+ plt.xlabel("Time (frames)")
30
+ plt.ylabel("Frequency (Hz)")
31
+ plt.savefig(image_path)
32
+ plt.close()
33
+
34
+ with open(txt_path, "w") as txtfile:
35
+ for i, f0_value in enumerate(f0):
36
+ frequency = i * sr / hop_length
37
+ txtfile.write(f"{frequency},{f0_value}\n")
38
+
39
+ print("F0 Curve extracted successfully!")
40
+ return image_path, txt_path
41
+
42
+
43
+ def f0_extractor_tab():
44
+ audio = gr.Audio(label=i18n("Upload Audio"), type="filepath")
45
+ f0_method = gr.Radio(
46
+ label=i18n("Pitch extraction algorithm"),
47
+ info=i18n(
48
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases."
49
+ ),
50
+ choices=["crepe", "fcpe", "rmvpe"],
51
+ value="rmvpe",
52
+ )
53
+ button = gr.Button(i18n("Extract F0 Curve"))
54
+
55
+ with gr.Row():
56
+ txt_output = gr.File(label=i18n("F0 Curve"), type="filepath")
57
+ image_output = gr.Image(type="filepath", interactive=False)
58
+
59
+ button.click(
60
+ fn=extract_f0_curve,
61
+ inputs=[
62
+ audio,
63
+ f0_method,
64
+ ],
65
+ outputs=[image_output, txt_output],
66
+ )
tabs/extra/sections/processing.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import gradio as gr
4
+
5
+ now_dir = os.getcwd()
6
+ sys.path.append(now_dir)
7
+
8
+ from core import run_model_information_script
9
+ from assets.i18n.i18n import I18nAuto
10
+
11
+ i18n = I18nAuto()
12
+
13
+
14
+ def processing_tab():
15
+ model_view_model_path = gr.Textbox(
16
+ label=i18n("Path to Model"),
17
+ info=i18n("Introduce the model pth path"),
18
+ value="",
19
+ interactive=True,
20
+ placeholder=i18n("Enter path to model"),
21
+ )
22
+
23
+ model_view_output_info = gr.Textbox(
24
+ label=i18n("Output Information"),
25
+ info=i18n("The output information will be displayed here."),
26
+ value="",
27
+ max_lines=11,
28
+ )
29
+ model_view_button = gr.Button(i18n("View"))
30
+ model_view_button.click(
31
+ fn=run_model_information_script,
32
+ inputs=[model_view_model_path],
33
+ outputs=[model_view_output_info],
34
+ )
tabs/inference/inference.py ADDED
@@ -0,0 +1,2334 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, sys
2
+ import gradio as gr
3
+ import regex as re
4
+ import shutil
5
+ import datetime
6
+ import json
7
+ import torch
8
+
9
+ from core import (
10
+ run_infer_script,
11
+ run_batch_infer_script,
12
+ )
13
+
14
+ from assets.i18n.i18n import I18nAuto
15
+
16
+ from rvc.lib.utils import format_title
17
+ from tabs.settings.sections.restart import stop_infer
18
+ from tabs.settings.sections.filter import get_filter_trigger, load_config_filter
19
+
20
+ i18n = I18nAuto()
21
+
22
+ now_dir = os.getcwd()
23
+ sys.path.append(now_dir)
24
+
25
+ model_root = os.path.join(now_dir, "logs")
26
+ audio_root = os.path.join(now_dir, "assets", "audios")
27
+ custom_embedder_root = os.path.join(
28
+ now_dir, "rvc", "models", "embedders", "embedders_custom"
29
+ )
30
+
31
+ PRESETS_DIR = os.path.join(now_dir, "assets", "presets")
32
+ FORMANTSHIFT_DIR = os.path.join(now_dir, "assets", "formant_shift")
33
+
34
+ os.makedirs(custom_embedder_root, exist_ok=True)
35
+
36
+ custom_embedder_root_relative = os.path.relpath(custom_embedder_root, now_dir)
37
+ model_root_relative = os.path.relpath(model_root, now_dir)
38
+ audio_root_relative = os.path.relpath(audio_root, now_dir)
39
+
40
+ sup_audioext = {
41
+ "wav",
42
+ "mp3",
43
+ "flac",
44
+ "ogg",
45
+ "opus",
46
+ "m4a",
47
+ "mp4",
48
+ "aac",
49
+ "alac",
50
+ "wma",
51
+ "aiff",
52
+ "webm",
53
+ "ac3",
54
+ }
55
+
56
+
57
+ def normalize_path(p):
58
+ return os.path.normpath(p).replace("\\", "/").lower()
59
+
60
+
61
+ # BASE model/index folder names for many latin languages (legacy: zips = models)
62
+ MODEL_FOLDER = re.compile(r"^(?:model.{0,4}|mdl(?:s)?|weight.{0,4}|zip(?:s)?)$")
63
+ INDEX_FOLDER = re.compile(r"^(?:ind.{0,4}|idx(?:s)?)$")
64
+
65
+
66
+ def is_mdl_alias(name: str) -> bool:
67
+ return bool(MODEL_FOLDER.match(name))
68
+
69
+
70
+ def is_idx_alias(name: str) -> bool:
71
+ return bool(INDEX_FOLDER.match(name))
72
+
73
+
74
+ def alias_score(path: str, want_model: bool) -> int:
75
+ """
76
+ Handles duplicate files, compare file type to path and assign a score:
77
+ 2 = Path contains correct alias (e.g., model file in 'modelos/' folder)
78
+ 1 = Path contains opposite alias (e.g., model file in 'index/' folder)
79
+ 0 = Path contains no recognized aliases
80
+ """
81
+ parts = normalize_path(os.path.dirname(path)).split("/")
82
+ has_mdl = any(is_mdl_alias(p) for p in parts)
83
+ has_idx = any(is_idx_alias(p) for p in parts)
84
+ if want_model:
85
+ return 2 if has_mdl else (1 if has_idx else 0)
86
+ else:
87
+ return 2 if has_idx else (1 if has_mdl else 0)
88
+
89
+
90
+ def get_files(type="model"):
91
+ assert type in ("model", "index"), "Invalid type for get_files (models or index)"
92
+ is_model = type == "model"
93
+ exts = (".pth", ".onnx") if is_model else (".index",)
94
+ exclude_prefixes = ("G_", "D_") if is_model else ()
95
+ exclude_substr = None if is_model else "trained"
96
+
97
+ best = {}
98
+ order = 0
99
+
100
+ for root, _, files in os.walk(model_root_relative, followlinks=True):
101
+ for file in files:
102
+ if not file.endswith(exts):
103
+ continue
104
+ if any(file.startswith(p) for p in exclude_prefixes):
105
+ continue
106
+ if exclude_substr and exclude_substr in file:
107
+ continue
108
+
109
+ full = os.path.join(root, file)
110
+ real = os.path.realpath(full)
111
+ score = alias_score(full, is_model)
112
+
113
+ prev = best.get(real)
114
+ if (
115
+ prev is None
116
+ ): # Prefer higher score; if equal score, use first encountered
117
+ best[real] = (score, order, full)
118
+ else:
119
+ prev_score, prev_order, _ = prev
120
+ if score > prev_score:
121
+ best[real] = (score, prev_order, full)
122
+ order += 1
123
+
124
+ return [t[2] for t in sorted(best.values(), key=lambda x: x[1])]
125
+
126
+
127
+ default_weight = next(iter(get_files("model")), None)
128
+
129
+ audio_paths = [
130
+ os.path.join(root, name)
131
+ for root, _, files in os.walk(audio_root_relative, topdown=False)
132
+ for name in files
133
+ if name.endswith(tuple(sup_audioext))
134
+ and root == audio_root_relative
135
+ and "_output" not in name
136
+ ]
137
+
138
+ custom_embedders = [
139
+ os.path.join(dirpath, dirname)
140
+ for dirpath, dirnames, _ in os.walk(custom_embedder_root_relative)
141
+ for dirname in dirnames
142
+ ]
143
+
144
+
145
+ def update_sliders(preset):
146
+ with open(
147
+ os.path.join(PRESETS_DIR, f"{preset}.json"), "r", encoding="utf-8"
148
+ ) as json_file:
149
+ values = json.load(json_file)
150
+ return (
151
+ values["pitch"],
152
+ values["index_rate"],
153
+ values["rms_mix_rate"],
154
+ values["protect"],
155
+ )
156
+
157
+
158
+ def update_sliders_formant(preset):
159
+ with open(
160
+ os.path.join(FORMANTSHIFT_DIR, f"{preset}.json"), "r", encoding="utf-8"
161
+ ) as json_file:
162
+ values = json.load(json_file)
163
+ return (
164
+ values["formant_qfrency"],
165
+ values["formant_timbre"],
166
+ )
167
+
168
+
169
+ def export_presets(presets, file_path):
170
+ with open(file_path, "w", encoding="utf-8") as json_file:
171
+ json.dump(presets, json_file, ensure_ascii=False, indent=4)
172
+
173
+
174
+ def import_presets(file_path):
175
+ with open(file_path, "r", encoding="utf-8") as json_file:
176
+ presets = json.load(json_file)
177
+ return presets
178
+
179
+
180
+ def get_presets_data(pitch, index_rate, rms_mix_rate, protect):
181
+ return {
182
+ "pitch": pitch,
183
+ "index_rate": index_rate,
184
+ "rms_mix_rate": rms_mix_rate,
185
+ "protect": protect,
186
+ }
187
+
188
+
189
+ def export_presets_button(preset_name, pitch, index_rate, rms_mix_rate, protect):
190
+ if preset_name:
191
+ file_path = os.path.join(PRESETS_DIR, f"{preset_name}.json")
192
+ presets_data = get_presets_data(pitch, index_rate, rms_mix_rate, protect)
193
+ with open(file_path, "w", encoding="utf-8") as json_file:
194
+ json.dump(presets_data, json_file, ensure_ascii=False, indent=4)
195
+ return "Export successful"
196
+ return "Export cancelled"
197
+
198
+
199
+ def import_presets_button(file_path):
200
+ if file_path:
201
+ imported_presets = import_presets(file_path.name)
202
+ return (
203
+ list(imported_presets.keys()),
204
+ imported_presets,
205
+ "Presets imported successfully!",
206
+ )
207
+ return [], {}, "No file selected for import."
208
+
209
+
210
+ def list_json_files(directory):
211
+ return [f.rsplit(".", 1)[0] for f in os.listdir(directory) if f.endswith(".json")]
212
+
213
+
214
+ def refresh_presets():
215
+ json_files = list_json_files(PRESETS_DIR)
216
+ return gr.update(choices=json_files)
217
+
218
+
219
+ def output_path_fn(input_audio_path):
220
+ original_name_without_extension = os.path.basename(input_audio_path).rsplit(".", 1)[
221
+ 0
222
+ ]
223
+ new_name = original_name_without_extension + "_output.wav"
224
+ output_path = os.path.join(os.path.dirname(input_audio_path), new_name)
225
+ return output_path
226
+
227
+
228
+ def change_choices(model):
229
+ if model:
230
+ speakers = get_speakers_id(model)
231
+ else:
232
+ speakers = [0]
233
+
234
+ models_list = get_files("model")
235
+ indexes_list = sorted(get_files("index"))
236
+
237
+ audio_paths = [
238
+ os.path.join(root, name)
239
+ for root, _, files in os.walk(audio_root_relative, topdown=False)
240
+ for name in files
241
+ if name.endswith(tuple(sup_audioext))
242
+ and root == audio_root_relative
243
+ and "_output" not in name
244
+ ]
245
+
246
+ return (
247
+ {"choices": sorted(models_list), "__type__": "update"},
248
+ {"choices": sorted(indexes_list), "__type__": "update"},
249
+ {"choices": sorted(audio_paths), "__type__": "update"},
250
+ {
251
+ "choices": (
252
+ sorted(speakers)
253
+ if speakers is not None and isinstance(speakers, (list, tuple))
254
+ else [0]
255
+ ),
256
+ "__type__": "update",
257
+ },
258
+ {
259
+ "choices": (
260
+ sorted(speakers)
261
+ if speakers is not None and isinstance(speakers, (list, tuple))
262
+ else [0]
263
+ ),
264
+ "__type__": "update",
265
+ },
266
+ )
267
+
268
+
269
+ def extract_model_and_epoch(path):
270
+ base_name = os.path.basename(path)
271
+ match = re.match(r"(.+?)_(\d+)e_", base_name)
272
+ if match:
273
+ model, epoch = match.groups()
274
+ return model, int(epoch)
275
+ return "", 0
276
+
277
+
278
+ def save_to_wav(record_button):
279
+ if record_button is None:
280
+ pass
281
+ else:
282
+ path_to_file = record_button
283
+ new_name = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".wav"
284
+ target_path = os.path.join(audio_root_relative, os.path.basename(new_name))
285
+
286
+ shutil.move(path_to_file, target_path)
287
+ return target_path, output_path_fn(target_path)
288
+
289
+
290
+ def save_to_wav2(upload_audio):
291
+ file_path = upload_audio
292
+ formated_name = format_title(os.path.basename(file_path))
293
+ target_path = os.path.join(audio_root_relative, formated_name)
294
+
295
+ if os.path.exists(target_path):
296
+ os.remove(target_path)
297
+
298
+ shutil.copy(file_path, target_path)
299
+ return target_path, output_path_fn(target_path)
300
+
301
+
302
+ def delete_outputs():
303
+ gr.Info(f"Outputs cleared!")
304
+ for root, _, files in os.walk(audio_root_relative, topdown=False):
305
+ for name in files:
306
+ if name.endswith(tuple(sup_audioext)) and name.__contains__("_output"):
307
+ os.remove(os.path.join(root, name))
308
+
309
+
310
+ def folders_same(
311
+ a: str, b: str
312
+ ) -> bool: # Used to "pair" index and model folders based on path names
313
+ """
314
+ True if:
315
+ 1) The two normalized paths are totally identical..OR
316
+ 2) One lives under a MODEL_FOLDER and the other lives
317
+ under an INDEX_FOLDER, at the same relative subpath
318
+ i.e. logs/models/miku and logs/index/miku = "SAME FOLDER"
319
+ """
320
+ a = normalize_path(a)
321
+ b = normalize_path(b)
322
+ if a == b:
323
+ return True
324
+
325
+ def split_after_alias(p):
326
+ parts = p.split("/")
327
+ for i, part in enumerate(parts):
328
+ if is_mdl_alias(part) or is_idx_alias(part):
329
+ base = part
330
+ rel = "/".join(parts[i + 1 :])
331
+ return base, rel
332
+ return None, None
333
+
334
+ base_a, rel_a = split_after_alias(a)
335
+ base_b, rel_b = split_after_alias(b)
336
+
337
+ if rel_a is None or rel_b is None:
338
+ return False
339
+
340
+ if rel_a == rel_b and (
341
+ (is_mdl_alias(base_a) and is_idx_alias(base_b))
342
+ or (is_idx_alias(base_a) and is_mdl_alias(base_b))
343
+ ):
344
+ return True
345
+ return False
346
+
347
+
348
+ def match_index(model_file_value):
349
+ if not model_file_value:
350
+ return ""
351
+
352
+ # Derive the information about the model's name and path for index matching
353
+ model_folder = normalize_path(os.path.dirname(model_file_value))
354
+ model_name = os.path.basename(model_file_value)
355
+ base_name = os.path.splitext(model_name)[0]
356
+ common = re.sub(r"[_\-\.\+](?:e|s|v|V)\d.*$", "", base_name)
357
+ prefix_match = re.match(r"^(.*?)[_\-\.\+]", base_name)
358
+ prefix = prefix_match.group(1) if prefix_match else None
359
+
360
+ same_count = 0
361
+ last_same = None
362
+ same_substr = None
363
+ same_prefixed = None
364
+ external_exact = None
365
+ external_substr = None
366
+ external_pref = None
367
+
368
+ for idx in get_files("index"):
369
+ idx_folder = os.path.dirname(idx)
370
+ idx_folder_n = normalize_path(idx_folder)
371
+ idx_name = os.path.basename(idx)
372
+ idx_base = os.path.splitext(idx_name)[0]
373
+
374
+ in_same = folders_same(model_folder, idx_folder_n)
375
+ if in_same:
376
+ same_count += 1
377
+ last_same = idx
378
+
379
+ # 1) EXACT match to loaded model name and folders_same = True
380
+ if idx_base == base_name:
381
+ return idx
382
+
383
+ # 2) Substring match to model name and folders_same
384
+ if common in idx_base and same_substr is None:
385
+ same_substr = idx
386
+
387
+ # 3) Prefix match to model name and folders_same
388
+ if prefix and idx_base.startswith(prefix) and same_prefixed is None:
389
+ same_prefixed = idx
390
+
391
+ # If it's NOT in a paired folder (folders_same = False) we look elseware:
392
+ else:
393
+ # 4) EXACT match to model name in external directory
394
+ if idx_base == base_name and external_exact is None:
395
+ external_exact = idx
396
+
397
+ # 5) Substring match to model name in ED
398
+ if common in idx_base and external_substr is None:
399
+ external_substr = idx
400
+
401
+ # 6) Prefix match to model name in ED
402
+ if prefix and idx_base.startswith(prefix) and external_pref is None:
403
+ external_pref = idx
404
+
405
+ # Fallback: If there is exactly one index file in the same (or paired) folder,
406
+ # we should assume that's the intended index file even if the name doesnt match
407
+ if same_count == 1:
408
+ return last_same
409
+
410
+ # Then by remaining priority queue:
411
+ if same_substr:
412
+ return same_substr
413
+ if same_prefixed:
414
+ return same_prefixed
415
+ if external_exact:
416
+ return external_exact
417
+ if external_substr:
418
+ return external_substr
419
+ if external_pref:
420
+ return external_pref
421
+
422
+ return ""
423
+
424
+
425
+ def create_folder_and_move_files(folder_name, bin_file, config_file):
426
+ if not folder_name:
427
+ return "Folder name must not be empty."
428
+
429
+ folder_name = os.path.basename(folder_name)
430
+ target_folder = os.path.join(custom_embedder_root, folder_name)
431
+
432
+ normalize_pathd_target_folder = os.path.abspath(target_folder)
433
+ normalize_pathd_custom_embedder_root = os.path.abspath(custom_embedder_root)
434
+
435
+ if not normalize_pathd_target_folder.startswith(
436
+ normalize_pathd_custom_embedder_root
437
+ ):
438
+ return "Invalid folder name. Folder must be within the custom embedder root directory."
439
+
440
+ os.makedirs(target_folder, exist_ok=True)
441
+
442
+ if bin_file:
443
+ shutil.copy(bin_file, os.path.join(target_folder, os.path.basename(bin_file)))
444
+ if config_file:
445
+ shutil.copy(
446
+ config_file, os.path.join(target_folder, os.path.basename(config_file))
447
+ )
448
+
449
+ return f"Files moved to folder {target_folder}"
450
+
451
+
452
+ def refresh_formant():
453
+ json_files = list_json_files(FORMANTSHIFT_DIR)
454
+ return gr.update(choices=json_files)
455
+
456
+
457
+ def refresh_embedders_folders():
458
+ custom_embedders = [
459
+ os.path.join(dirpath, dirname)
460
+ for dirpath, dirnames, _ in os.walk(custom_embedder_root_relative)
461
+ for dirname in dirnames
462
+ ]
463
+ return custom_embedders
464
+
465
+
466
+ def get_speakers_id(model):
467
+ if model:
468
+ try:
469
+ model_data = torch.load(
470
+ os.path.join(now_dir, model), map_location="cpu", weights_only=True
471
+ )
472
+ speakers_id = model_data.get("speakers_id")
473
+ if speakers_id:
474
+ return list(range(speakers_id))
475
+ else:
476
+ return [0]
477
+ except Exception as e:
478
+ return [0]
479
+ else:
480
+ return [0]
481
+
482
+
483
+ def filter_dropdowns(filter_text):
484
+ ft = filter_text.lower()
485
+ all_models = sorted(get_files("model"), key=extract_model_and_epoch)
486
+ all_indexes = sorted(get_files("index"))
487
+ filtered_models = [m for m in all_models if ft in m.lower()]
488
+ filtered_indexes = [i for i in all_indexes if ft in i.lower()]
489
+ return (gr.update(choices=filtered_models), gr.update(choices=filtered_indexes))
490
+
491
+
492
+ def update_filter_visibility(_):
493
+ en = load_config_filter()
494
+ if not en:
495
+ box = gr.update(visible=False, value="")
496
+ m_upd, i_upd = filter_dropdowns("")
497
+ return box, m_upd, i_upd
498
+ return gr.update(visible=True), gr.skip(), gr.skip()
499
+
500
+
501
+ # Inference tab
502
+ def inference_tab():
503
+ trigger = get_filter_trigger()
504
+ with gr.Column():
505
+ with gr.Row():
506
+ model_file = gr.Dropdown(
507
+ label=i18n("Voice Model"),
508
+ info=i18n("Select the voice model to use for the conversion."),
509
+ choices=sorted(get_files("model"), key=extract_model_and_epoch),
510
+ value=default_weight,
511
+ interactive=True,
512
+ allow_custom_value=True,
513
+ )
514
+ filter_box_inf = gr.Textbox(
515
+ label=i18n("Filter"),
516
+ info=i18n("Path must contain:"),
517
+ placeholder=i18n("Type to filter..."),
518
+ interactive=True,
519
+ scale=0.1,
520
+ visible=load_config_filter(),
521
+ )
522
+ index_file = gr.Dropdown(
523
+ label=i18n("Index File"),
524
+ info=i18n("Select the index file to use for the conversion."),
525
+ choices=sorted(get_files("index")),
526
+ value=match_index(default_weight),
527
+ interactive=True,
528
+ allow_custom_value=True,
529
+ )
530
+ filter_box_inf.blur(
531
+ fn=filter_dropdowns,
532
+ inputs=[filter_box_inf],
533
+ outputs=[model_file, index_file],
534
+ )
535
+ trigger.change(
536
+ fn=update_filter_visibility,
537
+ inputs=[trigger],
538
+ outputs=[filter_box_inf, model_file, index_file],
539
+ show_progress=False,
540
+ )
541
+ with gr.Row():
542
+ unload_button = gr.Button(i18n("Unload Voice"))
543
+ refresh_button = gr.Button(i18n("Refresh"))
544
+
545
+ unload_button.click(
546
+ fn=lambda: (
547
+ {"value": "", "__type__": "update"},
548
+ {"value": "", "__type__": "update"},
549
+ ),
550
+ inputs=[],
551
+ outputs=[model_file, index_file],
552
+ )
553
+ model_file.select(
554
+ fn=lambda model_file_value: match_index(model_file_value),
555
+ inputs=[model_file],
556
+ outputs=[index_file],
557
+ )
558
+
559
+ # Single inference tab
560
+ with gr.Tab(i18n("Single")):
561
+ with gr.Column():
562
+ upload_audio = gr.Audio(
563
+ label=i18n("Upload Audio"), type="filepath", editable=False
564
+ )
565
+ with gr.Row():
566
+ audio = gr.Dropdown(
567
+ label=i18n("Select Audio"),
568
+ info=i18n("Select the audio to convert."),
569
+ choices=sorted(audio_paths),
570
+ value=audio_paths[0] if audio_paths else "",
571
+ interactive=True,
572
+ allow_custom_value=True,
573
+ )
574
+
575
+ with gr.Accordion(i18n("Advanced Settings"), open=False):
576
+ with gr.Column():
577
+ clear_outputs_infer = gr.Button(
578
+ i18n("Clear Outputs (Deletes all audios in assets/audios)")
579
+ )
580
+ output_path = gr.Textbox(
581
+ label=i18n("Output Path"),
582
+ placeholder=i18n("Enter output path"),
583
+ info=i18n(
584
+ "The path where the output audio will be saved, by default in assets/audios/output.wav"
585
+ ),
586
+ value=(
587
+ output_path_fn(audio_paths[0])
588
+ if audio_paths
589
+ else os.path.join(now_dir, "assets", "audios", "output.wav")
590
+ ),
591
+ interactive=True,
592
+ )
593
+ export_format = gr.Radio(
594
+ label=i18n("Export Format"),
595
+ info=i18n("Select the format to export the audio."),
596
+ choices=["WAV", "MP3", "FLAC", "OGG", "M4A"],
597
+ value="WAV",
598
+ interactive=True,
599
+ )
600
+ sid = gr.Dropdown(
601
+ label=i18n("Speaker ID"),
602
+ info=i18n("Select the speaker ID to use for the conversion."),
603
+ choices=get_speakers_id(model_file.value),
604
+ value=0,
605
+ interactive=True,
606
+ )
607
+ split_audio = gr.Checkbox(
608
+ label=i18n("Split Audio"),
609
+ info=i18n(
610
+ "Split the audio into chunks for inference to obtain better results in some cases."
611
+ ),
612
+ visible=True,
613
+ value=False,
614
+ interactive=True,
615
+ )
616
+ autotune = gr.Checkbox(
617
+ label=i18n("Autotune"),
618
+ info=i18n(
619
+ "Apply a soft autotune to your inferences, recommended for singing conversions."
620
+ ),
621
+ visible=True,
622
+ value=False,
623
+ interactive=True,
624
+ )
625
+ autotune_strength = gr.Slider(
626
+ minimum=0,
627
+ maximum=1,
628
+ label=i18n("Autotune Strength"),
629
+ info=i18n(
630
+ "Set the autotune strength - the more you increase it the more it will snap to the chromatic grid."
631
+ ),
632
+ visible=False,
633
+ value=1,
634
+ interactive=True,
635
+ )
636
+ proposed_pitch = gr.Checkbox(
637
+ label=i18n("Proposed Pitch"),
638
+ info=i18n(
639
+ "Adjust the input audio pitch to match the voice model range."
640
+ ),
641
+ visible=True,
642
+ value=False,
643
+ interactive=True,
644
+ )
645
+ proposed_pitch_threshold = gr.Slider(
646
+ minimum=50.0,
647
+ maximum=1200.0,
648
+ label=i18n("Proposed Pitch Threshold"),
649
+ info=i18n(
650
+ "Male voice models typically use 155.0 and female voice models typically use 255.0."
651
+ ),
652
+ visible=False,
653
+ value=155.0,
654
+ interactive=True,
655
+ )
656
+ clean_audio = gr.Checkbox(
657
+ label=i18n("Clean Audio"),
658
+ info=i18n(
659
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios."
660
+ ),
661
+ visible=True,
662
+ value=False,
663
+ interactive=True,
664
+ )
665
+ clean_strength = gr.Slider(
666
+ minimum=0,
667
+ maximum=1,
668
+ label=i18n("Clean Strength"),
669
+ info=i18n(
670
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed."
671
+ ),
672
+ visible=False,
673
+ value=0.5,
674
+ interactive=True,
675
+ )
676
+ formant_shifting = gr.Checkbox(
677
+ label=i18n("Formant Shifting"),
678
+ info=i18n(
679
+ "Enable formant shifting. Used for male to female and vice-versa convertions."
680
+ ),
681
+ value=False,
682
+ visible=True,
683
+ interactive=True,
684
+ )
685
+ post_process = gr.Checkbox(
686
+ label=i18n("Post-Process"),
687
+ info=i18n("Post-process the audio to apply effects to the output."),
688
+ value=False,
689
+ interactive=True,
690
+ )
691
+ with gr.Row(visible=False) as formant_row:
692
+ formant_preset = gr.Dropdown(
693
+ label=i18n("Browse presets for formanting"),
694
+ info=i18n(
695
+ "Presets are located in /assets/formant_shift folder"
696
+ ),
697
+ choices=list_json_files(FORMANTSHIFT_DIR),
698
+ visible=False,
699
+ interactive=True,
700
+ )
701
+ formant_refresh_button = gr.Button(
702
+ value="Refresh",
703
+ visible=False,
704
+ )
705
+ formant_qfrency = gr.Slider(
706
+ value=1.0,
707
+ info=i18n("Default value is 1.0"),
708
+ label=i18n("Quefrency for formant shifting"),
709
+ minimum=0.0,
710
+ maximum=16.0,
711
+ step=0.1,
712
+ visible=False,
713
+ interactive=True,
714
+ )
715
+ formant_timbre = gr.Slider(
716
+ value=1.0,
717
+ info=i18n("Default value is 1.0"),
718
+ label=i18n("Timbre for formant shifting"),
719
+ minimum=0.0,
720
+ maximum=16.0,
721
+ step=0.1,
722
+ visible=False,
723
+ interactive=True,
724
+ )
725
+ reverb = gr.Checkbox(
726
+ label=i18n("Reverb"),
727
+ info=i18n("Apply reverb to the audio."),
728
+ value=False,
729
+ interactive=True,
730
+ visible=False,
731
+ )
732
+ reverb_room_size = gr.Slider(
733
+ minimum=0,
734
+ maximum=1,
735
+ label=i18n("Reverb Room Size"),
736
+ info=i18n("Set the room size of the reverb."),
737
+ value=0.5,
738
+ interactive=True,
739
+ visible=False,
740
+ )
741
+ reverb_damping = gr.Slider(
742
+ minimum=0,
743
+ maximum=1,
744
+ label=i18n("Reverb Damping"),
745
+ info=i18n("Set the damping of the reverb."),
746
+ value=0.5,
747
+ interactive=True,
748
+ visible=False,
749
+ )
750
+ reverb_wet_gain = gr.Slider(
751
+ minimum=0,
752
+ maximum=1,
753
+ label=i18n("Reverb Wet Gain"),
754
+ info=i18n("Set the wet gain of the reverb."),
755
+ value=0.33,
756
+ interactive=True,
757
+ visible=False,
758
+ )
759
+ reverb_dry_gain = gr.Slider(
760
+ minimum=0,
761
+ maximum=1,
762
+ label=i18n("Reverb Dry Gain"),
763
+ info=i18n("Set the dry gain of the reverb."),
764
+ value=0.4,
765
+ interactive=True,
766
+ visible=False,
767
+ )
768
+ reverb_width = gr.Slider(
769
+ minimum=0,
770
+ maximum=1,
771
+ label=i18n("Reverb Width"),
772
+ info=i18n("Set the width of the reverb."),
773
+ value=1.0,
774
+ interactive=True,
775
+ visible=False,
776
+ )
777
+ reverb_freeze_mode = gr.Slider(
778
+ minimum=0,
779
+ maximum=1,
780
+ label=i18n("Reverb Freeze Mode"),
781
+ info=i18n("Set the freeze mode of the reverb."),
782
+ value=0.0,
783
+ interactive=True,
784
+ visible=False,
785
+ )
786
+ pitch_shift = gr.Checkbox(
787
+ label=i18n("Pitch Shift"),
788
+ info=i18n("Apply pitch shift to the audio."),
789
+ value=False,
790
+ interactive=True,
791
+ visible=False,
792
+ )
793
+ pitch_shift_semitones = gr.Slider(
794
+ minimum=-12,
795
+ maximum=12,
796
+ label=i18n("Pitch Shift Semitones"),
797
+ info=i18n("Set the pitch shift semitones."),
798
+ value=0,
799
+ interactive=True,
800
+ visible=False,
801
+ )
802
+ limiter = gr.Checkbox(
803
+ label=i18n("Limiter"),
804
+ info=i18n("Apply limiter to the audio."),
805
+ value=False,
806
+ interactive=True,
807
+ visible=False,
808
+ )
809
+ limiter_threshold = gr.Slider(
810
+ minimum=-60,
811
+ maximum=0,
812
+ label=i18n("Limiter Threshold dB"),
813
+ info=i18n("Set the limiter threshold dB."),
814
+ value=-6,
815
+ interactive=True,
816
+ visible=False,
817
+ )
818
+ limiter_release_time = gr.Slider(
819
+ minimum=0.01,
820
+ maximum=1,
821
+ label=i18n("Limiter Release Time"),
822
+ info=i18n("Set the limiter release time."),
823
+ value=0.05,
824
+ interactive=True,
825
+ visible=False,
826
+ )
827
+ gain = gr.Checkbox(
828
+ label=i18n("Gain"),
829
+ info=i18n("Apply gain to the audio."),
830
+ value=False,
831
+ interactive=True,
832
+ visible=False,
833
+ )
834
+ gain_db = gr.Slider(
835
+ minimum=-60,
836
+ maximum=60,
837
+ label=i18n("Gain dB"),
838
+ info=i18n("Set the gain dB."),
839
+ value=0,
840
+ interactive=True,
841
+ visible=False,
842
+ )
843
+ distortion = gr.Checkbox(
844
+ label=i18n("Distortion"),
845
+ info=i18n("Apply distortion to the audio."),
846
+ value=False,
847
+ interactive=True,
848
+ visible=False,
849
+ )
850
+ distortion_gain = gr.Slider(
851
+ minimum=-60,
852
+ maximum=60,
853
+ label=i18n("Distortion Gain"),
854
+ info=i18n("Set the distortion gain."),
855
+ value=25,
856
+ interactive=True,
857
+ visible=False,
858
+ )
859
+ chorus = gr.Checkbox(
860
+ label=i18n("Chorus"),
861
+ info=i18n("Apply chorus to the audio."),
862
+ value=False,
863
+ interactive=True,
864
+ visible=False,
865
+ )
866
+ chorus_rate = gr.Slider(
867
+ minimum=0,
868
+ maximum=100,
869
+ label=i18n("Chorus Rate Hz"),
870
+ info=i18n("Set the chorus rate Hz."),
871
+ value=1.0,
872
+ interactive=True,
873
+ visible=False,
874
+ )
875
+ chorus_depth = gr.Slider(
876
+ minimum=0,
877
+ maximum=1,
878
+ label=i18n("Chorus Depth"),
879
+ info=i18n("Set the chorus depth."),
880
+ value=0.25,
881
+ interactive=True,
882
+ visible=False,
883
+ )
884
+ chorus_center_delay = gr.Slider(
885
+ minimum=7,
886
+ maximum=8,
887
+ label=i18n("Chorus Center Delay ms"),
888
+ info=i18n("Set the chorus center delay ms."),
889
+ value=7,
890
+ interactive=True,
891
+ visible=False,
892
+ )
893
+ chorus_feedback = gr.Slider(
894
+ minimum=0,
895
+ maximum=1,
896
+ label=i18n("Chorus Feedback"),
897
+ info=i18n("Set the chorus feedback."),
898
+ value=0.0,
899
+ interactive=True,
900
+ visible=False,
901
+ )
902
+ chorus_mix = gr.Slider(
903
+ minimum=0,
904
+ maximum=1,
905
+ label=i18n("Chorus Mix"),
906
+ info=i18n("Set the chorus mix."),
907
+ value=0.5,
908
+ interactive=True,
909
+ visible=False,
910
+ )
911
+ bitcrush = gr.Checkbox(
912
+ label=i18n("Bitcrush"),
913
+ info=i18n("Apply bitcrush to the audio."),
914
+ value=False,
915
+ interactive=True,
916
+ visible=False,
917
+ )
918
+ bitcrush_bit_depth = gr.Slider(
919
+ minimum=1,
920
+ maximum=32,
921
+ label=i18n("Bitcrush Bit Depth"),
922
+ info=i18n("Set the bitcrush bit depth."),
923
+ value=8,
924
+ interactive=True,
925
+ visible=False,
926
+ )
927
+ clipping = gr.Checkbox(
928
+ label=i18n("Clipping"),
929
+ info=i18n("Apply clipping to the audio."),
930
+ value=False,
931
+ interactive=True,
932
+ visible=False,
933
+ )
934
+ clipping_threshold = gr.Slider(
935
+ minimum=-60,
936
+ maximum=0,
937
+ label=i18n("Clipping Threshold"),
938
+ info=i18n("Set the clipping threshold."),
939
+ value=-6,
940
+ interactive=True,
941
+ visible=False,
942
+ )
943
+ compressor = gr.Checkbox(
944
+ label=i18n("Compressor"),
945
+ info=i18n("Apply compressor to the audio."),
946
+ value=False,
947
+ interactive=True,
948
+ visible=False,
949
+ )
950
+ compressor_threshold = gr.Slider(
951
+ minimum=-60,
952
+ maximum=0,
953
+ label=i18n("Compressor Threshold dB"),
954
+ info=i18n("Set the compressor threshold dB."),
955
+ value=0,
956
+ interactive=True,
957
+ visible=False,
958
+ )
959
+ compressor_ratio = gr.Slider(
960
+ minimum=1,
961
+ maximum=20,
962
+ label=i18n("Compressor Ratio"),
963
+ info=i18n("Set the compressor ratio."),
964
+ value=1,
965
+ interactive=True,
966
+ visible=False,
967
+ )
968
+ compressor_attack = gr.Slider(
969
+ minimum=0.0,
970
+ maximum=100,
971
+ label=i18n("Compressor Attack ms"),
972
+ info=i18n("Set the compressor attack ms."),
973
+ value=1.0,
974
+ interactive=True,
975
+ visible=False,
976
+ )
977
+ compressor_release = gr.Slider(
978
+ minimum=0.01,
979
+ maximum=100,
980
+ label=i18n("Compressor Release ms"),
981
+ info=i18n("Set the compressor release ms."),
982
+ value=100,
983
+ interactive=True,
984
+ visible=False,
985
+ )
986
+ delay = gr.Checkbox(
987
+ label=i18n("Delay"),
988
+ info=i18n("Apply delay to the audio."),
989
+ value=False,
990
+ interactive=True,
991
+ visible=False,
992
+ )
993
+ delay_seconds = gr.Slider(
994
+ minimum=0.0,
995
+ maximum=5.0,
996
+ label=i18n("Delay Seconds"),
997
+ info=i18n("Set the delay seconds."),
998
+ value=0.5,
999
+ interactive=True,
1000
+ visible=False,
1001
+ )
1002
+ delay_feedback = gr.Slider(
1003
+ minimum=0.0,
1004
+ maximum=1.0,
1005
+ label=i18n("Delay Feedback"),
1006
+ info=i18n("Set the delay feedback."),
1007
+ value=0.0,
1008
+ interactive=True,
1009
+ visible=False,
1010
+ )
1011
+ delay_mix = gr.Slider(
1012
+ minimum=0.0,
1013
+ maximum=1.0,
1014
+ label=i18n("Delay Mix"),
1015
+ info=i18n("Set the delay mix."),
1016
+ value=0.5,
1017
+ interactive=True,
1018
+ visible=False,
1019
+ )
1020
+ with gr.Accordion(i18n("Preset Settings"), open=False):
1021
+ with gr.Row():
1022
+ preset_dropdown = gr.Dropdown(
1023
+ label=i18n("Select Custom Preset"),
1024
+ choices=list_json_files(PRESETS_DIR),
1025
+ interactive=True,
1026
+ )
1027
+ presets_refresh_button = gr.Button(i18n("Refresh Presets"))
1028
+ import_file = gr.File(
1029
+ label=i18n("Select file to import"),
1030
+ file_count="single",
1031
+ type="filepath",
1032
+ interactive=True,
1033
+ )
1034
+ import_file.change(
1035
+ import_presets_button,
1036
+ inputs=import_file,
1037
+ outputs=[preset_dropdown],
1038
+ )
1039
+ presets_refresh_button.click(
1040
+ refresh_presets, outputs=preset_dropdown
1041
+ )
1042
+ with gr.Row():
1043
+ preset_name_input = gr.Textbox(
1044
+ label=i18n("Preset Name"),
1045
+ placeholder=i18n("Enter preset name"),
1046
+ )
1047
+ export_button = gr.Button(i18n("Export Preset"))
1048
+ pitch = gr.Slider(
1049
+ minimum=-24,
1050
+ maximum=24,
1051
+ step=1,
1052
+ label=i18n("Pitch"),
1053
+ info=i18n(
1054
+ "Set the pitch of the audio, the higher the value, the higher the pitch."
1055
+ ),
1056
+ value=0,
1057
+ interactive=True,
1058
+ )
1059
+ index_rate = gr.Slider(
1060
+ minimum=0,
1061
+ maximum=1,
1062
+ label=i18n("Search Feature Ratio"),
1063
+ info=i18n(
1064
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio."
1065
+ ),
1066
+ value=0.75,
1067
+ interactive=True,
1068
+ )
1069
+ rms_mix_rate = gr.Slider(
1070
+ minimum=0,
1071
+ maximum=1,
1072
+ label=i18n("Volume Envelope"),
1073
+ info=i18n(
1074
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed."
1075
+ ),
1076
+ value=1,
1077
+ interactive=True,
1078
+ )
1079
+ protect = gr.Slider(
1080
+ minimum=0,
1081
+ maximum=0.5,
1082
+ label=i18n("Protect Voiceless Consonants"),
1083
+ info=i18n(
1084
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect."
1085
+ ),
1086
+ value=0.5,
1087
+ interactive=True,
1088
+ )
1089
+ preset_dropdown.change(
1090
+ update_sliders,
1091
+ inputs=preset_dropdown,
1092
+ outputs=[
1093
+ pitch,
1094
+ index_rate,
1095
+ rms_mix_rate,
1096
+ protect,
1097
+ ],
1098
+ )
1099
+ export_button.click(
1100
+ export_presets_button,
1101
+ inputs=[
1102
+ preset_name_input,
1103
+ pitch,
1104
+ index_rate,
1105
+ rms_mix_rate,
1106
+ protect,
1107
+ ],
1108
+ )
1109
+ f0_method = gr.Radio(
1110
+ label=i18n("Pitch extraction algorithm"),
1111
+ info=i18n(
1112
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases."
1113
+ ),
1114
+ choices=[
1115
+ "crepe",
1116
+ "crepe-tiny",
1117
+ "rmvpe",
1118
+ "fcpe",
1119
+ "swift",
1120
+ ],
1121
+ value="rmvpe",
1122
+ interactive=True,
1123
+ )
1124
+ embedder_model = gr.Radio(
1125
+ label=i18n("Embedder Model"),
1126
+ info=i18n("Model used for learning speaker embedding."),
1127
+ choices=[
1128
+ "contentvec",
1129
+ "spin",
1130
+ "spin-v2",
1131
+ "chinese-hubert-base",
1132
+ "japanese-hubert-base",
1133
+ "korean-hubert-base",
1134
+ "custom",
1135
+ ],
1136
+ value="contentvec",
1137
+ interactive=True,
1138
+ )
1139
+ with gr.Column(visible=False) as embedder_custom:
1140
+ with gr.Accordion(i18n("Custom Embedder"), open=True):
1141
+ with gr.Row():
1142
+ embedder_model_custom = gr.Dropdown(
1143
+ label=i18n("Select Custom Embedder"),
1144
+ choices=refresh_embedders_folders(),
1145
+ interactive=True,
1146
+ allow_custom_value=True,
1147
+ )
1148
+ refresh_embedders_button = gr.Button(
1149
+ i18n("Refresh embedders")
1150
+ )
1151
+ folder_name_input = gr.Textbox(
1152
+ label=i18n("Folder Name"), interactive=True
1153
+ )
1154
+ with gr.Row():
1155
+ bin_file_upload = gr.File(
1156
+ label=i18n("Upload .bin"),
1157
+ type="filepath",
1158
+ interactive=True,
1159
+ )
1160
+ config_file_upload = gr.File(
1161
+ label=i18n("Upload .json"),
1162
+ type="filepath",
1163
+ interactive=True,
1164
+ )
1165
+ move_files_button = gr.Button(
1166
+ i18n("Move files to custom embedder folder")
1167
+ )
1168
+
1169
+ def enforce_terms(terms_accepted, *args):
1170
+ if not terms_accepted:
1171
+ message = "You must agree to the Terms of Use to proceed."
1172
+ gr.Info(message)
1173
+ return message, None
1174
+ return run_infer_script(*args)
1175
+
1176
+ def enforce_terms_batch(terms_accepted, *args):
1177
+ if not terms_accepted:
1178
+ message = "You must agree to the Terms of Use to proceed."
1179
+ gr.Info(message)
1180
+ return message, None
1181
+ return run_batch_infer_script(*args)
1182
+
1183
+ terms_checkbox = gr.Checkbox(
1184
+ label=i18n("I agree to the terms of use"),
1185
+ info=i18n(
1186
+ "Please ensure compliance with the terms and conditions detailed in [this document](https://github.com/IAHispano/Applio/blob/main/TERMS_OF_USE.md) before proceeding with your inference."
1187
+ ),
1188
+ value=False,
1189
+ interactive=True,
1190
+ )
1191
+
1192
+ convert_button1 = gr.Button(i18n("Convert"))
1193
+
1194
+ with gr.Row():
1195
+ vc_output1 = gr.Textbox(
1196
+ label=i18n("Output Information"),
1197
+ info=i18n("The output information will be displayed here."),
1198
+ )
1199
+ vc_output2 = gr.Audio(label=i18n("Export Audio"))
1200
+
1201
+ # Batch inference tab
1202
+ with gr.Tab(i18n("Batch")):
1203
+ with gr.Row():
1204
+ with gr.Column():
1205
+ input_folder_batch = gr.Textbox(
1206
+ label=i18n("Input Folder"),
1207
+ info=i18n("Select the folder containing the audios to convert."),
1208
+ placeholder=i18n("Enter input path"),
1209
+ value=os.path.join(now_dir, "assets", "audios"),
1210
+ interactive=True,
1211
+ )
1212
+ output_folder_batch = gr.Textbox(
1213
+ label=i18n("Output Folder"),
1214
+ info=i18n(
1215
+ "Select the folder where the output audios will be saved."
1216
+ ),
1217
+ placeholder=i18n("Enter output path"),
1218
+ value=os.path.join(now_dir, "assets", "audios"),
1219
+ interactive=True,
1220
+ )
1221
+ with gr.Accordion(i18n("Advanced Settings"), open=False):
1222
+ with gr.Column():
1223
+ clear_outputs_batch = gr.Button(
1224
+ i18n("Clear Outputs (Deletes all audios in assets/audios)")
1225
+ )
1226
+ export_format_batch = gr.Radio(
1227
+ label=i18n("Export Format"),
1228
+ info=i18n("Select the format to export the audio."),
1229
+ choices=["WAV", "MP3", "FLAC", "OGG", "M4A"],
1230
+ value="WAV",
1231
+ interactive=True,
1232
+ )
1233
+ sid_batch = gr.Dropdown(
1234
+ label=i18n("Speaker ID"),
1235
+ info=i18n("Select the speaker ID to use for the conversion."),
1236
+ choices=get_speakers_id(model_file.value),
1237
+ value=0,
1238
+ interactive=True,
1239
+ )
1240
+ split_audio_batch = gr.Checkbox(
1241
+ label=i18n("Split Audio"),
1242
+ info=i18n(
1243
+ "Split the audio into chunks for inference to obtain better results in some cases."
1244
+ ),
1245
+ visible=True,
1246
+ value=False,
1247
+ interactive=True,
1248
+ )
1249
+ autotune_batch = gr.Checkbox(
1250
+ label=i18n("Autotune"),
1251
+ info=i18n(
1252
+ "Apply a soft autotune to your inferences, recommended for singing conversions."
1253
+ ),
1254
+ visible=True,
1255
+ value=False,
1256
+ interactive=True,
1257
+ )
1258
+ autotune_strength_batch = gr.Slider(
1259
+ minimum=0,
1260
+ maximum=1,
1261
+ label=i18n("Autotune Strength"),
1262
+ info=i18n(
1263
+ "Set the autotune strength - the more you increase it the more it will snap to the chromatic grid."
1264
+ ),
1265
+ visible=False,
1266
+ value=1,
1267
+ interactive=True,
1268
+ )
1269
+ proposed_pitch_batch = gr.Checkbox(
1270
+ label=i18n("Proposed Pitch"),
1271
+ info=i18n(
1272
+ "Adjust the input audio pitch to match the voice model range."
1273
+ ),
1274
+ visible=True,
1275
+ value=False,
1276
+ interactive=True,
1277
+ )
1278
+ proposed_pitch_threshold_batch = gr.Slider(
1279
+ minimum=50.0,
1280
+ maximum=1200.0,
1281
+ label=i18n("Proposed Pitch Threshold"),
1282
+ info=i18n(
1283
+ "Male voice models typically use 155.0 and female voice models typically use 255.0."
1284
+ ),
1285
+ visible=False,
1286
+ value=155.0,
1287
+ interactive=True,
1288
+ )
1289
+ clean_audio_batch = gr.Checkbox(
1290
+ label=i18n("Clean Audio"),
1291
+ info=i18n(
1292
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios."
1293
+ ),
1294
+ visible=True,
1295
+ value=False,
1296
+ interactive=True,
1297
+ )
1298
+ clean_strength_batch = gr.Slider(
1299
+ minimum=0,
1300
+ maximum=1,
1301
+ label=i18n("Clean Strength"),
1302
+ info=i18n(
1303
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed."
1304
+ ),
1305
+ visible=False,
1306
+ value=0.5,
1307
+ interactive=True,
1308
+ )
1309
+ formant_shifting_batch = gr.Checkbox(
1310
+ label=i18n("Formant Shifting"),
1311
+ info=i18n(
1312
+ "Enable formant shifting. Used for male to female and vice-versa convertions."
1313
+ ),
1314
+ value=False,
1315
+ visible=True,
1316
+ interactive=True,
1317
+ )
1318
+ post_process_batch = gr.Checkbox(
1319
+ label=i18n("Post-Process"),
1320
+ info=i18n("Post-process the audio to apply effects to the output."),
1321
+ value=False,
1322
+ interactive=True,
1323
+ )
1324
+ with gr.Row(visible=False) as formant_row_batch:
1325
+ formant_preset_batch = gr.Dropdown(
1326
+ label=i18n("Browse presets for formanting"),
1327
+ info=i18n(
1328
+ "Presets are located in /assets/formant_shift folder"
1329
+ ),
1330
+ choices=list_json_files(FORMANTSHIFT_DIR),
1331
+ visible=False,
1332
+ interactive=True,
1333
+ )
1334
+ formant_refresh_button_batch = gr.Button(
1335
+ value="Refresh",
1336
+ visible=False,
1337
+ )
1338
+ formant_qfrency_batch = gr.Slider(
1339
+ value=1.0,
1340
+ info=i18n("Default value is 1.0"),
1341
+ label=i18n("Quefrency for formant shifting"),
1342
+ minimum=0.0,
1343
+ maximum=16.0,
1344
+ step=0.1,
1345
+ visible=False,
1346
+ interactive=True,
1347
+ )
1348
+ formant_timbre_batch = gr.Slider(
1349
+ value=1.0,
1350
+ info=i18n("Default value is 1.0"),
1351
+ label=i18n("Timbre for formant shifting"),
1352
+ minimum=0.0,
1353
+ maximum=16.0,
1354
+ step=0.1,
1355
+ visible=False,
1356
+ interactive=True,
1357
+ )
1358
+ reverb_batch = gr.Checkbox(
1359
+ label=i18n("Reverb"),
1360
+ info=i18n("Apply reverb to the audio."),
1361
+ value=False,
1362
+ interactive=True,
1363
+ visible=False,
1364
+ )
1365
+ reverb_room_size_batch = gr.Slider(
1366
+ minimum=0,
1367
+ maximum=1,
1368
+ label=i18n("Reverb Room Size"),
1369
+ info=i18n("Set the room size of the reverb."),
1370
+ value=0.5,
1371
+ interactive=True,
1372
+ visible=False,
1373
+ )
1374
+ reverb_damping_batch = gr.Slider(
1375
+ minimum=0,
1376
+ maximum=1,
1377
+ label=i18n("Reverb Damping"),
1378
+ info=i18n("Set the damping of the reverb."),
1379
+ value=0.5,
1380
+ interactive=True,
1381
+ visible=False,
1382
+ )
1383
+ reverb_wet_gain_batch = gr.Slider(
1384
+ minimum=0,
1385
+ maximum=1,
1386
+ label=i18n("Reverb Wet Gain"),
1387
+ info=i18n("Set the wet gain of the reverb."),
1388
+ value=0.33,
1389
+ interactive=True,
1390
+ visible=False,
1391
+ )
1392
+ reverb_dry_gain_batch = gr.Slider(
1393
+ minimum=0,
1394
+ maximum=1,
1395
+ label=i18n("Reverb Dry Gain"),
1396
+ info=i18n("Set the dry gain of the reverb."),
1397
+ value=0.4,
1398
+ interactive=True,
1399
+ visible=False,
1400
+ )
1401
+ reverb_width_batch = gr.Slider(
1402
+ minimum=0,
1403
+ maximum=1,
1404
+ label=i18n("Reverb Width"),
1405
+ info=i18n("Set the width of the reverb."),
1406
+ value=1.0,
1407
+ interactive=True,
1408
+ visible=False,
1409
+ )
1410
+ reverb_freeze_mode_batch = gr.Slider(
1411
+ minimum=0,
1412
+ maximum=1,
1413
+ label=i18n("Reverb Freeze Mode"),
1414
+ info=i18n("Set the freeze mode of the reverb."),
1415
+ value=0.0,
1416
+ interactive=True,
1417
+ visible=False,
1418
+ )
1419
+ pitch_shift_batch = gr.Checkbox(
1420
+ label=i18n("Pitch Shift"),
1421
+ info=i18n("Apply pitch shift to the audio."),
1422
+ value=False,
1423
+ interactive=True,
1424
+ visible=False,
1425
+ )
1426
+ pitch_shift_semitones_batch = gr.Slider(
1427
+ minimum=-12,
1428
+ maximum=12,
1429
+ label=i18n("Pitch Shift Semitones"),
1430
+ info=i18n("Set the pitch shift semitones."),
1431
+ value=0,
1432
+ interactive=True,
1433
+ visible=False,
1434
+ )
1435
+ limiter_batch = gr.Checkbox(
1436
+ label=i18n("Limiter"),
1437
+ info=i18n("Apply limiter to the audio."),
1438
+ value=False,
1439
+ interactive=True,
1440
+ visible=False,
1441
+ )
1442
+ limiter_threshold_batch = gr.Slider(
1443
+ minimum=-60,
1444
+ maximum=0,
1445
+ label=i18n("Limiter Threshold dB"),
1446
+ info=i18n("Set the limiter threshold dB."),
1447
+ value=-6,
1448
+ interactive=True,
1449
+ visible=False,
1450
+ )
1451
+ limiter_release_time_batch = gr.Slider(
1452
+ minimum=0.01,
1453
+ maximum=1,
1454
+ label=i18n("Limiter Release Time"),
1455
+ info=i18n("Set the limiter release time."),
1456
+ value=0.05,
1457
+ interactive=True,
1458
+ visible=False,
1459
+ )
1460
+ gain_batch = gr.Checkbox(
1461
+ label=i18n("Gain"),
1462
+ info=i18n("Apply gain to the audio."),
1463
+ value=False,
1464
+ interactive=True,
1465
+ visible=False,
1466
+ )
1467
+ gain_db_batch = gr.Slider(
1468
+ minimum=-60,
1469
+ maximum=60,
1470
+ label=i18n("Gain dB"),
1471
+ info=i18n("Set the gain dB."),
1472
+ value=0,
1473
+ interactive=True,
1474
+ visible=False,
1475
+ )
1476
+ distortion_batch = gr.Checkbox(
1477
+ label=i18n("Distortion"),
1478
+ info=i18n("Apply distortion to the audio."),
1479
+ value=False,
1480
+ interactive=True,
1481
+ visible=False,
1482
+ )
1483
+ distortion_gain_batch = gr.Slider(
1484
+ minimum=-60,
1485
+ maximum=60,
1486
+ label=i18n("Distortion Gain"),
1487
+ info=i18n("Set the distortion gain."),
1488
+ value=25,
1489
+ interactive=True,
1490
+ visible=False,
1491
+ )
1492
+ chorus_batch = gr.Checkbox(
1493
+ label=i18n("Chorus"),
1494
+ info=i18n("Apply chorus to the audio."),
1495
+ value=False,
1496
+ interactive=True,
1497
+ visible=False,
1498
+ )
1499
+ chorus_rate_batch = gr.Slider(
1500
+ minimum=0,
1501
+ maximum=100,
1502
+ label=i18n("Chorus Rate Hz"),
1503
+ info=i18n("Set the chorus rate Hz."),
1504
+ value=1.0,
1505
+ interactive=True,
1506
+ visible=False,
1507
+ )
1508
+ chorus_depth_batch = gr.Slider(
1509
+ minimum=0,
1510
+ maximum=1,
1511
+ label=i18n("Chorus Depth"),
1512
+ info=i18n("Set the chorus depth."),
1513
+ value=0.25,
1514
+ interactive=True,
1515
+ visible=False,
1516
+ )
1517
+ chorus_center_delay_batch = gr.Slider(
1518
+ minimum=7,
1519
+ maximum=8,
1520
+ label=i18n("Chorus Center Delay ms"),
1521
+ info=i18n("Set the chorus center delay ms."),
1522
+ value=7,
1523
+ interactive=True,
1524
+ visible=False,
1525
+ )
1526
+ chorus_feedback_batch = gr.Slider(
1527
+ minimum=0,
1528
+ maximum=1,
1529
+ label=i18n("Chorus Feedback"),
1530
+ info=i18n("Set the chorus feedback."),
1531
+ value=0.0,
1532
+ interactive=True,
1533
+ visible=False,
1534
+ )
1535
+ chorus_mix_batch = gr.Slider(
1536
+ minimum=0,
1537
+ maximum=1,
1538
+ label=i18n("Chorus Mix"),
1539
+ info=i18n("Set the chorus mix."),
1540
+ value=0.5,
1541
+ interactive=True,
1542
+ visible=False,
1543
+ )
1544
+ bitcrush_batch = gr.Checkbox(
1545
+ label=i18n("Bitcrush"),
1546
+ info=i18n("Apply bitcrush to the audio."),
1547
+ value=False,
1548
+ interactive=True,
1549
+ visible=False,
1550
+ )
1551
+ bitcrush_bit_depth_batch = gr.Slider(
1552
+ minimum=1,
1553
+ maximum=32,
1554
+ label=i18n("Bitcrush Bit Depth"),
1555
+ info=i18n("Set the bitcrush bit depth."),
1556
+ value=8,
1557
+ interactive=True,
1558
+ visible=False,
1559
+ )
1560
+ clipping_batch = gr.Checkbox(
1561
+ label=i18n("Clipping"),
1562
+ info=i18n("Apply clipping to the audio."),
1563
+ value=False,
1564
+ interactive=True,
1565
+ visible=False,
1566
+ )
1567
+ clipping_threshold_batch = gr.Slider(
1568
+ minimum=-60,
1569
+ maximum=0,
1570
+ label=i18n("Clipping Threshold"),
1571
+ info=i18n("Set the clipping threshold."),
1572
+ value=-6,
1573
+ interactive=True,
1574
+ visible=False,
1575
+ )
1576
+ compressor_batch = gr.Checkbox(
1577
+ label=i18n("Compressor"),
1578
+ info=i18n("Apply compressor to the audio."),
1579
+ value=False,
1580
+ interactive=True,
1581
+ visible=False,
1582
+ )
1583
+ compressor_threshold_batch = gr.Slider(
1584
+ minimum=-60,
1585
+ maximum=0,
1586
+ label=i18n("Compressor Threshold dB"),
1587
+ info=i18n("Set the compressor threshold dB."),
1588
+ value=0,
1589
+ interactive=True,
1590
+ visible=False,
1591
+ )
1592
+ compressor_ratio_batch = gr.Slider(
1593
+ minimum=1,
1594
+ maximum=20,
1595
+ label=i18n("Compressor Ratio"),
1596
+ info=i18n("Set the compressor ratio."),
1597
+ value=1,
1598
+ interactive=True,
1599
+ visible=False,
1600
+ )
1601
+ compressor_attack_batch = gr.Slider(
1602
+ minimum=0.0,
1603
+ maximum=100,
1604
+ label=i18n("Compressor Attack ms"),
1605
+ info=i18n("Set the compressor attack ms."),
1606
+ value=1.0,
1607
+ interactive=True,
1608
+ visible=False,
1609
+ )
1610
+ compressor_release_batch = gr.Slider(
1611
+ minimum=0.01,
1612
+ maximum=100,
1613
+ label=i18n("Compressor Release ms"),
1614
+ info=i18n("Set the compressor release ms."),
1615
+ value=100,
1616
+ interactive=True,
1617
+ visible=False,
1618
+ )
1619
+ delay_batch = gr.Checkbox(
1620
+ label=i18n("Delay"),
1621
+ info=i18n("Apply delay to the audio."),
1622
+ value=False,
1623
+ interactive=True,
1624
+ visible=False,
1625
+ )
1626
+ delay_seconds_batch = gr.Slider(
1627
+ minimum=0.0,
1628
+ maximum=5.0,
1629
+ label=i18n("Delay Seconds"),
1630
+ info=i18n("Set the delay seconds."),
1631
+ value=0.5,
1632
+ interactive=True,
1633
+ visible=False,
1634
+ )
1635
+ delay_feedback_batch = gr.Slider(
1636
+ minimum=0.0,
1637
+ maximum=1.0,
1638
+ label=i18n("Delay Feedback"),
1639
+ info=i18n("Set the delay feedback."),
1640
+ value=0.0,
1641
+ interactive=True,
1642
+ visible=False,
1643
+ )
1644
+ delay_mix_batch = gr.Slider(
1645
+ minimum=0.0,
1646
+ maximum=1.0,
1647
+ label=i18n("Delay Mix"),
1648
+ info=i18n("Set the delay mix."),
1649
+ value=0.5,
1650
+ interactive=True,
1651
+ visible=False,
1652
+ )
1653
+ with gr.Accordion(i18n("Preset Settings"), open=False):
1654
+ with gr.Row():
1655
+ preset_dropdown = gr.Dropdown(
1656
+ label=i18n("Select Custom Preset"),
1657
+ interactive=True,
1658
+ )
1659
+ presets_batch_refresh_button = gr.Button(
1660
+ i18n("Refresh Presets")
1661
+ )
1662
+ import_file = gr.File(
1663
+ label=i18n("Select file to import"),
1664
+ file_count="single",
1665
+ type="filepath",
1666
+ interactive=True,
1667
+ )
1668
+ import_file.change(
1669
+ import_presets_button,
1670
+ inputs=import_file,
1671
+ outputs=[preset_dropdown],
1672
+ )
1673
+ presets_batch_refresh_button.click(
1674
+ refresh_presets, outputs=preset_dropdown
1675
+ )
1676
+ with gr.Row():
1677
+ preset_name_input = gr.Textbox(
1678
+ label=i18n("Preset Name"),
1679
+ placeholder=i18n("Enter preset name"),
1680
+ )
1681
+ export_button = gr.Button(i18n("Export Preset"))
1682
+ pitch_batch = gr.Slider(
1683
+ minimum=-24,
1684
+ maximum=24,
1685
+ step=1,
1686
+ label=i18n("Pitch"),
1687
+ info=i18n(
1688
+ "Set the pitch of the audio, the higher the value, the higher the pitch."
1689
+ ),
1690
+ value=0,
1691
+ interactive=True,
1692
+ )
1693
+ index_rate_batch = gr.Slider(
1694
+ minimum=0,
1695
+ maximum=1,
1696
+ label=i18n("Search Feature Ratio"),
1697
+ info=i18n(
1698
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio."
1699
+ ),
1700
+ value=0.75,
1701
+ interactive=True,
1702
+ )
1703
+ rms_mix_rate_batch = gr.Slider(
1704
+ minimum=0,
1705
+ maximum=1,
1706
+ label=i18n("Volume Envelope"),
1707
+ info=i18n(
1708
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed."
1709
+ ),
1710
+ value=1,
1711
+ interactive=True,
1712
+ )
1713
+ protect_batch = gr.Slider(
1714
+ minimum=0,
1715
+ maximum=0.5,
1716
+ label=i18n("Protect Voiceless Consonants"),
1717
+ info=i18n(
1718
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect."
1719
+ ),
1720
+ value=0.5,
1721
+ interactive=True,
1722
+ )
1723
+ preset_dropdown.change(
1724
+ update_sliders,
1725
+ inputs=preset_dropdown,
1726
+ outputs=[
1727
+ pitch_batch,
1728
+ index_rate_batch,
1729
+ rms_mix_rate_batch,
1730
+ protect_batch,
1731
+ ],
1732
+ )
1733
+ export_button.click(
1734
+ export_presets_button,
1735
+ inputs=[
1736
+ preset_name_input,
1737
+ pitch,
1738
+ index_rate,
1739
+ rms_mix_rate_batch,
1740
+ protect,
1741
+ ],
1742
+ outputs=[],
1743
+ )
1744
+ f0_method_batch = gr.Radio(
1745
+ label=i18n("Pitch extraction algorithm"),
1746
+ info=i18n(
1747
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases."
1748
+ ),
1749
+ choices=[
1750
+ "crepe",
1751
+ "crepe-tiny",
1752
+ "rmvpe",
1753
+ "fcpe",
1754
+ "swift",
1755
+ ],
1756
+ value="rmvpe",
1757
+ interactive=True,
1758
+ )
1759
+ embedder_model_batch = gr.Radio(
1760
+ label=i18n("Embedder Model"),
1761
+ info=i18n("Model used for learning speaker embedding."),
1762
+ choices=[
1763
+ "contentvec",
1764
+ "spin",
1765
+ "spin-v2",
1766
+ "chinese-hubert-base",
1767
+ "japanese-hubert-base",
1768
+ "korean-hubert-base",
1769
+ "custom",
1770
+ ],
1771
+ value="contentvec",
1772
+ interactive=True,
1773
+ )
1774
+ with gr.Column(visible=False) as embedder_custom_batch:
1775
+ with gr.Accordion(i18n("Custom Embedder"), open=True):
1776
+ with gr.Row():
1777
+ embedder_model_custom_batch = gr.Dropdown(
1778
+ label=i18n("Select Custom Embedder"),
1779
+ choices=refresh_embedders_folders(),
1780
+ interactive=True,
1781
+ allow_custom_value=True,
1782
+ )
1783
+ refresh_embedders_button_batch = gr.Button(
1784
+ i18n("Refresh embedders")
1785
+ )
1786
+ folder_name_input_batch = gr.Textbox(
1787
+ label=i18n("Folder Name"), interactive=True
1788
+ )
1789
+ with gr.Row():
1790
+ bin_file_upload_batch = gr.File(
1791
+ label=i18n("Upload .bin"),
1792
+ type="filepath",
1793
+ interactive=True,
1794
+ )
1795
+ config_file_upload_batch = gr.File(
1796
+ label=i18n("Upload .json"),
1797
+ type="filepath",
1798
+ interactive=True,
1799
+ )
1800
+ move_files_button_batch = gr.Button(
1801
+ i18n("Move files to custom embedder folder")
1802
+ )
1803
+
1804
+ terms_checkbox_batch = gr.Checkbox(
1805
+ label=i18n("I agree to the terms of use"),
1806
+ info=i18n(
1807
+ "Please ensure compliance with the terms and conditions detailed in [this document](https://github.com/IAHispano/Applio/blob/main/TERMS_OF_USE.md) before proceeding with your inference."
1808
+ ),
1809
+ value=False,
1810
+ interactive=True,
1811
+ )
1812
+ convert_button_batch = gr.Button(i18n("Convert"))
1813
+ stop_button = gr.Button(i18n("Stop convert"), visible=False)
1814
+ stop_button.click(fn=stop_infer, inputs=[], outputs=[])
1815
+
1816
+ with gr.Row():
1817
+ vc_output3 = gr.Textbox(
1818
+ label=i18n("Output Information"),
1819
+ info=i18n("The output information will be displayed here."),
1820
+ )
1821
+
1822
+ def toggle_visible(checkbox):
1823
+ return {"visible": checkbox, "__type__": "update"}
1824
+
1825
+ def toggle_visible_embedder_custom(embedder_model):
1826
+ if embedder_model == "custom":
1827
+ return {"visible": True, "__type__": "update"}
1828
+ return {"visible": False, "__type__": "update"}
1829
+
1830
+ def enable_stop_convert_button():
1831
+ return {"visible": False, "__type__": "update"}, {
1832
+ "visible": True,
1833
+ "__type__": "update",
1834
+ }
1835
+
1836
+ def disable_stop_convert_button():
1837
+ return {"visible": True, "__type__": "update"}, {
1838
+ "visible": False,
1839
+ "__type__": "update",
1840
+ }
1841
+
1842
+ def toggle_visible_formant_shifting(checkbox):
1843
+ if checkbox:
1844
+ return (
1845
+ gr.update(visible=True),
1846
+ gr.update(visible=True),
1847
+ gr.update(visible=True),
1848
+ gr.update(visible=True),
1849
+ gr.update(visible=True),
1850
+ )
1851
+ else:
1852
+ return (
1853
+ gr.update(visible=False),
1854
+ gr.update(visible=False),
1855
+ gr.update(visible=False),
1856
+ gr.update(visible=False),
1857
+ gr.update(visible=False),
1858
+ )
1859
+
1860
+ def update_visibility(checkbox, count):
1861
+ return [gr.update(visible=checkbox) for _ in range(count)]
1862
+
1863
+ def post_process_visible(checkbox):
1864
+ return update_visibility(checkbox, 10)
1865
+
1866
+ def reverb_visible(checkbox):
1867
+ return update_visibility(checkbox, 6)
1868
+
1869
+ def limiter_visible(checkbox):
1870
+ return update_visibility(checkbox, 2)
1871
+
1872
+ def chorus_visible(checkbox):
1873
+ return update_visibility(checkbox, 6)
1874
+
1875
+ def bitcrush_visible(checkbox):
1876
+ return update_visibility(checkbox, 1)
1877
+
1878
+ def compress_visible(checkbox):
1879
+ return update_visibility(checkbox, 4)
1880
+
1881
+ def delay_visible(checkbox):
1882
+ return update_visibility(checkbox, 3)
1883
+
1884
+ autotune.change(
1885
+ fn=toggle_visible,
1886
+ inputs=[autotune],
1887
+ outputs=[autotune_strength],
1888
+ )
1889
+ proposed_pitch.change(
1890
+ fn=toggle_visible,
1891
+ inputs=[proposed_pitch],
1892
+ outputs=[proposed_pitch_threshold],
1893
+ )
1894
+ proposed_pitch_batch.change(
1895
+ fn=toggle_visible,
1896
+ inputs=[proposed_pitch_batch],
1897
+ outputs=[proposed_pitch_threshold_batch],
1898
+ )
1899
+ clean_audio.change(
1900
+ fn=toggle_visible,
1901
+ inputs=[clean_audio],
1902
+ outputs=[clean_strength],
1903
+ )
1904
+ formant_shifting.change(
1905
+ fn=toggle_visible_formant_shifting,
1906
+ inputs=[formant_shifting],
1907
+ outputs=[
1908
+ formant_row,
1909
+ formant_preset,
1910
+ formant_refresh_button,
1911
+ formant_qfrency,
1912
+ formant_timbre,
1913
+ ],
1914
+ )
1915
+ formant_shifting_batch.change(
1916
+ fn=toggle_visible_formant_shifting,
1917
+ inputs=[formant_shifting],
1918
+ outputs=[
1919
+ formant_row_batch,
1920
+ formant_preset_batch,
1921
+ formant_refresh_button_batch,
1922
+ formant_qfrency_batch,
1923
+ formant_timbre_batch,
1924
+ ],
1925
+ )
1926
+ formant_refresh_button.click(
1927
+ fn=refresh_formant,
1928
+ inputs=[],
1929
+ outputs=[formant_preset],
1930
+ )
1931
+ formant_preset.change(
1932
+ fn=update_sliders_formant,
1933
+ inputs=[formant_preset],
1934
+ outputs=[
1935
+ formant_qfrency,
1936
+ formant_timbre,
1937
+ ],
1938
+ )
1939
+ formant_preset_batch.change(
1940
+ fn=update_sliders_formant,
1941
+ inputs=[formant_preset_batch],
1942
+ outputs=[
1943
+ formant_qfrency,
1944
+ formant_timbre,
1945
+ ],
1946
+ )
1947
+ post_process.change(
1948
+ fn=post_process_visible,
1949
+ inputs=[post_process],
1950
+ outputs=[
1951
+ reverb,
1952
+ pitch_shift,
1953
+ limiter,
1954
+ gain,
1955
+ distortion,
1956
+ chorus,
1957
+ bitcrush,
1958
+ clipping,
1959
+ compressor,
1960
+ delay,
1961
+ ],
1962
+ )
1963
+ reverb.change(
1964
+ fn=reverb_visible,
1965
+ inputs=[reverb],
1966
+ outputs=[
1967
+ reverb_room_size,
1968
+ reverb_damping,
1969
+ reverb_wet_gain,
1970
+ reverb_dry_gain,
1971
+ reverb_width,
1972
+ reverb_freeze_mode,
1973
+ ],
1974
+ )
1975
+ pitch_shift.change(
1976
+ fn=toggle_visible,
1977
+ inputs=[pitch_shift],
1978
+ outputs=[pitch_shift_semitones],
1979
+ )
1980
+ limiter.change(
1981
+ fn=limiter_visible,
1982
+ inputs=[limiter],
1983
+ outputs=[limiter_threshold, limiter_release_time],
1984
+ )
1985
+ gain.change(
1986
+ fn=toggle_visible,
1987
+ inputs=[gain],
1988
+ outputs=[gain_db],
1989
+ )
1990
+ distortion.change(
1991
+ fn=toggle_visible,
1992
+ inputs=[distortion],
1993
+ outputs=[distortion_gain],
1994
+ )
1995
+ chorus.change(
1996
+ fn=chorus_visible,
1997
+ inputs=[chorus],
1998
+ outputs=[
1999
+ chorus_rate,
2000
+ chorus_depth,
2001
+ chorus_center_delay,
2002
+ chorus_feedback,
2003
+ chorus_mix,
2004
+ ],
2005
+ )
2006
+ bitcrush.change(
2007
+ fn=bitcrush_visible,
2008
+ inputs=[bitcrush],
2009
+ outputs=[bitcrush_bit_depth],
2010
+ )
2011
+ clipping.change(
2012
+ fn=toggle_visible,
2013
+ inputs=[clipping],
2014
+ outputs=[clipping_threshold],
2015
+ )
2016
+ compressor.change(
2017
+ fn=compress_visible,
2018
+ inputs=[compressor],
2019
+ outputs=[
2020
+ compressor_threshold,
2021
+ compressor_ratio,
2022
+ compressor_attack,
2023
+ compressor_release,
2024
+ ],
2025
+ )
2026
+ delay.change(
2027
+ fn=delay_visible,
2028
+ inputs=[delay],
2029
+ outputs=[delay_seconds, delay_feedback, delay_mix],
2030
+ )
2031
+ post_process_batch.change(
2032
+ fn=post_process_visible,
2033
+ inputs=[post_process_batch],
2034
+ outputs=[
2035
+ reverb_batch,
2036
+ pitch_shift_batch,
2037
+ limiter_batch,
2038
+ gain_batch,
2039
+ distortion_batch,
2040
+ chorus_batch,
2041
+ bitcrush_batch,
2042
+ clipping_batch,
2043
+ compressor_batch,
2044
+ delay_batch,
2045
+ ],
2046
+ )
2047
+ reverb_batch.change(
2048
+ fn=reverb_visible,
2049
+ inputs=[reverb_batch],
2050
+ outputs=[
2051
+ reverb_room_size_batch,
2052
+ reverb_damping_batch,
2053
+ reverb_wet_gain_batch,
2054
+ reverb_dry_gain_batch,
2055
+ reverb_width_batch,
2056
+ reverb_freeze_mode_batch,
2057
+ ],
2058
+ )
2059
+ pitch_shift_batch.change(
2060
+ fn=toggle_visible,
2061
+ inputs=[pitch_shift_batch],
2062
+ outputs=[pitch_shift_semitones_batch],
2063
+ )
2064
+ limiter_batch.change(
2065
+ fn=limiter_visible,
2066
+ inputs=[limiter_batch],
2067
+ outputs=[limiter_threshold_batch, limiter_release_time_batch],
2068
+ )
2069
+ gain_batch.change(
2070
+ fn=toggle_visible,
2071
+ inputs=[gain_batch],
2072
+ outputs=[gain_db_batch],
2073
+ )
2074
+ distortion_batch.change(
2075
+ fn=toggle_visible,
2076
+ inputs=[distortion_batch],
2077
+ outputs=[distortion_gain_batch],
2078
+ )
2079
+ chorus_batch.change(
2080
+ fn=chorus_visible,
2081
+ inputs=[chorus_batch],
2082
+ outputs=[
2083
+ chorus_rate_batch,
2084
+ chorus_depth_batch,
2085
+ chorus_center_delay_batch,
2086
+ chorus_feedback_batch,
2087
+ chorus_mix_batch,
2088
+ ],
2089
+ )
2090
+ bitcrush_batch.change(
2091
+ fn=bitcrush_visible,
2092
+ inputs=[bitcrush_batch],
2093
+ outputs=[bitcrush_bit_depth_batch],
2094
+ )
2095
+ clipping_batch.change(
2096
+ fn=toggle_visible,
2097
+ inputs=[clipping_batch],
2098
+ outputs=[clipping_threshold_batch],
2099
+ )
2100
+ compressor_batch.change(
2101
+ fn=compress_visible,
2102
+ inputs=[compressor_batch],
2103
+ outputs=[
2104
+ compressor_threshold_batch,
2105
+ compressor_ratio_batch,
2106
+ compressor_attack_batch,
2107
+ compressor_release_batch,
2108
+ ],
2109
+ )
2110
+ delay_batch.change(
2111
+ fn=delay_visible,
2112
+ inputs=[delay_batch],
2113
+ outputs=[delay_seconds_batch, delay_feedback_batch, delay_mix_batch],
2114
+ )
2115
+ autotune_batch.change(
2116
+ fn=toggle_visible,
2117
+ inputs=[autotune_batch],
2118
+ outputs=[autotune_strength_batch],
2119
+ )
2120
+ clean_audio_batch.change(
2121
+ fn=toggle_visible,
2122
+ inputs=[clean_audio_batch],
2123
+ outputs=[clean_strength_batch],
2124
+ )
2125
+ refresh_button.click(
2126
+ fn=change_choices,
2127
+ inputs=[model_file],
2128
+ outputs=[model_file, index_file, audio, sid, sid_batch],
2129
+ ).then(
2130
+ fn=filter_dropdowns,
2131
+ inputs=[filter_box_inf],
2132
+ outputs=[model_file, index_file],
2133
+ )
2134
+ audio.change(
2135
+ fn=output_path_fn,
2136
+ inputs=[audio],
2137
+ outputs=[output_path],
2138
+ )
2139
+ upload_audio.upload(
2140
+ fn=save_to_wav2,
2141
+ inputs=[upload_audio],
2142
+ outputs=[audio, output_path],
2143
+ )
2144
+ upload_audio.stop_recording(
2145
+ fn=save_to_wav,
2146
+ inputs=[upload_audio],
2147
+ outputs=[audio, output_path],
2148
+ )
2149
+ clear_outputs_infer.click(
2150
+ fn=delete_outputs,
2151
+ inputs=[],
2152
+ outputs=[],
2153
+ )
2154
+ clear_outputs_batch.click(
2155
+ fn=delete_outputs,
2156
+ inputs=[],
2157
+ outputs=[],
2158
+ )
2159
+ embedder_model.change(
2160
+ fn=toggle_visible_embedder_custom,
2161
+ inputs=[embedder_model],
2162
+ outputs=[embedder_custom],
2163
+ )
2164
+ embedder_model_batch.change(
2165
+ fn=toggle_visible_embedder_custom,
2166
+ inputs=[embedder_model_batch],
2167
+ outputs=[embedder_custom_batch],
2168
+ )
2169
+ move_files_button.click(
2170
+ fn=create_folder_and_move_files,
2171
+ inputs=[folder_name_input, bin_file_upload, config_file_upload],
2172
+ outputs=[],
2173
+ )
2174
+ refresh_embedders_button.click(
2175
+ fn=lambda: gr.update(choices=refresh_embedders_folders()),
2176
+ inputs=[],
2177
+ outputs=[embedder_model_custom],
2178
+ )
2179
+ move_files_button_batch.click(
2180
+ fn=create_folder_and_move_files,
2181
+ inputs=[
2182
+ folder_name_input_batch,
2183
+ bin_file_upload_batch,
2184
+ config_file_upload_batch,
2185
+ ],
2186
+ outputs=[],
2187
+ )
2188
+ refresh_embedders_button_batch.click(
2189
+ fn=lambda: gr.update(choices=refresh_embedders_folders()),
2190
+ inputs=[],
2191
+ outputs=[embedder_model_custom_batch],
2192
+ )
2193
+ convert_button1.click(
2194
+ fn=enforce_terms,
2195
+ inputs=[
2196
+ terms_checkbox,
2197
+ pitch,
2198
+ index_rate,
2199
+ rms_mix_rate,
2200
+ protect,
2201
+ f0_method,
2202
+ audio,
2203
+ output_path,
2204
+ model_file,
2205
+ index_file,
2206
+ split_audio,
2207
+ autotune,
2208
+ autotune_strength,
2209
+ proposed_pitch,
2210
+ proposed_pitch_threshold,
2211
+ clean_audio,
2212
+ clean_strength,
2213
+ export_format,
2214
+ embedder_model,
2215
+ embedder_model_custom,
2216
+ formant_shifting,
2217
+ formant_qfrency,
2218
+ formant_timbre,
2219
+ post_process,
2220
+ reverb,
2221
+ pitch_shift,
2222
+ limiter,
2223
+ gain,
2224
+ distortion,
2225
+ chorus,
2226
+ bitcrush,
2227
+ clipping,
2228
+ compressor,
2229
+ delay,
2230
+ reverb_room_size,
2231
+ reverb_damping,
2232
+ reverb_wet_gain,
2233
+ reverb_dry_gain,
2234
+ reverb_width,
2235
+ reverb_freeze_mode,
2236
+ pitch_shift_semitones,
2237
+ limiter_threshold,
2238
+ limiter_release_time,
2239
+ gain_db,
2240
+ distortion_gain,
2241
+ chorus_rate,
2242
+ chorus_depth,
2243
+ chorus_center_delay,
2244
+ chorus_feedback,
2245
+ chorus_mix,
2246
+ bitcrush_bit_depth,
2247
+ clipping_threshold,
2248
+ compressor_threshold,
2249
+ compressor_ratio,
2250
+ compressor_attack,
2251
+ compressor_release,
2252
+ delay_seconds,
2253
+ delay_feedback,
2254
+ delay_mix,
2255
+ sid,
2256
+ ],
2257
+ outputs=[vc_output1, vc_output2],
2258
+ )
2259
+ convert_button_batch.click(
2260
+ fn=enforce_terms_batch,
2261
+ inputs=[
2262
+ terms_checkbox_batch,
2263
+ pitch_batch,
2264
+ index_rate_batch,
2265
+ rms_mix_rate_batch,
2266
+ protect_batch,
2267
+ f0_method_batch,
2268
+ input_folder_batch,
2269
+ output_folder_batch,
2270
+ model_file,
2271
+ index_file,
2272
+ split_audio_batch,
2273
+ autotune_batch,
2274
+ autotune_strength_batch,
2275
+ proposed_pitch_batch,
2276
+ proposed_pitch_threshold_batch,
2277
+ clean_audio_batch,
2278
+ clean_strength_batch,
2279
+ export_format_batch,
2280
+ embedder_model_batch,
2281
+ embedder_model_custom_batch,
2282
+ formant_shifting_batch,
2283
+ formant_qfrency_batch,
2284
+ formant_timbre_batch,
2285
+ post_process_batch,
2286
+ reverb_batch,
2287
+ pitch_shift_batch,
2288
+ limiter_batch,
2289
+ gain_batch,
2290
+ distortion_batch,
2291
+ chorus_batch,
2292
+ bitcrush_batch,
2293
+ clipping_batch,
2294
+ compressor_batch,
2295
+ delay_batch,
2296
+ reverb_room_size_batch,
2297
+ reverb_damping_batch,
2298
+ reverb_wet_gain_batch,
2299
+ reverb_dry_gain_batch,
2300
+ reverb_width_batch,
2301
+ reverb_freeze_mode_batch,
2302
+ pitch_shift_semitones_batch,
2303
+ limiter_threshold_batch,
2304
+ limiter_release_time_batch,
2305
+ gain_db_batch,
2306
+ distortion_gain_batch,
2307
+ chorus_rate_batch,
2308
+ chorus_depth_batch,
2309
+ chorus_center_delay_batch,
2310
+ chorus_feedback_batch,
2311
+ chorus_mix_batch,
2312
+ bitcrush_bit_depth_batch,
2313
+ clipping_threshold_batch,
2314
+ compressor_threshold_batch,
2315
+ compressor_ratio_batch,
2316
+ compressor_attack_batch,
2317
+ compressor_release_batch,
2318
+ delay_seconds_batch,
2319
+ delay_feedback_batch,
2320
+ delay_mix_batch,
2321
+ sid_batch,
2322
+ ],
2323
+ outputs=[vc_output3],
2324
+ )
2325
+ convert_button_batch.click(
2326
+ fn=enable_stop_convert_button,
2327
+ inputs=[],
2328
+ outputs=[convert_button_batch, stop_button],
2329
+ )
2330
+ stop_button.click(
2331
+ fn=disable_stop_convert_button,
2332
+ inputs=[],
2333
+ outputs=[convert_button_batch, stop_button],
2334
+ )
tabs/plugins/plugins.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, sys
2
+ import gradio as gr
3
+ import importlib.util
4
+ import tabs.plugins.plugins_core as plugins_core
5
+
6
+ from assets.i18n.i18n import I18nAuto
7
+
8
+ i18n = I18nAuto()
9
+
10
+ now_dir = os.getcwd()
11
+ sys.path.append(now_dir)
12
+
13
+ plugins_core.check_new_folders()
14
+
15
+
16
+ def plugins_tab():
17
+ with gr.TabItem(i18n("Plugin Installer")):
18
+ dropbox = gr.File(
19
+ label=i18n("Drag your plugin.zip to install it"),
20
+ type="filepath",
21
+ )
22
+
23
+ dropbox.upload(
24
+ fn=plugins_core.save_plugin_dropbox,
25
+ inputs=[dropbox],
26
+ outputs=[dropbox],
27
+ )
28
+
29
+ for plugin in os.listdir(os.path.join(now_dir, "tabs", "plugins", "installed")):
30
+ plugin_main = f"tabs.plugins.installed.{plugin}.plugin"
31
+ plugin_import = importlib.import_module(plugin_main)
32
+
33
+ with gr.TabItem(plugin):
34
+ plugin_import.applio_plugin()
tabs/plugins/plugins_core.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, sys, shutil
2
+ import json
3
+ import gradio as gr
4
+ import zipfile
5
+ import subprocess
6
+
7
+ from assets.i18n.i18n import I18nAuto
8
+
9
+ i18n = I18nAuto()
10
+
11
+ now_dir = os.getcwd()
12
+ sys.path.append(now_dir)
13
+
14
+ from tabs.settings.sections.restart import restart_applio
15
+
16
+ plugins_path = os.path.join(now_dir, "tabs", "plugins", "installed")
17
+ if not os.path.exists(plugins_path):
18
+ os.makedirs(plugins_path)
19
+ json_file_path = os.path.join(now_dir, "assets", "config.json")
20
+ current_folders = os.listdir(plugins_path)
21
+
22
+
23
+ def get_existing_folders():
24
+ if os.path.exists(json_file_path):
25
+ with open(json_file_path, "r") as file:
26
+ config = json.load(file)
27
+ return config["plugins"]
28
+ else:
29
+ return []
30
+
31
+
32
+ def save_existing_folders(existing_folders):
33
+ with open(json_file_path, "r") as file:
34
+ config = json.load(file)
35
+ config["plugins"] = existing_folders
36
+ with open(json_file_path, "w") as file:
37
+ json.dump(config, file, indent=2)
38
+
39
+
40
+ def save_plugin_dropbox(dropbox):
41
+ if "zip" not in dropbox:
42
+ raise gr.Error(
43
+ message="The file you dropped is not a valid plugin.zip. Please try again."
44
+ )
45
+ else:
46
+ file_name = os.path.basename(dropbox)
47
+ folder_name = file_name.split(".zip")[0]
48
+ folder_path = os.path.join(plugins_path, folder_name)
49
+ zip_file_path = os.path.join(plugins_path, file_name)
50
+
51
+ if os.path.exists(folder_name):
52
+ os.remove(folder_name)
53
+
54
+ shutil.move(dropbox, os.path.join(plugins_path, file_name))
55
+ print("Proceeding with the extraction...")
56
+
57
+ with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
58
+ zip_ref.extractall(plugins_path)
59
+ os.remove(zip_file_path)
60
+
61
+ if os.path.exists(os.path.join(folder_path, "requirements.txt")):
62
+ if os.name == "nt":
63
+ subprocess.run(
64
+ [
65
+ os.path.join("env", "python.exe"),
66
+ "-m",
67
+ "pip",
68
+ "install",
69
+ "-r",
70
+ os.path.join(folder_path, "requirements.txt"),
71
+ ]
72
+ )
73
+ else:
74
+ subprocess.run(
75
+ [
76
+ "python",
77
+ "-m",
78
+ "pip",
79
+ "install",
80
+ "-r",
81
+ os.path.join(folder_path, "requirements.txt"),
82
+ ]
83
+ )
84
+ else:
85
+ print("No requirements.txt file found in the plugin folder.")
86
+
87
+ save_existing_folders(get_existing_folders() + [folder_name])
88
+
89
+ print(
90
+ f"{folder_name} plugin installed in {plugins_path}! Restarting applio to apply the changes."
91
+ )
92
+ gr.Info(
93
+ f"{folder_name} plugin installed in {plugins_path}! Restarting applio to apply the changes."
94
+ )
95
+ restart_applio()
96
+ return None
97
+
98
+
99
+ def check_new_folders():
100
+ existing_folders = get_existing_folders()
101
+ new_folders = set(current_folders) - set(existing_folders)
102
+ save_existing_folders(current_folders)
103
+ if new_folders:
104
+ for new_folder in new_folders:
105
+ complete_path = os.path.join(plugins_path, new_folder)
106
+ print(f"New plugin {new_folder} found, installing it...")
107
+
108
+ if os.path.exists(os.path.join(complete_path, "requirements.txt")):
109
+ if os.name == "nt":
110
+ subprocess.run(
111
+ [
112
+ os.path.join("env", "python.exe"),
113
+ "-m",
114
+ "pip",
115
+ "install",
116
+ "-r",
117
+ os.path.join(complete_path, "requirements.txt"),
118
+ ]
119
+ )
120
+ else:
121
+ subprocess.run(
122
+ [
123
+ "python",
124
+ "-m",
125
+ "pip",
126
+ "install",
127
+ "-r",
128
+ os.path.join(complete_path, "requirements.txt"),
129
+ ]
130
+ )
131
+ else:
132
+ print("No requirements.txt file found in the plugin folder.")
133
+ print("Plugins checked and installed! Restarting applio to apply the changes.")
134
+ restart_applio()
tabs/realtime/realtime.py ADDED
@@ -0,0 +1,1129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import sounddevice as sd
3
+ import os
4
+ import sys
5
+ import time
6
+ import json
7
+ import regex as re
8
+ import shutil
9
+ import torch
10
+
11
+ now_dir = os.getcwd()
12
+ sys.path.append(now_dir)
13
+
14
+ from rvc.realtime.callbacks import AudioCallbacks
15
+ from rvc.realtime.audio import list_audio_device
16
+ from rvc.realtime.core import AUDIO_SAMPLE_RATE
17
+
18
+ from assets.i18n.i18n import I18nAuto
19
+
20
+ i18n = I18nAuto()
21
+
22
+ model_root = os.path.join(now_dir, "logs")
23
+ custom_embedder_root = os.path.join(
24
+ now_dir, "rvc", "models", "embedders", "embedders_custom"
25
+ )
26
+
27
+ os.makedirs(custom_embedder_root, exist_ok=True)
28
+
29
+ custom_embedder_root_relative = os.path.relpath(custom_embedder_root, now_dir)
30
+ model_root_relative = os.path.relpath(model_root, now_dir)
31
+
32
+ def normalize_path(p):
33
+ return os.path.normpath(p).replace("\\", "/").lower()
34
+
35
+ MODEL_FOLDER = re.compile(r"^(?:model.{0,4}|mdl(?:s)?|weight.{0,4}|zip(?:s)?)$")
36
+ INDEX_FOLDER = re.compile(r"^(?:ind.{0,4}|idx(?:s)?)$")
37
+
38
+
39
+ def is_mdl_alias(name: str) -> bool:
40
+ return bool(MODEL_FOLDER.match(name))
41
+
42
+
43
+ def is_idx_alias(name: str) -> bool:
44
+ return bool(INDEX_FOLDER.match(name))
45
+
46
+
47
+ def alias_score(path: str, want_model: bool) -> int:
48
+ parts = normalize_path(os.path.dirname(path)).split("/")
49
+ has_mdl = any(is_mdl_alias(p) for p in parts)
50
+ has_idx = any(is_idx_alias(p) for p in parts)
51
+ if want_model:
52
+ return 2 if has_mdl else (1 if has_idx else 0)
53
+ else:
54
+ return 2 if has_idx else (1 if has_mdl else 0)
55
+
56
+
57
+ def get_files(type="model"):
58
+ assert type in ("model", "index"), "Invalid type for get_files (models or index)"
59
+ is_model = type == "model"
60
+ exts = (".pth", ".onnx") if is_model else (".index",)
61
+ exclude_prefixes = ("G_", "D_") if is_model else ()
62
+ exclude_substr = None if is_model else "trained"
63
+
64
+ best = {}
65
+ order = 0
66
+
67
+ for root, _, files in os.walk(model_root_relative, followlinks=True):
68
+ for file in files:
69
+ if not file.endswith(exts):
70
+ continue
71
+ if any(file.startswith(p) for p in exclude_prefixes):
72
+ continue
73
+ if exclude_substr and exclude_substr in file:
74
+ continue
75
+
76
+ full = os.path.join(root, file)
77
+ real = os.path.realpath(full)
78
+ score = alias_score(full, is_model)
79
+
80
+ prev = best.get(real)
81
+ if (
82
+ prev is None
83
+ ): # Prefer higher score; if equal score, use first encountered
84
+ best[real] = (score, order, full)
85
+ else:
86
+ prev_score, prev_order, _ = prev
87
+ if score > prev_score:
88
+ best[real] = (score, prev_order, full)
89
+ order += 1
90
+
91
+ return [t[2] for t in sorted(best.values(), key=lambda x: x[1])]
92
+
93
+
94
+ def folders_same(
95
+ a: str, b: str
96
+ ) -> bool: # Used to "pair" index and model folders based on path names
97
+ """
98
+ True if:
99
+ 1) The two normalized paths are totally identical..OR
100
+ 2) One lives under a MODEL_FOLDER and the other lives
101
+ under an INDEX_FOLDER, at the same relative subpath
102
+ i.e. logs/models/miku and logs/index/miku = "SAME FOLDER"
103
+ """
104
+ a = normalize_path(a)
105
+ b = normalize_path(b)
106
+ if a == b:
107
+ return True
108
+
109
+ def split_after_alias(p):
110
+ parts = p.split("/")
111
+ for i, part in enumerate(parts):
112
+ if is_mdl_alias(part) or is_idx_alias(part):
113
+ base = part
114
+ rel = "/".join(parts[i + 1 :])
115
+ return base, rel
116
+ return None, None
117
+
118
+ base_a, rel_a = split_after_alias(a)
119
+ base_b, rel_b = split_after_alias(b)
120
+
121
+ if rel_a is None or rel_b is None:
122
+ return False
123
+
124
+ if rel_a == rel_b and (
125
+ (is_mdl_alias(base_a) and is_idx_alias(base_b))
126
+ or (is_idx_alias(base_a) and is_mdl_alias(base_b))
127
+ ):
128
+ return True
129
+ return False
130
+
131
+
132
+ def match_index(model_file_value):
133
+ if not model_file_value:
134
+ return ""
135
+
136
+ # Derive the information about the model's name and path for index matching
137
+ model_folder = normalize_path(os.path.dirname(model_file_value))
138
+ model_name = os.path.basename(model_file_value)
139
+ base_name = os.path.splitext(model_name)[0]
140
+ common = re.sub(r"[_\-\.\+](?:e|s|v|V)\d.*$", "", base_name)
141
+ prefix_match = re.match(r"^(.*?)[_\-\.\+]", base_name)
142
+ prefix = prefix_match.group(1) if prefix_match else None
143
+
144
+ same_count = 0
145
+ last_same = None
146
+ same_substr = None
147
+ same_prefixed = None
148
+ external_exact = None
149
+ external_substr = None
150
+ external_pref = None
151
+
152
+ for idx in get_files("index"):
153
+ idx_folder = os.path.dirname(idx)
154
+ idx_folder_n = normalize_path(idx_folder)
155
+ idx_name = os.path.basename(idx)
156
+ idx_base = os.path.splitext(idx_name)[0]
157
+
158
+ in_same = folders_same(model_folder, idx_folder_n)
159
+ if in_same:
160
+ same_count += 1
161
+ last_same = idx
162
+
163
+ # 1) EXACT match to loaded model name and folders_same = True
164
+ if idx_base == base_name:
165
+ return idx
166
+
167
+ # 2) Substring match to model name and folders_same
168
+ if common in idx_base and same_substr is None:
169
+ same_substr = idx
170
+
171
+ # 3) Prefix match to model name and folders_same
172
+ if prefix and idx_base.startswith(prefix) and same_prefixed is None:
173
+ same_prefixed = idx
174
+
175
+ # If it's NOT in a paired folder (folders_same = False) we look elseware:
176
+ else:
177
+ # 4) EXACT match to model name in external directory
178
+ if idx_base == base_name and external_exact is None:
179
+ external_exact = idx
180
+
181
+ # 5) Substring match to model name in ED
182
+ if common in idx_base and external_substr is None:
183
+ external_substr = idx
184
+
185
+ # 6) Prefix match to model name in ED
186
+ if prefix and idx_base.startswith(prefix) and external_pref is None:
187
+ external_pref = idx
188
+
189
+ # Fallback: If there is exactly one index file in the same (or paired) folder,
190
+ # we should assume that's the intended index file even if the name doesnt match
191
+ if same_count == 1:
192
+ return last_same
193
+
194
+ # Then by remaining priority queue:
195
+ if same_substr:
196
+ return same_substr
197
+ if same_prefixed:
198
+ return same_prefixed
199
+ if external_exact:
200
+ return external_exact
201
+ if external_substr:
202
+ return external_substr
203
+ if external_pref:
204
+ return external_pref
205
+
206
+ return ""
207
+
208
+ def extract_model_and_epoch(path):
209
+ base_name = os.path.basename(path)
210
+ match = re.match(r"(.+?)_(\d+)e_", base_name)
211
+ if match:
212
+ model, epoch = match.groups()
213
+ return model, int(epoch)
214
+ return "", 0
215
+
216
+
217
+ def get_speakers_id(model):
218
+ if model:
219
+ try:
220
+ model_data = torch.load(
221
+ os.path.join(now_dir, model), map_location="cpu", weights_only=True
222
+ )
223
+ speakers_id = model_data.get("speakers_id")
224
+ if speakers_id:
225
+ return list(range(speakers_id))
226
+ else:
227
+ return [0]
228
+ except Exception as e:
229
+ return [0]
230
+ else:
231
+ return [0]
232
+
233
+
234
+ def create_folder_and_move_files(folder_name, bin_file, config_file):
235
+ if not folder_name:
236
+ return "Folder name must not be empty."
237
+
238
+ folder_name = os.path.basename(folder_name)
239
+ target_folder = os.path.join(custom_embedder_root, folder_name)
240
+
241
+ normalized_target_folder = os.path.abspath(target_folder)
242
+ normalized_custom_embedder_root = os.path.abspath(custom_embedder_root)
243
+
244
+ if not normalized_target_folder.startswith(normalized_custom_embedder_root):
245
+ return "Invalid folder name. Folder must be within the custom embedder root directory."
246
+
247
+ os.makedirs(target_folder, exist_ok=True)
248
+
249
+ if bin_file:
250
+ shutil.copy(bin_file, os.path.join(target_folder, os.path.basename(bin_file)))
251
+ if config_file:
252
+ shutil.copy(
253
+ config_file, os.path.join(target_folder, os.path.basename(config_file))
254
+ )
255
+
256
+ return f"Files moved to folder {target_folder}"
257
+
258
+
259
+ def refresh_embedders_folders():
260
+ custom_embedders = [
261
+ os.path.join(dirpath, dirname)
262
+ for dirpath, dirnames, _ in os.walk(custom_embedder_root_relative)
263
+ for dirname in dirnames
264
+ ]
265
+ return custom_embedders
266
+
267
+ names = get_files("model")
268
+ default_weight = names[0] if names else None
269
+
270
+ PASS_THROUGH = False
271
+ interactive_true = gr.update(interactive=True)
272
+ interactive_false = gr.update(interactive=False)
273
+ running, callbacks, audio_manager = False, None, None
274
+
275
+ CONFIG_PATH = os.path.join(now_dir, "assets", "config.json")
276
+
277
+ def save_realtime_settings(
278
+ input_device, output_device, monitor_device, model_file, index_file
279
+ ):
280
+ """Save realtime settings to config.json"""
281
+ try:
282
+ if os.path.exists(CONFIG_PATH):
283
+ with open(CONFIG_PATH, "r", encoding="utf-8") as f:
284
+ config = json.load(f)
285
+ else:
286
+ config = {}
287
+
288
+ if "realtime" not in config:
289
+ config["realtime"] = {}
290
+
291
+ # Only save non-None values, preserve existing values for None inputs
292
+ if input_device is not None:
293
+ config["realtime"]["input_device"] = input_device or ""
294
+ if output_device is not None:
295
+ config["realtime"]["output_device"] = output_device or ""
296
+ if monitor_device is not None:
297
+ config["realtime"]["monitor_device"] = monitor_device or ""
298
+ if model_file is not None:
299
+ config["realtime"]["model_file"] = model_file or ""
300
+ if index_file is not None:
301
+ config["realtime"]["index_file"] = index_file or ""
302
+
303
+ with open(CONFIG_PATH, "w", encoding="utf-8") as f:
304
+ json.dump(config, f, indent=2, ensure_ascii=False)
305
+ except Exception as e:
306
+ print(f"Error saving realtime settings: {e}")
307
+
308
+
309
+ def load_realtime_settings():
310
+ """Load realtime settings from config.json"""
311
+ try:
312
+ if os.path.exists(CONFIG_PATH):
313
+ with open(CONFIG_PATH, "r", encoding="utf-8") as f:
314
+ config = json.load(f)
315
+ realtime_config = config.get("realtime", {})
316
+ return {
317
+ "input_device": realtime_config.get("input_device", ""),
318
+ "output_device": realtime_config.get("output_device", ""),
319
+ "monitor_device": realtime_config.get("monitor_device", ""),
320
+ "model_file": realtime_config.get("model_file", ""),
321
+ "index_file": realtime_config.get("index_file", ""),
322
+ }
323
+ except Exception as e:
324
+ print(f"Error loading realtime settings: {e}")
325
+
326
+ return {
327
+ "input_device": "",
328
+ "output_device": "",
329
+ "monitor_device": "",
330
+ "model_file": "",
331
+ "index_file": "",
332
+ }
333
+
334
+
335
+ def get_safe_dropdown_value(saved_value, choices, fallback_value=None):
336
+ """Safely get a dropdown value, ensuring it exists in choices"""
337
+ if saved_value and saved_value in choices:
338
+ return saved_value
339
+ elif fallback_value and fallback_value in choices:
340
+ return fallback_value
341
+ elif choices:
342
+ return choices[0]
343
+ else:
344
+ return None
345
+
346
+
347
+ def get_safe_index_value(saved_value, choices, fallback_value=None):
348
+ """Safely get an index file value, handling file path matching"""
349
+ # Handle empty string, None, or whitespace-only values
350
+ if not saved_value or (isinstance(saved_value, str) and not saved_value.strip()):
351
+ if fallback_value and fallback_value in choices:
352
+ return fallback_value
353
+ elif choices:
354
+ return choices[0]
355
+ else:
356
+ return None
357
+
358
+ # Check exact match first
359
+ if saved_value in choices:
360
+ return saved_value
361
+
362
+ # Check if saved value is a filename that matches any choice
363
+ saved_filename = os.path.basename(saved_value)
364
+ for choice in choices:
365
+ if os.path.basename(choice) == saved_filename:
366
+ return choice
367
+
368
+ # Fallback to default or first choice
369
+ if fallback_value and fallback_value in choices:
370
+ return fallback_value
371
+ elif choices:
372
+ return choices[0]
373
+ else:
374
+ return None
375
+
376
+
377
+ def start_realtime(
378
+ input_audio_device: str,
379
+ input_audio_gain: int,
380
+ input_asio_channels: int,
381
+ output_audio_device: str,
382
+ output_audio_gain: int,
383
+ output_asio_channels: int,
384
+ monitor_output_device: str,
385
+ monitor_audio_gain: int,
386
+ monitor_asio_channels: int,
387
+ use_monitor_device: bool,
388
+ exclusive_mode: bool,
389
+ vad_enabled: bool,
390
+ chunk_size: float,
391
+ cross_fade_overlap_size: float,
392
+ extra_convert_size: float,
393
+ silent_threshold: int,
394
+ pitch: int,
395
+ index_rate: float,
396
+ volume_envelope: float,
397
+ protect: float,
398
+ f0_method: str,
399
+ pth_path: str,
400
+ index_path: str,
401
+ sid: int,
402
+ f0_autotune: bool,
403
+ f0_autotune_strength: float,
404
+ proposed_pitch: bool,
405
+ proposed_pitch_threshold: float,
406
+ embedder_model: str,
407
+ embedder_model_custom: str = None,
408
+ ):
409
+ global running, callbacks, audio_manager
410
+ running = True
411
+
412
+ if not input_audio_device or not output_audio_device:
413
+ yield (
414
+ "Please select valid input/output devices!",
415
+ interactive_true,
416
+ interactive_false,
417
+ )
418
+ return
419
+ if use_monitor_device and not monitor_output_device:
420
+ yield (
421
+ "Please select a valid monitor device!",
422
+ interactive_true,
423
+ interactive_false,
424
+ )
425
+ return
426
+ if not pth_path:
427
+ yield (
428
+ "Model path not provided. Aborting conversion.",
429
+ interactive_true,
430
+ interactive_false,
431
+ )
432
+ return
433
+
434
+ yield "Starting Realtime...", interactive_false, interactive_true
435
+
436
+ read_chunk_size = int(chunk_size * AUDIO_SAMPLE_RATE / 1000 / 128)
437
+
438
+ sid = int(sid) if sid is not None else 0
439
+
440
+ input_audio_gain /= 100.0
441
+ output_audio_gain /= 100.0
442
+ monitor_audio_gain /= 100.0
443
+
444
+ try:
445
+ input_devices, output_devices = get_audio_devices_formatted()
446
+ input_device_id = input_devices[input_audio_device]
447
+ output_device_id = output_devices[output_audio_device]
448
+ output_monitor_id = (
449
+ output_devices[monitor_output_device] if use_monitor_device else None
450
+ )
451
+ except (ValueError, IndexError):
452
+ yield "Incorrectly formatted audio device. Stopping.", interactive_true, interactive_false
453
+ return
454
+
455
+ callbacks = AudioCallbacks(
456
+ pass_through=PASS_THROUGH,
457
+ read_chunk_size=read_chunk_size,
458
+ cross_fade_overlap_size=cross_fade_overlap_size,
459
+ extra_convert_size=extra_convert_size,
460
+ model_path=pth_path,
461
+ index_path=str(index_path),
462
+ f0_method=f0_method,
463
+ embedder_model=embedder_model,
464
+ embedder_model_custom=embedder_model_custom,
465
+ silent_threshold=silent_threshold,
466
+ f0_up_key=pitch,
467
+ index_rate=index_rate,
468
+ protect=protect,
469
+ volume_envelope=volume_envelope,
470
+ f0_autotune=f0_autotune,
471
+ f0_autotune_strength=f0_autotune_strength,
472
+ proposed_pitch=proposed_pitch,
473
+ proposed_pitch_threshold=proposed_pitch_threshold,
474
+ input_audio_gain=input_audio_gain,
475
+ output_audio_gain=output_audio_gain,
476
+ monitor_audio_gain=monitor_audio_gain,
477
+ monitor=use_monitor_device,
478
+ vad_enabled=vad_enabled,
479
+ vad_sensitivity=3,
480
+ vad_frame_ms=30,
481
+ sid=sid,
482
+ )
483
+
484
+ audio_manager = callbacks.audio
485
+ audio_manager.start(
486
+ input_device_id=input_device_id,
487
+ output_device_id=output_device_id,
488
+ output_monitor_id=output_monitor_id,
489
+ exclusive_mode=exclusive_mode,
490
+ asio_input_channel=input_asio_channels,
491
+ asio_output_channel=output_asio_channels,
492
+ asio_output_monitor_channel=monitor_asio_channels,
493
+ read_chunk_size=read_chunk_size,
494
+ )
495
+
496
+ yield "Realtime is ready!", interactive_false, interactive_true
497
+
498
+ while running and callbacks is not None and audio_manager is not None:
499
+ time.sleep(0.1)
500
+ if hasattr(audio_manager, "latency"):
501
+ yield f"Latency: {audio_manager.latency:.2f} ms", interactive_false, interactive_true
502
+
503
+ return gr.update(), gr.update(), gr.update()
504
+
505
+
506
+ def stop_realtime():
507
+ global running, callbacks, audio_manager
508
+ if running and audio_manager is not None and callbacks is not None:
509
+ audio_manager.stop()
510
+ running = False
511
+ if hasattr(audio_manager, "latency"):
512
+ del audio_manager.latency
513
+ audio_manager = callbacks = None
514
+
515
+ return gr.update(value="Stopping..."), gr.update(), gr.update()
516
+ else:
517
+ return "Realtime pipeline not found!", interactive_true, interactive_false
518
+
519
+
520
+ def get_audio_devices_formatted():
521
+ try:
522
+ input_devices, output_devices = list_audio_device()
523
+
524
+ def priority(name: str) -> int:
525
+ n = name.lower()
526
+ if "virtual" in n:
527
+ return 0
528
+ if "vb" in n:
529
+ return 1
530
+ return 2
531
+
532
+ output_sorted = sorted(output_devices, key=lambda d: priority(d.name))
533
+ input_sorted = sorted(
534
+ input_devices, key=lambda d: priority(d.name), reverse=True
535
+ )
536
+
537
+ input_device_list = {
538
+ f"{input_sorted.index(d)+1}: {d.name} ({d.host_api})": d.index
539
+ for d in input_sorted
540
+ }
541
+ output_device_list = {
542
+ f"{output_sorted.index(d)+1}: {d.name} ({d.host_api})": d.index
543
+ for d in output_sorted
544
+ }
545
+
546
+ return input_device_list, output_device_list
547
+ except Exception:
548
+ return [], []
549
+
550
+
551
+ def realtime_tab():
552
+ input_devices, output_devices = get_audio_devices_formatted()
553
+ input_devices, output_devices = list(input_devices.keys()), list(
554
+ output_devices.keys()
555
+ )
556
+
557
+ # Load saved settings
558
+ saved_settings = load_realtime_settings()
559
+
560
+ with gr.Blocks() as ui:
561
+ with gr.Row():
562
+ start_button = gr.Button(i18n("Start"), variant="primary")
563
+ stop_button = gr.Button(i18n("Stop"), interactive=False)
564
+ latency_info = gr.Label(label=i18n("Status"), value="Realtime not started.")
565
+ terms_checkbox = gr.Checkbox(
566
+ label=i18n("I agree to the terms of use"),
567
+ info=i18n(
568
+ "Please ensure compliance with the terms and conditions detailed in [this document](https://github.com/IAHispano/Applio/blob/main/TERMS_OF_USE.md) before proceeding with your realtime."
569
+ ),
570
+ value=False,
571
+ interactive=True,
572
+ )
573
+
574
+ with gr.Tabs():
575
+ with gr.TabItem(i18n("Audio Settings")):
576
+ with gr.Row():
577
+ refresh_devices_button = gr.Button(i18n("Refresh Audio Devices"))
578
+ with gr.Row():
579
+ with gr.Accordion(i18n("Input Device"), open=True):
580
+ with gr.Column():
581
+ input_audio_device = gr.Dropdown(
582
+ label=i18n("Input Device"),
583
+ info=i18n(
584
+ "Select the microphone or audio interface you will be speaking into."
585
+ ),
586
+ choices=input_devices,
587
+ value=get_safe_dropdown_value(
588
+ saved_settings["input_device"], input_devices
589
+ ),
590
+ interactive=True,
591
+ )
592
+ input_audio_gain = gr.Slider(
593
+ minimum=0,
594
+ maximum=200,
595
+ value=100,
596
+ label=i18n("Input Gain (%)"),
597
+ info=i18n(
598
+ "Adjusts the input volume before processing. Prevents clipping or boosts a quiet mic."
599
+ ),
600
+ interactive=True,
601
+ )
602
+ input_asio_channels = gr.Slider(
603
+ minimum=-1,
604
+ maximum=16,
605
+ value=-1,
606
+ step=1,
607
+ label=i18n("Input ASIO Channel"),
608
+ info=i18n(
609
+ "For ASIO drivers, selects a specific input channel. Leave at -1 for default."
610
+ ),
611
+ interactive=True,
612
+ )
613
+ with gr.Accordion("Output Device", open=True):
614
+ with gr.Column():
615
+ output_audio_device = gr.Dropdown(
616
+ label=i18n("Output Device"),
617
+ info=i18n(
618
+ "Select the device where the final converted voice will be sent (e.g., a virtual cable)."
619
+ ),
620
+ choices=output_devices,
621
+ value=get_safe_dropdown_value(
622
+ saved_settings["output_device"], output_devices
623
+ ),
624
+ interactive=True,
625
+ )
626
+ output_audio_gain = gr.Slider(
627
+ minimum=0,
628
+ maximum=200,
629
+ value=100,
630
+ label=i18n("Output Gain (%)"),
631
+ info=i18n(
632
+ "Adjusts the final volume of the converted voice after processing."
633
+ ),
634
+ interactive=True,
635
+ )
636
+ output_asio_channels = gr.Slider(
637
+ minimum=-1,
638
+ maximum=16,
639
+ value=-1,
640
+ step=1,
641
+ label=i18n("Output ASIO Channel"),
642
+ info=i18n(
643
+ "For ASIO drivers, selects a specific output channel. Leave at -1 for default."
644
+ ),
645
+ interactive=True,
646
+ )
647
+ with gr.Accordion("Monitor Device (Optional)", open=False):
648
+ with gr.Column():
649
+ use_monitor_device = gr.Checkbox(
650
+ label=i18n("Use Monitor Device"),
651
+ value=False,
652
+ interactive=True,
653
+ )
654
+ monitor_output_device = gr.Dropdown(
655
+ label=i18n("Monitor Device"),
656
+ info=i18n(
657
+ "Select the device for monitoring your voice (e.g., your headphones)."
658
+ ),
659
+ choices=output_devices,
660
+ value=get_safe_dropdown_value(
661
+ saved_settings["monitor_device"], output_devices
662
+ ),
663
+ interactive=True,
664
+ )
665
+ monitor_audio_gain = gr.Slider(
666
+ minimum=0,
667
+ maximum=200,
668
+ value=100,
669
+ label=i18n("Monitor Gain (%)"),
670
+ info=i18n(
671
+ "Adjusts the volume of the monitor feed, independent of the main output."
672
+ ),
673
+ interactive=True,
674
+ )
675
+ monitor_asio_channels = gr.Slider(
676
+ minimum=-1,
677
+ maximum=16,
678
+ value=-1,
679
+ step=1,
680
+ label=i18n("Monitor ASIO Channel"),
681
+ info=i18n(
682
+ "For ASIO drivers, selects a specific monitor output channel. Leave at -1 for default."
683
+ ),
684
+ interactive=True,
685
+ )
686
+ with gr.Row():
687
+ exclusive_mode = gr.Checkbox(
688
+ label=i18n("Exclusive Mode (WASAPI)"),
689
+ info=i18n(
690
+ "For WASAPI (Windows), gives the app exclusive control for potentially lower latency."
691
+ ),
692
+ value=True,
693
+ interactive=True,
694
+ )
695
+ vad_enabled = gr.Checkbox(
696
+ label=i18n("Enable VAD"),
697
+ info=i18n(
698
+ "Enables Voice Activity Detection to only process audio when you are speaking, saving CPU."
699
+ ),
700
+ value=True,
701
+ interactive=True,
702
+ )
703
+
704
+ with gr.TabItem(i18n("Model Settings")):
705
+ with gr.Row():
706
+ model_choices = (
707
+ sorted(names, key=extract_model_and_epoch) if names else []
708
+ )
709
+ model_file = gr.Dropdown(
710
+ label=i18n("Voice Model"),
711
+ choices=model_choices,
712
+ interactive=True,
713
+ value=get_safe_dropdown_value(
714
+ saved_settings["model_file"], model_choices, default_weight
715
+ ),
716
+ allow_custom_value=True,
717
+ )
718
+ index_choices = get_files("index")
719
+ index_file = gr.Dropdown(
720
+ label=i18n("Index File"),
721
+ choices=index_choices,
722
+ value=get_safe_index_value(
723
+ saved_settings["index_file"],
724
+ index_choices,
725
+ match_index(default_weight) if default_weight else None,
726
+ ),
727
+ interactive=True,
728
+ allow_custom_value=True,
729
+ )
730
+
731
+ with gr.Row():
732
+ unload_button = gr.Button(i18n("Unload Voice"))
733
+ refresh_button = gr.Button(i18n("Refresh"))
734
+ with gr.Column():
735
+ autotune = gr.Checkbox(
736
+ label=i18n("Autotune"),
737
+ info=i18n(
738
+ "Apply a soft autotune to your inferences, recommended for singing conversions."
739
+ ),
740
+ visible=True,
741
+ value=False,
742
+ interactive=True,
743
+ )
744
+ autotune_strength = gr.Slider(
745
+ minimum=0,
746
+ maximum=1,
747
+ label=i18n("Autotune Strength"),
748
+ info=i18n(
749
+ "Set the autotune strength - the more you increase it the more it will snap to the chromatic grid."
750
+ ),
751
+ visible=False,
752
+ value=1,
753
+ interactive=True,
754
+ )
755
+ proposed_pitch = gr.Checkbox(
756
+ label=i18n("Proposed Pitch"),
757
+ info=i18n(
758
+ "Adjust the input audio pitch to match the voice model range."
759
+ ),
760
+ visible=True,
761
+ value=False,
762
+ interactive=True,
763
+ )
764
+ proposed_pitch_threshold = gr.Slider(
765
+ minimum=50.0,
766
+ maximum=1200.0,
767
+ label=i18n("Proposed Pitch Threshold"),
768
+ info=i18n(
769
+ "Male voice models typically use 155.0 and female voice models typically use 255.0."
770
+ ),
771
+ visible=False,
772
+ value=155.0,
773
+ interactive=True,
774
+ )
775
+ sid = gr.Dropdown(
776
+ label=i18n("Speaker ID"),
777
+ choices=(
778
+ get_speakers_id(default_weight) if default_weight else [0]
779
+ ),
780
+ value=0,
781
+ interactive=True,
782
+ )
783
+ pitch = gr.Slider(
784
+ minimum=-24,
785
+ maximum=24,
786
+ step=1,
787
+ label=i18n("Pitch"),
788
+ info=i18n(
789
+ "Set the pitch of the audio, the higher the value, the higher the pitch."
790
+ ),
791
+ value=0,
792
+ interactive=True,
793
+ )
794
+ index_rate = gr.Slider(
795
+ minimum=0,
796
+ maximum=1,
797
+ label=i18n("Search Feature Ratio"),
798
+ info=i18n(
799
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio."
800
+ ),
801
+ value=0.75,
802
+ interactive=True,
803
+ )
804
+ volume_envelope = gr.Slider(
805
+ minimum=0,
806
+ maximum=1,
807
+ value=1,
808
+ label=i18n("Volume Envelope"),
809
+ info=i18n(
810
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed."
811
+ ),
812
+ interactive=True,
813
+ )
814
+ protect = gr.Slider(
815
+ minimum=0,
816
+ maximum=0.5,
817
+ value=0.5,
818
+ label=i18n("Protect Voiceless Consonants"),
819
+ info=i18n(
820
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect."
821
+ ),
822
+ interactive=True,
823
+ )
824
+ f0_method = gr.Radio(
825
+ choices=["rmvpe", "fcpe", "swift"],
826
+ value="swift",
827
+ label=i18n("Pitch extraction algorithm"),
828
+ info=i18n(
829
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases."
830
+ ),
831
+ interactive=True,
832
+ )
833
+ embedder_model = gr.Radio(
834
+ choices=[
835
+ "contentvec",
836
+ "spin",
837
+ "chinese-hubert-base",
838
+ "japanese-hubert-base",
839
+ "korean-hubert-base",
840
+ "custom",
841
+ ],
842
+ value="contentvec",
843
+ label=i18n("Embedder Model"),
844
+ info=i18n("Model used for learning speaker embedding."),
845
+ interactive=True,
846
+ )
847
+ with gr.Column(visible=False) as embedder_custom:
848
+ with gr.Accordion(i18n("Custom Embedder"), open=True):
849
+ with gr.Row():
850
+ embedder_model_custom = gr.Dropdown(
851
+ label=i18n("Select Custom Embedder"),
852
+ choices=refresh_embedders_folders(),
853
+ interactive=True,
854
+ allow_custom_value=True,
855
+ )
856
+ refresh_embedders_button = gr.Button(
857
+ i18n("Refresh embedders")
858
+ )
859
+ folder_name_input = gr.Textbox(
860
+ label=i18n("Folder Name"), interactive=True
861
+ )
862
+ with gr.Row():
863
+ bin_file_upload = gr.File(
864
+ label=i18n("Upload .bin"),
865
+ type="filepath",
866
+ interactive=True,
867
+ )
868
+ config_file_upload = gr.File(
869
+ label=i18n("Upload .json"),
870
+ type="filepath",
871
+ interactive=True,
872
+ )
873
+ move_files_button = gr.Button(
874
+ i18n("Move files to custom embedder folder")
875
+ )
876
+
877
+ with gr.TabItem(i18n("Performance Settings")):
878
+ chunk_size = gr.Slider(
879
+ minimum=2.7,
880
+ maximum=2730.7,
881
+ value=512,
882
+ step=1,
883
+ label=i18n("Chunk Size (ms)"),
884
+ info=i18n(
885
+ "Audio buffer size in milliseconds. Lower values may reduce latency but increase CPU load."
886
+ ),
887
+ interactive=True,
888
+ )
889
+ cross_fade_overlap_size = gr.Slider(
890
+ minimum=0.05,
891
+ maximum=0.2,
892
+ value=0.05,
893
+ step=0.01,
894
+ label=i18n("Crossfade Overlap Size (s)"),
895
+ info=i18n(
896
+ "Duration of the fade between audio chunks to prevent clicks. Higher values create smoother transitions but may increase latency."
897
+ ),
898
+ interactive=True,
899
+ )
900
+ extra_convert_size = gr.Slider(
901
+ minimum=0.1,
902
+ maximum=5,
903
+ value=0.5,
904
+ step=0.1,
905
+ label=i18n("Extra Conversion Size (s)"),
906
+ info=i18n(
907
+ "Amount of extra audio processed to provide context to the model. Improves conversion quality at the cost of higher CPU usage."
908
+ ),
909
+ interactive=True,
910
+ )
911
+ silent_threshold = gr.Slider(
912
+ minimum=-90,
913
+ maximum=-60,
914
+ value=-90,
915
+ step=1,
916
+ label=i18n("Silence Threshold (dB)"),
917
+ info=i18n(
918
+ "Volume level below which audio is treated as silence and not processed. Helps to save CPU resources and reduce background noise."
919
+ ),
920
+ interactive=True,
921
+ )
922
+
923
+ def enforce_terms(terms_accepted, *args):
924
+ if not terms_accepted:
925
+ message = "You must agree to the Terms of Use to proceed."
926
+ gr.Info(message)
927
+ yield message, interactive_true, interactive_false
928
+ return
929
+ yield from start_realtime(*args)
930
+
931
+ def update_on_model_change(model_path):
932
+ new_index = match_index(model_path)
933
+ new_sids = get_speakers_id(model_path)
934
+
935
+ # Get updated index choices
936
+ new_index_choices = get_files("index")
937
+ # Use the matched index as fallback, but handle empty strings
938
+ fallback_index = new_index if new_index and new_index.strip() else None
939
+ safe_index_value = get_safe_index_value(
940
+ "", new_index_choices, fallback_index
941
+ )
942
+
943
+ return gr.update(
944
+ choices=new_index_choices, value=safe_index_value
945
+ ), gr.update(choices=new_sids, value=0 if new_sids else None)
946
+
947
+ def refresh_devices():
948
+ sd._terminate()
949
+ sd._initialize()
950
+
951
+ input_choices, output_choices = get_audio_devices_formatted()
952
+ input_choices, output_choices = list(input_choices.keys()), list(
953
+ output_choices.keys()
954
+ )
955
+ return (
956
+ gr.update(choices=input_choices),
957
+ gr.update(choices=output_choices),
958
+ gr.update(choices=output_choices),
959
+ )
960
+
961
+ def toggle_visible(checkbox):
962
+ return {"visible": checkbox, "__type__": "update"}
963
+
964
+ def toggle_visible_embedder_custom(embedder_model):
965
+ if embedder_model == "custom":
966
+ return {"visible": True, "__type__": "update"}
967
+ return {"visible": False, "__type__": "update"}
968
+
969
+
970
+
971
+ refresh_devices_button.click(
972
+ fn=refresh_devices,
973
+ outputs=[input_audio_device, output_audio_device, monitor_output_device],
974
+ )
975
+
976
+ autotune.change(
977
+ fn=toggle_visible,
978
+ inputs=[autotune],
979
+ outputs=[autotune_strength],
980
+ )
981
+
982
+ proposed_pitch.change(
983
+ fn=toggle_visible,
984
+ inputs=[proposed_pitch],
985
+ outputs=[proposed_pitch_threshold],
986
+ )
987
+
988
+ embedder_model.change(
989
+ fn=toggle_visible_embedder_custom,
990
+ inputs=[embedder_model],
991
+ outputs=[embedder_custom],
992
+ )
993
+
994
+ move_files_button.click(
995
+ fn=create_folder_and_move_files,
996
+ inputs=[folder_name_input, bin_file_upload, config_file_upload],
997
+ outputs=[],
998
+ )
999
+ refresh_embedders_button.click(
1000
+ fn=lambda: gr.update(choices=refresh_embedders_folders()),
1001
+ inputs=[],
1002
+ outputs=[embedder_model_custom],
1003
+ )
1004
+
1005
+ start_button.click(
1006
+ fn=enforce_terms,
1007
+ inputs=[
1008
+ terms_checkbox,
1009
+ input_audio_device,
1010
+ input_audio_gain,
1011
+ input_asio_channels,
1012
+ output_audio_device,
1013
+ output_audio_gain,
1014
+ output_asio_channels,
1015
+ monitor_output_device,
1016
+ monitor_audio_gain,
1017
+ monitor_asio_channels,
1018
+ use_monitor_device,
1019
+ exclusive_mode,
1020
+ vad_enabled,
1021
+ chunk_size,
1022
+ cross_fade_overlap_size,
1023
+ extra_convert_size,
1024
+ silent_threshold,
1025
+ pitch,
1026
+ index_rate,
1027
+ volume_envelope,
1028
+ protect,
1029
+ f0_method,
1030
+ model_file,
1031
+ index_file,
1032
+ sid,
1033
+ autotune,
1034
+ autotune_strength,
1035
+ proposed_pitch,
1036
+ proposed_pitch_threshold,
1037
+ embedder_model,
1038
+ embedder_model_custom,
1039
+ ],
1040
+ outputs=[latency_info, start_button, stop_button],
1041
+ )
1042
+
1043
+ stop_button.click(
1044
+ fn=stop_realtime, outputs=[latency_info, start_button, stop_button]
1045
+ ).then(
1046
+ fn=lambda: (
1047
+ yield gr.update(value="Stopped"),
1048
+ interactive_true,
1049
+ interactive_false,
1050
+ ),
1051
+ inputs=None,
1052
+ outputs=[latency_info, start_button, stop_button],
1053
+ )
1054
+ unload_button.click(
1055
+ fn=lambda: (
1056
+ {"value": "", "__type__": "update"},
1057
+ {"value": "", "__type__": "update"},
1058
+ ),
1059
+ inputs=[],
1060
+ outputs=[model_file, index_file],
1061
+ )
1062
+ model_file.select(
1063
+ fn=update_on_model_change, inputs=[model_file], outputs=[index_file, sid]
1064
+ )
1065
+
1066
+ # Save settings when devices or model change
1067
+ def save_input_device(input_device):
1068
+ if input_device:
1069
+ save_realtime_settings(input_device, None, None, None, None)
1070
+
1071
+ def save_output_device(output_device):
1072
+ if output_device:
1073
+ save_realtime_settings(None, output_device, None, None, None)
1074
+
1075
+ def save_monitor_device(monitor_device):
1076
+ if monitor_device:
1077
+ save_realtime_settings(None, None, monitor_device, None, None)
1078
+
1079
+ def save_model_file(model_file):
1080
+ if model_file:
1081
+ save_realtime_settings(None, None, None, model_file, None)
1082
+
1083
+ def save_index_file(index_file):
1084
+ # Only save if index_file is not None and not empty
1085
+ if index_file:
1086
+ save_realtime_settings(None, None, None, None, index_file)
1087
+
1088
+ # Add event handlers to save settings
1089
+ input_audio_device.change(
1090
+ fn=save_input_device, inputs=[input_audio_device], outputs=[]
1091
+ )
1092
+
1093
+ output_audio_device.change(
1094
+ fn=save_output_device, inputs=[output_audio_device], outputs=[]
1095
+ )
1096
+
1097
+ monitor_output_device.change(
1098
+ fn=save_monitor_device, inputs=[monitor_output_device], outputs=[]
1099
+ )
1100
+
1101
+ def refresh_all():
1102
+ new_names = get_files("model")
1103
+ new_indexes = get_files("index")
1104
+ input_choices, output_choices = get_audio_devices_formatted()
1105
+ input_choices, output_choices = list(input_choices.keys()), list(
1106
+ output_choices.keys()
1107
+ )
1108
+ return (
1109
+ gr.update(choices=sorted(new_names, key=extract_model_and_epoch)),
1110
+ gr.update(choices=new_indexes),
1111
+ gr.update(choices=input_choices),
1112
+ gr.update(choices=output_choices),
1113
+ gr.update(choices=output_choices),
1114
+ )
1115
+
1116
+ model_file.change(fn=save_model_file, inputs=[model_file], outputs=[])
1117
+
1118
+ index_file.change(fn=save_index_file, inputs=[index_file], outputs=[])
1119
+
1120
+ refresh_button.click(
1121
+ fn=refresh_all,
1122
+ outputs=[
1123
+ model_file,
1124
+ index_file,
1125
+ input_audio_device,
1126
+ output_audio_device,
1127
+ monitor_output_device,
1128
+ ],
1129
+ )
tabs/report/main.js ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // main.js
2
+ if (!ScreenCastRecorder.isSupportedBrowser()) {
3
+ console.error("Screen Recording not supported in this browser");
4
+ }
5
+ let recorder;
6
+ let outputBlob;
7
+ const stopRecording = () => __awaiter(void 0, void 0, void 0, function* () {
8
+ let currentState = "RECORDING";
9
+ // We should do nothing if the user try to stop recording when it is not started
10
+ if (currentState === "OFF" || recorder == null) {
11
+ return;
12
+ }
13
+ // if (currentState === "COUNTDOWN") {
14
+ // this.setState({
15
+ // currentState: "OFF",
16
+ // })
17
+ // }
18
+ if (currentState === "RECORDING") {
19
+ if (recorder.getState() === "inactive") {
20
+ // this.setState({
21
+ // currentState: "OFF",
22
+ // })
23
+ console.log("Inactive");
24
+ }
25
+ else {
26
+ outputBlob = yield recorder.stop();
27
+ console.log("Done recording");
28
+ // this.setState({
29
+ // outputBlob,
30
+ // currentState: "PREVIEW_FILE",
31
+ // })
32
+ window.currentState = "PREVIEW_FILE";
33
+ const videoSource = URL.createObjectURL(outputBlob);
34
+ window.videoSource = videoSource;
35
+ const fileName = "recording";
36
+ const link = document.createElement("a");
37
+ link.setAttribute("href", videoSource);
38
+ link.setAttribute("download", `${fileName}.webm`);
39
+ link.click();
40
+ }
41
+ }
42
+ });
43
+ const startRecording = () => __awaiter(void 0, void 0, void 0, function* () {
44
+ const recordAudio = false;
45
+ recorder = new ScreenCastRecorder({
46
+ recordAudio,
47
+ onErrorOrStop: () => stopRecording(),
48
+ });
49
+ try {
50
+ yield recorder.initialize();
51
+ }
52
+ catch (e) {
53
+ console.warn(`ScreenCastRecorder.initialize error: ${e}`);
54
+ // this.setState({ currentState: "UNSUPPORTED" })
55
+ window.currentState = "UNSUPPORTED";
56
+ return;
57
+ }
58
+ // this.setState({ currentState: "COUNTDOWN" })
59
+ const hasStarted = recorder.start();
60
+ if (hasStarted) {
61
+ // this.setState({
62
+ // currentState: "RECORDING",
63
+ // })
64
+ console.log("Started recording");
65
+ window.currentState = "RECORDING";
66
+ }
67
+ else {
68
+ stopRecording().catch(err => console.warn(`withScreencast.stopRecording threw an error: ${err}`));
69
+ }
70
+ });
71
+
72
+ // Set global functions to window.
73
+ window.startRecording = startRecording;
74
+ window.stopRecording = stopRecording;
tabs/report/record_button.js ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Setup if needed and start recording.
2
+ async () => {
3
+ // Set up recording functions if not already initialized
4
+ if (!window.startRecording) {
5
+ let recorder_js = null;
6
+ let main_js = null;
7
+ }
8
+
9
+ // Function to fetch and convert video blob to base64 using async/await without explicit Promise
10
+ async function getVideoBlobAsBase64(objectURL) {
11
+ const response = await fetch(objectURL);
12
+ if (!response.ok) {
13
+ throw new Error('Failed to fetch video blob.');
14
+ }
15
+
16
+ const blob = await response.blob();
17
+
18
+ const reader = new FileReader();
19
+ reader.readAsDataURL(blob);
20
+
21
+ return new Promise((resolve, reject) => {
22
+ reader.onloadend = () => {
23
+ if (reader.result) {
24
+ resolve(reader.result.split(',')[1]); // Return the base64 string (without data URI prefix)
25
+ } else {
26
+ reject('Failed to convert blob to base64.');
27
+ }
28
+ };
29
+ });
30
+ }
31
+
32
+ if (window.currentState === "RECORDING") {
33
+ await window.stopRecording();
34
+ const base64String = await getVideoBlobAsBase64(window.videoSource);
35
+ return base64String;
36
+ } else {
37
+ window.startRecording();
38
+ return "Record";
39
+ }
40
+ }
tabs/report/recorder.js ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // recorder.js
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ const BLOB_TYPE = "video/webm";
12
+ class ScreenCastRecorder {
13
+ /** True if the current browser likely supports screencasts. */
14
+ static isSupportedBrowser() {
15
+ return (navigator.mediaDevices != null &&
16
+ navigator.mediaDevices.getUserMedia != null &&
17
+ navigator.mediaDevices.getDisplayMedia != null &&
18
+ MediaRecorder.isTypeSupported(BLOB_TYPE));
19
+ }
20
+ constructor({ recordAudio, onErrorOrStop }) {
21
+ this.recordAudio = recordAudio;
22
+ this.onErrorOrStopCallback = onErrorOrStop;
23
+ this.inputStream = null;
24
+ this.recordedChunks = [];
25
+ this.mediaRecorder = null;
26
+ }
27
+ /**
28
+ * This asynchronous method will initialize the screen recording object asking
29
+ * for permissions to the user which are needed to start recording.
30
+ */
31
+ initialize() {
32
+ return __awaiter(this, void 0, void 0, function* () {
33
+ const desktopStream = yield navigator.mediaDevices.getDisplayMedia({
34
+ video: true,
35
+ });
36
+ let tracks = desktopStream.getTracks();
37
+ if (this.recordAudio) {
38
+ const voiceStream = yield navigator.mediaDevices.getUserMedia({
39
+ video: false,
40
+ audio: true,
41
+ });
42
+ tracks = tracks.concat(voiceStream.getAudioTracks());
43
+ }
44
+ this.recordedChunks = [];
45
+ this.inputStream = new MediaStream(tracks);
46
+ this.mediaRecorder = new MediaRecorder(this.inputStream, {
47
+ mimeType: BLOB_TYPE,
48
+ });
49
+ this.mediaRecorder.ondataavailable = e => this.recordedChunks.push(e.data);
50
+ });
51
+ }
52
+ getState() {
53
+ if (this.mediaRecorder) {
54
+ return this.mediaRecorder.state;
55
+ }
56
+ return "inactive";
57
+ }
58
+ /**
59
+ * This method will start the screen recording if the user has granted permissions
60
+ * and the mediaRecorder has been initialized
61
+ *
62
+ * @returns {boolean}
63
+ */
64
+ start() {
65
+ if (!this.mediaRecorder) {
66
+ console.warn(`ScreenCastRecorder.start: mediaRecorder is null`);
67
+ return false;
68
+ }
69
+ const logRecorderError = (e) => {
70
+ console.warn(`mediaRecorder.start threw an error: ${e}`);
71
+ };
72
+ this.mediaRecorder.onerror = (e) => {
73
+ logRecorderError(e);
74
+ this.onErrorOrStopCallback();
75
+ };
76
+ this.mediaRecorder.onstop = () => this.onErrorOrStopCallback();
77
+ try {
78
+ this.mediaRecorder.start();
79
+ }
80
+ catch (e) {
81
+ logRecorderError(e);
82
+ return false;
83
+ }
84
+ return true;
85
+ }
86
+ /**
87
+ * This method will stop recording and then return the generated Blob
88
+ *
89
+ * @returns {(Promise|undefined)}
90
+ * A Promise which will return the generated Blob
91
+ * Undefined if the MediaRecorder could not initialize
92
+ */
93
+ stop() {
94
+ if (!this.mediaRecorder) {
95
+ return undefined;
96
+ }
97
+ let resolver;
98
+ const promise = new Promise(r => {
99
+ resolver = r;
100
+ });
101
+ this.mediaRecorder.onstop = () => resolver();
102
+ this.mediaRecorder.stop();
103
+ if (this.inputStream) {
104
+ this.inputStream.getTracks().forEach(s => s.stop());
105
+ this.inputStream = null;
106
+ }
107
+ return promise.then(() => this.buildOutputBlob());
108
+ }
109
+ buildOutputBlob() {
110
+ return new Blob(this.recordedChunks, { type: BLOB_TYPE });
111
+ }
112
+ }
tabs/report/report.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import base64
4
+ import pathlib
5
+ import tempfile
6
+ import gradio as gr
7
+
8
+ from assets.i18n.i18n import I18nAuto
9
+
10
+ now_dir = os.getcwd()
11
+ sys.path.append(now_dir)
12
+
13
+ i18n = I18nAuto()
14
+
15
+ recorder_js_path = os.path.join(now_dir, "tabs", "report", "recorder.js")
16
+ main_js_path = os.path.join(now_dir, "tabs", "report", "main.js")
17
+ record_button_js_path = os.path.join(now_dir, "tabs", "report", "record_button.js")
18
+
19
+ recorder_js = pathlib.Path(recorder_js_path).read_text()
20
+ main_js = pathlib.Path(main_js_path).read_text()
21
+ record_button_js = (
22
+ pathlib.Path(record_button_js_path)
23
+ .read_text()
24
+ .replace("let recorder_js = null;", recorder_js)
25
+ .replace("let main_js = null;", main_js)
26
+ )
27
+
28
+
29
+ def save_base64_video(base64_string):
30
+ base64_video = base64_string
31
+ video_data = base64.b64decode(base64_video)
32
+ with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_file:
33
+ temp_filename = temp_file.name
34
+ temp_file.write(video_data)
35
+ print(f"Temporary MP4 file saved as: {temp_filename}")
36
+ return temp_filename
37
+
38
+
39
+ def report_tab():
40
+ instructions = [
41
+ i18n("# How to Report an Issue on GitHub"),
42
+ i18n(
43
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing."
44
+ ),
45
+ i18n(
46
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not)."
47
+ ),
48
+ i18n(
49
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button."
50
+ ),
51
+ i18n(
52
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step."
53
+ ),
54
+ ]
55
+ components = [gr.Markdown(value=instruction) for instruction in instructions]
56
+
57
+ start_button = gr.Button(i18n("Record Screen"))
58
+ video_component = gr.Video(interactive=False)
59
+
60
+ def toggle_button_label(returned_string):
61
+ if returned_string.startswith("Record"):
62
+ return gr.Button(value="Stop Recording"), None
63
+ else:
64
+ try:
65
+ temp_filename = save_base64_video(returned_string)
66
+ except Exception as error:
67
+ print(f"An error occurred converting video to mp4: {error}")
68
+ return gr.Button(value="Record Screen"), gr.Warning(
69
+ f"Failed to convert video to mp4:\n{error}"
70
+ )
71
+ return gr.Button(value="Record Screen"), gr.Video(
72
+ value=temp_filename, interactive=False
73
+ )
74
+
75
+ start_button.click(
76
+ fn=toggle_button_label,
77
+ inputs=[start_button],
78
+ outputs=[start_button, video_component],
79
+ js=record_button_js,
80
+ )
tabs/settings/sections/filter.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import json
4
+ import gradio as gr
5
+ from assets.i18n.i18n import I18nAuto
6
+
7
+ now_dir = os.getcwd()
8
+ sys.path.append(now_dir)
9
+
10
+ i18n = I18nAuto()
11
+ config_file = os.path.join(now_dir, "assets", "config.json")
12
+
13
+ filter_trigger = None
14
+
15
+
16
+ def get_filter_trigger():
17
+ global filter_trigger
18
+ if filter_trigger is None:
19
+ filter_trigger = gr.Textbox(visible=False)
20
+ return filter_trigger
21
+
22
+
23
+ def load_config_filter():
24
+ with open(config_file, "r", encoding="utf8") as f:
25
+ cfg = json.load(f)
26
+ return bool(cfg.get("model_index_filter", False))
27
+
28
+
29
+ def save_config_filter(val: bool):
30
+ with open(config_file, "r", encoding="utf8") as f:
31
+ cfg = json.load(f)
32
+ cfg["model_index_filter"] = bool(val)
33
+ with open(config_file, "w", encoding="utf8") as f:
34
+ json.dump(cfg, f, indent=2)
35
+
36
+
37
+ def filter_tab():
38
+ checkbox = gr.Checkbox(
39
+ label=i18n("Enable model/index list filter"),
40
+ info=i18n(
41
+ "Adds a keyword filter for the model/index selection lists in the Inference and TTS tabs."
42
+ ),
43
+ value=load_config_filter(),
44
+ interactive=True,
45
+ )
46
+ checkbox.change(fn=save_config_filter, inputs=[checkbox], outputs=[])
47
+ return checkbox
tabs/settings/sections/lang.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, sys
2
+ import json
3
+ import gradio as gr
4
+ from assets.i18n.i18n import I18nAuto
5
+
6
+ now_dir = os.getcwd()
7
+ sys.path.append(now_dir)
8
+
9
+ i18n = I18nAuto()
10
+
11
+ config_file = os.path.join(now_dir, "assets", "config.json")
12
+
13
+
14
+ def get_language_settings():
15
+ with open(config_file, "r", encoding="utf8") as file:
16
+ config = json.load(file)
17
+
18
+ if config["lang"]["override"] == False:
19
+ return "Language automatically detected in the system"
20
+ else:
21
+ return config["lang"]["selected_lang"]
22
+
23
+
24
+ def save_lang_settings(selected_language):
25
+ with open(config_file, "r", encoding="utf8") as file:
26
+ config = json.load(file)
27
+
28
+ if selected_language == "Language automatically detected in the system":
29
+ config["lang"]["override"] = False
30
+ else:
31
+ config["lang"]["override"] = True
32
+ config["lang"]["selected_lang"] = selected_language
33
+
34
+ gr.Info("Language have been saved. Restart Applio to apply the changes.")
35
+
36
+ with open(config_file, "w", encoding="utf8") as file:
37
+ json.dump(config, file, indent=2)
38
+
39
+
40
+ def lang_tab():
41
+ with gr.Column():
42
+ selected_language = gr.Dropdown(
43
+ label=i18n("Language"),
44
+ info=i18n(
45
+ "Select the language you want to use. (Requires restarting Applio)"
46
+ ),
47
+ value=get_language_settings(),
48
+ choices=["Language automatically detected in the system"]
49
+ + i18n._get_available_languages(),
50
+ interactive=True,
51
+ )
52
+
53
+ selected_language.change(
54
+ fn=save_lang_settings,
55
+ inputs=[selected_language],
56
+ outputs=[],
57
+ )
tabs/settings/sections/model_author.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import json
4
+
5
+ now_dir = os.getcwd()
6
+ sys.path.append(now_dir)
7
+
8
+ import gradio as gr
9
+ from assets.i18n.i18n import I18nAuto
10
+
11
+ i18n = I18nAuto()
12
+
13
+
14
+ def set_model_author(model_author: str):
15
+ with open(os.path.join(now_dir, "assets", "config.json"), "r") as f:
16
+ config = json.load(f)
17
+
18
+ config["model_author"] = model_author
19
+
20
+ with open(os.path.join(now_dir, "assets", "config.json"), "w") as f:
21
+ json.dump(config, f, indent=4)
22
+
23
+ print(f"Model author set to {model_author}.")
24
+ return f"Model author set to {model_author}."
25
+
26
+
27
+ def get_model_author():
28
+ with open(os.path.join(now_dir, "assets", "config.json"), "r") as f:
29
+ config = json.load(f)
30
+
31
+ return config["model_author"] if "model_author" in config else None
32
+
33
+
34
+ def model_author_tab():
35
+ model_author_name = gr.Textbox(
36
+ label=i18n("Model Author Name"),
37
+ info=i18n("The name that will appear in the model information."),
38
+ value=get_model_author(),
39
+ placeholder=i18n("Enter your nickname"),
40
+ interactive=True,
41
+ )
42
+ model_author_output_info = gr.Textbox(
43
+ label=i18n("Output Information"),
44
+ info=i18n("The output information will be displayed here."),
45
+ value="",
46
+ max_lines=1,
47
+ )
48
+ button = gr.Button(i18n("Set name"))
49
+
50
+ button.click(
51
+ fn=set_model_author,
52
+ inputs=[model_author_name],
53
+ outputs=[model_author_output_info],
54
+ )
tabs/settings/sections/precision.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import json
4
+
5
+ now_dir = os.getcwd()
6
+ sys.path.append(now_dir)
7
+
8
+ import gradio as gr
9
+ from assets.i18n.i18n import I18nAuto
10
+
11
+ i18n = I18nAuto()
12
+
13
+
14
+ def set_precision(precision: str):
15
+ with open(os.path.join(now_dir, "assets", "config.json"), "r") as f:
16
+ config = json.load(f)
17
+
18
+ config["precision"] = precision
19
+
20
+ with open(os.path.join(now_dir, "assets", "config.json"), "w") as f:
21
+ json.dump(config, f, indent=4)
22
+
23
+ print(f"Precision set to {precision}.")
24
+ return f"Precision set to {precision}."
25
+
26
+
27
+ def get_precision():
28
+ with open(os.path.join(now_dir, "assets", "config.json"), "r") as f:
29
+ config = json.load(f)
30
+
31
+ return config["precision"] if "precision" in config else None
32
+
33
+
34
+ def precision_tab():
35
+ precision = gr.Radio(
36
+ label=i18n("Precision"),
37
+ info=i18n("Select the precision you want to use for training and inference."),
38
+ value=get_precision(),
39
+ choices=["fp32", "fp16", "bf16"],
40
+ interactive=True,
41
+ )
42
+ precision_info = gr.Textbox(
43
+ label=i18n("Output Information"),
44
+ info=i18n("The output information will be displayed here."),
45
+ value="",
46
+ max_lines=1,
47
+ )
48
+ button = gr.Button(i18n("Update precision"))
49
+
50
+ button.click(
51
+ fn=set_precision,
52
+ inputs=[precision],
53
+ outputs=[precision_info],
54
+ )
tabs/settings/sections/presence.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import gradio as gr
4
+ import json
5
+ from assets.i18n.i18n import I18nAuto
6
+ from assets.discord_presence import RPCManager
7
+
8
+ now_dir = os.getcwd()
9
+ sys.path.append(now_dir)
10
+
11
+ i18n = I18nAuto()
12
+ config_file = os.path.join(now_dir, "assets", "config.json")
13
+
14
+
15
+ def load_config_presence():
16
+ with open(config_file, "r", encoding="utf8") as file:
17
+ config = json.load(file)
18
+ return config["discord_presence"]
19
+
20
+
21
+ def save_config(value):
22
+ with open(config_file, "r", encoding="utf8") as file:
23
+ config = json.load(file)
24
+ config["discord_presence"] = value
25
+ with open(config_file, "w", encoding="utf8") as file:
26
+ json.dump(config, file, indent=2)
27
+
28
+
29
+ def presence_tab():
30
+ with gr.Row():
31
+ with gr.Column():
32
+ presence = gr.Checkbox(
33
+ label=i18n("Enable Applio integration with Discord presence"),
34
+ info=i18n(
35
+ "It will activate the possibility of displaying the current Applio activity in Discord."
36
+ ),
37
+ interactive=True,
38
+ value=load_config_presence(),
39
+ )
40
+ presence.change(
41
+ fn=toggle,
42
+ inputs=[presence],
43
+ outputs=[],
44
+ )
45
+
46
+
47
+ def toggle(checkbox):
48
+ save_config(bool(checkbox))
49
+ if load_config_presence() == True:
50
+ try:
51
+ RPCManager.start_presence()
52
+ except KeyboardInterrupt:
53
+ RPCManager.stop_presence()
54
+ else:
55
+ RPCManager.stop_presence()
tabs/settings/sections/restart.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import sys
4
+ import json
5
+
6
+ now_dir = os.getcwd()
7
+
8
+
9
+ def stop_train(model_name: str):
10
+ pid_file_path = os.path.join(now_dir, "logs", model_name, "config.json")
11
+ try:
12
+ with open(pid_file_path, "r") as pid_file:
13
+ pid_data = json.load(pid_file)
14
+ pids = pid_data.get("process_pids", [])
15
+ with open(pid_file_path, "w") as pid_file:
16
+ pid_data.pop("process_pids", None)
17
+ json.dump(pid_data, pid_file, indent=4)
18
+ for pid in pids:
19
+ os.kill(pid, 9)
20
+ except:
21
+ pass
22
+
23
+
24
+ def stop_infer():
25
+ pid_file_path = os.path.join(now_dir, "assets", "infer_pid.txt")
26
+ try:
27
+ with open(pid_file_path, "r") as pid_file:
28
+ pids = [int(pid) for pid in pid_file.readlines()]
29
+ for pid in pids:
30
+ os.kill(pid, 9)
31
+ os.remove(pid_file_path)
32
+ except:
33
+ pass
34
+
35
+
36
+ def restart_applio():
37
+ if os.name != "nt":
38
+ os.system("clear")
39
+ else:
40
+ os.system("cls")
41
+ python = sys.executable
42
+ os.execl(python, python, *sys.argv)
43
+
44
+
45
+ from assets.i18n.i18n import I18nAuto
46
+
47
+ i18n = I18nAuto()
48
+
49
+
50
+ def restart_tab():
51
+ with gr.Row():
52
+ with gr.Column():
53
+ restart_button = gr.Button(i18n("Restart Applio"))
54
+ restart_button.click(
55
+ fn=restart_applio,
56
+ inputs=[],
57
+ outputs=[],
58
+ )
tabs/settings/sections/themes.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import gradio as gr
4
+
5
+ from assets.i18n.i18n import I18nAuto
6
+ import assets.themes.loadThemes as loadThemes
7
+
8
+ now_dir = os.getcwd()
9
+ sys.path.append(now_dir)
10
+
11
+ i18n = I18nAuto()
12
+
13
+
14
+ def theme_tab():
15
+ with gr.Row():
16
+ with gr.Column():
17
+ themes_select = gr.Dropdown(
18
+ loadThemes.get_theme_list(),
19
+ value=loadThemes.load_theme(),
20
+ label=i18n("Theme"),
21
+ info=i18n(
22
+ "Select the theme you want to use. (Requires restarting Applio)"
23
+ ),
24
+ visible=True,
25
+ )
26
+ themes_select.change(
27
+ fn=loadThemes.select_theme,
28
+ inputs=themes_select,
29
+ outputs=[],
30
+ )
tabs/settings/sections/version.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ from assets.version_checker import compare_version
4
+ from assets.i18n.i18n import I18nAuto
5
+
6
+ i18n = I18nAuto()
7
+
8
+
9
+ def version_tab():
10
+ with gr.Row():
11
+ with gr.Column():
12
+ version_check = gr.Textbox(
13
+ label=i18n("Version Checker"),
14
+ info=i18n(
15
+ "Check which version of Applio is the latest to see if you need to update."
16
+ ),
17
+ interactive=False,
18
+ )
19
+ version_button = gr.Button(i18n("Check for updates"))
20
+ version_button.click(
21
+ fn=compare_version,
22
+ inputs=[],
23
+ outputs=[version_check],
24
+ )
tabs/settings/settings.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import gradio as gr
4
+
5
+ now_dir = os.getcwd()
6
+ sys.path.append(now_dir)
7
+
8
+ from assets.i18n.i18n import I18nAuto
9
+
10
+ i18n = I18nAuto()
11
+
12
+ from tabs.settings.sections.presence import presence_tab
13
+ from tabs.settings.sections.themes import theme_tab
14
+ from tabs.settings.sections.version import version_tab
15
+ from tabs.settings.sections.lang import lang_tab
16
+ from tabs.settings.sections.restart import restart_tab
17
+ from tabs.settings.sections.model_author import model_author_tab
18
+ from tabs.settings.sections.precision import precision_tab
19
+ from tabs.settings.sections.filter import filter_tab, get_filter_trigger
20
+
21
+
22
+ def settings_tab(filter_state_trigger=None):
23
+ if filter_state_trigger is None:
24
+ filter_state_trigger = get_filter_trigger()
25
+
26
+ with gr.TabItem(label=i18n("General")):
27
+ filter_component = filter_tab()
28
+
29
+ filter_component.change(
30
+ fn=lambda checked: gr.update(value=str(checked)),
31
+ inputs=[filter_component],
32
+ outputs=[filter_state_trigger],
33
+ show_progress=False,
34
+ )
35
+ presence_tab()
36
+ theme_tab()
37
+ version_tab()
38
+ lang_tab()
39
+ restart_tab()
40
+ with gr.TabItem(label=i18n("Training")):
41
+ model_author_tab()
42
+ precision_tab()
tabs/train/train.py ADDED
@@ -0,0 +1,1033 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import sys
4
+ from multiprocessing import cpu_count
5
+
6
+ import gradio as gr
7
+
8
+ from assets.i18n.i18n import I18nAuto
9
+ from core import (
10
+ run_extract_script,
11
+ run_index_script,
12
+ run_preprocess_script,
13
+ run_prerequisites_script,
14
+ run_train_script,
15
+ )
16
+ from rvc.configs.config import get_gpu_info, get_number_of_gpus, max_vram_gpu
17
+ from rvc.lib.utils import format_title
18
+ from tabs.settings.sections.restart import stop_train
19
+
20
+ i18n = I18nAuto()
21
+ now_dir = os.getcwd()
22
+ sys.path.append(now_dir)
23
+
24
+
25
+ sup_audioext = {
26
+ "wav",
27
+ "mp3",
28
+ "flac",
29
+ "ogg",
30
+ "opus",
31
+ "m4a",
32
+ "mp4",
33
+ "aac",
34
+ "alac",
35
+ "wma",
36
+ "aiff",
37
+ "webm",
38
+ "ac3",
39
+ }
40
+
41
+ # Custom Pretraineds
42
+ pretraineds_custom_path = os.path.join(
43
+ now_dir, "rvc", "models", "pretraineds", "custom"
44
+ )
45
+
46
+ pretraineds_custom_path_relative = os.path.relpath(pretraineds_custom_path, now_dir)
47
+
48
+ custom_embedder_root = os.path.join(
49
+ now_dir, "rvc", "models", "embedders", "embedders_custom"
50
+ )
51
+ custom_embedder_root_relative = os.path.relpath(custom_embedder_root, now_dir)
52
+
53
+ os.makedirs(custom_embedder_root, exist_ok=True)
54
+ os.makedirs(pretraineds_custom_path_relative, exist_ok=True)
55
+
56
+
57
+ def get_pretrained_list(suffix):
58
+ return [
59
+ os.path.join(dirpath, filename)
60
+ for dirpath, _, filenames in os.walk(pretraineds_custom_path_relative)
61
+ for filename in filenames
62
+ if filename.endswith(".pth") and suffix in filename
63
+ ]
64
+
65
+
66
+ pretraineds_list_d = get_pretrained_list("D")
67
+ pretraineds_list_g = get_pretrained_list("G")
68
+
69
+
70
+ def refresh_custom_pretraineds():
71
+ return (
72
+ {"choices": sorted(get_pretrained_list("G")), "__type__": "update"},
73
+ {"choices": sorted(get_pretrained_list("D")), "__type__": "update"},
74
+ )
75
+
76
+
77
+ # Dataset Creator
78
+ datasets_path = os.path.join(now_dir, "assets", "datasets")
79
+
80
+ if not os.path.exists(datasets_path):
81
+ os.makedirs(datasets_path)
82
+
83
+ datasets_path_relative = os.path.relpath(datasets_path, now_dir)
84
+
85
+
86
+ def get_datasets_list():
87
+ return [
88
+ dirpath
89
+ for dirpath, _, filenames in os.walk(datasets_path_relative)
90
+ if any(filename.endswith(tuple(sup_audioext)) for filename in filenames)
91
+ ]
92
+
93
+
94
+ def refresh_datasets():
95
+ return {"choices": sorted(get_datasets_list()), "__type__": "update"}
96
+
97
+
98
+ # Model Names
99
+ models_path = os.path.join(now_dir, "logs")
100
+
101
+
102
+ def get_models_list():
103
+ return [
104
+ os.path.basename(dirpath)
105
+ for dirpath in os.listdir(models_path)
106
+ if os.path.isdir(os.path.join(models_path, dirpath))
107
+ and all(excluded not in dirpath for excluded in ["zips", "mute", "reference"])
108
+ ]
109
+
110
+
111
+ def refresh_models():
112
+ return {"choices": sorted(get_models_list()), "__type__": "update"}
113
+
114
+
115
+ # Refresh Models and Datasets
116
+ def refresh_models_and_datasets():
117
+ return (
118
+ {"choices": sorted(get_models_list()), "__type__": "update"},
119
+ {"choices": sorted(get_datasets_list()), "__type__": "update"},
120
+ )
121
+
122
+
123
+ # Refresh Custom Embedders
124
+ def get_embedder_custom_list():
125
+ return [
126
+ os.path.join(dirpath, dirname)
127
+ for dirpath, dirnames, _ in os.walk(custom_embedder_root_relative)
128
+ for dirname in dirnames
129
+ ]
130
+
131
+
132
+ def refresh_custom_embedder_list():
133
+ return {"choices": sorted(get_embedder_custom_list()), "__type__": "update"}
134
+
135
+
136
+ # Drop Model
137
+ def save_drop_model(dropbox):
138
+ if ".pth" not in dropbox:
139
+ gr.Info(
140
+ i18n(
141
+ "The file you dropped is not a valid pretrained file. Please try again."
142
+ )
143
+ )
144
+ else:
145
+ file_name = os.path.basename(dropbox)
146
+ pretrained_path = os.path.join(pretraineds_custom_path_relative, file_name)
147
+ if os.path.exists(pretrained_path):
148
+ os.remove(pretrained_path)
149
+ shutil.copy(dropbox, pretrained_path)
150
+ gr.Info(
151
+ i18n(
152
+ "Click the refresh button to see the pretrained file in the dropdown menu."
153
+ )
154
+ )
155
+ return None
156
+
157
+
158
+ # Drop Dataset
159
+ def save_drop_dataset_audio(dropbox, dataset_name):
160
+ if not dataset_name:
161
+ gr.Info("Please enter a valid dataset name. Please try again.")
162
+ return None, None
163
+ else:
164
+ file_extension = os.path.splitext(dropbox)[1][1:].lower()
165
+ if file_extension not in sup_audioext:
166
+ gr.Info("The file you dropped is not a valid audio file. Please try again.")
167
+ else:
168
+ dataset_name = format_title(dataset_name)
169
+ audio_file = format_title(os.path.basename(dropbox))
170
+ dataset_path = os.path.join(now_dir, "assets", "datasets", dataset_name)
171
+ if not os.path.exists(dataset_path):
172
+ os.makedirs(dataset_path)
173
+ destination_path = os.path.join(dataset_path, audio_file)
174
+ if os.path.exists(destination_path):
175
+ os.remove(destination_path)
176
+ shutil.copy(dropbox, destination_path)
177
+ gr.Info(
178
+ i18n(
179
+ "The audio file has been successfully added to the dataset. Please click the preprocess button."
180
+ )
181
+ )
182
+ dataset_path = os.path.dirname(destination_path)
183
+ relative_dataset_path = os.path.relpath(dataset_path, now_dir)
184
+
185
+ return None, relative_dataset_path
186
+
187
+
188
+ # Drop Custom Embedder
189
+ def create_folder_and_move_files(folder_name, bin_file, config_file):
190
+ if not folder_name:
191
+ return "Folder name must not be empty."
192
+
193
+ folder_name = os.path.basename(folder_name)
194
+ target_folder = os.path.join(custom_embedder_root, folder_name)
195
+
196
+ normalized_target_folder = os.path.abspath(target_folder)
197
+ normalized_custom_embedder_root = os.path.abspath(custom_embedder_root)
198
+
199
+ if not normalized_target_folder.startswith(normalized_custom_embedder_root):
200
+ return "Invalid folder name. Folder must be within the custom embedder root directory."
201
+
202
+ os.makedirs(target_folder, exist_ok=True)
203
+
204
+ if bin_file:
205
+ shutil.copy(bin_file, os.path.join(target_folder, os.path.basename(bin_file)))
206
+ if config_file:
207
+ shutil.copy(
208
+ config_file, os.path.join(target_folder, os.path.basename(config_file))
209
+ )
210
+
211
+ return f"Files moved to folder {target_folder}"
212
+
213
+
214
+ def refresh_embedders_folders():
215
+ custom_embedders = [
216
+ os.path.join(dirpath, dirname)
217
+ for dirpath, dirnames, _ in os.walk(custom_embedder_root_relative)
218
+ for dirname in dirnames
219
+ ]
220
+ return custom_embedders
221
+
222
+
223
+ # Export
224
+ def get_pth_list():
225
+ return [
226
+ os.path.relpath(os.path.join(dirpath, filename), now_dir)
227
+ for dirpath, _, filenames in os.walk(models_path)
228
+ for filename in filenames
229
+ if filename.endswith(".pth")
230
+ ]
231
+
232
+
233
+ def get_index_list():
234
+ return [
235
+ os.path.relpath(os.path.join(dirpath, filename), now_dir)
236
+ for dirpath, _, filenames in os.walk(models_path)
237
+ for filename in filenames
238
+ if filename.endswith(".index") and "trained" not in filename
239
+ ]
240
+
241
+
242
+ def refresh_pth_and_index_list():
243
+ return (
244
+ {"choices": sorted(get_pth_list()), "__type__": "update"},
245
+ {"choices": sorted(get_index_list()), "__type__": "update"},
246
+ )
247
+
248
+
249
+ # Export Pth and Index Files
250
+ def export_pth(pth_path):
251
+ allowed_paths = get_pth_list()
252
+ normalized_allowed_paths = [
253
+ os.path.abspath(os.path.join(now_dir, p)) for p in allowed_paths
254
+ ]
255
+ normalized_pth_path = os.path.abspath(os.path.join(now_dir, pth_path))
256
+
257
+ if normalized_pth_path in normalized_allowed_paths:
258
+ return pth_path
259
+ else:
260
+ print(f"Attempted to export invalid pth path: {pth_path}")
261
+ return None
262
+
263
+
264
+ def export_index(index_path):
265
+ allowed_paths = get_index_list()
266
+ normalized_allowed_paths = [
267
+ os.path.abspath(os.path.join(now_dir, p)) for p in allowed_paths
268
+ ]
269
+ normalized_index_path = os.path.abspath(os.path.join(now_dir, index_path))
270
+
271
+ if normalized_index_path in normalized_allowed_paths:
272
+ return index_path
273
+ else:
274
+ print(f"Attempted to export invalid index path: {index_path}")
275
+ return None
276
+
277
+
278
+ # Upload to Google Drive
279
+ def upload_to_google_drive(pth_path, index_path):
280
+ def upload_file(file_path):
281
+ if file_path:
282
+ try:
283
+ gr.Info(f"Uploading {pth_path} to Google Drive...")
284
+ google_drive_folder = "/content/drive/MyDrive/ApplioExported"
285
+ if not os.path.exists(google_drive_folder):
286
+ os.makedirs(google_drive_folder)
287
+ google_drive_file_path = os.path.join(
288
+ google_drive_folder, os.path.basename(file_path)
289
+ )
290
+ if os.path.exists(google_drive_file_path):
291
+ os.remove(google_drive_file_path)
292
+ shutil.copy2(file_path, google_drive_file_path)
293
+ gr.Info("File uploaded successfully.")
294
+ except Exception as error:
295
+ print(f"An error occurred uploading to Google Drive: {error}")
296
+ gr.Info("Error uploading to Google Drive")
297
+
298
+ upload_file(pth_path)
299
+ upload_file(index_path)
300
+
301
+
302
+ def auto_enable_checkpointing():
303
+ try:
304
+ return max_vram_gpu(0) < 6
305
+ except:
306
+ return False
307
+
308
+
309
+ # Train Tab
310
+ def train_tab():
311
+ # Model settings section
312
+ with gr.Accordion(i18n("Model Settings")):
313
+ with gr.Row():
314
+ with gr.Column():
315
+ model_name = gr.Dropdown(
316
+ label=i18n("Model Name"),
317
+ info=i18n("Name of the new model."),
318
+ choices=get_models_list(),
319
+ value="my-project",
320
+ interactive=True,
321
+ allow_custom_value=True,
322
+ )
323
+ architecture = gr.Radio(
324
+ label=i18n("Architecture"),
325
+ info=i18n(
326
+ "Choose the model architecture:\n- **RVC (V2)**: Default option, compatible with all clients.\n- **Applio**: Advanced quality with improved vocoders and higher sample rates, Applio-only."
327
+ ),
328
+ choices=["RVC", "Applio"],
329
+ value="RVC",
330
+ interactive=True,
331
+ visible=False, # to be visible once pretraineds are ready
332
+ )
333
+ with gr.Column():
334
+ sampling_rate = gr.Radio(
335
+ label=i18n("Sampling Rate"),
336
+ info=i18n("The sampling rate of the audio files."),
337
+ choices=["32000", "40000", "48000"],
338
+ value="40000",
339
+ interactive=True,
340
+ )
341
+ vocoder = gr.Radio(
342
+ label=i18n("Vocoder"),
343
+ info=i18n(
344
+ "Choose the vocoder for audio synthesis:\n- **HiFi-GAN**: Default option, compatible with all clients.\n- **MRF HiFi-GAN**: Higher fidelity, Applio-only.\n- **RefineGAN**: Superior audio quality, Applio-only."
345
+ ),
346
+ choices=["HiFi-GAN", "MRF HiFi-GAN", "RefineGAN"],
347
+ value="HiFi-GAN",
348
+ interactive=False,
349
+ visible=False, # to be visible once pretraineds are ready
350
+ )
351
+ with gr.Accordion(
352
+ i18n("Advanced Settings"),
353
+ open=False,
354
+ ):
355
+ with gr.Row():
356
+ with gr.Column():
357
+ cpu_cores = gr.Slider(
358
+ 1,
359
+ min(cpu_count(), 32), # max 32 parallel processes
360
+ min(cpu_count(), 32),
361
+ step=1,
362
+ label=i18n("CPU Cores"),
363
+ info=i18n(
364
+ "The number of CPU cores to use in the extraction process. The default setting are your cpu cores, which is recommended for most cases."
365
+ ),
366
+ interactive=True,
367
+ )
368
+
369
+ with gr.Column():
370
+ gpu = gr.Textbox(
371
+ label=i18n("GPU Number"),
372
+ info=i18n(
373
+ "Specify the number of GPUs you wish to utilize for extracting by entering them separated by hyphens (-)."
374
+ ),
375
+ placeholder=i18n("0 to ∞ separated by -"),
376
+ value=str(get_number_of_gpus()),
377
+ interactive=True,
378
+ )
379
+ gr.Textbox(
380
+ label=i18n("GPU Information"),
381
+ info=i18n("The GPU information will be displayed here."),
382
+ value=get_gpu_info(),
383
+ interactive=False,
384
+ )
385
+ # Preprocess section
386
+ with gr.Accordion(i18n("Preprocess")):
387
+ dataset_path = gr.Dropdown(
388
+ label=i18n("Dataset Path"),
389
+ info=i18n("Path to the dataset folder."),
390
+ # placeholder=i18n("Enter dataset path"),
391
+ choices=get_datasets_list(),
392
+ allow_custom_value=True,
393
+ interactive=True,
394
+ )
395
+ dataset_creator = gr.Checkbox(
396
+ label=i18n("Dataset Creator"),
397
+ value=False,
398
+ interactive=True,
399
+ visible=True,
400
+ )
401
+ with gr.Column(visible=False) as dataset_creator_settings:
402
+ with gr.Accordion(i18n("Dataset Creator")):
403
+ dataset_name = gr.Textbox(
404
+ label=i18n("Dataset Name"),
405
+ info=i18n("Name of the new dataset."),
406
+ placeholder=i18n("Enter dataset name"),
407
+ interactive=True,
408
+ )
409
+ upload_audio_dataset = gr.File(
410
+ label=i18n("Upload Audio Dataset"),
411
+ type="filepath",
412
+ interactive=True,
413
+ )
414
+ refresh = gr.Button(i18n("Refresh"))
415
+
416
+ with gr.Accordion(i18n("Advanced Settings"), open=False):
417
+ cut_preprocess = gr.Radio(
418
+ label=i18n("Audio cutting"),
419
+ info=i18n(
420
+ "Audio file slicing method: Select 'Skip' if the files are already pre-sliced, 'Simple' if excessive silence has already been removed from the files, or 'Automatic' for automatic silence detection and slicing around it."
421
+ ),
422
+ choices=["Skip", "Simple", "Automatic"],
423
+ value="Automatic",
424
+ interactive=True,
425
+ )
426
+ with gr.Row():
427
+ chunk_len = gr.Slider(
428
+ 0.5,
429
+ 5.0,
430
+ 3.0,
431
+ step=0.1,
432
+ label=i18n("Chunk length (sec)"),
433
+ info=i18n("Length of the audio slice for 'Simple' method."),
434
+ interactive=True,
435
+ )
436
+ overlap_len = gr.Slider(
437
+ 0.0,
438
+ 0.4,
439
+ 0.3,
440
+ step=0.1,
441
+ label=i18n("Overlap length (sec)"),
442
+ info=i18n(
443
+ "Length of the overlap between slices for 'Simple' method."
444
+ ),
445
+ interactive=True,
446
+ )
447
+
448
+ with gr.Row():
449
+ process_effects = gr.Checkbox(
450
+ label=i18n("Noise filter"),
451
+ info=i18n(
452
+ "It's recommended to deactivate this option if your dataset has already been processed."
453
+ ),
454
+ value=True,
455
+ interactive=True,
456
+ visible=True,
457
+ )
458
+
459
+ normalization_mode = gr.Radio(
460
+ label=i18n("Normalization mode"),
461
+ info=i18n(
462
+ "Audio normalization: Select 'none' if the files are already normalized, 'pre' to normalize the entire input file at once, or 'post' to normalize each slice individually."
463
+ ),
464
+ choices=["none", "pre", "post"],
465
+ value="none",
466
+ interactive=True,
467
+ visible=True,
468
+ )
469
+
470
+ noise_reduction = gr.Checkbox(
471
+ label=i18n("Noise Reduction"),
472
+ info=i18n(
473
+ "It's recommended keep deactivate this option if your dataset has already been processed."
474
+ ),
475
+ value=False,
476
+ interactive=True,
477
+ visible=True,
478
+ )
479
+ clean_strength = gr.Slider(
480
+ minimum=0,
481
+ maximum=1,
482
+ label=i18n("Noise Reduction Strength"),
483
+ info=i18n(
484
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed."
485
+ ),
486
+ visible=False,
487
+ value=0.5,
488
+ interactive=True,
489
+ )
490
+ preprocess_output_info = gr.Textbox(
491
+ label=i18n("Output Information"),
492
+ info=i18n("The output information will be displayed here."),
493
+ value="",
494
+ max_lines=8,
495
+ interactive=False,
496
+ )
497
+
498
+ with gr.Row():
499
+ preprocess_button = gr.Button(i18n("Preprocess Dataset"))
500
+ preprocess_button.click(
501
+ fn=run_preprocess_script,
502
+ inputs=[
503
+ model_name,
504
+ dataset_path,
505
+ sampling_rate,
506
+ cpu_cores,
507
+ cut_preprocess,
508
+ process_effects,
509
+ noise_reduction,
510
+ clean_strength,
511
+ chunk_len,
512
+ overlap_len,
513
+ normalization_mode,
514
+ ],
515
+ outputs=[preprocess_output_info],
516
+ )
517
+
518
+ # Extract section
519
+ with gr.Accordion(i18n("Extract")):
520
+ with gr.Row():
521
+ f0_method = gr.Radio(
522
+ label=i18n("Pitch extraction algorithm"),
523
+ info=i18n(
524
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases."
525
+ ),
526
+ choices=["crepe", "crepe-tiny", "rmvpe", "fcpe"],
527
+ value="rmvpe",
528
+ interactive=True,
529
+ )
530
+
531
+ embedder_model = gr.Radio(
532
+ label=i18n("Embedder Model"),
533
+ info=i18n("Model used for learning speaker embedding."),
534
+ choices=[
535
+ "contentvec",
536
+ "spin",
537
+ "spin-v2",
538
+ "chinese-hubert-base",
539
+ "japanese-hubert-base",
540
+ "korean-hubert-base",
541
+ "custom",
542
+ ],
543
+ value="contentvec",
544
+ interactive=True,
545
+ )
546
+ include_mutes = gr.Slider(
547
+ 0,
548
+ 10,
549
+ 2,
550
+ step=1,
551
+ label=i18n("Silent training files"),
552
+ info=i18n(
553
+ "Adding several silent files to the training set enables the model to handle pure silence in inferred audio files. Select 0 if your dataset is clean and already contains segments of pure silence."
554
+ ),
555
+ value=True,
556
+ interactive=True,
557
+ )
558
+ with gr.Row(visible=False) as embedder_custom:
559
+ with gr.Accordion(i18n("Custom Embedder"), open=True):
560
+ with gr.Row():
561
+ embedder_model_custom = gr.Dropdown(
562
+ label=i18n("Select Custom Embedder"),
563
+ choices=refresh_embedders_folders(),
564
+ interactive=True,
565
+ allow_custom_value=True,
566
+ )
567
+ refresh_embedders_button = gr.Button(i18n("Refresh embedders"))
568
+ folder_name_input = gr.Textbox(
569
+ label=i18n("Folder Name"), interactive=True
570
+ )
571
+ with gr.Row():
572
+ bin_file_upload = gr.File(
573
+ label=i18n("Upload .bin"), type="filepath", interactive=True
574
+ )
575
+ config_file_upload = gr.File(
576
+ label=i18n("Upload .json"), type="filepath", interactive=True
577
+ )
578
+ move_files_button = gr.Button(
579
+ i18n("Move files to custom embedder folder")
580
+ )
581
+
582
+ extract_output_info = gr.Textbox(
583
+ label=i18n("Output Information"),
584
+ info=i18n("The output information will be displayed here."),
585
+ value="",
586
+ max_lines=8,
587
+ interactive=False,
588
+ )
589
+ extract_button = gr.Button(i18n("Extract Features"))
590
+ extract_button.click(
591
+ fn=run_extract_script,
592
+ inputs=[
593
+ model_name,
594
+ f0_method,
595
+ cpu_cores,
596
+ gpu,
597
+ sampling_rate,
598
+ embedder_model,
599
+ embedder_model_custom,
600
+ include_mutes,
601
+ ],
602
+ outputs=[extract_output_info],
603
+ )
604
+
605
+ # Training section
606
+ with gr.Accordion(i18n("Training")):
607
+ with gr.Row():
608
+ batch_size = gr.Slider(
609
+ 1,
610
+ 64,
611
+ 4,
612
+ step=1,
613
+ label=i18n("Batch Size"),
614
+ info=i18n(
615
+ "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results."
616
+ ),
617
+ interactive=True,
618
+ )
619
+ save_every_epoch = gr.Slider(
620
+ 1,
621
+ 100,
622
+ 10,
623
+ step=1,
624
+ label=i18n("Save Every Epoch"),
625
+ info=i18n("Determine at how many epochs the model will saved at."),
626
+ interactive=True,
627
+ )
628
+ total_epoch = gr.Slider(
629
+ 1,
630
+ 10000,
631
+ 500,
632
+ step=1,
633
+ label=i18n("Total Epoch"),
634
+ info=i18n(
635
+ "Specifies the overall quantity of epochs for the model training process."
636
+ ),
637
+ interactive=True,
638
+ )
639
+ with gr.Accordion(i18n("Advanced Settings"), open=False):
640
+ with gr.Row():
641
+ with gr.Column():
642
+ save_only_latest = gr.Checkbox(
643
+ label=i18n("Save Only Latest"),
644
+ info=i18n(
645
+ "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space."
646
+ ),
647
+ value=True,
648
+ interactive=True,
649
+ )
650
+ save_every_weights = gr.Checkbox(
651
+ label=i18n("Save Every Weights"),
652
+ info=i18n(
653
+ "This setting enables you to save the weights of the model at the conclusion of each epoch."
654
+ ),
655
+ value=True,
656
+ interactive=True,
657
+ )
658
+ pretrained = gr.Checkbox(
659
+ label=i18n("Pretrained"),
660
+ info=i18n(
661
+ "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality."
662
+ ),
663
+ value=True,
664
+ interactive=True,
665
+ )
666
+ with gr.Column():
667
+ cleanup = gr.Checkbox(
668
+ label=i18n("Fresh Training"),
669
+ info=i18n(
670
+ "Enable this setting only if you are training a new model from scratch or restarting the training. Deletes all previously generated weights and tensorboard logs."
671
+ ),
672
+ value=False,
673
+ interactive=True,
674
+ )
675
+ cache_dataset_in_gpu = gr.Checkbox(
676
+ label=i18n("Cache Dataset in GPU"),
677
+ info=i18n(
678
+ "Cache the dataset in GPU memory to speed up the training process."
679
+ ),
680
+ value=False,
681
+ interactive=True,
682
+ )
683
+ checkpointing = gr.Checkbox(
684
+ label=i18n("Checkpointing"),
685
+ info=i18n(
686
+ "Enables memory-efficient training. This reduces VRAM usage at the cost of slower training speed. It is useful for GPUs with limited memory (e.g., <6GB VRAM) or when training with a batch size larger than what your GPU can normally accommodate."
687
+ ),
688
+ value=auto_enable_checkpointing(),
689
+ interactive=True,
690
+ )
691
+ with gr.Row():
692
+ custom_pretrained = gr.Checkbox(
693
+ label=i18n("Custom Pretrained"),
694
+ info=i18n(
695
+ "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance."
696
+ ),
697
+ value=False,
698
+ interactive=True,
699
+ )
700
+ overtraining_detector = gr.Checkbox(
701
+ label=i18n("Overtraining Detector"),
702
+ info=i18n(
703
+ "Detect overtraining to prevent the model from learning the training data too well and losing the ability to generalize to new data."
704
+ ),
705
+ value=False,
706
+ interactive=True,
707
+ )
708
+ with gr.Row():
709
+ with gr.Column(visible=False) as pretrained_custom_settings:
710
+ with gr.Accordion(i18n("Pretrained Custom Settings")):
711
+ upload_pretrained = gr.File(
712
+ label=i18n("Upload Pretrained Model"),
713
+ type="filepath",
714
+ interactive=True,
715
+ )
716
+ refresh_custom_pretaineds_button = gr.Button(
717
+ i18n("Refresh Custom Pretraineds")
718
+ )
719
+ g_pretrained_path = gr.Dropdown(
720
+ label=i18n("Custom Pretrained G"),
721
+ info=i18n(
722
+ "Select the custom pretrained model for the generator."
723
+ ),
724
+ choices=sorted(pretraineds_list_g),
725
+ interactive=True,
726
+ allow_custom_value=True,
727
+ )
728
+ d_pretrained_path = gr.Dropdown(
729
+ label=i18n("Custom Pretrained D"),
730
+ info=i18n(
731
+ "Select the custom pretrained model for the discriminator."
732
+ ),
733
+ choices=sorted(pretraineds_list_d),
734
+ interactive=True,
735
+ allow_custom_value=True,
736
+ )
737
+
738
+ with gr.Column(visible=False) as overtraining_settings:
739
+ with gr.Accordion(i18n("Overtraining Detector Settings")):
740
+ overtraining_threshold = gr.Slider(
741
+ 1,
742
+ 100,
743
+ 50,
744
+ step=1,
745
+ label=i18n("Overtraining Threshold"),
746
+ info=i18n(
747
+ "Set the maximum number of epochs you want your model to stop training if no improvement is detected."
748
+ ),
749
+ interactive=True,
750
+ )
751
+ index_algorithm = gr.Radio(
752
+ label=i18n("Index Algorithm"),
753
+ info=i18n(
754
+ "KMeans is a clustering algorithm that divides the dataset into K clusters. This setting is particularly useful for large datasets."
755
+ ),
756
+ choices=["Auto", "Faiss", "KMeans"],
757
+ value="Auto",
758
+ interactive=True,
759
+ )
760
+
761
+ def enforce_terms(terms_accepted, *args):
762
+ if not terms_accepted:
763
+ message = "You must agree to the Terms of Use to proceed."
764
+ gr.Info(message)
765
+ return message
766
+ return run_train_script(*args)
767
+
768
+ terms_checkbox = gr.Checkbox(
769
+ label=i18n("I agree to the terms of use"),
770
+ info=i18n(
771
+ "Please ensure compliance with the terms and conditions detailed in [this document](https://github.com/IAHispano/Applio/blob/main/TERMS_OF_USE.md) before proceeding with your training."
772
+ ),
773
+ value=False,
774
+ interactive=True,
775
+ )
776
+ train_output_info = gr.Textbox(
777
+ label=i18n("Output Information"),
778
+ info=i18n("The output information will be displayed here."),
779
+ value="",
780
+ max_lines=8,
781
+ interactive=False,
782
+ )
783
+
784
+ with gr.Row():
785
+ train_button = gr.Button(i18n("Start Training"))
786
+ train_button.click(
787
+ fn=enforce_terms,
788
+ inputs=[
789
+ terms_checkbox,
790
+ model_name,
791
+ save_every_epoch,
792
+ save_only_latest,
793
+ save_every_weights,
794
+ total_epoch,
795
+ sampling_rate,
796
+ batch_size,
797
+ gpu,
798
+ overtraining_detector,
799
+ overtraining_threshold,
800
+ pretrained,
801
+ cleanup,
802
+ index_algorithm,
803
+ cache_dataset_in_gpu,
804
+ custom_pretrained,
805
+ g_pretrained_path,
806
+ d_pretrained_path,
807
+ vocoder,
808
+ checkpointing,
809
+ ],
810
+ outputs=[train_output_info],
811
+ )
812
+
813
+ stop_train_button = gr.Button(i18n("Stop Training"), visible=False)
814
+ stop_train_button.click(
815
+ fn=stop_train,
816
+ inputs=[model_name],
817
+ outputs=[],
818
+ )
819
+
820
+ index_button = gr.Button(i18n("Generate Index"))
821
+ index_button.click(
822
+ fn=run_index_script,
823
+ inputs=[model_name, index_algorithm],
824
+ outputs=[train_output_info],
825
+ )
826
+
827
+ # Export Model section
828
+ with gr.Accordion(i18n("Export Model"), open=False):
829
+ if not os.name == "nt":
830
+ gr.Markdown(
831
+ i18n(
832
+ "The button 'Upload' is only for google colab: Uploads the exported files to the ApplioExported folder in your Google Drive."
833
+ )
834
+ )
835
+ with gr.Row():
836
+ with gr.Column():
837
+ pth_file_export = gr.File(
838
+ label=i18n("Exported Pth file"),
839
+ type="filepath",
840
+ value=None,
841
+ interactive=False,
842
+ )
843
+ pth_dropdown_export = gr.Dropdown(
844
+ label=i18n("Pth file"),
845
+ info=i18n("Select the pth file to be exported"),
846
+ choices=get_pth_list(),
847
+ value=None,
848
+ interactive=True,
849
+ allow_custom_value=True,
850
+ )
851
+ with gr.Column():
852
+ index_file_export = gr.File(
853
+ label=i18n("Exported Index File"),
854
+ type="filepath",
855
+ value=None,
856
+ interactive=False,
857
+ )
858
+ index_dropdown_export = gr.Dropdown(
859
+ label=i18n("Index File"),
860
+ info=i18n("Select the index file to be exported"),
861
+ choices=get_index_list(),
862
+ value=None,
863
+ interactive=True,
864
+ allow_custom_value=True,
865
+ )
866
+ with gr.Row():
867
+ with gr.Column():
868
+ refresh_export = gr.Button(i18n("Refresh"))
869
+ if not os.name == "nt":
870
+ upload_exported = gr.Button(i18n("Upload"))
871
+ upload_exported.click(
872
+ fn=upload_to_google_drive,
873
+ inputs=[pth_dropdown_export, index_dropdown_export],
874
+ outputs=[],
875
+ )
876
+
877
+ def toggle_visible(checkbox):
878
+ return {"visible": checkbox, "__type__": "update"}
879
+
880
+ def toggle_pretrained(pretrained, custom_pretrained):
881
+ if custom_pretrained == False:
882
+ return {"visible": pretrained, "__type__": "update"}, {
883
+ "visible": False,
884
+ "__type__": "update",
885
+ }
886
+ else:
887
+ return {"visible": pretrained, "__type__": "update"}, {
888
+ "visible": pretrained,
889
+ "__type__": "update",
890
+ }
891
+
892
+ def enable_stop_train_button():
893
+ return {"visible": False, "__type__": "update"}, {
894
+ "visible": True,
895
+ "__type__": "update",
896
+ }
897
+
898
+ def disable_stop_train_button():
899
+ return {"visible": True, "__type__": "update"}, {
900
+ "visible": False,
901
+ "__type__": "update",
902
+ }
903
+
904
+ def download_prerequisites():
905
+ gr.Info(
906
+ "Checking for prerequisites with pitch guidance... Missing files will be downloaded. If you already have them, this step will be skipped."
907
+ )
908
+ run_prerequisites_script(
909
+ pretraineds_hifigan=True,
910
+ models=False,
911
+ exe=False,
912
+ )
913
+ gr.Info(
914
+ "Prerequisites check complete. Missing files were downloaded, and you may now start preprocessing."
915
+ )
916
+
917
+ def toggle_visible_embedder_custom(embedder_model):
918
+ if embedder_model == "custom":
919
+ return {"visible": True, "__type__": "update"}
920
+ return {"visible": False, "__type__": "update"}
921
+
922
+ def toggle_architecture(architecture):
923
+ if architecture == "Applio":
924
+ return {
925
+ "choices": ["32000", "40000", "48000"],
926
+ "__type__": "update",
927
+ }, {
928
+ "interactive": True,
929
+ "__type__": "update",
930
+ }
931
+ else:
932
+ return {
933
+ "choices": ["32000", "40000", "48000"],
934
+ "__type__": "update",
935
+ "value": "40000",
936
+ }, {"interactive": False, "__type__": "update", "value": "HiFi-GAN"}
937
+
938
+ def update_slider_visibility(noise_reduction):
939
+ return gr.update(visible=noise_reduction)
940
+
941
+ noise_reduction.change(
942
+ fn=update_slider_visibility,
943
+ inputs=noise_reduction,
944
+ outputs=clean_strength,
945
+ )
946
+ architecture.change(
947
+ fn=toggle_architecture,
948
+ inputs=[architecture],
949
+ outputs=[sampling_rate, vocoder],
950
+ )
951
+ refresh.click(
952
+ fn=refresh_models_and_datasets,
953
+ inputs=[],
954
+ outputs=[model_name, dataset_path],
955
+ )
956
+ dataset_creator.change(
957
+ fn=toggle_visible,
958
+ inputs=[dataset_creator],
959
+ outputs=[dataset_creator_settings],
960
+ )
961
+ upload_audio_dataset.upload(
962
+ fn=save_drop_dataset_audio,
963
+ inputs=[upload_audio_dataset, dataset_name],
964
+ outputs=[upload_audio_dataset, dataset_path],
965
+ )
966
+ embedder_model.change(
967
+ fn=toggle_visible_embedder_custom,
968
+ inputs=[embedder_model],
969
+ outputs=[embedder_custom],
970
+ )
971
+ embedder_model.change(
972
+ fn=toggle_visible_embedder_custom,
973
+ inputs=[embedder_model],
974
+ outputs=[embedder_custom],
975
+ )
976
+ move_files_button.click(
977
+ fn=create_folder_and_move_files,
978
+ inputs=[folder_name_input, bin_file_upload, config_file_upload],
979
+ outputs=[],
980
+ )
981
+ refresh_embedders_button.click(
982
+ fn=refresh_embedders_folders, inputs=[], outputs=[embedder_model_custom]
983
+ )
984
+ pretrained.change(
985
+ fn=toggle_pretrained,
986
+ inputs=[pretrained, custom_pretrained],
987
+ outputs=[custom_pretrained, pretrained_custom_settings],
988
+ )
989
+ custom_pretrained.change(
990
+ fn=toggle_visible,
991
+ inputs=[custom_pretrained],
992
+ outputs=[pretrained_custom_settings],
993
+ )
994
+ refresh_custom_pretaineds_button.click(
995
+ fn=refresh_custom_pretraineds,
996
+ inputs=[],
997
+ outputs=[g_pretrained_path, d_pretrained_path],
998
+ )
999
+ upload_pretrained.upload(
1000
+ fn=save_drop_model,
1001
+ inputs=[upload_pretrained],
1002
+ outputs=[upload_pretrained],
1003
+ )
1004
+ overtraining_detector.change(
1005
+ fn=toggle_visible,
1006
+ inputs=[overtraining_detector],
1007
+ outputs=[overtraining_settings],
1008
+ )
1009
+ train_button.click(
1010
+ fn=enable_stop_train_button,
1011
+ inputs=[],
1012
+ outputs=[train_button, stop_train_button],
1013
+ )
1014
+ train_output_info.change(
1015
+ fn=disable_stop_train_button,
1016
+ inputs=[],
1017
+ outputs=[train_button, stop_train_button],
1018
+ )
1019
+ pth_dropdown_export.change(
1020
+ fn=export_pth,
1021
+ inputs=[pth_dropdown_export],
1022
+ outputs=[pth_file_export],
1023
+ )
1024
+ index_dropdown_export.change(
1025
+ fn=export_index,
1026
+ inputs=[index_dropdown_export],
1027
+ outputs=[index_file_export],
1028
+ )
1029
+ refresh_export.click(
1030
+ fn=refresh_pth_and_index_list,
1031
+ inputs=[],
1032
+ outputs=[pth_dropdown_export, index_dropdown_export],
1033
+ )
tabs/tts/tts.py ADDED
@@ -0,0 +1,462 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import random
4
+ import sys
5
+
6
+ import gradio as gr
7
+
8
+ now_dir = os.getcwd()
9
+ sys.path.append(now_dir)
10
+
11
+ from assets.i18n.i18n import I18nAuto
12
+ from core import run_tts_script
13
+ from tabs.settings.sections.filter import get_filter_trigger, load_config_filter
14
+ from tabs.inference.inference import (
15
+ change_choices,
16
+ create_folder_and_move_files,
17
+ get_files,
18
+ get_speakers_id,
19
+ match_index,
20
+ refresh_embedders_folders,
21
+ extract_model_and_epoch,
22
+ default_weight,
23
+ filter_dropdowns,
24
+ update_filter_visibility,
25
+ )
26
+
27
+ i18n = I18nAuto()
28
+
29
+
30
+ with open(
31
+ os.path.join("rvc", "lib", "tools", "tts_voices.json"), "r", encoding="utf-8"
32
+ ) as file:
33
+ tts_voices_data = json.load(file)
34
+
35
+ short_names = [voice.get("ShortName", "") for voice in tts_voices_data]
36
+
37
+
38
+ def process_input(file_path):
39
+ try:
40
+ with open(file_path, "r", encoding="utf-8") as file:
41
+ file.read()
42
+ gr.Info(f"The file has been loaded!")
43
+ return file_path, file_path
44
+ except UnicodeDecodeError:
45
+ gr.Info(f"The file has to be in UTF-8 encoding.")
46
+ return None, None
47
+
48
+
49
+ # TTS tab
50
+ def tts_tab():
51
+ trigger = get_filter_trigger()
52
+ with gr.Column():
53
+ with gr.Row():
54
+ model_file = gr.Dropdown(
55
+ label=i18n("Voice Model"),
56
+ info=i18n("Select the voice model to use for the conversion."),
57
+ choices=sorted(get_files("model"), key=extract_model_and_epoch),
58
+ interactive=True,
59
+ value=default_weight,
60
+ allow_custom_value=True,
61
+ )
62
+ filter_box_tts = gr.Textbox(
63
+ label=i18n("Filter"),
64
+ info=i18n("Path must contain:"),
65
+ placeholder=i18n("Type to filter..."),
66
+ interactive=True,
67
+ scale=0.1,
68
+ visible=load_config_filter(),
69
+ elem_id="filter_box_tts",
70
+ )
71
+ index_file = gr.Dropdown(
72
+ label=i18n("Index File"),
73
+ info=i18n("Select the index file to use for the conversion."),
74
+ choices=sorted(get_files("index")),
75
+ value=match_index(default_weight),
76
+ interactive=True,
77
+ allow_custom_value=True,
78
+ )
79
+ filter_box_tts.blur(
80
+ fn=filter_dropdowns,
81
+ inputs=[filter_box_tts],
82
+ outputs=[model_file, index_file],
83
+ )
84
+ trigger.change(
85
+ fn=update_filter_visibility,
86
+ inputs=[trigger],
87
+ outputs=[filter_box_tts, model_file, index_file],
88
+ show_progress=False,
89
+ )
90
+ with gr.Row():
91
+ unload_button = gr.Button(i18n("Unload Voice"))
92
+ refresh_button = gr.Button(i18n("Refresh"))
93
+
94
+ unload_button.click(
95
+ fn=lambda: (
96
+ {"value": "", "__type__": "update"},
97
+ {"value": "", "__type__": "update"},
98
+ ),
99
+ inputs=[],
100
+ outputs=[model_file, index_file],
101
+ )
102
+
103
+ model_file.select(
104
+ fn=lambda model_file_value: match_index(model_file_value),
105
+ inputs=[model_file],
106
+ outputs=[index_file],
107
+ )
108
+
109
+ gr.Markdown(
110
+ i18n(
111
+ f"Applio is a Speech-to-Speech conversion software, utilizing EdgeTTS as middleware for running the Text-to-Speech (TTS) component. Read more about it [here!](https://docs.applio.org/applio/getting-started/tts)"
112
+ )
113
+ )
114
+ tts_voice = gr.Dropdown(
115
+ label=i18n("TTS Voices"),
116
+ info=i18n("Select the TTS voice to use for the conversion."),
117
+ choices=short_names,
118
+ interactive=True,
119
+ value=random.choice(short_names),
120
+ )
121
+
122
+ tts_rate = gr.Slider(
123
+ minimum=-100,
124
+ maximum=100,
125
+ step=1,
126
+ label=i18n("TTS Speed"),
127
+ info=i18n("Increase or decrease TTS speed."),
128
+ value=0,
129
+ interactive=True,
130
+ )
131
+
132
+ with gr.Tabs():
133
+ with gr.Tab(label=i18n("Text to Speech")):
134
+ tts_text = gr.Textbox(
135
+ label=i18n("Text to Synthesize"),
136
+ info=i18n("Enter the text to synthesize."),
137
+ placeholder=i18n("Enter text to synthesize"),
138
+ lines=3,
139
+ )
140
+ with gr.Tab(label=i18n("File to Speech")):
141
+ txt_file = gr.File(
142
+ label=i18n("Upload a .txt file"),
143
+ type="filepath",
144
+ )
145
+ input_tts_path = gr.Textbox(
146
+ label=i18n("Input path for text file"),
147
+ placeholder=i18n(
148
+ "The path to the text file that contains content for text to speech."
149
+ ),
150
+ value="",
151
+ interactive=True,
152
+ )
153
+
154
+ with gr.Accordion(i18n("Advanced Settings"), open=False):
155
+ with gr.Column():
156
+ output_tts_path = gr.Textbox(
157
+ label=i18n("Output Path for TTS Audio"),
158
+ placeholder=i18n("Enter output path"),
159
+ value=os.path.join(now_dir, "assets", "audios", "tts_output.wav"),
160
+ interactive=True,
161
+ )
162
+ output_rvc_path = gr.Textbox(
163
+ label=i18n("Output Path for RVC Audio"),
164
+ placeholder=i18n("Enter output path"),
165
+ value=os.path.join(now_dir, "assets", "audios", "tts_rvc_output.wav"),
166
+ interactive=True,
167
+ )
168
+ export_format = gr.Radio(
169
+ label=i18n("Export Format"),
170
+ info=i18n("Select the format to export the audio."),
171
+ choices=["WAV", "MP3", "FLAC", "OGG", "M4A"],
172
+ value="WAV",
173
+ interactive=True,
174
+ )
175
+ sid = gr.Dropdown(
176
+ label=i18n("Speaker ID"),
177
+ info=i18n("Select the speaker ID to use for the conversion."),
178
+ choices=get_speakers_id(model_file.value),
179
+ value=0,
180
+ interactive=True,
181
+ )
182
+ split_audio = gr.Checkbox(
183
+ label=i18n("Split Audio"),
184
+ info=i18n(
185
+ "Split the audio into chunks for inference to obtain better results in some cases."
186
+ ),
187
+ visible=True,
188
+ value=False,
189
+ interactive=True,
190
+ )
191
+ autotune = gr.Checkbox(
192
+ label=i18n("Autotune"),
193
+ info=i18n(
194
+ "Apply a soft autotune to your inferences, recommended for singing conversions."
195
+ ),
196
+ visible=True,
197
+ value=False,
198
+ interactive=True,
199
+ )
200
+ autotune_strength = gr.Slider(
201
+ minimum=0,
202
+ maximum=1,
203
+ label=i18n("Autotune Strength"),
204
+ info=i18n(
205
+ "Set the autotune strength - the more you increase it the more it will snap to the chromatic grid."
206
+ ),
207
+ visible=False,
208
+ value=1,
209
+ interactive=True,
210
+ )
211
+ proposed_pitch = gr.Checkbox(
212
+ label=i18n("Proposed Pitch"),
213
+ info=i18n(
214
+ "Adjust the input audio pitch to match the voice model range."
215
+ ),
216
+ visible=True,
217
+ value=False,
218
+ interactive=True,
219
+ )
220
+ proposed_pitch_threshold = gr.Slider(
221
+ minimum=50.0,
222
+ maximum=1200.0,
223
+ label=i18n("Proposed Pitch Threshold"),
224
+ info=i18n(
225
+ "Male voice models typically use 155.0 and female voice models typically use 255.0."
226
+ ),
227
+ visible=False,
228
+ value=155.0,
229
+ interactive=True,
230
+ )
231
+ clean_audio = gr.Checkbox(
232
+ label=i18n("Clean Audio"),
233
+ info=i18n(
234
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios."
235
+ ),
236
+ visible=True,
237
+ value=False,
238
+ interactive=True,
239
+ )
240
+ clean_strength = gr.Slider(
241
+ minimum=0,
242
+ maximum=1,
243
+ label=i18n("Clean Strength"),
244
+ info=i18n(
245
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed."
246
+ ),
247
+ visible=True,
248
+ value=0.5,
249
+ interactive=True,
250
+ )
251
+ pitch = gr.Slider(
252
+ minimum=-24,
253
+ maximum=24,
254
+ step=1,
255
+ label=i18n("Pitch"),
256
+ info=i18n(
257
+ "Set the pitch of the audio, the higher the value, the higher the pitch."
258
+ ),
259
+ value=0,
260
+ interactive=True,
261
+ )
262
+ index_rate = gr.Slider(
263
+ minimum=0,
264
+ maximum=1,
265
+ label=i18n("Search Feature Ratio"),
266
+ info=i18n(
267
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio."
268
+ ),
269
+ value=0.75,
270
+ interactive=True,
271
+ )
272
+ rms_mix_rate = gr.Slider(
273
+ minimum=0,
274
+ maximum=1,
275
+ label=i18n("Volume Envelope"),
276
+ info=i18n(
277
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed."
278
+ ),
279
+ value=1,
280
+ interactive=True,
281
+ )
282
+ protect = gr.Slider(
283
+ minimum=0,
284
+ maximum=0.5,
285
+ label=i18n("Protect Voiceless Consonants"),
286
+ info=i18n(
287
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect."
288
+ ),
289
+ value=0.5,
290
+ interactive=True,
291
+ )
292
+ f0_method = gr.Radio(
293
+ label=i18n("Pitch extraction algorithm"),
294
+ info=i18n(
295
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases."
296
+ ),
297
+ choices=[
298
+ "crepe",
299
+ "crepe-tiny",
300
+ "rmvpe",
301
+ "fcpe",
302
+ "swift",
303
+ ],
304
+ value="rmvpe",
305
+ interactive=True,
306
+ )
307
+ embedder_model = gr.Radio(
308
+ label=i18n("Embedder Model"),
309
+ info=i18n("Model used for learning speaker embedding."),
310
+ choices=[
311
+ "contentvec",
312
+ "spin",
313
+ "spin-v2",
314
+ "chinese-hubert-base",
315
+ "japanese-hubert-base",
316
+ "korean-hubert-base",
317
+ "custom",
318
+ ],
319
+ value="contentvec",
320
+ interactive=True,
321
+ )
322
+ with gr.Column(visible=False) as embedder_custom:
323
+ with gr.Accordion(i18n("Custom Embedder"), open=True):
324
+ with gr.Row():
325
+ embedder_model_custom = gr.Dropdown(
326
+ label=i18n("Select Custom Embedder"),
327
+ choices=refresh_embedders_folders(),
328
+ interactive=True,
329
+ allow_custom_value=True,
330
+ )
331
+ refresh_embedders_button = gr.Button(i18n("Refresh embedders"))
332
+ folder_name_input = gr.Textbox(
333
+ label=i18n("Folder Name"), interactive=True
334
+ )
335
+ with gr.Row():
336
+ bin_file_upload = gr.File(
337
+ label=i18n("Upload .bin"),
338
+ type="filepath",
339
+ interactive=True,
340
+ )
341
+ config_file_upload = gr.File(
342
+ label=i18n("Upload .json"),
343
+ type="filepath",
344
+ interactive=True,
345
+ )
346
+ move_files_button = gr.Button(
347
+ i18n("Move files to custom embedder folder")
348
+ )
349
+ f0_file = gr.File(
350
+ label=i18n(
351
+ "The f0 curve represents the variations in the base frequency of a voice over time, showing how pitch rises and falls."
352
+ ),
353
+ visible=True,
354
+ )
355
+
356
+ def enforce_terms(terms_accepted, *args):
357
+ if not terms_accepted:
358
+ message = "You must agree to the Terms of Use to proceed."
359
+ gr.Info(message)
360
+ return message, None
361
+ return run_tts_script(*args)
362
+
363
+ terms_checkbox = gr.Checkbox(
364
+ label=i18n("I agree to the terms of use"),
365
+ info=i18n(
366
+ "Please ensure compliance with the terms and conditions detailed in [this document](https://github.com/IAHispano/Applio/blob/main/TERMS_OF_USE.md) before proceeding with your inference."
367
+ ),
368
+ value=False,
369
+ interactive=True,
370
+ )
371
+ convert_button = gr.Button(i18n("Convert"))
372
+
373
+ with gr.Row():
374
+ vc_output1 = gr.Textbox(
375
+ label=i18n("Output Information"),
376
+ info=i18n("The output information will be displayed here."),
377
+ )
378
+ vc_output2 = gr.Audio(label=i18n("Export Audio"))
379
+
380
+ def toggle_visible(checkbox):
381
+ return {"visible": checkbox, "__type__": "update"}
382
+
383
+ def toggle_visible_embedder_custom(embedder_model):
384
+ if embedder_model == "custom":
385
+ return {"visible": True, "__type__": "update"}
386
+ return {"visible": False, "__type__": "update"}
387
+
388
+ autotune.change(
389
+ fn=toggle_visible,
390
+ inputs=[autotune],
391
+ outputs=[autotune_strength],
392
+ )
393
+ proposed_pitch.change(
394
+ fn=toggle_visible,
395
+ inputs=[proposed_pitch],
396
+ outputs=[proposed_pitch_threshold],
397
+ )
398
+ clean_audio.change(
399
+ fn=toggle_visible,
400
+ inputs=[clean_audio],
401
+ outputs=[clean_strength],
402
+ )
403
+ refresh_button.click(
404
+ fn=change_choices,
405
+ inputs=[model_file],
406
+ outputs=[model_file, index_file, sid, sid],
407
+ ).then(
408
+ fn=filter_dropdowns,
409
+ inputs=[filter_box_tts],
410
+ outputs=[model_file, index_file],
411
+ )
412
+ txt_file.upload(
413
+ fn=process_input,
414
+ inputs=[txt_file],
415
+ outputs=[input_tts_path, txt_file],
416
+ )
417
+ embedder_model.change(
418
+ fn=toggle_visible_embedder_custom,
419
+ inputs=[embedder_model],
420
+ outputs=[embedder_custom],
421
+ )
422
+ move_files_button.click(
423
+ fn=create_folder_and_move_files,
424
+ inputs=[folder_name_input, bin_file_upload, config_file_upload],
425
+ outputs=[],
426
+ )
427
+ refresh_embedders_button.click(
428
+ fn=lambda: gr.update(choices=refresh_embedders_folders()),
429
+ inputs=[],
430
+ outputs=[embedder_model_custom],
431
+ )
432
+ convert_button.click(
433
+ fn=enforce_terms,
434
+ inputs=[
435
+ terms_checkbox,
436
+ input_tts_path,
437
+ tts_text,
438
+ tts_voice,
439
+ tts_rate,
440
+ pitch,
441
+ index_rate,
442
+ rms_mix_rate,
443
+ protect,
444
+ f0_method,
445
+ output_tts_path,
446
+ output_rvc_path,
447
+ model_file,
448
+ index_file,
449
+ split_audio,
450
+ autotune,
451
+ autotune_strength,
452
+ proposed_pitch,
453
+ proposed_pitch_threshold,
454
+ clean_audio,
455
+ clean_strength,
456
+ export_format,
457
+ embedder_model,
458
+ embedder_model_custom,
459
+ sid,
460
+ ],
461
+ outputs=[vc_output1, vc_output2],
462
+ )
tabs/voice_blender/voice_blender.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, sys
2
+ import gradio as gr
3
+
4
+ now_dir = os.getcwd()
5
+ sys.path.append(now_dir)
6
+
7
+ from assets.i18n.i18n import I18nAuto
8
+ from core import run_model_blender_script
9
+
10
+ i18n = I18nAuto()
11
+
12
+
13
+ def update_model_fusion(dropbox):
14
+ return dropbox, None
15
+
16
+
17
+ def voice_blender_tab():
18
+ gr.Markdown(i18n("## Voice Blender"))
19
+ gr.Markdown(
20
+ i18n(
21
+ "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice."
22
+ )
23
+ )
24
+ with gr.Column():
25
+ model_fusion_name = gr.Textbox(
26
+ label=i18n("Model Name"),
27
+ info=i18n("Name of the new model."),
28
+ value="",
29
+ max_lines=1,
30
+ interactive=True,
31
+ placeholder=i18n("Enter model name"),
32
+ )
33
+ with gr.Row():
34
+ with gr.Column():
35
+ model_fusion_a_dropbox = gr.File(
36
+ label=i18n("Drag and drop your model here"), type="filepath"
37
+ )
38
+ model_fusion_a = gr.Textbox(
39
+ label=i18n("Path to Model"),
40
+ value="",
41
+ interactive=True,
42
+ placeholder=i18n("Enter path to model"),
43
+ info=i18n("You can also use a custom path."),
44
+ )
45
+ with gr.Column():
46
+ model_fusion_b_dropbox = gr.File(
47
+ label=i18n("Drag and drop your model here"), type="filepath"
48
+ )
49
+ model_fusion_b = gr.Textbox(
50
+ label=i18n("Path to Model"),
51
+ value="",
52
+ interactive=True,
53
+ placeholder=i18n("Enter path to model"),
54
+ info=i18n("You can also use a custom path."),
55
+ )
56
+ alpha_a = gr.Slider(
57
+ minimum=0,
58
+ maximum=1,
59
+ label=i18n("Blend Ratio"),
60
+ value=0.5,
61
+ interactive=True,
62
+ info=i18n(
63
+ "Adjusting the position more towards one side or the other will make the model more similar to the first or second."
64
+ ),
65
+ )
66
+ model_fusion_button = gr.Button(i18n("Fusion"))
67
+ with gr.Row():
68
+ model_fusion_output_info = gr.Textbox(
69
+ label=i18n("Output Information"),
70
+ info=i18n("The output information will be displayed here."),
71
+ value="",
72
+ )
73
+ model_fusion_pth_output = gr.File(
74
+ label=i18n("Download Model"), type="filepath", interactive=False
75
+ )
76
+
77
+ model_fusion_button.click(
78
+ fn=run_model_blender_script,
79
+ inputs=[
80
+ model_fusion_name,
81
+ model_fusion_a,
82
+ model_fusion_b,
83
+ alpha_a,
84
+ ],
85
+ outputs=[model_fusion_output_info, model_fusion_pth_output],
86
+ )
87
+
88
+ model_fusion_a_dropbox.upload(
89
+ fn=update_model_fusion,
90
+ inputs=model_fusion_a_dropbox,
91
+ outputs=[model_fusion_a, model_fusion_a_dropbox],
92
+ )
93
+
94
+ model_fusion_b_dropbox.upload(
95
+ fn=update_model_fusion,
96
+ inputs=model_fusion_b_dropbox,
97
+ outputs=[model_fusion_b, model_fusion_b_dropbox],
98
+ )