Hev832 commited on
Commit
007a247
·
verified ·
1 Parent(s): 97526ee

not update anymor

Browse files
Files changed (1) hide show
  1. lib/tts_infer.py +0 -243
lib/tts_infer.py DELETED
@@ -1,243 +0,0 @@
1
- import os
2
- import shutil
3
- import gc
4
- import torch
5
- from multiprocessing import cpu_count
6
- from lib.modules import VC
7
- from lib.language_tts import language_dict
8
- from lib.split_audio import split_silence_nonsilent, adjust_audio_lengths, combine_silence_nonsilent
9
- import edge_tts
10
- import tempfile
11
- import anyio
12
-
13
-
14
-
15
-
16
-
17
-
18
-
19
- class Configs:
20
- def __init__(self, device, is_half):
21
- self.device = device
22
- self.is_half = is_half
23
- self.n_cpu = 0
24
- self.gpu_name = None
25
- self.gpu_mem = None
26
- self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
27
-
28
- def device_config(self) -> tuple:
29
- if torch.cuda.is_available():
30
- i_device = int(self.device.split(":")[-1])
31
- self.gpu_name = torch.cuda.get_device_name(i_device)
32
- #if (
33
- # ("16" in self.gpu_name and "V100" not in self.gpu_name.upper())
34
- # or "P40" in self.gpu_name.upper()
35
- # or "1060" in self.gpu_name
36
- # or "1070" in self.gpu_name
37
- # or "1080" in self.gpu_name
38
- # ):
39
- # print("16 series/10 series P40 forced single precision")
40
- # self.is_half = False
41
- # for config_file in ["32k.json", "40k.json", "48k.json"]:
42
- # with open(BASE_DIR / "src" / "configs" / config_file, "r") as f:
43
- # strr = f.read().replace("true", "false")
44
- # with open(BASE_DIR / "src" / "configs" / config_file, "w") as f:
45
- # f.write(strr)
46
- # with open(BASE_DIR / "src" / "trainset_preprocess_pipeline_print.py", "r") as f:
47
- # strr = f.read().replace("3.7", "3.0")
48
- # with open(BASE_DIR / "src" / "trainset_preprocess_pipeline_print.py", "w") as f:
49
- # f.write(strr)
50
- # else:
51
- # self.gpu_name = None
52
- # self.gpu_mem = int(
53
- # torch.cuda.get_device_properties(i_device).total_memory
54
- # / 1024
55
- # / 1024
56
- # / 1024
57
- # + 0.4
58
- # )
59
- # if self.gpu_mem <= 4:
60
- # with open(BASE_DIR / "src" / "trainset_preprocess_pipeline_print.py", "r") as f:
61
- # strr = f.read().replace("3.7", "3.0")
62
- # with open(BASE_DIR / "src" / "trainset_preprocess_pipeline_print.py", "w") as f:
63
- # f.write(strr)
64
- elif torch.backends.mps.is_available():
65
- print("No supported N-card found, use MPS for inference")
66
- self.device = "mps"
67
- else:
68
- print("No supported N-card found, use CPU for inference")
69
- self.device = "cpu"
70
-
71
- if self.n_cpu == 0:
72
- self.n_cpu = cpu_count()
73
-
74
- if self.is_half:
75
- # 6G memory config
76
- x_pad = 3
77
- x_query = 10
78
- x_center = 60
79
- x_max = 65
80
- else:
81
- # 5G memory config
82
- x_pad = 1
83
- x_query = 6
84
- x_center = 38
85
- x_max = 41
86
-
87
- if self.gpu_mem != None and self.gpu_mem <= 4:
88
- x_pad = 1
89
- x_query = 5
90
- x_center = 30
91
- x_max = 32
92
-
93
- return x_pad, x_query, x_center, x_max
94
-
95
- def get_model(voice_model):
96
- model_dir = os.path.join(os.getcwd(), "models", voice_model)
97
- model_filename, index_filename = None, None
98
- for file in os.listdir(model_dir):
99
- ext = os.path.splitext(file)[1]
100
- if ext == '.pth':
101
- model_filename = file
102
- if ext == '.index':
103
- index_filename = file
104
-
105
- if model_filename is None:
106
- print(f'No model file exists in {models_dir}.')
107
- return None, None
108
-
109
- return os.path.join(model_dir, model_filename), os.path.join(model_dir, index_filename) if index_filename else ''
110
-
111
- def infer_audio(
112
- model_name,
113
- text,
114
- language_code,
115
- f0_change=0,
116
- f0_method="rmvpe",
117
- min_pitch="50",
118
- max_pitch="1100",
119
- crepe_hop_length=128,
120
- index_rate=0.75,
121
- filter_radius=3,
122
- rms_mix_rate=0.25,
123
- protect=0.33,
124
- split_infer=False,
125
- min_silence=500,
126
- silence_threshold=-50,
127
- seek_step=1,
128
- keep_silence=100,
129
- do_formant=False,
130
- quefrency=0,
131
- timbre=1,
132
- f0_autotune=False,
133
- audio_format="wav",
134
- resample_sr=0,
135
- hubert_model_path="hubert_base.pt",
136
- rmvpe_model_path="rmvpe.pt",
137
- fcpe_model_path="fcpe.pt"
138
- ):
139
- os.environ["rmvpe_model_path"] = rmvpe_model_path
140
- os.environ["fcpe_model_path"] = fcpe_model_path
141
- configs = Configs('cuda:0', True)
142
- vc = VC(configs)
143
- pth_path, index_path = get_model(model_name)
144
- vc_data = vc.get_vc(pth_path, protect, 0.5)
145
-
146
-
147
- voice = language_dict.get(language_code, "default_voice")
148
- communicate = edge_tts.Communicate(text, voice)
149
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
150
- tmp_path = tmp_file.name
151
- await communicate.save(tmp_path)
152
-
153
-
154
-
155
-
156
-
157
- if split_infer:
158
- inferred_files = []
159
- temp_dir = os.path.join(os.getcwd(), "seperate", "temp")
160
- os.makedirs(temp_dir, exist_ok=True)
161
- print("Splitting audio to silence and nonsilent segments.")
162
- silence_files, nonsilent_files = split_silence_nonsilent(audio_path, min_silence, silence_threshold, seek_step, keep_silence)
163
- print(f"Total silence segments: {len(silence_files)}.\nTotal nonsilent segments: {len(nonsilent_files)}.")
164
- for i, nonsilent_file in enumerate(nonsilent_files):
165
- print(f"Inferring nonsilent audio {i+1}")
166
- inference_info, audio_data, output_path = vc.vc_single(
167
- 0,
168
- nonsilent_file,
169
- f0_change,
170
- f0_method,
171
- index_path,
172
- index_path,
173
- index_rate,
174
- filter_radius,
175
- resample_sr,
176
- rms_mix_rate,
177
- protect,
178
- audio_format,
179
- crepe_hop_length,
180
- do_formant,
181
- quefrency,
182
- timbre,
183
- min_pitch,
184
- max_pitch,
185
- f0_autotune,
186
- hubert_model_path
187
- )
188
- if inference_info[0] == "Success.":
189
- print("Inference ran successfully.")
190
- print(inference_info[1])
191
- print("Times:\nnpy: %.2fs f0: %.2fs infer: %.2fs\nTotal time: %.2fs" % (*inference_info[2],))
192
- else:
193
- print(f"An error occurred while processing.\n{inference_info[0]}")
194
- return None
195
- inferred_files.append(output_path)
196
- print("Adjusting inferred audio lengths.")
197
- adjusted_inferred_files = adjust_audio_lengths(nonsilent_files, inferred_files)
198
- print("Combining silence and inferred audios.")
199
- output_count = 1
200
- while True:
201
- output_path = os.path.join(os.getcwd(), "output", f"{os.path.splitext(os.path.basename(audio_path))[0]}{model_name}{f0_method.capitalize()}_{output_count}.{audio_format}")
202
- if not os.path.exists(output_path):
203
- break
204
- output_count += 1
205
- output_path = combine_silence_nonsilent(silence_files, adjusted_inferred_files, keep_silence, output_path)
206
- [shutil.move(inferred_file, temp_dir) for inferred_file in inferred_files]
207
- shutil.rmtree(temp_dir)
208
- else:
209
- inference_info, audio_data, output_path = vc.vc_single(
210
- 0,
211
- audio_path=tmp_path,
212
- f0_change,
213
- f0_method,
214
- index_path,
215
- index_path,
216
- index_rate,
217
- filter_radius,
218
- resample_sr,
219
- rms_mix_rate,
220
- protect,
221
- audio_format,
222
- crepe_hop_length,
223
- do_formant,
224
- quefrency,
225
- timbre,
226
- min_pitch,
227
- max_pitch,
228
- f0_autotune,
229
- hubert_model_path
230
- )
231
- if inference_info[0] == "Success.":
232
- print("Inference ran successfully.")
233
- print(inference_info[1])
234
- print("Times:\nnpy: %.2fs f0: %.2fs infer: %.2fs\nTotal time: %.2fs" % (*inference_info[2],))
235
- else:
236
- print(f"An error occurred while processing.\n{inference_info[0]}")
237
- del configs, vc
238
- gc.collect()
239
- return inference_info[0]
240
-
241
- del configs, vc
242
- gc.collect()
243
- return output_path