Mirror lj1995/VoiceConversionWebUI @ b2c8cae96e3b — trainset_preprocess_pipeline.py
Browse files
trainset_preprocess_pipeline.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np,ffmpeg,os,traceback
|
| 2 |
+
from slicer import Slicer
|
| 3 |
+
slicer = Slicer(
|
| 4 |
+
sr=40000,
|
| 5 |
+
db_threshold=-32,
|
| 6 |
+
min_length=800,
|
| 7 |
+
win_l=400,
|
| 8 |
+
win_s=20,
|
| 9 |
+
max_silence_kept=150
|
| 10 |
+
)
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def p0_load_audio(file, sr):#str-ing
|
| 16 |
+
try:
|
| 17 |
+
out, _ = (
|
| 18 |
+
ffmpeg.input(file, threads=0)
|
| 19 |
+
.output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=sr)
|
| 20 |
+
.run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
|
| 21 |
+
)
|
| 22 |
+
except ffmpeg.Error as e:
|
| 23 |
+
raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
|
| 24 |
+
return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
|
| 25 |
+
|
| 26 |
+
def p1_trim_audio(slicer,audio):return slicer.slice(audio)
|
| 27 |
+
|
| 28 |
+
def p2_avg_cut(audio,sr,per=3.7,overlap=0.3,tail=4):
|
| 29 |
+
i = 0
|
| 30 |
+
audios=[]
|
| 31 |
+
while (1):
|
| 32 |
+
start = int(sr * (per - overlap) * i)
|
| 33 |
+
i += 1
|
| 34 |
+
if (len(audio[start:]) > tail * sr):
|
| 35 |
+
audios.append(audio[start:start + int(per * sr)])
|
| 36 |
+
else:
|
| 37 |
+
audios.append(audio[start:])
|
| 38 |
+
break
|
| 39 |
+
return audios
|
| 40 |
+
|
| 41 |
+
def p2b_get_vol(audio):return np.square(audio).mean()
|
| 42 |
+
|
| 43 |
+
def p3_norm(audio,alpha=0.8,maxx=0.95):return audio / np.abs(audio).max() * (maxx * alpha) + (1-alpha) * audio
|
| 44 |
+
|
| 45 |
+
def pipeline(inp_root,sr1=40000,sr2=16000,if_trim=True,if_avg_cut=True,if_norm=True,save_root1=None,save_root2=None):
|
| 46 |
+
if(save_root1==None and save_root2==None):return "No save root."
|
| 47 |
+
name2vol={}
|
| 48 |
+
infos=[]
|
| 49 |
+
names=[]
|
| 50 |
+
for name in os.listdir(inp_root):
|
| 51 |
+
try:
|
| 52 |
+
inp_path=os.path.join(inp_root,name)
|
| 53 |
+
audio=p0_load_audio(inp_path)
|
| 54 |
+
except:
|
| 55 |
+
infos.append("%s\t%s"%(name,traceback.format_exc()))
|
| 56 |
+
continue
|
| 57 |
+
if(if_trim==True):res1s=p1_trim_audio(audio)
|
| 58 |
+
else:res1s=[audio]
|
| 59 |
+
for i0,res1 in res1s:
|
| 60 |
+
if(if_avg_cut==True):res2=p2_avg_cut(res1)
|
| 61 |
+
else:res2=[res1]
|
| 62 |
+
|
| 63 |
+
|