bodhisativa commited on
Commit
902503d
·
verified ·
1 Parent(s): d6ed3bd

Mirror lj1995/VoiceConversionWebUI @ b2c8cae96e3b — trainset_preprocess_pipeline.py

Browse files
Files changed (1) hide show
  1. trainset_preprocess_pipeline.py +63 -0
trainset_preprocess_pipeline.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np,ffmpeg,os,traceback
2
+ from slicer import Slicer
3
+ slicer = Slicer(
4
+ sr=40000,
5
+ db_threshold=-32,
6
+ min_length=800,
7
+ win_l=400,
8
+ win_s=20,
9
+ max_silence_kept=150
10
+ )
11
+
12
+
13
+
14
+
15
+ def p0_load_audio(file, sr):#str-ing
16
+ try:
17
+ out, _ = (
18
+ ffmpeg.input(file, threads=0)
19
+ .output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=sr)
20
+ .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
21
+ )
22
+ except ffmpeg.Error as e:
23
+ raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
24
+ return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
25
+
26
+ def p1_trim_audio(slicer,audio):return slicer.slice(audio)
27
+
28
+ def p2_avg_cut(audio,sr,per=3.7,overlap=0.3,tail=4):
29
+ i = 0
30
+ audios=[]
31
+ while (1):
32
+ start = int(sr * (per - overlap) * i)
33
+ i += 1
34
+ if (len(audio[start:]) > tail * sr):
35
+ audios.append(audio[start:start + int(per * sr)])
36
+ else:
37
+ audios.append(audio[start:])
38
+ break
39
+ return audios
40
+
41
+ def p2b_get_vol(audio):return np.square(audio).mean()
42
+
43
+ def p3_norm(audio,alpha=0.8,maxx=0.95):return audio / np.abs(audio).max() * (maxx * alpha) + (1-alpha) * audio
44
+
45
+ def pipeline(inp_root,sr1=40000,sr2=16000,if_trim=True,if_avg_cut=True,if_norm=True,save_root1=None,save_root2=None):
46
+ if(save_root1==None and save_root2==None):return "No save root."
47
+ name2vol={}
48
+ infos=[]
49
+ names=[]
50
+ for name in os.listdir(inp_root):
51
+ try:
52
+ inp_path=os.path.join(inp_root,name)
53
+ audio=p0_load_audio(inp_path)
54
+ except:
55
+ infos.append("%s\t%s"%(name,traceback.format_exc()))
56
+ continue
57
+ if(if_trim==True):res1s=p1_trim_audio(audio)
58
+ else:res1s=[audio]
59
+ for i0,res1 in res1s:
60
+ if(if_avg_cut==True):res2=p2_avg_cut(res1)
61
+ else:res2=[res1]
62
+
63
+