hynt commited on
Commit
26c5857
·
1 Parent(s): 7a702fa

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +68 -0
utils.py CHANGED
@@ -4,6 +4,59 @@ import hashlib
4
  import matplotlib.pylab as plt
5
  import librosa
6
  from transformers import pipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  def initialize_asr_pipeline(device="cuda", dtype=None):
9
  if dtype is None:
@@ -51,6 +104,21 @@ def save_spectrogram(audio, path):
51
  plt.savefig(path)
52
  plt.close()
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  def preprocess_ref_audio_text(ref_audio_orig, ref_text, clip_short=True, show_info=print, device="cuda"):
55
 
56
  show_info("Converting audio...")
 
4
  import matplotlib.pylab as plt
5
  import librosa
6
  from transformers import pipeline
7
+ import re
8
+
9
+ def chunk_text(text, max_chars=135):
10
+
11
+ # print(text)
12
+
13
+ # Bước 1: Tách câu theo dấu ". "
14
+ sentences = [s.strip() for s in text.split('. ') if s.strip()]
15
+
16
+ # Ghép câu ngắn hơn 4 từ với câu liền kề
17
+ i = 0
18
+ while i < len(sentences):
19
+ if len(sentences[i].split()) < 4:
20
+ if i == 0 and i + 1 < len(sentences):
21
+ # Ghép với câu sau
22
+ sentences[i + 1] = sentences[i] + ', ' + sentences[i + 1]
23
+ del sentences[i]
24
+ else:
25
+ if i - 1 >= 0:
26
+ # Ghép với câu trước
27
+ sentences[i - 1] = sentences[i - 1] + ', ' + sentences[i]
28
+ del sentences[i]
29
+ i -= 1
30
+ else:
31
+ i += 1
32
+
33
+ # print(sentences)
34
+
35
+ # Bước 2: Tách phần quá dài trong câu theo dấu ", "
36
+ final_sentences = []
37
+ for sentence in sentences:
38
+ parts = [p.strip() for p in sentence.split(', ')]
39
+ buffer = []
40
+ for part in parts:
41
+ buffer.append(part)
42
+ total_words = sum(len(p.split()) for p in buffer)
43
+ if total_words > 20:
44
+ # Tách câu ra
45
+ long_part = ', '.join(buffer)
46
+ final_sentences.append(long_part)
47
+ buffer = []
48
+ if buffer:
49
+ final_sentences.append(', '.join(buffer))
50
+
51
+ # print(final_sentences)
52
+
53
+ if len(final_sentences[-1].split()) < 4 and len(final_sentences) >= 2:
54
+ final_sentences[-2] = final_sentences[-2] + ", " + final_sentences[-1]
55
+ final_sentences = final_sentences[0:-1]
56
+
57
+ # print(final_sentences)
58
+
59
+ return final_sentences
60
 
61
  def initialize_asr_pipeline(device="cuda", dtype=None):
62
  if dtype is None:
 
104
  plt.savefig(path)
105
  plt.close()
106
 
107
+ def remove_silence_edges(audio, silence_threshold=-42):
108
+ # Remove silence from the start
109
+ non_silent_start_idx = silence.detect_leading_silence(audio, silence_threshold=silence_threshold)
110
+ audio = audio[non_silent_start_idx:]
111
+
112
+ # Remove silence from the end
113
+ non_silent_end_duration = audio.duration_seconds
114
+ for ms in reversed(audio):
115
+ if ms.dBFS > silence_threshold:
116
+ break
117
+ non_silent_end_duration -= 0.001
118
+ trimmed_audio = audio[: int(non_silent_end_duration * 1000)]
119
+
120
+ return trimmed_audio
121
+
122
  def preprocess_ref_audio_text(ref_audio_orig, ref_text, clip_short=True, show_info=print, device="cuda"):
123
 
124
  show_info("Converting audio...")