NOI_3_ZIP

Running

App Files Files Community

hynt commited on Jul 18

Commit

26c5857

1 Parent(s): 7a702fa

Update utils.py

Browse files

Files changed (1) hide show

utils.py +68 -0

utils.py CHANGED Viewed

@@ -4,6 +4,59 @@ import hashlib
 import matplotlib.pylab as plt
 import librosa
 from transformers import pipeline
 def initialize_asr_pipeline(device="cuda", dtype=None):
     if dtype is None:
@@ -51,6 +104,21 @@ def save_spectrogram(audio, path):
     plt.savefig(path)
     plt.close()
 def preprocess_ref_audio_text(ref_audio_orig, ref_text, clip_short=True, show_info=print, device="cuda"):
     show_info("Converting audio...")

 import matplotlib.pylab as plt
 import librosa
 from transformers import pipeline
+import re
+def chunk_text(text, max_chars=135):
+    # print(text)
+    # Bước 1: Tách câu theo dấu ". "
+    sentences = [s.strip() for s in text.split('. ') if s.strip()]
+    # Ghép câu ngắn hơn 4 từ với câu liền kề
+    i = 0
+    while i < len(sentences):
+        if len(sentences[i].split()) < 4:
+            if i == 0 and i + 1 < len(sentences):
+                # Ghép với câu sau
+                sentences[i + 1] = sentences[i] + ', ' + sentences[i + 1]
+                del sentences[i]
+            else:
+                if i - 1 >= 0:
+                    # Ghép với câu trước
+                    sentences[i - 1] = sentences[i - 1] + ', ' + sentences[i]
+                    del sentences[i]
+                    i -= 1
+        else:
+            i += 1
+    # print(sentences)
+    # Bước 2: Tách phần quá dài trong câu theo dấu ", "
+    final_sentences = []
+    for sentence in sentences:
+        parts = [p.strip() for p in sentence.split(', ')]
+        buffer = []
+        for part in parts:
+            buffer.append(part)
+            total_words = sum(len(p.split()) for p in buffer)
+            if total_words > 20:
+                # Tách câu ra
+                long_part = ', '.join(buffer)
+                final_sentences.append(long_part)
+                buffer = []
+        if buffer:
+            final_sentences.append(', '.join(buffer))
+    # print(final_sentences)
+    if len(final_sentences[-1].split()) < 4 and len(final_sentences) >= 2:
+        final_sentences[-2] = final_sentences[-2] + ", " + final_sentences[-1]
+        final_sentences = final_sentences[0:-1]
+    # print(final_sentences)
+    return final_sentences
 def initialize_asr_pipeline(device="cuda", dtype=None):
     if dtype is None:
     plt.savefig(path)
     plt.close()
+def remove_silence_edges(audio, silence_threshold=-42):
+    # Remove silence from the start
+    non_silent_start_idx = silence.detect_leading_silence(audio, silence_threshold=silence_threshold)
+    audio = audio[non_silent_start_idx:]
+    # Remove silence from the end
+    non_silent_end_duration = audio.duration_seconds
+    for ms in reversed(audio):
+        if ms.dBFS > silence_threshold:
+            break
+        non_silent_end_duration -= 0.001
+    trimmed_audio = audio[: int(non_silent_end_duration * 1000)]
+    return trimmed_audio
 def preprocess_ref_audio_text(ref_audio_orig, ref_text, clip_short=True, show_info=print, device="cuda"):
     show_info("Converting audio...")