oicui commited on
Commit
5329297
·
verified ·
1 Parent(s): ac0aeca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -66
app.py CHANGED
@@ -28,6 +28,11 @@ def get_or_load_model():
28
  raise
29
  return MODEL
30
 
 
 
 
 
 
31
 
32
  # ---------------------------------------
33
  # UTILITIES
@@ -42,46 +47,15 @@ def set_seed(seed: int):
42
  np.random.seed(seed)
43
 
44
 
45
- # --- SMART CHUNKING (IMPROVED FOR NATURAL PAUSING) ---
46
  def smart_chunk_text(text: str, chunk_size: int):
47
- """
48
- Improved chunker:
49
- - Supports newline as a break
50
- - Splits by natural sentence boundaries
51
- - Falls back to word-level splitting for long sentences
52
- """
53
-
54
- # Tách theo dấu câu hoặc xuống dòng
55
- sentences = re.split(r"(?<=[\.\!\?…;])\s+|\n+", text)
56
 
57
  chunks = []
58
  current = ""
59
 
60
  for sentence in sentences:
61
- sentence = sentence.strip()
62
- if not sentence:
63
- continue
64
-
65
- # Nếu câu này quá dài → fallback tách theo từ
66
- if len(sentence) > chunk_size:
67
- if current:
68
- chunks.append(current.strip())
69
- current = ""
70
-
71
- words = sentence.split(" ")
72
- tmp = ""
73
- for w in words:
74
- if len(tmp) + len(w) + 1 > chunk_size:
75
- chunks.append(tmp.strip())
76
- tmp = w + " "
77
- else:
78
- tmp += w + " "
79
- if tmp:
80
- chunks.append(tmp.strip())
81
- continue
82
-
83
- # Logic gộp câu vào chunk
84
- if len(current) + len(sentence) + 1 > chunk_size:
85
  if current:
86
  chunks.append(current.strip())
87
  current = sentence + " "
@@ -94,24 +68,10 @@ def smart_chunk_text(text: str, chunk_size: int):
94
  return chunks
95
 
96
 
97
- # --- CONCAT WITH NATURAL SILENCE ---
98
- def concat_audio(chunks, sample_rate: int, silence_ms: int = 150):
99
- """Ghép các đoạn audio và chèn im lặng 150ms để ngắt nghỉ tự nhiên."""
100
  if not chunks:
101
  return None
102
-
103
- if silence_ms <= 0:
104
- return np.concatenate(chunks, axis=-1)
105
-
106
- silence = np.zeros(int(sample_rate * silence_ms / 1000), dtype=chunks[0].dtype)
107
-
108
- segs = []
109
- for i, c in enumerate(chunks):
110
- if i > 0:
111
- segs.append(silence)
112
- segs.append(c)
113
-
114
- return np.concatenate(segs, axis=-1)
115
 
116
 
117
  # ---------------------------------------
@@ -136,12 +96,13 @@ def generate_tts_audio(
136
  raise RuntimeError("TTS model is not loaded.")
137
 
138
  # -------------------------
139
- # SEED
140
  # -------------------------
141
  if seed_num_input == 0:
142
  used_seed = random.randint(1, 2**31 - 1)
143
  else:
144
  used_seed = int(seed_num_input)
 
145
  print(f"Using seed: {used_seed}")
146
  set_seed(used_seed)
147
 
@@ -157,7 +118,7 @@ def generate_tts_audio(
157
  generate_kwargs["audio_prompt_path"] = audio_prompt_path_input
158
 
159
  # -------------------------
160
- # SMART CHUNKING
161
  # -------------------------
162
  if enable_chunking:
163
  print(f"Smart chunking enabled — chunk size = {chunk_size_value}")
@@ -166,30 +127,20 @@ def generate_tts_audio(
166
  text_chunks = [text_input]
167
 
168
  audio_segments = []
169
-
170
  for i, chunk in enumerate(text_chunks):
171
  print(f"Rendering chunk {i+1}/{len(text_chunks)}...")
172
-
173
- # USE NO_GRAD FOR SPEED & LESS MEMORY
174
- with torch.no_grad():
175
- wav = current_model.generate(chunk, **generate_kwargs)
176
-
177
  audio_segments.append(wav.squeeze(0).numpy())
178
 
179
- # GHÉP CÓ CHÈN IM LẶNG (150ms)
180
- final_audio = concat_audio(
181
- audio_segments,
182
- sample_rate=current_model.sr,
183
- silence_ms=150
184
- )
185
-
186
  print("Audio generation complete.")
187
 
 
188
  return (current_model.sr, final_audio), used_seed
189
 
190
 
191
  # ---------------------------------------
192
- # UI (UNCHANGED EXCEPT LOGIC ABOVE)
193
  # ---------------------------------------
194
 
195
  with gr.Blocks() as demo:
@@ -250,6 +201,7 @@ with gr.Blocks() as demo:
250
  with gr.Column():
251
  audio_output = gr.Audio(label="Output Audio")
252
 
 
253
  run_btn.click(
254
  fn=generate_tts_audio,
255
  inputs=[
 
28
  raise
29
  return MODEL
30
 
31
+ try:
32
+ get_or_load_model()
33
+ except Exception as e:
34
+ print(f"CRITICAL startup load failed: {e}")
35
+
36
 
37
  # ---------------------------------------
38
  # UTILITIES
 
47
  np.random.seed(seed)
48
 
49
 
50
+ # --- SMART CHUNKING ---
51
  def smart_chunk_text(text: str, chunk_size: int):
52
+ sentences = re.split(r"(?<=[\.\!\?…;])\s+", text)
 
 
 
 
 
 
 
 
53
 
54
  chunks = []
55
  current = ""
56
 
57
  for sentence in sentences:
58
+ if len(current) + len(sentence) > chunk_size:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  if current:
60
  chunks.append(current.strip())
61
  current = sentence + " "
 
68
  return chunks
69
 
70
 
71
+ def concat_audio(chunks):
 
 
72
  if not chunks:
73
  return None
74
+ return np.concatenate(chunks, axis=-1)
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
 
77
  # ---------------------------------------
 
96
  raise RuntimeError("TTS model is not loaded.")
97
 
98
  # -------------------------
99
+ # SEED HANDLING
100
  # -------------------------
101
  if seed_num_input == 0:
102
  used_seed = random.randint(1, 2**31 - 1)
103
  else:
104
  used_seed = int(seed_num_input)
105
+
106
  print(f"Using seed: {used_seed}")
107
  set_seed(used_seed)
108
 
 
118
  generate_kwargs["audio_prompt_path"] = audio_prompt_path_input
119
 
120
  # -------------------------
121
+ # SMART CHUNK PROCESSING
122
  # -------------------------
123
  if enable_chunking:
124
  print(f"Smart chunking enabled — chunk size = {chunk_size_value}")
 
127
  text_chunks = [text_input]
128
 
129
  audio_segments = []
 
130
  for i, chunk in enumerate(text_chunks):
131
  print(f"Rendering chunk {i+1}/{len(text_chunks)}...")
132
+ wav = current_model.generate(chunk, **generate_kwargs)
 
 
 
 
133
  audio_segments.append(wav.squeeze(0).numpy())
134
 
135
+ final_audio = concat_audio(audio_segments)
 
 
 
 
 
 
136
  print("Audio generation complete.")
137
 
138
+ # FIXED OUTPUT FORMAT (Gradio-compatible)
139
  return (current_model.sr, final_audio), used_seed
140
 
141
 
142
  # ---------------------------------------
143
+ # UI
144
  # ---------------------------------------
145
 
146
  with gr.Blocks() as demo:
 
201
  with gr.Column():
202
  audio_output = gr.Audio(label="Output Audio")
203
 
204
+ # CONNECT BUTTON
205
  run_btn.click(
206
  fn=generate_tts_audio,
207
  inputs=[