tsuching commited on
Commit
29bc350
·
verified ·
1 Parent(s): dbb9618

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -13
app.py CHANGED
@@ -302,34 +302,107 @@ def get_all_phonetics_schemes(text):
302
  # 4) Return both audio forms + a status message
303
  # return (sr, audio), tmpfile.name, "Tibetan audio generated successfully!"
304
 
 
 
305
  def run_task_tts(text: str):
306
- # Ensure input is a string
307
  if not isinstance(text, str):
308
  text = str(text)
309
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
  # Add extra space to prevent cut endings
311
- text = text.strip() #+ " །";
312
 
313
  # 1) Generate speech via MMS-TTS
314
- speech = tts_tibetan(text) # pipeline expects plain string
315
 
316
  # 2) Clip, cast, flatten for Gradio (browser playback expects float32 in [-1, 1])
317
- audio = speech["audio"]
318
- sr = int(speech["sampling_rate"])
319
- audio = np.clip(audio.astype(np.float32), -1.0, 1.0).flatten()
320
 
321
  # 🔥 Add 1 second of silence padding
322
- silence_duration = 1.0 # seconds
323
- silence_samples = int(sr * silence_duration)
324
- silence = np.zeros(silence_samples, dtype=np.float32)
325
- padded_audio = np.concatenate([audio, silence])
326
 
327
  # 3) Write a WAV file for download/Flutter using PCM_16
328
- tmpfile = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
329
- sf.write(tmpfile.name, audio, sr, subtype="PCM_16")
330
 
331
  # 4) Return both audio forms + a status message
332
- return (sr, audio), tmpfile.name, "Tibetan audio generated successfully!"
333
 
334
 
335
  # Translate/Tokenize function
 
302
  # 4) Return both audio forms + a status message
303
  # return (sr, audio), tmpfile.name, "Tibetan audio generated successfully!"
304
 
305
+
306
+ ########
307
  def run_task_tts(text: str):
308
+ # Ensure input is a string and strip whitespace
309
  if not isinstance(text, str):
310
  text = str(text)
311
 
312
+ text = text.strip()
313
+
314
+ # 1. Segment Text and Filter Empty Chunks
315
+ # Use the primary phrase marker (།) to split the long text into manageable segments.
316
+ # The regex re.split(r'[།\n]', text) is safer for finding both tsheg and newlines
317
+
318
+ # Use the primary phrase marker (།) and newlines (\n) to split the text.
319
+ # The 're' module must be imported at the top of your script (which it is).
320
+ segments = [s.strip() for s in re.split(r'[།\n]', text) if s.strip()]
321
+
322
+ if not segments:
323
+ return (None, ""), "", "⚠️ Error: No valid Tibetan text found after cleaning/segmentation."
324
+
325
+ # List to hold all generated audio segments (numpy arrays)
326
+ audio_segments = []
327
+
328
+ # Get sampling rate once, will be the same for all segments
329
+ sr = 0
330
+
331
+ try:
332
+ # 2. Process each segment
333
+ for segment in segments:
334
+ # Re-add the closing tsheg/shes (།) for better phrasing,
335
+ # and an extra space to prevent cut endings. If the segment already
336
+ # ends in a །, this is harmless as it's trimmed later.
337
+ segment_with_tsheg = segment + " །"
338
+
339
+ # Generate speech for the short segment
340
+ speech = tts_tibetan(segment_with_tsheg)
341
+
342
+ # Clip and flatten the audio for the segment
343
+ audio_data = speech["audio"]
344
+ sr = int(speech["sampling_rate"]) # Capture the sampling rate
345
+
346
+ # Convert to float32 and normalize
347
+ segment_audio = np.clip(audio_data.astype(np.float32), -1.0, 1.0).flatten()
348
+
349
+ audio_segments.append(segment_audio)
350
+
351
+ # Add a small silence gap between segments for clarity (e.g., 0.25s)
352
+ silence_duration = 0.25 # seconds
353
+ silence_samples = int(sr * silence_duration)
354
+ silence = np.zeros(silence_samples, dtype=np.float32)
355
+ audio_segments.append(silence)
356
+
357
+ # 3. Concatenate all audio segments into the final array
358
+ final_audio = np.concatenate(audio_segments)
359
+
360
+ # 4. Write a WAV file for download/Flutter using PCM_16
361
+ tmpfile = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
362
+
363
+ # We must have a valid sampling rate 'sr' here
364
+ if sr == 0:
365
+ raise ValueError("Sampling rate was not determined during TTS generation.")
366
+
367
+ sf.write(tmpfile.name, final_audio, sr, subtype="PCM_16")
368
+
369
+ # 5. Return both audio forms + a status message
370
+ return (sr, final_audio), tmpfile.name, "Tibetan audio generated successfully via segmentation!"
371
+
372
+ except Exception as e:
373
+ # Catch any failure during TTS or concatenation
374
+ error_message = f"TTS processing failed for a long text segment: {e}. The segmenting process may have failed or the model encountered an unpronounceable character. Try shorter text."
375
+ print(f"TTS Error during segmentation: {e}")
376
+ return (None, ""), "", error_message # Return empty data on failure
377
+ ########
378
+ # def run_task_tts(text: str):
379
+ # Ensure input is a string
380
+ # if not isinstance(text, str):
381
+ # text = str(text)
382
+
383
  # Add extra space to prevent cut endings
384
+ # text = text.strip() #+ " །";
385
 
386
  # 1) Generate speech via MMS-TTS
387
+ # speech = tts_tibetan(text) # pipeline expects plain string
388
 
389
  # 2) Clip, cast, flatten for Gradio (browser playback expects float32 in [-1, 1])
390
+ # audio = speech["audio"]
391
+ # sr = int(speech["sampling_rate"])
392
+ # audio = np.clip(audio.astype(np.float32), -1.0, 1.0).flatten()
393
 
394
  # 🔥 Add 1 second of silence padding
395
+ # silence_duration = 1.0 # seconds
396
+ # silence_samples = int(sr * silence_duration)
397
+ # silence = np.zeros(silence_samples, dtype=np.float32)
398
+ # padded_audio = np.concatenate([audio, silence])
399
 
400
  # 3) Write a WAV file for download/Flutter using PCM_16
401
+ # tmpfile = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
402
+ # sf.write(tmpfile.name, audio, sr, subtype="PCM_16")
403
 
404
  # 4) Return both audio forms + a status message
405
+ # return (sr, audio), tmpfile.name, "Tibetan audio generated successfully!"
406
 
407
 
408
  # Translate/Tokenize function