Spaces:

Somalitts
/

5aad

Runtime error

App Files Files Community

Somalitts commited on Jun 14, 2025

Commit

cd7d46e

verified ·

1 Parent(s): 50f6f95

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -36

app.py CHANGED Viewed

@@ -6,8 +6,6 @@ import os
 from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
 from speechbrain.pretrained import EncoderClassifier
-import torchaudio.sox_effects as sox
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Load models
@@ -79,48 +77,21 @@ def normalize_text(text):
     text = re.sub(r'[^\w\s]', '', text)
     return text
-# Adjust speed using sox effects (preserves pitch and reduces noise)
-def adjust_speed(waveform, sample_rate, text):
-    length = len(text)
-    if length <= 100:
-        speed_factor = 0.85
-    elif length <= 150:
-        speed_factor = 0.95
-    elif length <= 500:
-        speed_factor = 1.0
-    elif length <= 2000:
-        speed_factor = 1.1
-    else:
-        speed_factor = 1.2
-    effects = [["speed", str(speed_factor)], ["rate", str(sample_rate)]]
-    adjusted, _ = torchaudio.sox_effects.apply_effects_tensor(waveform, sample_rate, effects)
-    return adjusted
-# TTS function with chunking for long text
 def text_to_speech(text):
     text = normalize_text(text)
-    max_chars_per_chunk = 300
-    chunks = [text[i:i+max_chars_per_chunk] for i in range(0, len(text), max_chars_per_chunk)]
-    full_waveform = torch.tensor([], device=device)
-    for chunk in chunks:
-        inputs = processor(text=chunk, return_tensors="pt").to(device)
-        with torch.no_grad():
-            speech = model.generate_speech(inputs["input_ids"], speaker_embedding.unsqueeze(0), vocoder=vocoder)
-        adjusted = adjust_speed(speech.unsqueeze(0), 16000, chunk)
-        full_waveform = torch.cat((full_waveform, adjusted.squeeze(0)), dim=-1)
-    return (16000, full_waveform.cpu().numpy())
 # Gradio Interface
 iface = gr.Interface(
     fn=text_to_speech,
     inputs=gr.Textbox(label="Geli qoraalka af-soomaali"),
     outputs=gr.Audio(label="Codka la abuuray", type="numpy"),
-    title="Somali TTS - Degdeg ah",
-    description="Cod abuurista Af-Soomaaliga iyadoo lagu daray xawaaraha saxda ah."
 )
 iface.launch()

 from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
 from speechbrain.pretrained import EncoderClassifier
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Load models
     text = re.sub(r'[^\w\s]', '', text)
     return text
+# TTS function
 def text_to_speech(text):
     text = normalize_text(text)
+    inputs = processor(text=text, return_tensors="pt").to(device)
+    with torch.no_grad():
+        speech = model.generate_speech(inputs["input_ids"], speaker_embedding.unsqueeze(0), vocoder=vocoder)
+    return (16000, speech.cpu().numpy())
 # Gradio Interface
 iface = gr.Interface(
     fn=text_to_speech,
     inputs=gr.Textbox(label="Geli qoraalka af-soomaali"),
     outputs=gr.Audio(label="Codka la abuuray", type="numpy"),
+    title="Somali TTS",
+    description="TTS Soomaaliyeed oo la adeegsaday cod gaar ah (11.wav)"
 )
 iface.launch()