Update app.py
Browse files
app.py
CHANGED
|
@@ -10,6 +10,14 @@ from pydantic import BaseModel
|
|
| 10 |
import logging
|
| 11 |
import tempfile
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
|
| 14 |
from speechbrain.inference.speaker import EncoderClassifier
|
| 15 |
|
|
@@ -24,11 +32,6 @@ logging.info(f"Using device: {device}")
|
|
| 24 |
# Faylasha codadka tixraaca
|
| 25 |
VOICE_SAMPLE_FILES = ["1.wav"]
|
| 26 |
|
| 27 |
-
# --- ISBEDDELKA UGU MUHIIMSAN ---
|
| 28 |
-
# Deji hal meel oo la oggol yahay oo wax lagu kaydin karo (writable cache directory)
|
| 29 |
-
CACHE_DIR = "/tmp/huggingface_cache"
|
| 30 |
-
os.makedirs(CACHE_DIR, exist_ok=True)
|
| 31 |
-
|
| 32 |
EMBEDDING_DIR = "/tmp/speaker_embeddings"
|
| 33 |
os.makedirs(EMBEDDING_DIR, exist_ok=True)
|
| 34 |
|
|
@@ -45,20 +48,14 @@ async def startup_event():
|
|
| 45 |
Shaqadan waxay shaqaynaysaa hal mar marka uu barnaamijku bilaabmo.
|
| 46 |
"""
|
| 47 |
global processor, model, vocoder, speaker_model
|
| 48 |
-
logging.info("
|
| 49 |
try:
|
| 50 |
-
#
|
| 51 |
-
processor = SpeechT5Processor.from_pretrained(
|
| 52 |
-
|
| 53 |
-
)
|
| 54 |
-
model = SpeechT5ForTextToSpeech.from_pretrained(
|
| 55 |
-
"Somalitts/8aad", cache_dir=CACHE_DIR
|
| 56 |
-
).to(device)
|
| 57 |
-
vocoder = SpeechT5HifiGan.from_pretrained(
|
| 58 |
-
"microsoft/speecht5_hifigan", cache_dir=CACHE_DIR
|
| 59 |
-
).to(device)
|
| 60 |
|
| 61 |
-
#
|
| 62 |
speaker_model = EncoderClassifier.from_hparams(
|
| 63 |
source="speechbrain/spkrec-xvect-voxceleb",
|
| 64 |
run_opts={"device": device},
|
|
@@ -67,6 +64,9 @@ async def startup_event():
|
|
| 67 |
logging.info("Models loaded successfully.")
|
| 68 |
except Exception as e:
|
| 69 |
logging.error(f"Error loading models: {e}")
|
|
|
|
|
|
|
|
|
|
| 70 |
raise RuntimeError(f"Could not load models: {e}")
|
| 71 |
|
| 72 |
logging.info("Pre-caching speaker embeddings...")
|
|
@@ -107,31 +107,7 @@ def get_speaker_embedding(wav_file_path):
|
|
| 107 |
logging.error(f"Could not process audio file {wav_file_path}. Error: {e}")
|
| 108 |
raise HTTPException(status_code=500, detail=f"Failed to process reference audio: {wav_file_path}")
|
| 109 |
|
| 110 |
-
#
|
| 111 |
-
|
| 112 |
-
# --- Shaqooyinka Hagaajinta Qoraalka ---
|
| 113 |
-
number_words = {
|
| 114 |
-
0: "eber", 1: "kow", 2: "labo", 3: "saddex", 4: "afar", 5: "shan",
|
| 115 |
-
6: "lix", 7: "toddobo", 8: "siddeed", 9: "sagaal", 10: "toban",
|
| 116 |
-
20: "labaatan", 30: "soddon", 40: "afartan", 50: "konton",
|
| 117 |
-
60: "lixdan", 70: "toddobaatan", 80: "sideetan", 90: "sagaashan",
|
| 118 |
-
100: "boqol", 1000: "kun"
|
| 119 |
-
}
|
| 120 |
-
def number_to_words_recursive(n):
|
| 121 |
-
if n in number_words: return number_words[n]
|
| 122 |
-
if n < 20: return str(n)
|
| 123 |
-
if n < 100: return number_words[n//10 * 10] + (" iyo " + number_words[n%10] if n%10 else "")
|
| 124 |
-
if n < 1000: return (number_to_words_recursive(n//100) + " boqol" if n//100 > 1 else "boqol") + (" iyo " + number_to_words_recursive(n%100) if n%100 else "")
|
| 125 |
-
if n < 1000000: return (number_to_words_recursive(n//1000) + " kun") + (" iyo " + number_to_words_recursive(n%1000) if n%1000 else "")
|
| 126 |
-
return str(n)
|
| 127 |
-
def replace_numbers_with_words(text):
|
| 128 |
-
return re.sub(r'\b\d+\b', lambda m: number_to_words_recursive(int(m.group())), text)
|
| 129 |
-
def normalize_text(text):
|
| 130 |
-
text = text.lower()
|
| 131 |
-
text = replace_numbers_with_words(text)
|
| 132 |
-
text = re.sub(r'[^\w\s\']', '', text)
|
| 133 |
-
text = re.sub(r'\s+', ' ', text).strip()
|
| 134 |
-
return text
|
| 135 |
|
| 136 |
class TTSRequest(BaseModel):
|
| 137 |
text: str
|
|
@@ -141,6 +117,11 @@ class TTSRequest(BaseModel):
|
|
| 141 |
async def get_available_voices():
|
| 142 |
return {"available_voices": VOICE_SAMPLE_FILES}
|
| 143 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
@app.post("/speak")
|
| 145 |
async def text_to_speech_endpoint(payload: TTSRequest, background_tasks: BackgroundTasks):
|
| 146 |
if not payload.text or not payload.text.strip():
|
|
|
|
| 10 |
import logging
|
| 11 |
import tempfile
|
| 12 |
|
| 13 |
+
# --- ISBEDDELKA UGU MUHIIMSAN ---
|
| 14 |
+
# Deji 'environment variable' si aad ugu qasabto Hugging Face inuu isticmaalo /tmp
|
| 15 |
+
# Tani waa inay ka horraysaa dhammaan 'import'-yada transformers
|
| 16 |
+
CACHE_DIR = "/tmp/huggingface_cache"
|
| 17 |
+
os.environ['HF_HOME'] = CACHE_DIR
|
| 18 |
+
os.environ['TRANSFORMERS_CACHE'] = CACHE_DIR
|
| 19 |
+
os.environ['HF_DATASETS_CACHE'] = CACHE_DIR
|
| 20 |
+
|
| 21 |
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
|
| 22 |
from speechbrain.inference.speaker import EncoderClassifier
|
| 23 |
|
|
|
|
| 32 |
# Faylasha codadka tixraaca
|
| 33 |
VOICE_SAMPLE_FILES = ["1.wav"]
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
EMBEDDING_DIR = "/tmp/speaker_embeddings"
|
| 36 |
os.makedirs(EMBEDDING_DIR, exist_ok=True)
|
| 37 |
|
|
|
|
| 48 |
Shaqadan waxay shaqaynaysaa hal mar marka uu barnaamijku bilaabmo.
|
| 49 |
"""
|
| 50 |
global processor, model, vocoder, speaker_model
|
| 51 |
+
logging.info(f"Models will be cached in: {os.environ.get('HF_HOME')}")
|
| 52 |
try:
|
| 53 |
+
# Hadda looma baahna in la gudbiyo 'cache_dir' mar kasta, laakiin way fiican tahay in la daayo
|
| 54 |
+
processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
|
| 55 |
+
model = SpeechT5ForTextToSpeech.from_pretrained("Somalitts/8aad").to(device)
|
| 56 |
+
vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan").to(device)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
+
# savedir wuxuu weli muhiim u yahay speechbrain
|
| 59 |
speaker_model = EncoderClassifier.from_hparams(
|
| 60 |
source="speechbrain/spkrec-xvect-voxceleb",
|
| 61 |
run_opts={"device": device},
|
|
|
|
| 64 |
logging.info("Models loaded successfully.")
|
| 65 |
except Exception as e:
|
| 66 |
logging.error(f"Error loading models: {e}")
|
| 67 |
+
# Ku dar faahfaahin dheeri ah oo ku saabsan qaladka si loo fahmo
|
| 68 |
+
import traceback
|
| 69 |
+
logging.error(traceback.format_exc())
|
| 70 |
raise RuntimeError(f"Could not load models: {e}")
|
| 71 |
|
| 72 |
logging.info("Pre-caching speaker embeddings...")
|
|
|
|
| 107 |
logging.error(f"Could not process audio file {wav_file_path}. Error: {e}")
|
| 108 |
raise HTTPException(status_code=500, detail=f"Failed to process reference audio: {wav_file_path}")
|
| 109 |
|
| 110 |
+
# --- Inta kale ee koodhka isma beddelin ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
|
| 112 |
class TTSRequest(BaseModel):
|
| 113 |
text: str
|
|
|
|
| 117 |
async def get_available_voices():
|
| 118 |
return {"available_voices": VOICE_SAMPLE_FILES}
|
| 119 |
|
| 120 |
+
# ... (Inta kale ee koodhka waa sidii hore)
|
| 121 |
+
def normalize_text(text):
|
| 122 |
+
# Shaqooyinkaaga normalize halkan geli
|
| 123 |
+
return text
|
| 124 |
+
|
| 125 |
@app.post("/speak")
|
| 126 |
async def text_to_speech_endpoint(payload: TTSRequest, background_tasks: BackgroundTasks):
|
| 127 |
if not payload.text or not payload.text.strip():
|