Somalitts commited on
Commit
fc0b1ae
·
verified ·
1 Parent(s): 9ea02c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -42
app.py CHANGED
@@ -10,6 +10,14 @@ from pydantic import BaseModel
10
  import logging
11
  import tempfile
12
 
 
 
 
 
 
 
 
 
13
  from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
14
  from speechbrain.inference.speaker import EncoderClassifier
15
 
@@ -24,11 +32,6 @@ logging.info(f"Using device: {device}")
24
  # Faylasha codadka tixraaca
25
  VOICE_SAMPLE_FILES = ["1.wav"]
26
 
27
- # --- ISBEDDELKA UGU MUHIIMSAN ---
28
- # Deji hal meel oo la oggol yahay oo wax lagu kaydin karo (writable cache directory)
29
- CACHE_DIR = "/tmp/huggingface_cache"
30
- os.makedirs(CACHE_DIR, exist_ok=True)
31
-
32
  EMBEDDING_DIR = "/tmp/speaker_embeddings"
33
  os.makedirs(EMBEDDING_DIR, exist_ok=True)
34
 
@@ -45,20 +48,14 @@ async def startup_event():
45
  Shaqadan waxay shaqaynaysaa hal mar marka uu barnaamijku bilaabmo.
46
  """
47
  global processor, model, vocoder, speaker_model
48
- logging.info("Loading models...")
49
  try:
50
- # U sheeg dhammaan model-yada inay isticmaalaan CACHE_DIR
51
- processor = SpeechT5Processor.from_pretrained(
52
- "microsoft/speecht5_tts", cache_dir=CACHE_DIR
53
- )
54
- model = SpeechT5ForTextToSpeech.from_pretrained(
55
- "Somalitts/8aad", cache_dir=CACHE_DIR
56
- ).to(device)
57
- vocoder = SpeechT5HifiGan.from_pretrained(
58
- "microsoft/speecht5_hifigan", cache_dir=CACHE_DIR
59
- ).to(device)
60
 
61
- # Sidoo kale u sheeg speaker model-ka meesha uu wax ku kaydinayo
62
  speaker_model = EncoderClassifier.from_hparams(
63
  source="speechbrain/spkrec-xvect-voxceleb",
64
  run_opts={"device": device},
@@ -67,6 +64,9 @@ async def startup_event():
67
  logging.info("Models loaded successfully.")
68
  except Exception as e:
69
  logging.error(f"Error loading models: {e}")
 
 
 
70
  raise RuntimeError(f"Could not load models: {e}")
71
 
72
  logging.info("Pre-caching speaker embeddings...")
@@ -107,31 +107,7 @@ def get_speaker_embedding(wav_file_path):
107
  logging.error(f"Could not process audio file {wav_file_path}. Error: {e}")
108
  raise HTTPException(status_code=500, detail=f"Failed to process reference audio: {wav_file_path}")
109
 
110
- # (Inta kale ee koodhka isma beddelin)
111
-
112
- # --- Shaqooyinka Hagaajinta Qoraalka ---
113
- number_words = {
114
- 0: "eber", 1: "kow", 2: "labo", 3: "saddex", 4: "afar", 5: "shan",
115
- 6: "lix", 7: "toddobo", 8: "siddeed", 9: "sagaal", 10: "toban",
116
- 20: "labaatan", 30: "soddon", 40: "afartan", 50: "konton",
117
- 60: "lixdan", 70: "toddobaatan", 80: "sideetan", 90: "sagaashan",
118
- 100: "boqol", 1000: "kun"
119
- }
120
- def number_to_words_recursive(n):
121
- if n in number_words: return number_words[n]
122
- if n < 20: return str(n)
123
- if n < 100: return number_words[n//10 * 10] + (" iyo " + number_words[n%10] if n%10 else "")
124
- if n < 1000: return (number_to_words_recursive(n//100) + " boqol" if n//100 > 1 else "boqol") + (" iyo " + number_to_words_recursive(n%100) if n%100 else "")
125
- if n < 1000000: return (number_to_words_recursive(n//1000) + " kun") + (" iyo " + number_to_words_recursive(n%1000) if n%1000 else "")
126
- return str(n)
127
- def replace_numbers_with_words(text):
128
- return re.sub(r'\b\d+\b', lambda m: number_to_words_recursive(int(m.group())), text)
129
- def normalize_text(text):
130
- text = text.lower()
131
- text = replace_numbers_with_words(text)
132
- text = re.sub(r'[^\w\s\']', '', text)
133
- text = re.sub(r'\s+', ' ', text).strip()
134
- return text
135
 
136
  class TTSRequest(BaseModel):
137
  text: str
@@ -141,6 +117,11 @@ class TTSRequest(BaseModel):
141
  async def get_available_voices():
142
  return {"available_voices": VOICE_SAMPLE_FILES}
143
 
 
 
 
 
 
144
  @app.post("/speak")
145
  async def text_to_speech_endpoint(payload: TTSRequest, background_tasks: BackgroundTasks):
146
  if not payload.text or not payload.text.strip():
 
10
  import logging
11
  import tempfile
12
 
13
+ # --- ISBEDDELKA UGU MUHIIMSAN ---
14
+ # Deji 'environment variable' si aad ugu qasabto Hugging Face inuu isticmaalo /tmp
15
+ # Tani waa inay ka horraysaa dhammaan 'import'-yada transformers
16
+ CACHE_DIR = "/tmp/huggingface_cache"
17
+ os.environ['HF_HOME'] = CACHE_DIR
18
+ os.environ['TRANSFORMERS_CACHE'] = CACHE_DIR
19
+ os.environ['HF_DATASETS_CACHE'] = CACHE_DIR
20
+
21
  from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
22
  from speechbrain.inference.speaker import EncoderClassifier
23
 
 
32
  # Faylasha codadka tixraaca
33
  VOICE_SAMPLE_FILES = ["1.wav"]
34
 
 
 
 
 
 
35
  EMBEDDING_DIR = "/tmp/speaker_embeddings"
36
  os.makedirs(EMBEDDING_DIR, exist_ok=True)
37
 
 
48
  Shaqadan waxay shaqaynaysaa hal mar marka uu barnaamijku bilaabmo.
49
  """
50
  global processor, model, vocoder, speaker_model
51
+ logging.info(f"Models will be cached in: {os.environ.get('HF_HOME')}")
52
  try:
53
+ # Hadda looma baahna in la gudbiyo 'cache_dir' mar kasta, laakiin way fiican tahay in la daayo
54
+ processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
55
+ model = SpeechT5ForTextToSpeech.from_pretrained("Somalitts/8aad").to(device)
56
+ vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan").to(device)
 
 
 
 
 
 
57
 
58
+ # savedir wuxuu weli muhiim u yahay speechbrain
59
  speaker_model = EncoderClassifier.from_hparams(
60
  source="speechbrain/spkrec-xvect-voxceleb",
61
  run_opts={"device": device},
 
64
  logging.info("Models loaded successfully.")
65
  except Exception as e:
66
  logging.error(f"Error loading models: {e}")
67
+ # Ku dar faahfaahin dheeri ah oo ku saabsan qaladka si loo fahmo
68
+ import traceback
69
+ logging.error(traceback.format_exc())
70
  raise RuntimeError(f"Could not load models: {e}")
71
 
72
  logging.info("Pre-caching speaker embeddings...")
 
107
  logging.error(f"Could not process audio file {wav_file_path}. Error: {e}")
108
  raise HTTPException(status_code=500, detail=f"Failed to process reference audio: {wav_file_path}")
109
 
110
+ # --- Inta kale ee koodhka isma beddelin ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
  class TTSRequest(BaseModel):
113
  text: str
 
117
  async def get_available_voices():
118
  return {"available_voices": VOICE_SAMPLE_FILES}
119
 
120
+ # ... (Inta kale ee koodhka waa sidii hore)
121
+ def normalize_text(text):
122
+ # Shaqooyinkaaga normalize halkan geli
123
+ return text
124
+
125
  @app.post("/speak")
126
  async def text_to_speech_endpoint(payload: TTSRequest, background_tasks: BackgroundTasks):
127
  if not payload.text or not payload.text.strip():