nexusbert commited on
Commit
5911a81
·
1 Parent(s): 27c6dfe
Files changed (3) hide show
  1. Dockerfile +1 -0
  2. app.py +90 -19
  3. requirements.txt +0 -1
Dockerfile CHANGED
@@ -36,6 +36,7 @@ RUN mkdir -p /models/huggingface && chmod -R 777 /models/huggingface
36
  RUN python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='facebook/mms-tts-hau')" \
37
  && python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='facebook/mms-tts-eng')" \
38
  && python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='facebook/mms-tts-yor')" \
 
39
  && find /models/huggingface -name '*.lock' -delete
40
 
41
  RUN python -c "from transformers import pipeline; pipeline('text-to-speech', model='facebook/mms-tts-hau')" \
 
36
  RUN python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='facebook/mms-tts-hau')" \
37
  && python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='facebook/mms-tts-eng')" \
38
  && python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='facebook/mms-tts-yor')" \
39
+ && python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='NCAIR1/N-ATLaS')" \
40
  && find /models/huggingface -name '*.lock' -delete
41
 
42
  RUN python -c "from transformers import pipeline; pipeline('text-to-speech', model='facebook/mms-tts-hau')" \
app.py CHANGED
@@ -9,8 +9,7 @@ import soundfile as sf
9
  from fastapi import FastAPI, File, UploadFile, HTTPException, Form
10
  from fastapi.responses import FileResponse
11
  from fastapi.middleware.cors import CORSMiddleware
12
- from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration
13
- from langdetect import detect
14
  import imageio_ffmpeg
15
  import logging
16
  from contextlib import asynccontextmanager
@@ -40,6 +39,7 @@ app.add_middleware(
40
 
41
  ASK_URL = "https://remostart-milestone-one-farmlingua-ai.hf.space/ask"
42
  tts_ha, tts_en, tts_yo, tts_ig = None, None, None, None
 
43
 
44
  asr_models = {
45
  "ha": {"repo": "NCAIR1/Hausa-ASR", "model": None, "proc": None},
@@ -49,7 +49,7 @@ asr_models = {
49
  }
50
 
51
  def load_models():
52
- global tts_ha, tts_en, tts_yo, tts_ig
53
  device = 0 if torch.cuda.is_available() else -1
54
  hf_token = os.getenv("HF_TOKEN")
55
  if hf_token:
@@ -82,6 +82,7 @@ def load_models():
82
  tts_ig = None
83
  logger.info("Igbo TTS model disabled - will return text responses for Igbo language")
84
 
 
85
 
86
  logger.info("Deferred ASR model loads: will lazy-load per language on first use")
87
 
@@ -195,23 +196,93 @@ IGBO_WORDS = [
195
  "ugbo","akụkọ","mmiri","ala","ọrụ","ncheta","ọhụrụ","ugwu","nri","ahụhụ"
196
  ]
197
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  def detect_language(text: str) -> str:
199
- text_lower = text.lower()
200
- if any(word in text_lower for word in HAUSA_WORDS):
201
- return "ha"
202
- elif any(word in text_lower for word in YORUBA_WORDS):
203
- return "yo"
204
- elif any(word in text_lower for word in IGBO_WORDS):
205
- return "ig"
206
- lang = detect(text)
207
- if lang.startswith("ha"):
208
- return "ha"
209
- elif lang.startswith("yo"):
210
- return "yo"
211
- elif lang.startswith("ig"):
212
- return "ig"
213
- else:
214
- return "en"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
 
216
  def text_to_speech_file(text: str) -> str:
217
  lang = detect_language(text)
 
9
  from fastapi import FastAPI, File, UploadFile, HTTPException, Form
10
  from fastapi.responses import FileResponse
11
  from fastapi.middleware.cors import CORSMiddleware
12
+ from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration, AutoTokenizer, AutoModelForCausalLM
 
13
  import imageio_ffmpeg
14
  import logging
15
  from contextlib import asynccontextmanager
 
39
 
40
  ASK_URL = "https://remostart-milestone-one-farmlingua-ai.hf.space/ask"
41
  tts_ha, tts_en, tts_yo, tts_ig = None, None, None, None
42
+ natlas_tokenizer, natlas_model = None, None
43
 
44
  asr_models = {
45
  "ha": {"repo": "NCAIR1/Hausa-ASR", "model": None, "proc": None},
 
49
  }
50
 
51
  def load_models():
52
+ global tts_ha, tts_en, tts_yo, tts_ig, natlas_tokenizer, natlas_model
53
  device = 0 if torch.cuda.is_available() else -1
54
  hf_token = os.getenv("HF_TOKEN")
55
  if hf_token:
 
82
  tts_ig = None
83
  logger.info("Igbo TTS model disabled - will return text responses for Igbo language")
84
 
85
+ logger.info("N-ATLaS language identification model will be lazy-loaded on first use")
86
 
87
  logger.info("Deferred ASR model loads: will lazy-load per language on first use")
88
 
 
196
  "ugbo","akụkọ","mmiri","ala","ọrụ","ncheta","ọhụrụ","ugwu","nri","ahụhụ"
197
  ]
198
 
199
+ def _load_natlas():
200
+ global natlas_tokenizer, natlas_model
201
+ if natlas_tokenizer is not None and natlas_model is not None:
202
+ return True
203
+
204
+ hf_token = os.getenv("HF_TOKEN")
205
+ if hf_token:
206
+ hf_token = hf_token.strip()
207
+
208
+ try:
209
+ logger.info("Lazy-loading N-ATLaS language identification model...")
210
+ natlas_tokenizer = AutoTokenizer.from_pretrained("NCAIR1/N-ATLaS", token=hf_token)
211
+ natlas_model = AutoModelForCausalLM.from_pretrained(
212
+ "NCAIR1/N-ATLaS",
213
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
214
+ device_map="auto" if torch.cuda.is_available() else None,
215
+ token=hf_token
216
+ )
217
+ logger.info("Loaded N-ATLaS language identification model")
218
+ return True
219
+ except Exception as e:
220
+ logger.exception("Failed to load N-ATLaS model")
221
+ natlas_tokenizer, natlas_model = None, None
222
+ return False
223
+
224
  def detect_language(text: str) -> str:
225
+ if not _load_natlas():
226
+ logger.warning("N-ATLaS model not available, falling back to keyword detection")
227
+ text_lower = text.lower()
228
+ if any(word in text_lower for word in HAUSA_WORDS):
229
+ return "ha"
230
+ elif any(word in text_lower for word in YORUBA_WORDS):
231
+ return "yo"
232
+ elif any(word in text_lower for word in IGBO_WORDS):
233
+ return "ig"
234
+ else:
235
+ return "en"
236
+
237
+ try:
238
+ messages = [
239
+ {'role': 'system', 'content': 'You are a language identification assistant. Identify the language of the given text and respond with only the language code: "en" for English, "ha" for Hausa, "yo" for Yoruba, or "ig" for Igbo.'},
240
+ {'role': 'user', 'content': f'What language is this text written in? "{text}"'}
241
+ ]
242
+
243
+ formatted_text = natlas_tokenizer.apply_chat_template(
244
+ messages,
245
+ add_generation_prompt=True,
246
+ tokenize=False
247
+ )
248
+
249
+ input_tokens = natlas_tokenizer(formatted_text, return_tensors='pt', add_special_tokens=False)
250
+ if torch.cuda.is_available():
251
+ input_tokens = input_tokens.to('cuda')
252
+
253
+ with torch.no_grad():
254
+ outputs = natlas_model.generate(
255
+ **input_tokens,
256
+ max_new_tokens=10,
257
+ use_cache=True,
258
+ repetition_penalty=1.1,
259
+ temperature=0.1,
260
+ do_sample=False
261
+ )
262
+
263
+ response = natlas_tokenizer.batch_decode(outputs)[0]
264
+ response_text = response.split(messages[1]['content'])[-1].strip().lower()
265
+
266
+ if 'ha' in response_text:
267
+ return "ha"
268
+ elif 'yo' in response_text:
269
+ return "yo"
270
+ elif 'ig' in response_text:
271
+ return "ig"
272
+ else:
273
+ return "en"
274
+
275
+ except Exception as e:
276
+ logger.exception(f"Language detection failed: {e}")
277
+ text_lower = text.lower()
278
+ if any(word in text_lower for word in HAUSA_WORDS):
279
+ return "ha"
280
+ elif any(word in text_lower for word in YORUBA_WORDS):
281
+ return "yo"
282
+ elif any(word in text_lower for word in IGBO_WORDS):
283
+ return "ig"
284
+ else:
285
+ return "en"
286
 
287
  def text_to_speech_file(text: str) -> str:
288
  lang = detect_language(text)
requirements.txt CHANGED
@@ -15,7 +15,6 @@ aiofiles
15
  accelerate
16
  sentencepiece
17
  protobuf
18
- langdetect
19
  nest-asyncio
20
 
21
 
 
15
  accelerate
16
  sentencepiece
17
  protobuf
 
18
  nest-asyncio
19
 
20