Spaces:

nexusbert
/

milestone3

Sleeping

App Files Files Community

nexusbert commited on Oct 10, 2025

Commit

5911a81

1 Parent(s): 27c6dfe

all

Browse files

Files changed (3) hide show

Dockerfile +1 -0
app.py +90 -19
requirements.txt +0 -1

Dockerfile CHANGED Viewed

@@ -36,6 +36,7 @@ RUN mkdir -p /models/huggingface && chmod -R 777 /models/huggingface
 RUN python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='facebook/mms-tts-hau')" \
  && python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='facebook/mms-tts-eng')" \
  && python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='facebook/mms-tts-yor')" \
  && find /models/huggingface -name '*.lock' -delete
 RUN python -c "from transformers import pipeline; pipeline('text-to-speech', model='facebook/mms-tts-hau')" \

 RUN python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='facebook/mms-tts-hau')" \
  && python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='facebook/mms-tts-eng')" \
  && python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='facebook/mms-tts-yor')" \
+ && python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='NCAIR1/N-ATLaS')" \
  && find /models/huggingface -name '*.lock' -delete
 RUN python -c "from transformers import pipeline; pipeline('text-to-speech', model='facebook/mms-tts-hau')" \

app.py CHANGED Viewed

@@ -9,8 +9,7 @@ import soundfile as sf
 from fastapi import FastAPI, File, UploadFile, HTTPException, Form
 from fastapi.responses import FileResponse
 from fastapi.middleware.cors import CORSMiddleware
-from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration
-from langdetect import detect
 import imageio_ffmpeg
 import logging
 from contextlib import asynccontextmanager
@@ -40,6 +39,7 @@ app.add_middleware(
 ASK_URL = "https://remostart-milestone-one-farmlingua-ai.hf.space/ask"
 tts_ha, tts_en, tts_yo, tts_ig = None, None, None, None
 asr_models = {
     "ha": {"repo": "NCAIR1/Hausa-ASR", "model": None, "proc": None},
@@ -49,7 +49,7 @@ asr_models = {
 }
 def load_models():
-    global tts_ha, tts_en, tts_yo, tts_ig
     device = 0 if torch.cuda.is_available() else -1
     hf_token = os.getenv("HF_TOKEN")
     if hf_token:
@@ -82,6 +82,7 @@ def load_models():
     tts_ig = None
     logger.info("Igbo TTS model disabled - will return text responses for Igbo language")
     logger.info("Deferred ASR model loads: will lazy-load per language on first use")
@@ -195,23 +196,93 @@ IGBO_WORDS = [
     "ugbo","akụkọ","mmiri","ala","ọrụ","ncheta","ọhụrụ","ugwu","nri","ahụhụ"
 ]
 def detect_language(text: str) -> str:
-    text_lower = text.lower()
-    if any(word in text_lower for word in HAUSA_WORDS):
-        return "ha"
-    elif any(word in text_lower for word in YORUBA_WORDS):
-        return "yo"
-    elif any(word in text_lower for word in IGBO_WORDS):
-        return "ig"
-    lang = detect(text)
-    if lang.startswith("ha"):
-        return "ha"
-    elif lang.startswith("yo"):
-        return "yo"
-    elif lang.startswith("ig"):
-        return "ig"
-    else:
-        return "en"
 def text_to_speech_file(text: str) -> str:
     lang = detect_language(text)

 from fastapi import FastAPI, File, UploadFile, HTTPException, Form
 from fastapi.responses import FileResponse
 from fastapi.middleware.cors import CORSMiddleware
+from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration, AutoTokenizer, AutoModelForCausalLM
 import imageio_ffmpeg
 import logging
 from contextlib import asynccontextmanager
 ASK_URL = "https://remostart-milestone-one-farmlingua-ai.hf.space/ask"
 tts_ha, tts_en, tts_yo, tts_ig = None, None, None, None
+natlas_tokenizer, natlas_model = None, None
 asr_models = {
     "ha": {"repo": "NCAIR1/Hausa-ASR", "model": None, "proc": None},
 }
 def load_models():
+    global tts_ha, tts_en, tts_yo, tts_ig, natlas_tokenizer, natlas_model
     device = 0 if torch.cuda.is_available() else -1
     hf_token = os.getenv("HF_TOKEN")
     if hf_token:
     tts_ig = None
     logger.info("Igbo TTS model disabled - will return text responses for Igbo language")
+    logger.info("N-ATLaS language identification model will be lazy-loaded on first use")
     logger.info("Deferred ASR model loads: will lazy-load per language on first use")
     "ugbo","akụkọ","mmiri","ala","ọrụ","ncheta","ọhụrụ","ugwu","nri","ahụhụ"
 ]
+def _load_natlas():
+    global natlas_tokenizer, natlas_model
+    if natlas_tokenizer is not None and natlas_model is not None:
+        return True
+    hf_token = os.getenv("HF_TOKEN")
+    if hf_token:
+        hf_token = hf_token.strip()
+    try:
+        logger.info("Lazy-loading N-ATLaS language identification model...")
+        natlas_tokenizer = AutoTokenizer.from_pretrained("NCAIR1/N-ATLaS", token=hf_token)
+        natlas_model = AutoModelForCausalLM.from_pretrained(
+            "NCAIR1/N-ATLaS",
+            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+            device_map="auto" if torch.cuda.is_available() else None,
+            token=hf_token
+        )
+        logger.info("Loaded N-ATLaS language identification model")
+        return True
+    except Exception as e:
+        logger.exception("Failed to load N-ATLaS model")
+        natlas_tokenizer, natlas_model = None, None
+        return False
 def detect_language(text: str) -> str:
+    if not _load_natlas():
+        logger.warning("N-ATLaS model not available, falling back to keyword detection")
+        text_lower = text.lower()
+        if any(word in text_lower for word in HAUSA_WORDS):
+            return "ha"
+        elif any(word in text_lower for word in YORUBA_WORDS):
+            return "yo"
+        elif any(word in text_lower for word in IGBO_WORDS):
+            return "ig"
+        else:
+            return "en"
+    try:
+        messages = [
+            {'role': 'system', 'content': 'You are a language identification assistant. Identify the language of the given text and respond with only the language code: "en" for English, "ha" for Hausa, "yo" for Yoruba, or "ig" for Igbo.'},
+            {'role': 'user', 'content': f'What language is this text written in? "{text}"'}
+        ]
+        formatted_text = natlas_tokenizer.apply_chat_template(
+            messages,
+            add_generation_prompt=True,
+            tokenize=False
+        )
+        input_tokens = natlas_tokenizer(formatted_text, return_tensors='pt', add_special_tokens=False)
+        if torch.cuda.is_available():
+            input_tokens = input_tokens.to('cuda')
+        with torch.no_grad():
+            outputs = natlas_model.generate(
+                **input_tokens,
+                max_new_tokens=10,
+                use_cache=True,
+                repetition_penalty=1.1,
+                temperature=0.1,
+                do_sample=False
+            )
+        response = natlas_tokenizer.batch_decode(outputs)[0]
+        response_text = response.split(messages[1]['content'])[-1].strip().lower()
+        if 'ha' in response_text:
+            return "ha"
+        elif 'yo' in response_text:
+            return "yo"
+        elif 'ig' in response_text:
+            return "ig"
+        else:
+            return "en"
+    except Exception as e:
+        logger.exception(f"Language detection failed: {e}")
+        text_lower = text.lower()
+        if any(word in text_lower for word in HAUSA_WORDS):
+            return "ha"
+        elif any(word in text_lower for word in YORUBA_WORDS):
+            return "yo"
+        elif any(word in text_lower for word in IGBO_WORDS):
+            return "ig"
+        else:
+            return "en"
 def text_to_speech_file(text: str) -> str:
     lang = detect_language(text)

requirements.txt CHANGED Viewed

@@ -15,7 +15,6 @@ aiofiles
 accelerate
 sentencepiece
 protobuf
-langdetect
 nest-asyncio

 accelerate
 sentencepiece
 protobuf
 nest-asyncio