Spaces:

nexusbert
/

milestone3

Sleeping

App Files Files Community

nexusbert commited on Oct 10, 2025

Commit

cd0d2d4

1 Parent(s): b8370c9

identifier

Browse files

Files changed (1) hide show

app.py +44 -4

app.py CHANGED Viewed

@@ -205,36 +205,52 @@ def _load_natlas():
     if hf_token:
         hf_token = hf_token.strip()
     try:
         logger.info("Lazy-loading N-ATLaS language identification model...")
         natlas_tokenizer = AutoTokenizer.from_pretrained("NCAIR1/N-ATLaS", token=hf_token)
         natlas_model = AutoModelForCausalLM.from_pretrained(
             "NCAIR1/N-ATLaS",
             torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
             device_map="auto" if torch.cuda.is_available() else None,
-            token=hf_token
         )
-        logger.info("Loaded N-ATLaS language identification model")
         return True
     except Exception as e:
-        logger.exception("Failed to load N-ATLaS model")
         natlas_tokenizer, natlas_model = None, None
         return False
 def detect_language(text: str) -> str:
     if not _load_natlas():
         logger.warning("N-ATLaS model not available, falling back to keyword detection")
         text_lower = text.lower()
         if any(word in text_lower for word in HAUSA_WORDS):
             return "ha"
         elif any(word in text_lower for word in YORUBA_WORDS):
             return "yo"
         elif any(word in text_lower for word in IGBO_WORDS):
             return "ig"
         else:
             return "en"
     try:
         messages = [
             {'role': 'system', 'content': 'You are a language identification assistant. Identify the language of the given text and respond with only the language code: "en" for English, "ha" for Hausa, "yo" for Yoruba, or "ig" for Igbo.'},
             {'role': 'user', 'content': f'What language is this text written in? "{text}"'}
@@ -263,17 +279,24 @@ def detect_language(text: str) -> str:
         response = natlas_tokenizer.batch_decode(outputs)[0]
         response_text = response.split(messages[1]['content'])[-1].strip().lower()
         if 'ha' in response_text:
             return "ha"
         elif 'yo' in response_text:
             return "yo"
         elif 'ig' in response_text:
             return "ig"
         else:
             return "en"
     except Exception as e:
         logger.exception(f"Language detection failed: {e}")
         text_lower = text.lower()
         if any(word in text_lower for word in HAUSA_WORDS):
             return "ha"
@@ -339,7 +362,24 @@ async def root():
 @app.get("/health")
 async def health():
-    return {"message": "Farmlingua AI Speech Interface is running!"}
 @app.post("/chat")
 async def chat(text: str = Form(...), speak: bool = False, raw: bool = False):

     if hf_token:
         hf_token = hf_token.strip()
+    if not hf_token:
+        logger.error("HF_TOKEN not available for N-ATLaS model access")
+        return False
     try:
         logger.info("Lazy-loading N-ATLaS language identification model...")
+        logger.info("This may take a few minutes as the model loads its shards...")
         natlas_tokenizer = AutoTokenizer.from_pretrained("NCAIR1/N-ATLaS", token=hf_token)
         natlas_model = AutoModelForCausalLM.from_pretrained(
             "NCAIR1/N-ATLaS",
             torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
             device_map="auto" if torch.cuda.is_available() else None,
+            token=hf_token,
+            trust_remote_code=True,
+            low_cpu_mem_usage=True,
+            use_cache=True
         )
+        logger.info("Successfully loaded N-ATLaS language identification model")
         return True
     except Exception as e:
+        logger.exception(f"Failed to load N-ATLaS model: {e}")
         natlas_tokenizer, natlas_model = None, None
         return False
 def detect_language(text: str) -> str:
+    logger.info(f"Detecting language for text: '{text[:50]}...'")
     if not _load_natlas():
         logger.warning("N-ATLaS model not available, falling back to keyword detection")
         text_lower = text.lower()
         if any(word in text_lower for word in HAUSA_WORDS):
+            logger.info("Keyword detection: Hausa")
             return "ha"
         elif any(word in text_lower for word in YORUBA_WORDS):
+            logger.info("Keyword detection: Yoruba")
             return "yo"
         elif any(word in text_lower for word in IGBO_WORDS):
+            logger.info("Keyword detection: Igbo")
             return "ig"
         else:
+            logger.info("Keyword detection: English (default)")
             return "en"
     try:
+        logger.info("Using N-ATLaS for language detection")
         messages = [
             {'role': 'system', 'content': 'You are a language identification assistant. Identify the language of the given text and respond with only the language code: "en" for English, "ha" for Hausa, "yo" for Yoruba, or "ig" for Igbo.'},
             {'role': 'user', 'content': f'What language is this text written in? "{text}"'}
         response = natlas_tokenizer.batch_decode(outputs)[0]
         response_text = response.split(messages[1]['content'])[-1].strip().lower()
+        logger.info(f"N-ATLaS response: '{response_text}'")
         if 'ha' in response_text:
+            logger.info("N-ATLaS detection: Hausa")
             return "ha"
         elif 'yo' in response_text:
+            logger.info("N-ATLaS detection: Yoruba")
             return "yo"
         elif 'ig' in response_text:
+            logger.info("N-ATLaS detection: Igbo")
             return "ig"
         else:
+            logger.info("N-ATLaS detection: English (default)")
             return "en"
     except Exception as e:
         logger.exception(f"Language detection failed: {e}")
+        logger.warning("Falling back to keyword detection due to N-ATLaS error")
         text_lower = text.lower()
         if any(word in text_lower for word in HAUSA_WORDS):
             return "ha"
 @app.get("/health")
 async def health():
+    natlas_status = "loaded" if natlas_tokenizer is not None and natlas_model is not None else "not_loaded"
+    return {
+        "message": "Farmlingua AI Speech Interface is running!",
+        "natlas_status": natlas_status,
+        "tts_models": {
+            "hausa": tts_ha is not None,
+            "english": tts_en is not None,
+            "yoruba": tts_yo is not None,
+            "igbo": False
+        }
+    }
+@app.get("/status")
+async def status():
+    return {
+        "natlas_loaded": natlas_tokenizer is not None and natlas_model is not None,
+        "loading_message": "N-ATLaS model is loading shards, please wait..." if natlas_tokenizer is None else "N-ATLaS model is ready"
+    }
 @app.post("/chat")
 async def chat(text: str = Form(...), speak: bool = False, raw: bool = False):