Spaces:

BissakaAI
/

hamid

Sleeping

App Files Files Community

BissakaAI commited on Dec 12, 2025

Commit

f26c5be

verified ·

1 Parent(s): f9a96bb

Update model.py

Browse files

Files changed (1) hide show

model.py +156 -156

model.py CHANGED Viewed

@@ -1,156 +1,156 @@
-# your_model_file.py
-from transformers import (
-    AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig,
-    AutoProcessor, SeamlessM4Tv2ForSpeechToText,
-    VitsModel
-)
-import torch
-import soundfile as sf
-import os
-# --------------------------
-# Device & config
-# --------------------------
-bnb_config = BitsAndBytesConfig(load_in_8bit=True)
-device = "cuda" if torch.cuda.is_available() else "cpu"
-# --------------------------
-# Load LLM
-# --------------------------
-HF_TOKEN = os.getenv("HF_TOKEN")  # Use environment variable for Spaces
-tokenizer = AutoTokenizer.from_pretrained(
-    "NCAIR1/N-ATLaS",
-    trust_remote_code=True,
-    token=HF_TOKEN
-)
-model = AutoModelForCausalLM.from_pretrained(
-    "NCAIR1/N-ATLaS",
-    quantization_config=bnb_config,
-    device_map="auto",
-    trust_remote_code=True,
-    token=HF_TOKEN
-)
-# --------------------------
-# Load ASR
-# --------------------------
-ASR_MODEL = "facebook/seamless-m4t-v2-large"
-processor = AutoProcessor.from_pretrained(ASR_MODEL, token=HF_TOKEN)
-asr_model = SeamlessM4Tv2ForSpeechToText.from_pretrained(ASR_MODEL, token=HF_TOKEN).to(device)
-asr_model.eval()
-# --------------------------
-# Load Nigerian TTS models
-# --------------------------
-tts_models = {}
-for lang, tts_name in {
-    "yoruba": "facebook/mms-tts-yor",
-    # "igbo": "facebook/mms-tts-ibo",
-    # "hausa": "facebook/mms-tts-hau",
-}.items():
-    print(f"Loading TTS model for {lang}...")
-    tts_proc = AutoProcessor.from_pretrained(tts_name, token=HF_TOKEN)
-    tts_mod = VitsModel.from_pretrained(tts_name, token=HF_TOKEN).to(device)
-    tts_mod.eval()
-    tts_models[lang] = {"processor": tts_proc, "model": tts_mod}
-print("✅ All models loaded successfully!")
-# --------------------------
-# TEXT FUNCTION
-# --------------------------
-def textonly(user_msg: str) -> str:
-    def format_prompt(messages):
-        return tokenizer.apply_chat_template(
-            messages,
-            add_generation_prompt=True,
-            tokenize=False
-        )
-    chat = [
-        {"role": "system", "content": "You are a helpful model trained by Awarri AI Technologies."},
-        {"role": "user", "content": user_msg}
-    ]
-    final_text = format_prompt(chat)
-    inputs = tokenizer(final_text, return_tensors="pt").to(model.device)
-    with torch.no_grad():
-        output_ids = model.generate(
-            **inputs,
-            max_new_tokens=200,
-            temperature=0.1,
-            repetition_penalty=1.12
-        )
-    response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
-    return response
-# --------------------------
-# SPEECH FUNCTION
-# --------------------------
-def speechonly(speech, output_wav_path="response.wav"):
-    # --- ASR ---
-    inputs = processor(audios=speech, sampling_rate=16000, return_tensors="pt").to(device)
-    with torch.no_grad():
-        predicted_ids = asr_model.generate(inputs["input_features"], max_new_tokens=300)
-        transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
-    # --- LLM Response ---
-    def format_prompt(messages):
-        return tokenizer.apply_chat_template(
-            messages,
-            add_generation_prompt=True,
-            tokenize=False
-        )
-    chat = [
-        {"role": "system", "content": "Respond ONLY in the detected Nigerian language (Yoruba, Igbo, Hausa, Pidgin, English)."},
-        {"role": "user", "content": transcription}
-    ]
-    final_text = format_prompt(chat)
-    inputs_llm = tokenizer(final_text, return_tensors="pt").to(model.device)
-    with torch.no_grad():
-        output_ids = model.generate(
-            **inputs_llm,
-            max_new_tokens=200,
-            temperature=0.1,
-            repetition_penalty=1.12
-        )
-    llm_response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
-    # --- Detect language ---
-    lang_prompt = [
-        {"role": "system", "content": "You are a Nigerian language expert."},
-        {"role": "user", "content": f"In which Nigerian language is this text: '{llm_response}'? Reply with only one of these: Yoruba, Igbo, Hausa, Pidgin, English."}
-    ]
-    lang_text = format_prompt(lang_prompt)
-    lang_inputs = tokenizer(lang_text, return_tensors="pt").to(model.device)
-    with torch.no_grad():
-        lang_output_ids = model.generate(**lang_inputs, max_new_tokens=10)
-    llm_language = tokenizer.decode(lang_output_ids[0], skip_special_tokens=True).strip().lower()
-    if llm_language not in tts_models:
-        llm_language = "yoruba"
-    # --- TTS ---
-    tts_processor = tts_models[llm_language]["processor"]
-    tts_model = tts_models[llm_language]["model"]
-    tts_inputs = tts_processor(text=llm_response, return_tensors="pt").to(device)
-    with torch.no_grad():
-        output = tts_model(**tts_inputs)
-    # Extract waveform and save
-    audio_array = output.waveform.squeeze().cpu().numpy()
-    sf.write(output_wav_path, audio_array, 16000)
-    return llm_response, output_wav_path

+# your_model_file.py
+from transformers import (
+    AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig,
+    AutoProcessor, SeamlessM4Tv2ForSpeechToText,
+    VitsModel
+)
+import torch
+import soundfile as sf
+import os
+# --------------------------
+# Device & config
+# --------------------------
+bnb_config = BitsAndBytesConfig(load_in_8bit=True)
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# --------------------------
+# Load LLM
+# --------------------------
+HF_TOKEN = os.getenv("HF_TOKEN")  # Use environment variable for Spaces
+tokenizer = AutoTokenizer.from_pretrained(
+    "NCAIR1/N-ATLaS",
+    trust_remote_code=True,
+    token=HF_TOKEN
+)
+model = AutoModelForCausalLM.from_pretrained(
+    "NCAIR1/N-ATLaS",
+    quantization_config=bnb_config,
+    device_map="auto",
+    trust_remote_code=True,
+    token=HF_TOKEN
+)
+# --------------------------
+# Load ASR
+# --------------------------
+ASR_MODEL = "facebook/seamless-m4t-v2-large"
+processor = AutoProcessor.from_pretrained(ASR_MODEL, token=HF_TOKEN)
+asr_model = SeamlessM4Tv2ForSpeechToText.from_pretrained(ASR_MODEL, token=HF_TOKEN, use_fast=False).to(device)
+asr_model.eval()
+# --------------------------
+# Load Nigerian TTS models
+# --------------------------
+tts_models = {}
+for lang, tts_name in {
+    "yoruba": "facebook/mms-tts-yor",
+    # "igbo": "facebook/mms-tts-ibo",
+    # "hausa": "facebook/mms-tts-hau",
+}.items():
+    print(f"Loading TTS model for {lang}...")
+    tts_proc = AutoProcessor.from_pretrained(tts_name, token=HF_TOKEN)
+    tts_mod = VitsModel.from_pretrained(tts_name, token=HF_TOKEN).to(device)
+    tts_mod.eval()
+    tts_models[lang] = {"processor": tts_proc, "model": tts_mod}
+print("✅ All models loaded successfully!")
+# --------------------------
+# TEXT FUNCTION
+# --------------------------
+def textonly(user_msg: str) -> str:
+    def format_prompt(messages):
+        return tokenizer.apply_chat_template(
+            messages,
+            add_generation_prompt=True,
+            tokenize=False
+        )
+    chat = [
+        {"role": "system", "content": "You are a helpful model trained by Awarri AI Technologies."},
+        {"role": "user", "content": user_msg}
+    ]
+    final_text = format_prompt(chat)
+    inputs = tokenizer(final_text, return_tensors="pt").to(model.device)
+    with torch.no_grad():
+        output_ids = model.generate(
+            **inputs,
+            max_new_tokens=200,
+            temperature=0.1,
+            repetition_penalty=1.12
+        )
+    response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    return response
+# --------------------------
+# SPEECH FUNCTION
+# --------------------------
+def speechonly(speech, output_wav_path="response.wav"):
+    # --- ASR ---
+    inputs = processor(audios=speech, sampling_rate=16000, return_tensors="pt").to(device)
+    with torch.no_grad():
+        predicted_ids = asr_model.generate(inputs["input_features"], max_new_tokens=300)
+        transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
+    # --- LLM Response ---
+    def format_prompt(messages):
+        return tokenizer.apply_chat_template(
+            messages,
+            add_generation_prompt=True,
+            tokenize=False
+        )
+    chat = [
+        {"role": "system", "content": "Respond ONLY in the detected Nigerian language (Yoruba, Igbo, Hausa, Pidgin, English)."},
+        {"role": "user", "content": transcription}
+    ]
+    final_text = format_prompt(chat)
+    inputs_llm = tokenizer(final_text, return_tensors="pt").to(model.device)
+    with torch.no_grad():
+        output_ids = model.generate(
+            **inputs_llm,
+            max_new_tokens=200,
+            temperature=0.1,
+            repetition_penalty=1.12
+        )
+    llm_response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    # --- Detect language ---
+    lang_prompt = [
+        {"role": "system", "content": "You are a Nigerian language expert."},
+        {"role": "user", "content": f"In which Nigerian language is this text: '{llm_response}'? Reply with only one of these: Yoruba, Igbo, Hausa, Pidgin, English."}
+    ]
+    lang_text = format_prompt(lang_prompt)
+    lang_inputs = tokenizer(lang_text, return_tensors="pt").to(model.device)
+    with torch.no_grad():
+        lang_output_ids = model.generate(**lang_inputs, max_new_tokens=10)
+    llm_language = tokenizer.decode(lang_output_ids[0], skip_special_tokens=True).strip().lower()
+    if llm_language not in tts_models:
+        llm_language = "yoruba"
+    # --- TTS ---
+    tts_processor = tts_models[llm_language]["processor"]
+    tts_model = tts_models[llm_language]["model"]
+    tts_inputs = tts_processor(text=llm_response, return_tensors="pt").to(device)
+    with torch.no_grad():
+        output = tts_model(**tts_inputs)
+    # Extract waveform and save
+    audio_array = output.waveform.squeeze().cpu().numpy()
+    sf.write(output_wav_path, audio_array, 16000)
+    return llm_response, output_wav_path