Spaces:
Sleeping
Sleeping
| # app.py | |
| import os | |
| import gradio as gr | |
| import torch | |
| from transformers import ( | |
| AutoTokenizer, | |
| AutoModelForCausalLM, | |
| BitsAndBytesConfig | |
| ) | |
| MODEL_NAME = "NCAIR1/N-ATLaS" | |
| # Read Hugging Face token from Space Secrets | |
| HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN") | |
| if not HF_TOKEN: | |
| raise RuntimeError("HF_TOKEN missing. Add it in Space β Settings β Secrets.") | |
| # 4-bit quantization config | |
| bnb_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_quant_type="nf4", | |
| bnb_4bit_compute_dtype=torch.float16, | |
| bnb_4bit_use_double_quant=True, | |
| ) | |
| print("Loading tokenizer...") | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| MODEL_NAME, | |
| use_auth_token=HF_TOKEN | |
| ) | |
| print("Loading quantized N-ATLaS model (this may take a few minutes)...") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_NAME, | |
| quantization_config=bnb_config, | |
| device_map="auto", # automatically uses CPU or GPU if available | |
| use_auth_token=HF_TOKEN | |
| ) | |
| model.eval() | |
| print("β N-ATLaS loaded successfully with 4-bit quantization") | |
| # Translation function | |
| def translate(text, target_language): | |
| if not text.strip(): | |
| return "" | |
| prompt = ( | |
| f"You are a Nigerian language translation expert.\n" | |
| f"Translate the following English text to {target_language}.\n" | |
| f"Only output the translation.\n\n" | |
| f"Text:\n{text}\n\nTranslation:" | |
| ) | |
| inputs = tokenizer(prompt, return_tensors="pt") | |
| # Make sure model outputs on the same device as inputs | |
| device = model.device | |
| inputs = {k: v.to(device) for k, v in inputs.items()} | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=128, # reduce for CPU | |
| temperature=0.2, | |
| repetition_penalty=1.1, | |
| do_sample=False, | |
| eos_token_id=tokenizer.eos_token_id | |
| ) | |
| decoded = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # Remove prompt part | |
| if "Translation:" in decoded: | |
| decoded = decoded.split("Translation:")[-1] | |
| return decoded.strip() | |
| # Gradio interface | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## π N-ATLaS Translation (CPU / 4-bit Quantized)") | |
| inp = gr.Textbox(label="English text", lines=4) | |
| lang = gr.Dropdown( | |
| choices=["Hausa", "Igbo", "Yoruba", "English"], | |
| value="Hausa", | |
| label="Target language" | |
| ) | |
| out = gr.Textbox(label="Translated text", lines=4) | |
| btn = gr.Button("Translate") | |
| btn.click(translate, inputs=[inp, lang], outputs=out) | |
| demo.launch() | |