Spaces:
Runtime error
Runtime error
File size: 2,588 Bytes
1690c4f f4cc427 bc9e2ed 1690c4f bc9e2ed f4cc427 1690c4f 8240c60 f4cc427 1690c4f f4cc427 bc9e2ed 1690c4f bc9e2ed 1690c4f 8240c60 f4cc427 1690c4f f4cc427 1690c4f f4cc427 1690c4f bc9e2ed f4cc427 bc9e2ed f4cc427 8240c60 bc9e2ed f4cc427 1690c4f f4cc427 1690c4f f4cc427 8240c60 f4cc427 8240c60 f4cc427 1690c4f 8240c60 bc9e2ed 1690c4f bc9e2ed 1690c4f 8240c60 f4cc427 bc9e2ed 1690c4f 8240c60 bc9e2ed f4cc427 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | # app.py
import os
import gradio as gr
import torch
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
BitsAndBytesConfig
)
MODEL_NAME = "NCAIR1/N-ATLaS"
# Read Hugging Face token from Space Secrets
HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")
if not HF_TOKEN:
raise RuntimeError("HF_TOKEN missing. Add it in Space β Settings β Secrets.")
# 4-bit quantization config
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=True,
)
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(
MODEL_NAME,
use_auth_token=HF_TOKEN
)
print("Loading quantized N-ATLaS model (this may take a few minutes)...")
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
quantization_config=bnb_config,
device_map="auto", # automatically uses CPU or GPU if available
use_auth_token=HF_TOKEN
)
model.eval()
print("β
N-ATLaS loaded successfully with 4-bit quantization")
# Translation function
def translate(text, target_language):
if not text.strip():
return ""
prompt = (
f"You are a Nigerian language translation expert.\n"
f"Translate the following English text to {target_language}.\n"
f"Only output the translation.\n\n"
f"Text:\n{text}\n\nTranslation:"
)
inputs = tokenizer(prompt, return_tensors="pt")
# Make sure model outputs on the same device as inputs
device = model.device
inputs = {k: v.to(device) for k, v in inputs.items()}
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=128, # reduce for CPU
temperature=0.2,
repetition_penalty=1.1,
do_sample=False,
eos_token_id=tokenizer.eos_token_id
)
decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Remove prompt part
if "Translation:" in decoded:
decoded = decoded.split("Translation:")[-1]
return decoded.strip()
# Gradio interface
with gr.Blocks() as demo:
gr.Markdown("## π N-ATLaS Translation (CPU / 4-bit Quantized)")
inp = gr.Textbox(label="English text", lines=4)
lang = gr.Dropdown(
choices=["Hausa", "Igbo", "Yoruba", "English"],
value="Hausa",
label="Target language"
)
out = gr.Textbox(label="Translated text", lines=4)
btn = gr.Button("Translate")
btn.click(translate, inputs=[inp, lang], outputs=out)
demo.launch()
|