File size: 2,588 Bytes
1690c4f
 
f4cc427
bc9e2ed
 
1690c4f
 
 
 
 
bc9e2ed
 
f4cc427
1690c4f
8240c60
 
 
f4cc427
1690c4f
 
 
 
 
 
 
 
 
f4cc427
bc9e2ed
1690c4f
bc9e2ed
 
1690c4f
8240c60
f4cc427
1690c4f
 
 
f4cc427
 
 
1690c4f
f4cc427
1690c4f
bc9e2ed
f4cc427
 
 
bc9e2ed
f4cc427
 
 
8240c60
bc9e2ed
f4cc427
 
 
1690c4f
 
 
 
f4cc427
 
 
1690c4f
f4cc427
 
8240c60
 
f4cc427
 
8240c60
f4cc427
1690c4f
8240c60
 
 
 
bc9e2ed
1690c4f
bc9e2ed
1690c4f
 
8240c60
f4cc427
 
 
 
 
 
bc9e2ed
1690c4f
8240c60
bc9e2ed
f4cc427
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# app.py

import os
import gradio as gr
import torch
from transformers import (
    AutoTokenizer, 
    AutoModelForCausalLM, 
    BitsAndBytesConfig
)

MODEL_NAME = "NCAIR1/N-ATLaS"

# Read Hugging Face token from Space Secrets
HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")
if not HF_TOKEN:
    raise RuntimeError("HF_TOKEN missing. Add it in Space β†’ Settings β†’ Secrets.")

# 4-bit quantization config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(
    MODEL_NAME,
    use_auth_token=HF_TOKEN
)

print("Loading quantized N-ATLaS model (this may take a few minutes)...")
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",  # automatically uses CPU or GPU if available
    use_auth_token=HF_TOKEN
)

model.eval()
print("βœ… N-ATLaS loaded successfully with 4-bit quantization")

# Translation function
def translate(text, target_language):
    if not text.strip():
        return ""

    prompt = (
        f"You are a Nigerian language translation expert.\n"
        f"Translate the following English text to {target_language}.\n"
        f"Only output the translation.\n\n"
        f"Text:\n{text}\n\nTranslation:"
    )

    inputs = tokenizer(prompt, return_tensors="pt")

    # Make sure model outputs on the same device as inputs
    device = model.device
    inputs = {k: v.to(device) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=128,  # reduce for CPU
            temperature=0.2,
            repetition_penalty=1.1,
            do_sample=False,
            eos_token_id=tokenizer.eos_token_id
        )

    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Remove prompt part
    if "Translation:" in decoded:
        decoded = decoded.split("Translation:")[-1]

    return decoded.strip()

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("## 🌍 N-ATLaS Translation (CPU / 4-bit Quantized)")
    
    inp = gr.Textbox(label="English text", lines=4)
    lang = gr.Dropdown(
        choices=["Hausa", "Igbo", "Yoruba", "English"],
        value="Hausa",
        label="Target language"
    )
    out = gr.Textbox(label="Translated text", lines=4)
    btn = gr.Button("Translate")
    
    btn.click(translate, inputs=[inp, lang], outputs=out)

demo.launch()