Translater / two.py
LouisMonawe's picture
//
76f85b9
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import gradio as gr
import torch
# Load NLLB-200 model and tokenizer
model_name = "facebook/nllb-200-3.3B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
# Define supported language pairs and NLLB codes
LANGUAGE_PAIRS = {
"English β†’ Afrikaans": ("eng_Latn", "afr_Latn"),
"English β†’ Xhosa": ("eng_Latn", "xho_Latn"),
"English β†’ Zulu": ("eng_Latn", "zul_Latn"),
"English β†’ Sesotho": ("eng_Latn", "sot_Latn"),
"English β†’ Tswana": ("eng_Latn", "tsn_Latn"),
"English β†’ Northern Sotho": ("eng_Latn", "nso_Latn"),
"English β†’ Swati": ("eng_Latn", "ssw_Latn"),
"English β†’ Tsonga": ("eng_Latn", "tso_Latn"),
"Afrikaans β†’ English": ("afr_Latn", "eng_Latn"),
"Xhosa β†’ English": ("xho_Latn", "eng_Latn"),
"Zulu β†’ English": ("zul_Latn", "eng_Latn"),
"Sesotho β†’ English": ("sot_Latn", "eng_Latn"),
"Tswana β†’ English": ("tsn_Latn", "eng_Latn"),
"Northern Sotho β†’ English": ("nso_Latn", "eng_Latn"),
"Swati β†’ English": ("ssw_Latn", "eng_Latn"),
"Tsonga β†’ English": ("tso_Latn", "eng_Latn"),
}
def translate(input_text, language_pair):
if not input_text.strip():
return "[ERROR] Please enter text."
_, tgt_lang = LANGUAGE_PAIRS[language_pair]
# Prepend target language token
input_with_lang = f">>{tgt_lang}<< {input_text.strip()}"
# Tokenize and generate
inputs = tokenizer(input_with_lang, return_tensors="pt")
with torch.no_grad():
outputs = model.generate(**inputs, max_length=256)
translated = tokenizer.decode(outputs[0], skip_special_tokens=True)
return translated
# Gradio Interface
translator = gr.Interface(
fn=translate,
inputs=[
gr.Textbox(label="Input Text", placeholder="Type text here..."),
gr.Dropdown(choices=list(LANGUAGE_PAIRS.keys()), label="Select Language Pair"),
],
outputs=gr.Textbox(label="Translation"),
title="Translademia (Local NLLB Edition)",
description="Translate between English and South African languages using Meta's NLLB-200 locally.",
)
translator.launch(share=True)
# import gradio as gr
# from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
# # Load tokenizer and model (this will download ~3.5GB)
# model_name = "facebook/nllb-200-distilled-600M"
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
# # Supported South African languages codes for NLLB
# LANGUAGES = {
# "English β†’ Afrikaans": "afr_Latn",
# "English β†’ Xhosa": "xho_Latn",
# "English β†’ Zulu": "zul_Latn",
# "English β†’ Sesotho": "sot_Latn",
# "English β†’ Tswana": "tsn_Latn",
# "English β†’ Northern Sotho": "nso_Latn",
# "English β†’ Swati": "ssw_Latn",
# "English β†’ Tsonga": "tso_Latn",
# "English β†’ Venda": "ven_Latn",
# }
# def translate(text, lang_label):
# if not text.strip():
# return "Please enter some text to translate."
# target_lang = LANGUAGES[lang_label]
# # Format input for NLLB: prefix target language token
# input_text = f">>{target_lang}<< {text}"
# inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
# outputs = model.generate(**inputs, max_length=512)
# translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
# return translated_text
# iface = gr.Interface(
# fn=translate,
# inputs=[
# gr.Textbox(label="English Text"),
# gr.Dropdown(list(LANGUAGES.keys()), label="Target Language"),
# ],
# outputs="text",
# title="NLLB-200 English to South African Languages",
# description="Translate English text to South African languages using Meta's NLLB-200 model locally.",
# )
# iface.launch()
# from transformers import MarianMTModel, MarianTokenizer, pipeline
# import gradio as gr
# # Define supported models for South African languages
# language_models = {
# "Afrikaans": "Helsinki-NLP/opus-mt-en-af",
# "Zulu": "Helsinki-NLP/opus-mt-en-zu",
# "Xhosa": "Helsinki-NLP/opus-mt-en-xh",
# "Sesotho": "Helsinki-NLP/opus-mt-en-st",
# "Setswana": "Helsinki-NLP/opus-mt-en-tn",
# }
# # Translation function
# def translate(text, target_language):
# model_name = language_models[target_language]
# tokenizer = MarianTokenizer.from_pretrained(model_name)
# model = MarianMTModel.from_pretrained(model_name)
# # Setup pipeline
# translation_pipeline = pipeline("translation", model=model, tokenizer=tokenizer)
# # Translate
# result = translation_pipeline(text)
# return result[0]["translation_text"]
# # Build Gradio interface
# interface = gr.Interface(
# fn=translate,
# inputs=[
# gr.Textbox(label="Enter English Text"),
# gr.Dropdown(choices=list(language_models.keys()), label="Translate to"),
# ],
# outputs="text",
# title="African Language Translator",
# description="Translate English text into Afrikaans, Zulu, Xhosa, Sesotho or Setswana",
# )
# # Launch the app
# interface.launch()
# from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
# import gradio as gr
# # Load the tokenizer and model
# model_name = "facebook/nllb-200-distilled-600M"
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
# # Language code map
# lang_map = {
# "English": "eng_Latn",
# "Afrikaans": "afr_Latn",
# "Zulu": "zul_Latn",
# "Xhosa": "xho_Latn",
# "French": "fra_Latn",
# "Spanish": "spa_Latn",
# "Swahili": "swh_Latn",
# }
# # Translation function
# def translate(text, src_lang, tgt_lang):
# src_code = lang_map[src_lang]
# tgt_code = lang_map[tgt_lang]
# tokenizer.src_lang = src_code
# inputs = tokenizer(text, return_tensors="pt", padding=True)
# generated_tokens = model.generate(
# **inputs, forced_bos_token_id=tokenizer.lang_code_to_id[tgt_code]
# )
# translated = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
# return translated
# # Gradio interface
# iface = gr.Interface(
# fn=translate,
# inputs=[
# gr.Textbox(label="Enter text"),
# gr.Dropdown(
# choices=list(lang_map.keys()), label="From Language", value="English"
# ),
# gr.Dropdown(
# choices=list(lang_map.keys()), label="To Language", value="Afrikaans"
# ),
# ],
# outputs="text",
# title="NLLB-200 Custom Language Translator",
# description="Translate text using Facebook's distilled NLLB-200 model with selectable languages.",
# )
# iface.launch()