from transformers import AutoTokenizer, AutoModelForSeq2SeqLM import gradio as gr import torch # Load NLLB-200 model and tokenizer model_name = "facebook/nllb-200-3.3B" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSeq2SeqLM.from_pretrained(model_name) # Define supported language pairs and NLLB codes LANGUAGE_PAIRS = { "English → Afrikaans": ("eng_Latn", "afr_Latn"), "English → Xhosa": ("eng_Latn", "xho_Latn"), "English → Zulu": ("eng_Latn", "zul_Latn"), "English → Sesotho": ("eng_Latn", "sot_Latn"), "English → Tswana": ("eng_Latn", "tsn_Latn"), "English → Northern Sotho": ("eng_Latn", "nso_Latn"), "English → Swati": ("eng_Latn", "ssw_Latn"), "English → Tsonga": ("eng_Latn", "tso_Latn"), "Afrikaans → English": ("afr_Latn", "eng_Latn"), "Xhosa → English": ("xho_Latn", "eng_Latn"), "Zulu → English": ("zul_Latn", "eng_Latn"), "Sesotho → English": ("sot_Latn", "eng_Latn"), "Tswana → English": ("tsn_Latn", "eng_Latn"), "Northern Sotho → English": ("nso_Latn", "eng_Latn"), "Swati → English": ("ssw_Latn", "eng_Latn"), "Tsonga → English": ("tso_Latn", "eng_Latn"), } def translate(input_text, language_pair): if not input_text.strip(): return "[ERROR] Please enter text." _, tgt_lang = LANGUAGE_PAIRS[language_pair] # Prepend target language token input_with_lang = f">>{tgt_lang}<< {input_text.strip()}" # Tokenize and generate inputs = tokenizer(input_with_lang, return_tensors="pt") with torch.no_grad(): outputs = model.generate(**inputs, max_length=256) translated = tokenizer.decode(outputs[0], skip_special_tokens=True) return translated # Gradio Interface translator = gr.Interface( fn=translate, inputs=[ gr.Textbox(label="Input Text", placeholder="Type text here..."), gr.Dropdown(choices=list(LANGUAGE_PAIRS.keys()), label="Select Language Pair"), ], outputs=gr.Textbox(label="Translation"), title="Translademia (Local NLLB Edition)", description="Translate between English and South African languages using Meta's NLLB-200 locally.", ) translator.launch(share=True) # import gradio as gr # from transformers import AutoTokenizer, AutoModelForSeq2SeqLM # # Load tokenizer and model (this will download ~3.5GB) # model_name = "facebook/nllb-200-distilled-600M" # tokenizer = AutoTokenizer.from_pretrained(model_name) # model = AutoModelForSeq2SeqLM.from_pretrained(model_name) # # Supported South African languages codes for NLLB # LANGUAGES = { # "English → Afrikaans": "afr_Latn", # "English → Xhosa": "xho_Latn", # "English → Zulu": "zul_Latn", # "English → Sesotho": "sot_Latn", # "English → Tswana": "tsn_Latn", # "English → Northern Sotho": "nso_Latn", # "English → Swati": "ssw_Latn", # "English → Tsonga": "tso_Latn", # "English → Venda": "ven_Latn", # } # def translate(text, lang_label): # if not text.strip(): # return "Please enter some text to translate." # target_lang = LANGUAGES[lang_label] # # Format input for NLLB: prefix target language token # input_text = f">>{target_lang}<< {text}" # inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True) # outputs = model.generate(**inputs, max_length=512) # translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) # return translated_text # iface = gr.Interface( # fn=translate, # inputs=[ # gr.Textbox(label="English Text"), # gr.Dropdown(list(LANGUAGES.keys()), label="Target Language"), # ], # outputs="text", # title="NLLB-200 English to South African Languages", # description="Translate English text to South African languages using Meta's NLLB-200 model locally.", # ) # iface.launch() # from transformers import MarianMTModel, MarianTokenizer, pipeline # import gradio as gr # # Define supported models for South African languages # language_models = { # "Afrikaans": "Helsinki-NLP/opus-mt-en-af", # "Zulu": "Helsinki-NLP/opus-mt-en-zu", # "Xhosa": "Helsinki-NLP/opus-mt-en-xh", # "Sesotho": "Helsinki-NLP/opus-mt-en-st", # "Setswana": "Helsinki-NLP/opus-mt-en-tn", # } # # Translation function # def translate(text, target_language): # model_name = language_models[target_language] # tokenizer = MarianTokenizer.from_pretrained(model_name) # model = MarianMTModel.from_pretrained(model_name) # # Setup pipeline # translation_pipeline = pipeline("translation", model=model, tokenizer=tokenizer) # # Translate # result = translation_pipeline(text) # return result[0]["translation_text"] # # Build Gradio interface # interface = gr.Interface( # fn=translate, # inputs=[ # gr.Textbox(label="Enter English Text"), # gr.Dropdown(choices=list(language_models.keys()), label="Translate to"), # ], # outputs="text", # title="African Language Translator", # description="Translate English text into Afrikaans, Zulu, Xhosa, Sesotho or Setswana", # ) # # Launch the app # interface.launch() # from transformers import AutoTokenizer, AutoModelForSeq2SeqLM # import gradio as gr # # Load the tokenizer and model # model_name = "facebook/nllb-200-distilled-600M" # tokenizer = AutoTokenizer.from_pretrained(model_name) # model = AutoModelForSeq2SeqLM.from_pretrained(model_name) # # Language code map # lang_map = { # "English": "eng_Latn", # "Afrikaans": "afr_Latn", # "Zulu": "zul_Latn", # "Xhosa": "xho_Latn", # "French": "fra_Latn", # "Spanish": "spa_Latn", # "Swahili": "swh_Latn", # } # # Translation function # def translate(text, src_lang, tgt_lang): # src_code = lang_map[src_lang] # tgt_code = lang_map[tgt_lang] # tokenizer.src_lang = src_code # inputs = tokenizer(text, return_tensors="pt", padding=True) # generated_tokens = model.generate( # **inputs, forced_bos_token_id=tokenizer.lang_code_to_id[tgt_code] # ) # translated = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0] # return translated # # Gradio interface # iface = gr.Interface( # fn=translate, # inputs=[ # gr.Textbox(label="Enter text"), # gr.Dropdown( # choices=list(lang_map.keys()), label="From Language", value="English" # ), # gr.Dropdown( # choices=list(lang_map.keys()), label="To Language", value="Afrikaans" # ), # ], # outputs="text", # title="NLLB-200 Custom Language Translator", # description="Translate text using Facebook's distilled NLLB-200 model with selectable languages.", # ) # iface.launch()