Spaces:
Sleeping
Sleeping
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| import gradio as gr | |
| import torch | |
| # Load NLLB-200 model and tokenizer | |
| model_name = "facebook/nllb-200-3.3B" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
| # Define supported language pairs and NLLB codes | |
| LANGUAGE_PAIRS = { | |
| "English β Afrikaans": ("eng_Latn", "afr_Latn"), | |
| "English β Xhosa": ("eng_Latn", "xho_Latn"), | |
| "English β Zulu": ("eng_Latn", "zul_Latn"), | |
| "English β Sesotho": ("eng_Latn", "sot_Latn"), | |
| "English β Tswana": ("eng_Latn", "tsn_Latn"), | |
| "English β Northern Sotho": ("eng_Latn", "nso_Latn"), | |
| "English β Swati": ("eng_Latn", "ssw_Latn"), | |
| "English β Tsonga": ("eng_Latn", "tso_Latn"), | |
| "Afrikaans β English": ("afr_Latn", "eng_Latn"), | |
| "Xhosa β English": ("xho_Latn", "eng_Latn"), | |
| "Zulu β English": ("zul_Latn", "eng_Latn"), | |
| "Sesotho β English": ("sot_Latn", "eng_Latn"), | |
| "Tswana β English": ("tsn_Latn", "eng_Latn"), | |
| "Northern Sotho β English": ("nso_Latn", "eng_Latn"), | |
| "Swati β English": ("ssw_Latn", "eng_Latn"), | |
| "Tsonga β English": ("tso_Latn", "eng_Latn"), | |
| } | |
| def translate(input_text, language_pair): | |
| if not input_text.strip(): | |
| return "[ERROR] Please enter text." | |
| _, tgt_lang = LANGUAGE_PAIRS[language_pair] | |
| # Prepend target language token | |
| input_with_lang = f">>{tgt_lang}<< {input_text.strip()}" | |
| # Tokenize and generate | |
| inputs = tokenizer(input_with_lang, return_tensors="pt") | |
| with torch.no_grad(): | |
| outputs = model.generate(**inputs, max_length=256) | |
| translated = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return translated | |
| # Gradio Interface | |
| translator = gr.Interface( | |
| fn=translate, | |
| inputs=[ | |
| gr.Textbox(label="Input Text", placeholder="Type text here..."), | |
| gr.Dropdown(choices=list(LANGUAGE_PAIRS.keys()), label="Select Language Pair"), | |
| ], | |
| outputs=gr.Textbox(label="Translation"), | |
| title="Translademia (Local NLLB Edition)", | |
| description="Translate between English and South African languages using Meta's NLLB-200 locally.", | |
| ) | |
| translator.launch(share=True) | |
| # import gradio as gr | |
| # from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| # # Load tokenizer and model (this will download ~3.5GB) | |
| # model_name = "facebook/nllb-200-distilled-600M" | |
| # tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| # model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
| # # Supported South African languages codes for NLLB | |
| # LANGUAGES = { | |
| # "English β Afrikaans": "afr_Latn", | |
| # "English β Xhosa": "xho_Latn", | |
| # "English β Zulu": "zul_Latn", | |
| # "English β Sesotho": "sot_Latn", | |
| # "English β Tswana": "tsn_Latn", | |
| # "English β Northern Sotho": "nso_Latn", | |
| # "English β Swati": "ssw_Latn", | |
| # "English β Tsonga": "tso_Latn", | |
| # "English β Venda": "ven_Latn", | |
| # } | |
| # def translate(text, lang_label): | |
| # if not text.strip(): | |
| # return "Please enter some text to translate." | |
| # target_lang = LANGUAGES[lang_label] | |
| # # Format input for NLLB: prefix target language token | |
| # input_text = f">>{target_lang}<< {text}" | |
| # inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True) | |
| # outputs = model.generate(**inputs, max_length=512) | |
| # translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # return translated_text | |
| # iface = gr.Interface( | |
| # fn=translate, | |
| # inputs=[ | |
| # gr.Textbox(label="English Text"), | |
| # gr.Dropdown(list(LANGUAGES.keys()), label="Target Language"), | |
| # ], | |
| # outputs="text", | |
| # title="NLLB-200 English to South African Languages", | |
| # description="Translate English text to South African languages using Meta's NLLB-200 model locally.", | |
| # ) | |
| # iface.launch() | |
| # from transformers import MarianMTModel, MarianTokenizer, pipeline | |
| # import gradio as gr | |
| # # Define supported models for South African languages | |
| # language_models = { | |
| # "Afrikaans": "Helsinki-NLP/opus-mt-en-af", | |
| # "Zulu": "Helsinki-NLP/opus-mt-en-zu", | |
| # "Xhosa": "Helsinki-NLP/opus-mt-en-xh", | |
| # "Sesotho": "Helsinki-NLP/opus-mt-en-st", | |
| # "Setswana": "Helsinki-NLP/opus-mt-en-tn", | |
| # } | |
| # # Translation function | |
| # def translate(text, target_language): | |
| # model_name = language_models[target_language] | |
| # tokenizer = MarianTokenizer.from_pretrained(model_name) | |
| # model = MarianMTModel.from_pretrained(model_name) | |
| # # Setup pipeline | |
| # translation_pipeline = pipeline("translation", model=model, tokenizer=tokenizer) | |
| # # Translate | |
| # result = translation_pipeline(text) | |
| # return result[0]["translation_text"] | |
| # # Build Gradio interface | |
| # interface = gr.Interface( | |
| # fn=translate, | |
| # inputs=[ | |
| # gr.Textbox(label="Enter English Text"), | |
| # gr.Dropdown(choices=list(language_models.keys()), label="Translate to"), | |
| # ], | |
| # outputs="text", | |
| # title="African Language Translator", | |
| # description="Translate English text into Afrikaans, Zulu, Xhosa, Sesotho or Setswana", | |
| # ) | |
| # # Launch the app | |
| # interface.launch() | |
| # from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| # import gradio as gr | |
| # # Load the tokenizer and model | |
| # model_name = "facebook/nllb-200-distilled-600M" | |
| # tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| # model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
| # # Language code map | |
| # lang_map = { | |
| # "English": "eng_Latn", | |
| # "Afrikaans": "afr_Latn", | |
| # "Zulu": "zul_Latn", | |
| # "Xhosa": "xho_Latn", | |
| # "French": "fra_Latn", | |
| # "Spanish": "spa_Latn", | |
| # "Swahili": "swh_Latn", | |
| # } | |
| # # Translation function | |
| # def translate(text, src_lang, tgt_lang): | |
| # src_code = lang_map[src_lang] | |
| # tgt_code = lang_map[tgt_lang] | |
| # tokenizer.src_lang = src_code | |
| # inputs = tokenizer(text, return_tensors="pt", padding=True) | |
| # generated_tokens = model.generate( | |
| # **inputs, forced_bos_token_id=tokenizer.lang_code_to_id[tgt_code] | |
| # ) | |
| # translated = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0] | |
| # return translated | |
| # # Gradio interface | |
| # iface = gr.Interface( | |
| # fn=translate, | |
| # inputs=[ | |
| # gr.Textbox(label="Enter text"), | |
| # gr.Dropdown( | |
| # choices=list(lang_map.keys()), label="From Language", value="English" | |
| # ), | |
| # gr.Dropdown( | |
| # choices=list(lang_map.keys()), label="To Language", value="Afrikaans" | |
| # ), | |
| # ], | |
| # outputs="text", | |
| # title="NLLB-200 Custom Language Translator", | |
| # description="Translate text using Facebook's distilled NLLB-200 model with selectable languages.", | |
| # ) | |
| # iface.launch() | |