Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM | |
| # Load multilingual model | |
| model_name = "facebook/m2m100_418M" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
| translator = pipeline("translation", model=model, tokenizer=tokenizer) | |
| # Full language list (A-Z) supported by M2M100 | |
| languages = { | |
| "Afrikaans": "af", | |
| "Albanian": "sq", | |
| "Amharic": "am", | |
| "Arabic": "ar", | |
| "Armenian": "hy", | |
| "Assamese": "as", | |
| "Azerbaijani": "az", | |
| "Basque": "eu", | |
| "Belarusian": "be", | |
| "Bengali": "bn", | |
| "Bosnian": "bs", | |
| "Bulgarian": "bg", | |
| "Burmese": "my", | |
| "Catalan": "ca", | |
| "Chinese": "zh", | |
| "Croatian": "hr", | |
| "Czech": "cs", | |
| "Danish": "da", | |
| "Dutch": "nl", | |
| "English": "en", | |
| "Estonian": "et", | |
| "Filipino": "tl", | |
| "Finnish": "fi", | |
| "French": "fr", | |
| "Galician": "gl", | |
| "Georgian": "ka", | |
| "German": "de", | |
| "Greek": "el", | |
| "Gujarati": "gu", | |
| "Hausa": "ha", | |
| "Hebrew": "he", | |
| "Hindi": "hi", | |
| "Hungarian": "hu", | |
| "Icelandic": "is", | |
| "Indonesian": "id", | |
| "Irish": "ga", | |
| "Italian": "it", | |
| "Japanese": "ja", | |
| "Javanese": "jv", | |
| "Kannada": "kn", | |
| "Kazakh": "kk", | |
| "Khmer": "km", | |
| "Korean": "ko", | |
| "Kurdish": "ku", | |
| "Kyrgyz": "ky", | |
| "Lao": "lo", | |
| "Latvian": "lv", | |
| "Lithuanian": "lt", | |
| "Macedonian": "mk", | |
| "Malay": "ms", | |
| "Malayalam": "ml", | |
| "Maltese": "mt", | |
| "Marathi": "mr", | |
| "Mongolian": "mn", | |
| "Nepali": "ne", | |
| "Norwegian": "no", | |
| "Odia": "or", | |
| "Pashto": "ps", | |
| "Persian": "fa", | |
| "Polish": "pl", | |
| "Portuguese": "pt", | |
| "Punjabi": "pa", | |
| "Romanian": "ro", | |
| "Russian": "ru", | |
| "Serbian": "sr", | |
| "Sinhala": "si", | |
| "Slovak": "sk", | |
| "Slovenian": "sl", | |
| "Somali": "so", | |
| "Spanish": "es", | |
| "Sundanese": "su", | |
| "Swahili": "sw", | |
| "Swedish": "sv", | |
| "Tagalog": "tl", | |
| "Tajik": "tg", | |
| "Tamil": "ta", | |
| "Telugu": "te", | |
| "Thai": "th", | |
| "Turkish": "tr", | |
| "Turkmen": "tk", | |
| "Ukrainian": "uk", | |
| "Urdu": "ur", | |
| "Uzbek": "uz", | |
| "Vietnamese": "vi", | |
| "Welsh": "cy", | |
| "Xhosa": "xh", | |
| "Yiddish": "yi", | |
| "Yoruba": "yo", | |
| "Zulu": "zu" | |
| } | |
| def translate_text(text, src_lang, tgt_lang): | |
| tokenizer.src_lang = languages[src_lang] | |
| result = translator(text, src_lang=languages[src_lang], tgt_lang=languages[tgt_lang], max_length=200) | |
| return result[0]['translation_text'] | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## Language Translation Tool") | |
| with gr.Row(): | |
| src = gr.Dropdown(choices=list(languages.keys()), label="Source Language", value="English") | |
| tgt = gr.Dropdown(choices=list(languages.keys()), label="Target Language", value="French") | |
| input_text = gr.Textbox(lines=5, placeholder="Enter text here...", label="Input Text") | |
| output_text = gr.Textbox(lines=5, label="Translated Text") | |
| translate_btn = gr.Button("Translate") | |
| translate_btn.click(fn=translate_text, inputs=[input_text, src, tgt], outputs=output_text) | |
| demo.launch() | |