Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import MarianMTModel, MarianTokenizer, AutoTokenizer, AutoModelForSequenceClassification | |
| import torch | |
| # Chargement du modèle IA de détection de langue | |
| lang_detect_tokenizer = AutoTokenizer.from_pretrained("papluca/xlm-roberta-base-language-detection") | |
| lang_detect_model = AutoModelForSequenceClassification.from_pretrained("papluca/xlm-roberta-base-language-detection") | |
| # Mapping code ISO → Nom complet | |
| LANG_NAMES = { | |
| "fr": "Français", | |
| "en": "Anglais", | |
| "es": "Espagnol", | |
| "de": "Allemand", | |
| "it": "Italien", | |
| "pt": "Portugais", | |
| "nl": "Néerlandais", | |
| "ru": "Russe", | |
| "ja": "Japonais", | |
| "zh": "Chinois" | |
| } | |
| # Liste des modèles MarianMT disponibles (dans les 2 sens) | |
| LANG_MODELS = {} | |
| for src in LANG_NAMES.keys(): | |
| for tgt in LANG_NAMES.keys(): | |
| if src != tgt: | |
| LANG_MODELS[(src, tgt)] = f"Helsinki-NLP/opus-mt-{src}-{tgt}" | |
| # Cache des modèles | |
| model_cache = {} | |
| def detect_language_ai(text): | |
| """Détecte la langue avec IA""" | |
| inputs = lang_detect_tokenizer(text, return_tensors="pt", truncation=True) | |
| with torch.no_grad(): | |
| logits = lang_detect_model(**inputs).logits | |
| predicted_id = torch.argmax(logits, dim=1).item() | |
| label = lang_detect_model.config.id2label[predicted_id] | |
| return label | |
| def get_model(src, tgt): | |
| """Charge ou récupère le modèle MarianMT""" | |
| if (src, tgt) not in LANG_MODELS: | |
| return None, None | |
| model_name = LANG_MODELS[(src, tgt)] | |
| if model_name not in model_cache: | |
| try: | |
| tokenizer = MarianTokenizer.from_pretrained(model_name) | |
| model = MarianMTModel.from_pretrained(model_name) | |
| model_cache[model_name] = (tokenizer, model) | |
| except: | |
| return None, None | |
| return model_cache.get(model_name, (None, None)) | |
| def translate(text, target_lang_name): | |
| # Trouver code ISO de la langue cible | |
| target_lang = [code for code, name in LANG_NAMES.items() if name == target_lang_name][0] | |
| # Détecter langue source avec IA | |
| source_lang = detect_language_ai(text) | |
| if source_lang not in LANG_NAMES: | |
| return f"Langue source '{source_lang}' non supportée." | |
| if source_lang == target_lang: | |
| return "La langue source et cible sont identiques." | |
| # Charger le modèle de traduction | |
| tokenizer, model = get_model(source_lang, target_lang) | |
| if tokenizer is None or model is None: | |
| return f"Traduction {LANG_NAMES[source_lang]} → {LANG_NAMES[target_lang]} non supportée." | |
| # Traduire | |
| batch = tokenizer([text], return_tensors="pt", padding=True) | |
| gen = model.generate(**batch) | |
| translated = tokenizer.batch_decode(gen, skip_special_tokens=True)[0] | |
| return translated | |
| # Interface Gradio | |
| iface = gr.Interface( | |
| fn=translate, | |
| inputs=[ | |
| gr.Textbox(lines=3, placeholder="Tapez votre texte ici..."), | |
| gr.Dropdown(list(LANG_NAMES.values()), label="Langue cible") | |
| ], | |
| outputs="text", | |
| title="MyTranslator 🌍", | |
| description="Traducteur multi-langues avec détection automatique IA." | |
| ) | |
| iface.launch() | |