Spaces:
Running
Running
| from flask import Flask, request, jsonify | |
| from flask_cors import CORS | |
| from transformers import ( | |
| MBartForConditionalGeneration, MBart50TokenizerFast, pipeline, | |
| MarianMTModel, MarianTokenizer | |
| ) | |
| import torch | |
| app = Flask(__name__) | |
| CORS(app) | |
| # 🧠 Load language detection model | |
| lang_detector = pipeline("text-classification", model="papluca/xlm-roberta-base-language-detection") | |
| # 🌐 Supported languages mapping for MBART | |
| LANG_CODE_MAP = { | |
| "ar": "ar_AR", "en": "en_XX", "es": "es_XX", "fr": "fr_XX", | |
| "de": "de_DE", "it": "it_IT", "ru": "ru_RU", "zh": "zh_CN", | |
| "ja": "ja_XX", "ko": "ko_KR", "tr": "tr_TR", "pt": "pt_XX", | |
| # أضف أكثر لو حبيت | |
| } | |
| # Load MBART model (general case) | |
| mbart_model_name = "facebook/mbart-large-50-many-to-one-mmt" | |
| mbart_tokenizer = MBart50TokenizerFast.from_pretrained(mbart_model_name) | |
| mbart_model = MBartForConditionalGeneration.from_pretrained(mbart_model_name) | |
| # Load Arabic-specific translation model | |
| helsinki_model_name = "Helsinki-NLP/opus-mt-ar-en" | |
| helsinki_tokenizer = MarianTokenizer.from_pretrained(helsinki_model_name) | |
| helsinki_model = MarianMTModel.from_pretrained(helsinki_model_name) | |
| def detect_language(text: str): | |
| try: | |
| result = lang_detector(text)[0] | |
| lang = result["label"] | |
| return lang if lang in LANG_CODE_MAP else "en" | |
| except Exception: | |
| return "en" # fallback | |
| def translate_to_english(text: str): | |
| src_lang = detect_language(text) | |
| print(f"Detected language: {src_lang}") | |
| try: | |
| # ✨ Use Helsinki model for Arabic | |
| if src_lang == "ar": | |
| encoded = helsinki_tokenizer(text, return_tensors="pt", padding=True) | |
| translated = helsinki_model.generate(**encoded) | |
| return helsinki_tokenizer.decode(translated[0], skip_special_tokens=True) | |
| # ✨ Use MBART for all other languages | |
| mbart_tokenizer.src_lang = LANG_CODE_MAP.get(src_lang, "en_XX") | |
| encoded = mbart_tokenizer(text, return_tensors="pt") | |
| generated_tokens = mbart_model.generate( | |
| **encoded, | |
| forced_bos_token_id=mbart_tokenizer.lang_code_to_id["en_XX"] | |
| ) | |
| return mbart_tokenizer.decode(generated_tokens[0], skip_special_tokens=True) | |
| except Exception as e: | |
| return f"❌ Translation Error: {str(e)}" | |
| def home(): | |
| return jsonify({"status": "Translation service is running!", "usage": "POST /translate with JSON {'text': 'your text here'}"}) | |
| def translate_text(): | |
| data = request.get_json() | |
| if not data or 'text' not in data: | |
| return jsonify({"success": False, "error": "No text provided"}), 400 | |
| translated = translate_to_english(data["text"]) | |
| return jsonify({"success": True, "translatedText": translated}) | |
| if __name__ == "__main__": | |
| app.run(host="0.0.0.0", port=7860) | |