Spaces:
Running
Running
File size: 2,874 Bytes
a36c3e2 c242fc2 3c0b058 db9642a 871c16a a36c3e2 db9642a e7cfc43 db9642a 69b53c5 db9642a 3c0b058 db9642a a36c3e2 3c0b058 db9642a 3c0b058 db9642a 3c0b058 c242fc2 3c0b058 db9642a 3c0b058 db9642a 3c0b058 db9642a 3c0b058 c242fc2 db9642a d314308 a36c3e2 c242fc2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
from flask import Flask, request, jsonify
from flask_cors import CORS
from transformers import (
MBartForConditionalGeneration, MBart50TokenizerFast, pipeline,
MarianMTModel, MarianTokenizer
)
import torch
app = Flask(__name__)
CORS(app)
# 🧠 Load language detection model
lang_detector = pipeline("text-classification", model="papluca/xlm-roberta-base-language-detection")
# 🌐 Supported languages mapping for MBART
LANG_CODE_MAP = {
"ar": "ar_AR", "en": "en_XX", "es": "es_XX", "fr": "fr_XX",
"de": "de_DE", "it": "it_IT", "ru": "ru_RU", "zh": "zh_CN",
"ja": "ja_XX", "ko": "ko_KR", "tr": "tr_TR", "pt": "pt_XX",
# أضف أكثر لو حبيت
}
# Load MBART model (general case)
mbart_model_name = "facebook/mbart-large-50-many-to-one-mmt"
mbart_tokenizer = MBart50TokenizerFast.from_pretrained(mbart_model_name)
mbart_model = MBartForConditionalGeneration.from_pretrained(mbart_model_name)
# Load Arabic-specific translation model
helsinki_model_name = "Helsinki-NLP/opus-mt-ar-en"
helsinki_tokenizer = MarianTokenizer.from_pretrained(helsinki_model_name)
helsinki_model = MarianMTModel.from_pretrained(helsinki_model_name)
def detect_language(text: str):
try:
result = lang_detector(text)[0]
lang = result["label"]
return lang if lang in LANG_CODE_MAP else "en"
except Exception:
return "en" # fallback
def translate_to_english(text: str):
src_lang = detect_language(text)
print(f"Detected language: {src_lang}")
try:
# ✨ Use Helsinki model for Arabic
if src_lang == "ar":
encoded = helsinki_tokenizer(text, return_tensors="pt", padding=True)
translated = helsinki_model.generate(**encoded)
return helsinki_tokenizer.decode(translated[0], skip_special_tokens=True)
# ✨ Use MBART for all other languages
mbart_tokenizer.src_lang = LANG_CODE_MAP.get(src_lang, "en_XX")
encoded = mbart_tokenizer(text, return_tensors="pt")
generated_tokens = mbart_model.generate(
**encoded,
forced_bos_token_id=mbart_tokenizer.lang_code_to_id["en_XX"]
)
return mbart_tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
except Exception as e:
return f"❌ Translation Error: {str(e)}"
@app.route("/")
def home():
return jsonify({"status": "Translation service is running!", "usage": "POST /translate with JSON {'text': 'your text here'}"})
@app.route("/translate", methods=["POST"])
def translate_text():
data = request.get_json()
if not data or 'text' not in data:
return jsonify({"success": False, "error": "No text provided"}), 400
translated = translate_to_english(data["text"])
return jsonify({"success": True, "translatedText": translated})
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)
|