Spaces:
Sleeping
Sleeping
techaryahs commited on
Commit ·
c8b2ed7
1
Parent(s): a5ff103
Add custom dictionary for accurate UI term translations
Browse files- app.py +50 -3
- translations_dict.json +153 -0
app.py
CHANGED
|
@@ -8,6 +8,7 @@ from flask_cors import CORS
|
|
| 8 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
|
| 9 |
from pathlib import Path
|
| 10 |
import os
|
|
|
|
| 11 |
|
| 12 |
# Initialize Flask app
|
| 13 |
app = Flask(__name__)
|
|
@@ -16,6 +17,25 @@ CORS(app) # Enable CORS for all routes
|
|
| 16 |
# Global variables for model
|
| 17 |
translator = None
|
| 18 |
MODEL_DIR = Path("models/en-mr-marianmt")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
def load_model():
|
| 21 |
"""Load the MarianMT translation model"""
|
|
@@ -39,6 +59,32 @@ def load_model():
|
|
| 39 |
print(f"✗ Failed to load model: {e}")
|
| 40 |
raise
|
| 41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
@app.route('/')
|
| 43 |
def home():
|
| 44 |
"""Home endpoint with API information"""
|
|
@@ -122,9 +168,10 @@ def translate():
|
|
| 122 |
if source == target:
|
| 123 |
return jsonify({"error": "Source and target languages must be different"}), 400
|
| 124 |
|
| 125 |
-
# Load model if not loaded
|
| 126 |
if translator is None:
|
| 127 |
load_model()
|
|
|
|
| 128 |
|
| 129 |
# Handle batch translation
|
| 130 |
is_batch = isinstance(text, list)
|
|
@@ -137,8 +184,7 @@ def translate():
|
|
| 137 |
translations.append("")
|
| 138 |
continue
|
| 139 |
|
| 140 |
-
|
| 141 |
-
translated = result[0]['translation_text'] if result else t
|
| 142 |
translations.append(translated)
|
| 143 |
|
| 144 |
# Return response
|
|
@@ -170,6 +216,7 @@ if __name__ == '__main__':
|
|
| 170 |
print("=" * 60)
|
| 171 |
|
| 172 |
try:
|
|
|
|
| 173 |
load_model()
|
| 174 |
print("\nStarting server...")
|
| 175 |
|
|
|
|
| 8 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
|
| 9 |
from pathlib import Path
|
| 10 |
import os
|
| 11 |
+
import json
|
| 12 |
|
| 13 |
# Initialize Flask app
|
| 14 |
app = Flask(__name__)
|
|
|
|
| 17 |
# Global variables for model
|
| 18 |
translator = None
|
| 19 |
MODEL_DIR = Path("models/en-mr-marianmt")
|
| 20 |
+
translations_dict = None
|
| 21 |
+
|
| 22 |
+
def load_translations_dict():
|
| 23 |
+
"""Load the custom translations dictionary"""
|
| 24 |
+
global translations_dict
|
| 25 |
+
|
| 26 |
+
if translations_dict is not None:
|
| 27 |
+
return translations_dict
|
| 28 |
+
|
| 29 |
+
dict_path = Path("translations_dict.json")
|
| 30 |
+
if dict_path.exists():
|
| 31 |
+
with open(dict_path, 'r', encoding='utf-8') as f:
|
| 32 |
+
translations_dict = json.load(f)
|
| 33 |
+
print("✓ Translations dictionary loaded")
|
| 34 |
+
else:
|
| 35 |
+
translations_dict = {"en_to_mr": {}, "mr_to_en": {}}
|
| 36 |
+
print("⚠ Translations dictionary not found, using model only")
|
| 37 |
+
|
| 38 |
+
return translations_dict
|
| 39 |
|
| 40 |
def load_model():
|
| 41 |
"""Load the MarianMT translation model"""
|
|
|
|
| 59 |
print(f"✗ Failed to load model: {e}")
|
| 60 |
raise
|
| 61 |
|
| 62 |
+
def translate_with_dict(text, source, target):
|
| 63 |
+
"""
|
| 64 |
+
Translate using dictionary first, fallback to model
|
| 65 |
+
Returns (translation, used_dict)
|
| 66 |
+
"""
|
| 67 |
+
dict_data = load_translations_dict()
|
| 68 |
+
|
| 69 |
+
# Normalize text for dictionary lookup
|
| 70 |
+
text_lower = text.lower().strip()
|
| 71 |
+
|
| 72 |
+
# Check dictionary first
|
| 73 |
+
if source == "en" and target == "mr":
|
| 74 |
+
if text_lower in dict_data["en_to_mr"]:
|
| 75 |
+
return dict_data["en_to_mr"][text_lower], True
|
| 76 |
+
elif source == "mr" and target == "en":
|
| 77 |
+
if text in dict_data["mr_to_en"]:
|
| 78 |
+
return dict_data["mr_to_en"][text], True
|
| 79 |
+
|
| 80 |
+
# Fallback to model
|
| 81 |
+
if translator is None:
|
| 82 |
+
load_model()
|
| 83 |
+
|
| 84 |
+
result = translator(text, max_length=512)
|
| 85 |
+
translated = result[0]['translation_text'] if result else text
|
| 86 |
+
return translated, False
|
| 87 |
+
|
| 88 |
@app.route('/')
|
| 89 |
def home():
|
| 90 |
"""Home endpoint with API information"""
|
|
|
|
| 168 |
if source == target:
|
| 169 |
return jsonify({"error": "Source and target languages must be different"}), 400
|
| 170 |
|
| 171 |
+
# Load model and dictionary if not loaded
|
| 172 |
if translator is None:
|
| 173 |
load_model()
|
| 174 |
+
load_translations_dict()
|
| 175 |
|
| 176 |
# Handle batch translation
|
| 177 |
is_batch = isinstance(text, list)
|
|
|
|
| 184 |
translations.append("")
|
| 185 |
continue
|
| 186 |
|
| 187 |
+
translated, from_dict = translate_with_dict(t, source, target)
|
|
|
|
| 188 |
translations.append(translated)
|
| 189 |
|
| 190 |
# Return response
|
|
|
|
| 216 |
print("=" * 60)
|
| 217 |
|
| 218 |
try:
|
| 219 |
+
load_translations_dict()
|
| 220 |
load_model()
|
| 221 |
print("\nStarting server...")
|
| 222 |
|
translations_dict.json
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"en_to_mr": {
|
| 3 |
+
"home": "घर",
|
| 4 |
+
"profile": "प्रोफाइल",
|
| 5 |
+
"settings": "सेटिंग्स",
|
| 6 |
+
"events": "कार्यक्रम",
|
| 7 |
+
"driver": "चालक",
|
| 8 |
+
"host": "यजमान",
|
| 9 |
+
"login": "लॉगिन",
|
| 10 |
+
"logout": "लॉगआउट",
|
| 11 |
+
"welcome": "स्वागत",
|
| 12 |
+
"search": "शोधा",
|
| 13 |
+
"save": "जतन करा",
|
| 14 |
+
"cancel": "रद्द करा",
|
| 15 |
+
"delete": "हटवा",
|
| 16 |
+
"edit": "संपादित करा",
|
| 17 |
+
"notifications": "सूचना",
|
| 18 |
+
"password": "पासवर्ड",
|
| 19 |
+
"email": "ईमेल",
|
| 20 |
+
"username": "वापरकर्तानाव",
|
| 21 |
+
"dashboard": "डॅशबोर्ड",
|
| 22 |
+
"menu": "मेनू",
|
| 23 |
+
"help": "मदत",
|
| 24 |
+
"about": "बद्दल",
|
| 25 |
+
"contact": "संपर्क",
|
| 26 |
+
"submit": "सबमिट करा",
|
| 27 |
+
"back": "मागे",
|
| 28 |
+
"next": "पुढे",
|
| 29 |
+
"previous": "मागील",
|
| 30 |
+
"continue": "सुरू ठेवा",
|
| 31 |
+
"confirm": "पुष्टी करा",
|
| 32 |
+
"close": "बंद करा",
|
| 33 |
+
"open": "उघडा",
|
| 34 |
+
"new": "नवीन",
|
| 35 |
+
"create": "तयार करा",
|
| 36 |
+
"update": "अपडेट करा",
|
| 37 |
+
"upload": "अपलोड करा",
|
| 38 |
+
"download": "डाउनलोड करा",
|
| 39 |
+
"share": "शेअर करा",
|
| 40 |
+
"send": "पाठवा",
|
| 41 |
+
"receive": "प्राप्त करा",
|
| 42 |
+
"accept": "स्वीकारा",
|
| 43 |
+
"reject": "नाकारा",
|
| 44 |
+
"approve": "मंजूर करा",
|
| 45 |
+
"decline": "नकार द्या",
|
| 46 |
+
"yes": "होय",
|
| 47 |
+
"no": "नाही",
|
| 48 |
+
"ok": "ठीक आहे",
|
| 49 |
+
"error": "त्रुटी",
|
| 50 |
+
"success": "यशस्वी",
|
| 51 |
+
"warning": "चेतावणी",
|
| 52 |
+
"info": "माहिती",
|
| 53 |
+
"loading": "लोड होत आहे",
|
| 54 |
+
"please wait": "कृपया प्रतीक्षा करा",
|
| 55 |
+
"try again": "पुन्हा प्रयत्न करा",
|
| 56 |
+
"refresh": "रिफ्रेश करा",
|
| 57 |
+
"reload": "पुन्हा लोड करा",
|
| 58 |
+
"view": "पहा",
|
| 59 |
+
"details": "तपशील",
|
| 60 |
+
"description": "वर्णन",
|
| 61 |
+
"name": "नाव",
|
| 62 |
+
"date": "तारीख",
|
| 63 |
+
"time": "वेळ",
|
| 64 |
+
"location": "स्थान",
|
| 65 |
+
"address": "पत्ता",
|
| 66 |
+
"phone": "फोन",
|
| 67 |
+
"message": "संदेश",
|
| 68 |
+
"comment": "टिप्पणी",
|
| 69 |
+
"reply": "उत्तर द्या",
|
| 70 |
+
"like": "आवडले",
|
| 71 |
+
"dislike": "आवडले नाही",
|
| 72 |
+
"favorite": "आवडते",
|
| 73 |
+
"bookmark": "बुकमार्क",
|
| 74 |
+
"follow": "फॉलो करा",
|
| 75 |
+
"unfollow": "अनफॉलो करा",
|
| 76 |
+
"block": "ब्लॉक करा",
|
| 77 |
+
"report": "तक्रार करा",
|
| 78 |
+
"filter": "फिल्टर",
|
| 79 |
+
"sort": "क्रमवारी लावा",
|
| 80 |
+
"category": "श्रेणी",
|
| 81 |
+
"tag": "टॅग",
|
| 82 |
+
"status": "स्थिती",
|
| 83 |
+
"active": "सक्रिय",
|
| 84 |
+
"inactive": "निष्क्रिय",
|
| 85 |
+
"online": "ऑनलाइन",
|
| 86 |
+
"offline": "ऑफलाइन",
|
| 87 |
+
"available": "उपलब्ध",
|
| 88 |
+
"unavailable": "अनुपलब्ध",
|
| 89 |
+
"busy": "व्यस्त",
|
| 90 |
+
"away": "दूर",
|
| 91 |
+
"account": "खाते",
|
| 92 |
+
"user": "वापरकर्ता",
|
| 93 |
+
"admin": "प्रशासक",
|
| 94 |
+
"guest": "अतिथी",
|
| 95 |
+
"member": "सदस्य",
|
| 96 |
+
"premium": "प्रीमियम",
|
| 97 |
+
"free": "मोफत",
|
| 98 |
+
"paid": "सशुल्क",
|
| 99 |
+
"subscription": "सदस्यता",
|
| 100 |
+
"plan": "योजना",
|
| 101 |
+
"price": "किंमत",
|
| 102 |
+
"total": "एकूण",
|
| 103 |
+
"subtotal": "उपएकूण",
|
| 104 |
+
"discount": "सवलत",
|
| 105 |
+
"tax": "कर",
|
| 106 |
+
"payment": "पेमेंट",
|
| 107 |
+
"checkout": "चेकआउट",
|
| 108 |
+
"cart": "कार्ट",
|
| 109 |
+
"wishlist": "इच्छासूची",
|
| 110 |
+
"order": "ऑर्डर",
|
| 111 |
+
"shipping": "शिपिंग",
|
| 112 |
+
"delivery": "डिलिव्हरी",
|
| 113 |
+
"track": "ट्रॅक करा",
|
| 114 |
+
"invoice": "बीजक",
|
| 115 |
+
"receipt": "पावती",
|
| 116 |
+
"refund": "परतावा",
|
| 117 |
+
"return": "परत करा",
|
| 118 |
+
"exchange": "बदला",
|
| 119 |
+
"support": "समर्थन",
|
| 120 |
+
"feedback": "अभिप्राय",
|
| 121 |
+
"rating": "रेटिंग",
|
| 122 |
+
"review": "पुनरावलोकन",
|
| 123 |
+
"terms": "अटी",
|
| 124 |
+
"privacy": "गोपनीयता",
|
| 125 |
+
"policy": "धोरण",
|
| 126 |
+
"legal": "कायदेशीर",
|
| 127 |
+
"copyright": "कॉपीराइट",
|
| 128 |
+
"version": "आवृत्ती",
|
| 129 |
+
"language": "भाषा",
|
| 130 |
+
"theme": "थीम",
|
| 131 |
+
"dark mode": "डार्क मोड",
|
| 132 |
+
"light mode": "लाइट मोड"
|
| 133 |
+
},
|
| 134 |
+
"mr_to_en": {
|
| 135 |
+
"घर": "home",
|
| 136 |
+
"प्रो���ाइल": "profile",
|
| 137 |
+
"सेटिंग्स": "settings",
|
| 138 |
+
"कार्यक्रम": "events",
|
| 139 |
+
"चालक": "driver",
|
| 140 |
+
"यजमान": "host",
|
| 141 |
+
"लॉगिन": "login",
|
| 142 |
+
"लॉगआउट": "logout",
|
| 143 |
+
"स्वागत": "welcome",
|
| 144 |
+
"शोधा": "search",
|
| 145 |
+
"जतन करा": "save",
|
| 146 |
+
"रद्द करा": "cancel",
|
| 147 |
+
"हटवा": "delete",
|
| 148 |
+
"संपादित करा": "edit",
|
| 149 |
+
"सूचना": "notifications",
|
| 150 |
+
"पासवर्ड": "password",
|
| 151 |
+
"ईमेल": "email"
|
| 152 |
+
}
|
| 153 |
+
}
|