Spaces:
Running
Running
Add translation functionality and enhance language detection; improve error handling and logging
Browse files
app.py
CHANGED
|
@@ -180,26 +180,49 @@ def get_context(message, conversation_id):
|
|
| 180 |
logger.error(f"Error getting context: {str(e)}")
|
| 181 |
return ""
|
| 182 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
def post_process_response(user_message, bot_response):
|
| 184 |
-
"""Check if the response language matches the user's language and
|
| 185 |
try:
|
| 186 |
-
# Detect languages
|
| 187 |
user_lang = detect_language(user_message)
|
| 188 |
bot_lang = detect_language(bot_response)
|
| 189 |
|
| 190 |
-
logger.debug(f"User language: {user_lang}, Bot response language: {bot_lang}")
|
| 191 |
-
|
| 192 |
-
# If languages don't match and response is long enough to detect
|
| 193 |
if user_lang != bot_lang and len(bot_response.strip()) > 20:
|
| 194 |
logger.warning(f"Language mismatch detected! User: {user_lang}, Bot: {bot_lang}")
|
| 195 |
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
return bot_response
|
|
|
|
| 201 |
except Exception as e:
|
| 202 |
-
logger.error(f"
|
| 203 |
return bot_response
|
| 204 |
|
| 205 |
def load_vector_store():
|
|
@@ -228,40 +251,14 @@ def load_vector_store():
|
|
| 228 |
logger.error(traceback.format_exc())
|
| 229 |
return None
|
| 230 |
|
| 231 |
-
def detect_language(text):
|
| 232 |
-
"""Detect language with fallback
|
| 233 |
try:
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
# Minimum text length for reliable detection - reduced to 5 characters
|
| 238 |
-
if len(cleaned_text) < 5:
|
| 239 |
-
logger.debug(f"Text too short for reliable detection: '{cleaned_text}'")
|
| 240 |
-
try:
|
| 241 |
-
return detect(cleaned_text)
|
| 242 |
-
except:
|
| 243 |
-
return "en"
|
| 244 |
-
|
| 245 |
-
lang = detect(cleaned_text)
|
| 246 |
-
|
| 247 |
-
# Expand supported languages list
|
| 248 |
-
supported_langs = [
|
| 249 |
-
# European languages
|
| 250 |
-
"en", "ru", "uk", "de", "fr", "es", "it", "pt", "nl", "pl", "cs", "sk", "hu",
|
| 251 |
-
# Nordic/Baltic
|
| 252 |
-
"sv", "no", "da", "lt", "lv", "et", "fi",
|
| 253 |
-
# Asian languages
|
| 254 |
-
"zh", "ja", "ko", "th", "vi",
|
| 255 |
-
# Middle Eastern
|
| 256 |
-
"ar", "fa", "he", "tr"
|
| 257 |
-
]
|
| 258 |
-
|
| 259 |
-
# Log detection result
|
| 260 |
-
if lang not in supported_langs:
|
| 261 |
-
logger.warning(f"Detected uncommon language: {lang} for text: '{cleaned_text[:50]}...'")
|
| 262 |
|
| 263 |
-
|
| 264 |
-
return lang
|
| 265 |
|
| 266 |
except Exception as e:
|
| 267 |
logger.error(f"Language detection error: {str(e)} for text: '{text[:50]}...'")
|
|
|
|
| 180 |
logger.error(f"Error getting context: {str(e)}")
|
| 181 |
return ""
|
| 182 |
|
| 183 |
+
def translate_with_llm(text: str, target_lang: str) -> str:
|
| 184 |
+
"""Translate text using the active LLM"""
|
| 185 |
+
try:
|
| 186 |
+
prompt = f"Translate this text to {target_lang}:\n\n{text}"
|
| 187 |
+
|
| 188 |
+
response = client.chat_completion(
|
| 189 |
+
messages=[
|
| 190 |
+
{"role": "user", "content": prompt}
|
| 191 |
+
],
|
| 192 |
+
max_tokens=ACTIVE_MODEL['parameters']['max_length'],
|
| 193 |
+
temperature=0.3,
|
| 194 |
+
top_p=0.9,
|
| 195 |
+
stream=False
|
| 196 |
+
)
|
| 197 |
+
|
| 198 |
+
return response.choices[0].message.content.strip()
|
| 199 |
+
|
| 200 |
+
except Exception as e:
|
| 201 |
+
logger.error(f"Translation failed: {e}")
|
| 202 |
+
return text
|
| 203 |
+
|
| 204 |
def post_process_response(user_message, bot_response):
|
| 205 |
+
"""Check if the response language matches the user's language and translate if needed"""
|
| 206 |
try:
|
|
|
|
| 207 |
user_lang = detect_language(user_message)
|
| 208 |
bot_lang = detect_language(bot_response)
|
| 209 |
|
|
|
|
|
|
|
|
|
|
| 210 |
if user_lang != bot_lang and len(bot_response.strip()) > 20:
|
| 211 |
logger.warning(f"Language mismatch detected! User: {user_lang}, Bot: {bot_lang}")
|
| 212 |
|
| 213 |
+
translated_response = translate_with_llm(bot_response, user_lang)
|
| 214 |
+
translated_lang = detect_language(translated_response)
|
| 215 |
+
|
| 216 |
+
if translated_lang == user_lang:
|
| 217 |
+
logger.info(f"Response automatically translated from {bot_lang} to {user_lang}")
|
| 218 |
+
return translated_response
|
| 219 |
+
else:
|
| 220 |
+
logger.error(f"Translation failed: got {translated_lang} instead of {user_lang}")
|
| 221 |
+
|
| 222 |
return bot_response
|
| 223 |
+
|
| 224 |
except Exception as e:
|
| 225 |
+
logger.error(f"Post-processing error: {e}")
|
| 226 |
return bot_response
|
| 227 |
|
| 228 |
def load_vector_store():
|
|
|
|
| 251 |
logger.error(traceback.format_exc())
|
| 252 |
return None
|
| 253 |
|
| 254 |
+
def detect_language(text: str) -> str:
|
| 255 |
+
"""Detect language with fallback"""
|
| 256 |
try:
|
| 257 |
+
if len(text.strip()) < 5:
|
| 258 |
+
logger.debug(f"Text too short for reliable detection: '{text}'")
|
| 259 |
+
return "en"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 260 |
|
| 261 |
+
return detect(text.strip())
|
|
|
|
| 262 |
|
| 263 |
except Exception as e:
|
| 264 |
logger.error(f"Language detection error: {str(e)} for text: '{text[:50]}...'")
|