Rulga commited on
Commit
82cef5a
·
1 Parent(s): 7d60219

Add translation functionality and enhance language detection; improve error handling and logging

Browse files
Files changed (1) hide show
  1. app.py +39 -42
app.py CHANGED
@@ -180,26 +180,49 @@ def get_context(message, conversation_id):
180
  logger.error(f"Error getting context: {str(e)}")
181
  return ""
182
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  def post_process_response(user_message, bot_response):
184
- """Check if the response language matches the user's language and fix if needed"""
185
  try:
186
- # Detect languages
187
  user_lang = detect_language(user_message)
188
  bot_lang = detect_language(bot_response)
189
 
190
- logger.debug(f"User language: {user_lang}, Bot response language: {bot_lang}")
191
-
192
- # If languages don't match and response is long enough to detect
193
  if user_lang != bot_lang and len(bot_response.strip()) > 20:
194
  logger.warning(f"Language mismatch detected! User: {user_lang}, Bot: {bot_lang}")
195
 
196
- # Add language mismatch warning
197
- warning = f"⚠️ [Language mismatch detected. Response should be in {user_lang}]\n\n"
198
- return warning + bot_response
199
-
 
 
 
 
 
200
  return bot_response
 
201
  except Exception as e:
202
- logger.error(f"Error in post_process_response: {str(e)}")
203
  return bot_response
204
 
205
  def load_vector_store():
@@ -228,40 +251,14 @@ def load_vector_store():
228
  logger.error(traceback.format_exc())
229
  return None
230
 
231
- def detect_language(text):
232
- """Detect language with fallback and enhanced logging"""
233
  try:
234
- # Strip text before checking length
235
- cleaned_text = text.strip()
236
-
237
- # Minimum text length for reliable detection - reduced to 5 characters
238
- if len(cleaned_text) < 5:
239
- logger.debug(f"Text too short for reliable detection: '{cleaned_text}'")
240
- try:
241
- return detect(cleaned_text)
242
- except:
243
- return "en"
244
-
245
- lang = detect(cleaned_text)
246
-
247
- # Expand supported languages list
248
- supported_langs = [
249
- # European languages
250
- "en", "ru", "uk", "de", "fr", "es", "it", "pt", "nl", "pl", "cs", "sk", "hu",
251
- # Nordic/Baltic
252
- "sv", "no", "da", "lt", "lv", "et", "fi",
253
- # Asian languages
254
- "zh", "ja", "ko", "th", "vi",
255
- # Middle Eastern
256
- "ar", "fa", "he", "tr"
257
- ]
258
-
259
- # Log detection result
260
- if lang not in supported_langs:
261
- logger.warning(f"Detected uncommon language: {lang} for text: '{cleaned_text[:50]}...'")
262
 
263
- # Return detected language even if not in supported list
264
- return lang
265
 
266
  except Exception as e:
267
  logger.error(f"Language detection error: {str(e)} for text: '{text[:50]}...'")
 
180
  logger.error(f"Error getting context: {str(e)}")
181
  return ""
182
 
183
+ def translate_with_llm(text: str, target_lang: str) -> str:
184
+ """Translate text using the active LLM"""
185
+ try:
186
+ prompt = f"Translate this text to {target_lang}:\n\n{text}"
187
+
188
+ response = client.chat_completion(
189
+ messages=[
190
+ {"role": "user", "content": prompt}
191
+ ],
192
+ max_tokens=ACTIVE_MODEL['parameters']['max_length'],
193
+ temperature=0.3,
194
+ top_p=0.9,
195
+ stream=False
196
+ )
197
+
198
+ return response.choices[0].message.content.strip()
199
+
200
+ except Exception as e:
201
+ logger.error(f"Translation failed: {e}")
202
+ return text
203
+
204
  def post_process_response(user_message, bot_response):
205
+ """Check if the response language matches the user's language and translate if needed"""
206
  try:
 
207
  user_lang = detect_language(user_message)
208
  bot_lang = detect_language(bot_response)
209
 
 
 
 
210
  if user_lang != bot_lang and len(bot_response.strip()) > 20:
211
  logger.warning(f"Language mismatch detected! User: {user_lang}, Bot: {bot_lang}")
212
 
213
+ translated_response = translate_with_llm(bot_response, user_lang)
214
+ translated_lang = detect_language(translated_response)
215
+
216
+ if translated_lang == user_lang:
217
+ logger.info(f"Response automatically translated from {bot_lang} to {user_lang}")
218
+ return translated_response
219
+ else:
220
+ logger.error(f"Translation failed: got {translated_lang} instead of {user_lang}")
221
+
222
  return bot_response
223
+
224
  except Exception as e:
225
+ logger.error(f"Post-processing error: {e}")
226
  return bot_response
227
 
228
  def load_vector_store():
 
251
  logger.error(traceback.format_exc())
252
  return None
253
 
254
+ def detect_language(text: str) -> str:
255
+ """Detect language with fallback"""
256
  try:
257
+ if len(text.strip()) < 5:
258
+ logger.debug(f"Text too short for reliable detection: '{text}'")
259
+ return "en"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
 
261
+ return detect(text.strip())
 
262
 
263
  except Exception as e:
264
  logger.error(f"Language detection error: {str(e)} for text: '{text[:50]}...'")