Rulga commited on
Commit
a19d0ca
·
1 Parent(s): b1627e8

Implement LanguageUtils class for enhanced language detection and response validation

Browse files
Files changed (2) hide show
  1. app.py +166 -150
  2. src/language_utils.py +98 -0
app.py CHANGED
@@ -325,24 +325,22 @@ def load_vector_store():
325
  return None
326
 
327
  def detect_language(text):
328
- """Detect language with detailed logging"""
329
  try:
330
- print("\n=== Language Detection Start ===", flush=True)
331
- print(f"Input text to analyze: '{text}'", flush=True)
332
- detected = detect(text)
333
- print(f"Detected language code: '{detected}'", flush=True)
334
- print("=== Language Detection End ===\n", flush=True)
335
- sys.stdout.flush()
336
- return detected
 
 
 
337
  except Exception as e:
338
- print("\n=== Language Detection Error ===", flush=True)
339
- print(f"Input text: '{text}'", flush=True)
340
- print(f"Error details: {str(e)}", flush=True)
341
- print("Defaulting to 'en'", flush=True)
342
- print("=== Language Detection End ===\n", flush=True)
343
- sys.stdout.flush()
344
  return "en"
345
-
346
  def respond(
347
  message,
348
  history,
@@ -353,176 +351,194 @@ def respond(
353
  top_p,
354
  attempt_fallback=True
355
  ):
356
- """Generate response using the current model with fallback option"""
357
  global fallback_model_attempted
358
 
359
- print("\n=== Response Generation Start ===", flush=True)
360
- sys.stdout.flush()
361
-
362
- # Detect language
363
- user_language = detect_language(message)
364
- print(f"Processing message: '{message}'", flush=True)
365
- print(f"Using detected language: '{user_language}'", flush=True)
366
- sys.stdout.flush()
367
-
368
- # Create stronger language instruction
369
- language_instruction = f"""
370
- CRITICAL: Message language detected as '{user_language}'
371
- YOU MUST RESPOND IN {user_language} LANGUAGE ONLY.
372
- ЗАПРЕЩЕНО ОТВЕЧАТЬ НА ЛЮБОМ ЯЗЫКЕ КРОМЕ ЯЗЫКА ВОПРОСА ({user_language}).
373
- THIS IS THE MOST IMPORTANT RULE.
374
- """
375
-
376
- print(f"Added language instruction for: {user_language}")
377
- print("=== Response Generation Setup Complete ===\n")
378
-
379
- # Create ID for new conversation
 
 
380
  if not conversation_id:
381
  import uuid
382
  conversation_id = str(uuid.uuid4())
 
 
 
 
 
 
 
 
 
383
 
384
- # Add stronger language instruction
385
- language_instruction = f"""
386
- CRITICAL INSTRUCTION: User message language is detected as '{user_language}'.
387
- YOU MUST RESPOND IN {user_language} LANGUAGE ONLY.
388
- ЗАПРЕЩЕНО ОТВЕЧАТЬ НА ЛЮБОМ ЯЗЫКЕ КРОМЕ ЯЗЫКА ВОПРОСА.
389
- THIS IS THE MOST IMPORTANT RULE.
390
-
391
- Original message: {message}
392
- Detected language: {user_language}
393
  """
394
-
395
- enhanced_system_message = language_instruction + system_message
396
-
397
- messages = [{"role": "system", "content": enhanced_system_message}]
398
-
399
- # Get context from knowledge base
400
- context = get_context(message, conversation_id)
401
-
402
- # Convert history from Gradio format to OpenAI format
403
- messages = [{"role": "system", "content": system_message}]
404
- if context:
405
- messages[0]["content"] += f"\n\nContext for response:\n{context}"
406
-
407
- # Debug: print the history format
408
- print("Debug - Processing history format:", history)
409
-
410
- # Convert history to OpenAI format for API call
 
 
 
 
 
 
 
 
 
 
411
  if history:
412
  try:
413
  for entry in history:
414
- # Check if we have messages in the expected format
415
  if isinstance(entry, dict) and 'role' in entry and 'content' in entry:
416
  messages.append(entry)
 
417
  except Exception as e:
418
  print(f"Error processing history: {str(e)}")
419
- # Continue with empty history if there was an error
420
-
421
- # Add current user message
422
- messages.append({"role": "user", "content": message})
423
-
424
- # Debug: print API messages
425
- print("Debug - API messages:", messages)
426
-
427
  try:
428
- # Non-streaming version for debugging
429
- full_response = client.chat_completion(
430
- messages,
 
 
 
 
431
  max_tokens=max_tokens,
432
- stream=False,
433
  temperature=temperature,
434
  top_p=top_p,
 
435
  )
436
 
437
- response = full_response.choices[0].message.content
438
- print(f"Debug - Full response from API: {response}")
 
 
 
 
439
 
440
- # Reset fallback flag on successful API call
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
441
  fallback_model_attempted = False
442
 
443
- # Return complete response in the new format
444
- final_history = history.copy() if history else []
445
- # Add user message
446
- final_history.append({"role": "user", "content": message})
447
- # Add assistant response
448
- final_history.append({"role": "assistant", "content": response})
449
 
450
- yield final_history, conversation_id
451
-
452
  except Exception as e:
453
- print(f"Debug - Error during API call: {str(e)}")
454
- error_message = str(e)
455
- current_model_key = None
456
-
457
- # Find current model key
458
- for key, model in MODELS.items():
459
- if model["id"] == ACTIVE_MODEL["id"]:
460
- current_model_key = key
461
- break
462
-
463
- # Try fallback model if appropriate
464
- if attempt_fallback and ("402" in error_message or "429" in error_message) and not fallback_model_attempted:
465
- fallback_model_key = get_fallback_model(current_model_key)
466
  if fallback_model_key:
 
467
  fallback_model_attempted = True
468
 
469
- # Log fallback attempt
470
- print(f"Attempting to fallback from {current_model_key} to {fallback_model_key}")
471
- log_api_error(message, error_message, ACTIVE_MODEL["id"], is_fallback=True)
472
-
473
  # Switch model temporarily
474
  original_model = ACTIVE_MODEL.copy()
475
  if switch_to_model(fallback_model_key):
476
- # Try with fallback model (but don't fallback again)
477
- fallback_generator = respond(
478
- message,
479
- history,
480
- conversation_id,
481
- system_message,
482
- max_tokens,
483
- temperature,
484
- top_p,
485
- attempt_fallback=False
486
- )
487
-
488
- yield from fallback_generator
489
-
490
- # Restore original model
491
- ACTIVE_MODEL.update(original_model)
492
- initialize_client(ACTIVE_MODEL["id"])
493
- return
494
-
495
- # Format user-friendly error message
496
- if "402" in error_message and "Payment Required" in error_message:
497
- friendly_error = (
498
- "⚠️ API Error: Free request limit exceeded for this model.\n\n"
499
- "Solutions:\n"
500
- "1. Switch to another model in the 'Model Settings' tab\n"
501
- "2. Use a local model version\n"
502
- "3. Subscribe to Hugging Face PRO for higher limits"
503
- )
504
- elif "401" in error_message and "Unauthorized" in error_message:
505
- friendly_error = (
506
- "⚠️ API Error: Authentication problem. Please check your API key."
507
- )
508
- elif "429" in error_message and "Too Many Requests" in error_message:
509
- friendly_error = (
510
- "⚠️ API Error: Too many requests. Please try again later."
511
- )
512
- else:
513
- friendly_error = f"⚠️ API Error: There was an error accessing the model. Details: {error_message}"
514
-
515
- # Log the error
516
- log_api_error(message, error_message, ACTIVE_MODEL["id"])
517
 
518
  error_history = history.copy() if history else []
519
- # Add user message
520
- error_history.append({"role": "user", "content": message})
521
- # Add error message as assistant response
522
- error_history.append({"role": "assistant", "content": friendly_error})
523
 
524
- yield error_history, conversation_id
525
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
526
  def log_api_error(user_message, error_message, model_id, is_fallback=False):
527
  """Log API errors to a separate file for monitoring"""
528
  try:
 
325
  return None
326
 
327
  def detect_language(text):
328
+ """Detect language with fallback and enhanced logging"""
329
  try:
330
+ # Minimum text length for reliable detection
331
+ if len(text.strip()) < 10:
332
+ return "en" # Default for short texts
333
+
334
+ lang = detect(text)
335
+
336
+ # Validate detected language
337
+ supported_langs = ["en", "ru", "uk", "de", "fr"] # Add your supported languages
338
+ return lang if lang in supported_langs else "en"
339
+
340
  except Exception as e:
341
+ print(f"Language detection error: {str(e)}. Defaulting to English.")
 
 
 
 
 
342
  return "en"
343
+
344
  def respond(
345
  message,
346
  history,
 
351
  top_p,
352
  attempt_fallback=True
353
  ):
354
+ """Generate response using the current model with enhanced language handling"""
355
  global fallback_model_attempted
356
 
357
+ # --- Setup and Initial Logging ---
358
+ print("\n" + "="*50)
359
+ print("=== NEW CHAT REQUEST ===")
360
+ print(f"Input message: '{message}'")
361
+ print(f"History length: {len(history) if history else 0}")
362
+ print(f"Conversation ID: {conversation_id or 'New conversation'}")
363
+ print("="*50 + "\n")
364
+
365
+ # --- Language Detection with Fallback ---
366
+ try:
367
+ user_language = detect_language(message)
368
+ print(f"Detected language: {user_language}")
369
+
370
+ # Validate supported languages
371
+ SUPPORTED_LANGUAGES = ["en", "ru", "uk", "de", "fr", "es"] # Add more as needed
372
+ if user_language not in SUPPORTED_LANGUAGES:
373
+ user_language = "en"
374
+ print(f"Unsupported language, defaulting to English")
375
+ except Exception as e:
376
+ user_language = "en"
377
+ print(f"Language detection failed, defaulting to English. Error: {str(e)}")
378
+
379
+ # --- Create Conversation ID if missing ---
380
  if not conversation_id:
381
  import uuid
382
  conversation_id = str(uuid.uuid4())
383
+ print(f"Generated new conversation ID: {conversation_id}")
384
+
385
+ # --- Enhanced Language Enforcement ---
386
+ LANGUAGE_INSTRUCTION = f"""
387
+ [CRITICAL INSTRUCTION - MUST FOLLOW]
388
+ - The user's message is in {user_language.upper()} language.
389
+ - You MUST respond in {user_language.upper()} ONLY.
390
+ - Never translate or switch to another language.
391
+ - This is the highest priority rule above all others.
392
 
393
+ [USER'S ORIGINAL MESSAGE]
394
+ {message}
 
 
 
 
 
 
 
395
  """
396
+
397
+ # --- Context Retrieval ---
398
+ context = ""
399
+ try:
400
+ context = get_context(message, conversation_id)
401
+ if context:
402
+ print("Retrieved context from knowledge base")
403
+ print(f"Context preview: {context[:200]}...")
404
+ else:
405
+ print("No context retrieved from knowledge base")
406
+ except Exception as e:
407
+ print(f"Context retrieval error: {str(e)}")
408
+
409
+ # --- Prepare Messages for API ---
410
+ messages = [
411
+ {
412
+ "role": "system",
413
+ "content": (
414
+ f"{system_message}\n\n"
415
+ f"Current date: {datetime.datetime.now().strftime('%Y-%m-%d')}\n"
416
+ f"Language requirement: Respond in {user_language} only\n"
417
+ f"{'Additional context:' + context if context else ''}"
418
+ )
419
+ }
420
+ ]
421
+
422
+ # Add conversation history
423
  if history:
424
  try:
425
  for entry in history:
 
426
  if isinstance(entry, dict) and 'role' in entry and 'content' in entry:
427
  messages.append(entry)
428
+ print(f"Added {len(history)} history messages")
429
  except Exception as e:
430
  print(f"Error processing history: {str(e)}")
431
+
432
+ # Add current message with language enforcement
433
+ messages.append({
434
+ "role": "user",
435
+ "content": LANGUAGE_INSTRUCTION
436
+ })
437
+
438
+ # --- API Request with Error Handling ---
439
  try:
440
+ print("\nSending request to model API...")
441
+ print(f"Model: {ACTIVE_MODEL['id']}")
442
+ print(f"Parameters: temp={temperature}, top_p={top_p}, max_tokens={max_tokens}")
443
+
444
+ # Non-streaming response for better error handling
445
+ response = client.chat_completion(
446
+ messages=messages,
447
  max_tokens=max_tokens,
 
448
  temperature=temperature,
449
  top_p=top_p,
450
+ stream=False
451
  )
452
 
453
+ # Extract and validate response
454
+ if not response.choices:
455
+ raise ValueError("Empty response from API")
456
+
457
+ bot_response = response.choices[0].message.content
458
+ print(f"\nRaw API response: {bot_response}")
459
 
460
+ # Verify response language
461
+ try:
462
+ response_lang = detect_language(bot_response)
463
+ if response_lang != user_language:
464
+ print(f"WARNING: Response language mismatch! Expected {user_language}, got {response_lang}")
465
+ # Add language correction prefix if mismatch
466
+ bot_response = f"[Language corrected to {user_language}]\n{bot_response}"
467
+ except Exception as e:
468
+ print(f"Couldn't verify response language: {str(e)}")
469
+
470
+ # --- Format Final Output ---
471
+ new_history = history.copy() if history else []
472
+ new_history.extend([
473
+ {"role": "user", "content": message},
474
+ {"role": "assistant", "content": bot_response}
475
+ ])
476
+
477
+ # Reset fallback flag on success
478
  fallback_model_attempted = False
479
 
480
+ print("\n=== SUCCESSFUL RESPONSE ===")
481
+ return new_history, conversation_id
 
 
 
 
482
 
 
 
483
  except Exception as e:
484
+ error_msg = str(e)
485
+ print(f"\n!!! API ERROR: {error_msg}")
486
+
487
+ # --- Fallback Logic ---
488
+ if attempt_fallback and not fallback_model_attempted:
489
+ fallback_model_key = get_fallback_model(ACTIVE_MODEL['id'])
 
 
 
 
 
 
 
490
  if fallback_model_key:
491
+ print(f"Attempting fallback to {fallback_model_key}")
492
  fallback_model_attempted = True
493
 
 
 
 
 
494
  # Switch model temporarily
495
  original_model = ACTIVE_MODEL.copy()
496
  if switch_to_model(fallback_model_key):
497
+ try:
498
+ result = yield from respond(
499
+ message, history, conversation_id,
500
+ system_message, max_tokens,
501
+ temperature, top_p,
502
+ attempt_fallback=False # Don't recurse infinitely
503
+ )
504
+ # Restore original model
505
+ ACTIVE_MODEL.update(original_model)
506
+ initialize_client(ACTIVE_MODEL['id'])
507
+ return result
508
+ except Exception as fallback_e:
509
+ print(f"Fallback also failed: {str(fallback_e)}")
510
+
511
+ # --- Error Response Formatting ---
512
+ friendly_error = format_friendly_error(error_msg)
513
+ print(f"Returning error to user: {friendly_error}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
514
 
515
  error_history = history.copy() if history else []
516
+ error_history.extend([
517
+ {"role": "user", "content": message},
518
+ {"role": "assistant", "content": friendly_error}
519
+ ])
520
 
521
+ return error_history, conversation_id
522
 
523
+
524
+ def format_friendly_error(api_error):
525
+ """Convert API errors to user-friendly messages"""
526
+ if "402" in api_error or "Payment Required" in api_error:
527
+ return ("⚠️ API Limit Reached\n\n"
528
+ "Please try:\n"
529
+ "1. Switching models in Settings\n"
530
+ "2. Using local model version\n"
531
+ "3. Waiting before next request")
532
+
533
+ elif "429" in api_error:
534
+ return "⚠️ Too many requests. Please wait before sending another message."
535
+
536
+ elif "401" in api_error:
537
+ return "⚠️ Authentication error. Please check your API key."
538
+
539
+ else:
540
+ return f"⚠️ Error processing request. Technical details: {api_error[:200]}"
541
+
542
  def log_api_error(user_message, error_message, model_id, is_fallback=False):
543
  """Log API errors to a separate file for monitoring"""
544
  try:
src/language_utils.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # src/language_utils.py
2
+ from langdetect import detect, DetectorFactory
3
+ from typing import Optional, List
4
+ import logging
5
+
6
+ # For more stable language detection
7
+ DetectorFactory.seed = 0
8
+
9
+ # Logger setup
10
+ logger = logging.getLogger(__name__)
11
+
12
+ class LanguageUtils:
13
+ """Centralized class for language processing"""
14
+
15
+ # Supported languages (can be extended)
16
+ SUPPORTED_LANGUAGES = ["en", "ru", "uk", "de", "fr", "es", "it", "pt"]
17
+
18
+ @classmethod
19
+ def detect_language(cls, text: str, default: str = "en") -> str:
20
+ """
21
+ Detects text language with enhanced error handling
22
+
23
+ Args:
24
+ text: Text to analyze
25
+ default: Default language in case of error
26
+
27
+ Returns:
28
+ Language code (ISO 639-1)
29
+ """
30
+ try:
31
+ # Minimum length for reliable detection
32
+ if len(text.strip()) < 15:
33
+ logger.warning(f"Text too short for reliable detection: '{text}'")
34
+ return default
35
+
36
+ lang = detect(text)
37
+
38
+ # Check language support
39
+ if lang not in cls.SUPPORTED_LANGUAGES:
40
+ logger.warning(f"Unsupported language detected: {lang}. Defaulting to {default}")
41
+ return default
42
+
43
+ logger.debug(f"Detected language: {lang} for text: '{text[:50]}...'")
44
+ return lang
45
+
46
+ except Exception as e:
47
+ logger.error(f"Language detection failed: {str(e)}. Text: '{text[:100]}...'")
48
+ return default
49
+
50
+ @classmethod
51
+ def get_language_instruction(cls, target_lang: str, user_message: str) -> str:
52
+ """
53
+ Generates strict response language instructions
54
+
55
+ Args:
56
+ target_lang: Language the bot should respond in
57
+ user_message: Original user message
58
+
59
+ Returns:
60
+ String with prompt instructions
61
+ """
62
+ instructions = {
63
+ "en": f"CRITICAL: Respond in English only. Never switch languages.\n\nOriginal message: {user_message}",
64
+ "ru": f"ВАЖНО: Отвечайте только на русском. Не переключайтесь на другие языки.\n\nОригинальное сообщение: {user_message}",
65
+ "uk": f"ВАЖЛИВО: Відповідайте лише українською. Не змінюйте мову.\n\nОригінальне повідомлення: {user_message}",
66
+ "de": f"KRITISCH: Antworten Sie nur auf Deutsch. Wechseln Sie nie die Sprache.\n\nOriginalnachricht: {user_message}",
67
+ "fr": f"CRITIQUE: Répondez uniquement en français. Ne changez jamais de langue.\n\nMessage original: {user_message}",
68
+ "es": f"CRÍTICO: Responda sólo en español. Nunca cambie de idioma.\n\nMensaje original: {user_message}",
69
+ "it": f"IMPORTANTE: Rispondere solo in italiano. Non cambiare lingua.\n\nMessaggio originale: {user_message}",
70
+ "pt": f"CRÍTICO: Responda apenas em português. Nunca mude de idioma.\n\nMensagem original: {user_message}"
71
+ }
72
+
73
+ return instructions.get(target_lang, instructions["en"])
74
+
75
+ @classmethod
76
+ def validate_response_language(cls, response: str, expected_lang: str) -> bool:
77
+ """
78
+ Validates if response language matches expected language
79
+
80
+ Args:
81
+ response: Bot's response
82
+ expected_lang: Expected language (ISO 639-1)
83
+
84
+ Returns:
85
+ True if language matches, False if not
86
+ """
87
+ try:
88
+ detected_lang = cls.detect_language(response)
89
+ if detected_lang != expected_lang:
90
+ logger.warning(f"Language mismatch! Expected {expected_lang}, got {detected_lang}")
91
+ return False
92
+ return True
93
+ except Exception as e:
94
+ logger.error(f"Language validation failed: {str(e)}")
95
+ return False
96
+
97
+ # Create instance for convenient import
98
+ language_processor = LanguageUtils()