Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -293,15 +293,17 @@ async def transcribe_voice_with_openai(file_path: str) -> str:
|
|
| 293 |
system_prompt = """
|
| 294 |
You are transcribing voice messages for Apex Biotical Veterinary WhatsApp Assistant. This is a professional veterinary products chatbot.
|
| 295 |
|
| 296 |
-
|
| 297 |
|
| 298 |
CONTEXT: Users can speak product names, menu selections, numbers, and general queries in English or Urdu ONLY.
|
| 299 |
|
| 300 |
-
LANGUAGE
|
| 301 |
-
- ONLY English (en) or Urdu (ur)
|
| 302 |
-
-
|
| 303 |
-
-
|
| 304 |
-
-
|
|
|
|
|
|
|
| 305 |
|
| 306 |
PRODUCT NAMES (Veterinary Products):
|
| 307 |
- Hydropex (electrolyte supplement)
|
|
@@ -3418,34 +3420,20 @@ async def handle_voice_message_complete(from_number: str, msg: dict):
|
|
| 3418 |
logger.info(f"[Voice] Applied corrections: '{transcribed_text}' -> '{corrected_text}'")
|
| 3419 |
transcribed_text = corrected_text
|
| 3420 |
|
| 3421 |
-
|
| 3422 |
detected_lang = 'en' # Default to English
|
| 3423 |
try:
|
| 3424 |
detected_lang = detect(transcribed_text)
|
| 3425 |
logger.info(f"[Voice] Raw detected language: {detected_lang}")
|
| 3426 |
|
| 3427 |
-
#
|
| 3428 |
-
#
|
| 3429 |
-
|
| 3430 |
-
|
| 3431 |
-
|
| 3432 |
-
|
| 3433 |
-
|
| 3434 |
-
|
| 3435 |
-
'pa': 'ur', # Punjabi (treat as Urdu)
|
| 3436 |
-
'id': 'ur', # Indonesian (treat as Urdu)
|
| 3437 |
-
'ms': 'ur', # Malay (treat as Urdu)
|
| 3438 |
-
'tr': 'ur', # Turkish (treat as Urdu)
|
| 3439 |
-
'de': 'en', # German -> English
|
| 3440 |
-
'fr': 'en', # French -> English
|
| 3441 |
-
'es': 'en', # Spanish -> English
|
| 3442 |
-
'it': 'en', # Italian -> English
|
| 3443 |
-
'pt': 'en', # Portuguese -> English
|
| 3444 |
-
'ru': 'en', # Russian -> English
|
| 3445 |
-
'ja': 'en', # Japanese -> English
|
| 3446 |
-
'ko': 'en', # Korean -> English
|
| 3447 |
-
'zh': 'en', # Chinese -> English
|
| 3448 |
-
}
|
| 3449 |
|
| 3450 |
# Check if text contains Urdu/Arabic characters or Islamic greetings
|
| 3451 |
urdu_arabic_pattern = re.compile(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]')
|
|
@@ -3456,16 +3444,10 @@ async def handle_voice_message_complete(from_number: str, msg: dict):
|
|
| 3456 |
|
| 3457 |
if has_urdu_chars or has_islamic_greeting:
|
| 3458 |
detected_lang = 'ur'
|
|
|
|
| 3459 |
logger.info(f"[Voice] Overriding language detection to Urdu due to Arabic/Urdu characters or Islamic greeting")
|
| 3460 |
|
| 3461 |
-
|
| 3462 |
-
reply_language = lang_mapping.get(detected_lang, 'en')
|
| 3463 |
-
logger.info(f"[Voice] Language '{detected_lang}' FORCED to: {reply_language}")
|
| 3464 |
-
|
| 3465 |
-
# Additional safety check - if still not English or Urdu, force to English
|
| 3466 |
-
if reply_language not in ['en', 'ur']:
|
| 3467 |
-
logger.warning(f"[Voice] Language '{reply_language}' not in allowed list, forcing to English")
|
| 3468 |
-
reply_language = 'en'
|
| 3469 |
|
| 3470 |
except Exception as e:
|
| 3471 |
logger.warning(f"[Voice] Language detection failed: {e}, defaulting to English")
|
|
|
|
| 293 |
system_prompt = """
|
| 294 |
You are transcribing voice messages for Apex Biotical Veterinary WhatsApp Assistant. This is a professional veterinary products chatbot.
|
| 295 |
|
| 296 |
+
CRITICAL LANGUAGE RESTRICTION: ONLY ENGLISH OR URDU - NOTHING ELSE
|
| 297 |
|
| 298 |
CONTEXT: Users can speak product names, menu selections, numbers, and general queries in English or Urdu ONLY.
|
| 299 |
|
| 300 |
+
STRICT LANGUAGE RULES:
|
| 301 |
+
- ONLY transcribe English (en) or Urdu (ur) speech
|
| 302 |
+
- REJECT all other languages completely
|
| 303 |
+
- If you hear any language other than English or Urdu, transcribe as English
|
| 304 |
+
- Never transcribe German, French, Spanish, Italian, Portuguese, Russian, Chinese, Japanese, Korean, or any other language
|
| 305 |
+
- Always default to English if uncertain
|
| 306 |
+
- This is a veterinary assistant - users will speak in English or Urdu only
|
| 307 |
|
| 308 |
PRODUCT NAMES (Veterinary Products):
|
| 309 |
- Hydropex (electrolyte supplement)
|
|
|
|
| 3420 |
logger.info(f"[Voice] Applied corrections: '{transcribed_text}' -> '{corrected_text}'")
|
| 3421 |
transcribed_text = corrected_text
|
| 3422 |
|
| 3423 |
+
# Detect language of transcribed text - STRICTLY ENGLISH OR URDU ONLY
|
| 3424 |
detected_lang = 'en' # Default to English
|
| 3425 |
try:
|
| 3426 |
detected_lang = detect(transcribed_text)
|
| 3427 |
logger.info(f"[Voice] Raw detected language: {detected_lang}")
|
| 3428 |
|
| 3429 |
+
# STRICTLY ENGLISH OR URDU ONLY - NO OTHER LANGUAGES
|
| 3430 |
+
# Only allow English and Urdu, reject everything else
|
| 3431 |
+
if detected_lang in ['en', 'ur']:
|
| 3432 |
+
reply_language = detected_lang
|
| 3433 |
+
else:
|
| 3434 |
+
# Force any other language to English
|
| 3435 |
+
reply_language = 'en'
|
| 3436 |
+
logger.warning(f"[Voice] Detected language '{detected_lang}' is not English or Urdu, forcing to English")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3437 |
|
| 3438 |
# Check if text contains Urdu/Arabic characters or Islamic greetings
|
| 3439 |
urdu_arabic_pattern = re.compile(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]')
|
|
|
|
| 3444 |
|
| 3445 |
if has_urdu_chars or has_islamic_greeting:
|
| 3446 |
detected_lang = 'ur'
|
| 3447 |
+
reply_language = 'ur'
|
| 3448 |
logger.info(f"[Voice] Overriding language detection to Urdu due to Arabic/Urdu characters or Islamic greeting")
|
| 3449 |
|
| 3450 |
+
logger.info(f"[Voice] Final language set to: {reply_language}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3451 |
|
| 3452 |
except Exception as e:
|
| 3453 |
logger.warning(f"[Voice] Language detection failed: {e}, defaulting to English")
|