DreamStream-1 commited on
Commit
34d3fc3
·
verified ·
1 Parent(s): 0d55cad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -20
app.py CHANGED
@@ -293,7 +293,15 @@ async def transcribe_voice_with_openai(file_path: str) -> str:
293
  system_prompt = """
294
  You are transcribing voice messages for Apex Biotical Veterinary WhatsApp Assistant. This is a professional veterinary products chatbot.
295
 
296
- CONTEXT: Users can speak product names, menu selections, numbers, and general queries in English or Urdu.
 
 
 
 
 
 
 
 
297
 
298
  PRODUCT NAMES (Veterinary Products):
299
  - Hydropex (electrolyte supplement)
@@ -339,13 +347,15 @@ English: search, browse, download, catalog, contact, availability, main menu, op
339
  Urdu: تلاش, براؤز, ڈاؤن لوڈ, کیٹلاگ, رابطہ, دستیابی, مین مینو, آپشن, نمبر, اختیار
340
 
341
  TRANSCRIPTION RULES:
342
- 1. Transcribe product names exactly as listed above
343
- 2. Convert spoken numbers to digits (1, 2, 3, etc.)
344
- 3. Handle both English and Urdu speech
345
- 4. Preserve exact spelling for product names
346
- 5. Convert menu selections to numbers
347
- 6. Handle common transcription errors (opium->option, numara->number)
348
- 7. Maintain context for veterinary domain
 
 
349
 
350
  EXAMPLES:
351
  - "hydropex" -> "hydropex"
@@ -3408,13 +3418,14 @@ async def handle_voice_message_complete(from_number: str, msg: dict):
3408
  logger.info(f"[Voice] Applied corrections: '{transcribed_text}' -> '{corrected_text}'")
3409
  transcribed_text = corrected_text
3410
 
3411
- # Detect language of transcribed text
3412
  detected_lang = 'en' # Default to English
3413
  try:
3414
  detected_lang = detect(transcribed_text)
3415
- logger.info(f"[Voice] Detected language: {detected_lang}")
3416
 
3417
- # Map language codes to supported languages
 
3418
  lang_mapping = {
3419
  'ur': 'ur', # Urdu
3420
  'ar': 'ur', # Arabic (treat as Urdu for Islamic greetings)
@@ -3422,9 +3433,18 @@ async def handle_voice_message_complete(from_number: str, msg: dict):
3422
  'hi': 'ur', # Hindi (treat as Urdu)
3423
  'bn': 'ur', # Bengali (treat as Urdu)
3424
  'pa': 'ur', # Punjabi (treat as Urdu)
3425
- 'id': 'ur', # Indonesian (often misdetected for Urdu/Arabic)
3426
- 'ms': 'ur', # Malay (often misdetected for Urdu/Arabic)
3427
- 'tr': 'ur', # Turkish (often misdetected for Urdu/Arabic)
 
 
 
 
 
 
 
 
 
3428
  }
3429
 
3430
  # Check if text contains Urdu/Arabic characters or Islamic greetings
@@ -3438,15 +3458,17 @@ async def handle_voice_message_complete(from_number: str, msg: dict):
3438
  detected_lang = 'ur'
3439
  logger.info(f"[Voice] Overriding language detection to Urdu due to Arabic/Urdu characters or Islamic greeting")
3440
 
 
3441
  reply_language = lang_mapping.get(detected_lang, 'en')
3442
- logger.info(f"[Voice] Language '{detected_lang}' mapped to: {reply_language}")
 
 
 
 
 
3443
 
3444
  except Exception as e:
3445
- logger.warning(f"[Voice] Language detection failed: {e}")
3446
- reply_language = 'en'
3447
-
3448
- if reply_language not in ['en', 'ur']:
3449
- logger.info(f"[Voice] Language '{reply_language}' not supported, defaulting to English")
3450
  reply_language = 'en'
3451
 
3452
  # For Urdu voice notes, translate to English for processing
 
293
  system_prompt = """
294
  You are transcribing voice messages for Apex Biotical Veterinary WhatsApp Assistant. This is a professional veterinary products chatbot.
295
 
296
+ IMPORTANT: ONLY TRANSCRIBE ENGLISH OR URDU SPEECH. IGNORE ALL OTHER LANGUAGES.
297
+
298
+ CONTEXT: Users can speak product names, menu selections, numbers, and general queries in English or Urdu ONLY.
299
+
300
+ LANGUAGE RESTRICTION:
301
+ - ONLY English (en) or Urdu (ur) are allowed
302
+ - If you detect any other language, force it to English
303
+ - Never transcribe in German, French, Spanish, Italian, or any other language
304
+ - Always assume English or Urdu speech patterns
305
 
306
  PRODUCT NAMES (Veterinary Products):
307
  - Hydropex (electrolyte supplement)
 
347
  Urdu: تلاش, براؤز, ڈاؤن لوڈ, کیٹلاگ, رابطہ, دستیابی, مین مینو, آپشن, نمبر, اختیار
348
 
349
  TRANSCRIPTION RULES:
350
+ 1. ONLY transcribe English or Urdu speech
351
+ 2. Transcribe product names exactly as listed above
352
+ 3. Convert spoken numbers to digits (1, 2, 3, etc.)
353
+ 4. Handle both English and Urdu speech
354
+ 5. Preserve exact spelling for product names
355
+ 6. Convert menu selections to numbers
356
+ 7. Handle common transcription errors (opium->option, numara->number)
357
+ 8. Maintain context for veterinary domain
358
+ 9. If unsure about language, default to English
359
 
360
  EXAMPLES:
361
  - "hydropex" -> "hydropex"
 
3418
  logger.info(f"[Voice] Applied corrections: '{transcribed_text}' -> '{corrected_text}'")
3419
  transcribed_text = corrected_text
3420
 
3421
+ # Detect language of transcribed text - FORCE ENGLISH OR URDU ONLY
3422
  detected_lang = 'en' # Default to English
3423
  try:
3424
  detected_lang = detect(transcribed_text)
3425
+ logger.info(f"[Voice] Raw detected language: {detected_lang}")
3426
 
3427
+ # FORCE LANGUAGE TO ENGLISH OR URDU ONLY
3428
+ # Map all languages to either English or Urdu
3429
  lang_mapping = {
3430
  'ur': 'ur', # Urdu
3431
  'ar': 'ur', # Arabic (treat as Urdu for Islamic greetings)
 
3433
  'hi': 'ur', # Hindi (treat as Urdu)
3434
  'bn': 'ur', # Bengali (treat as Urdu)
3435
  'pa': 'ur', # Punjabi (treat as Urdu)
3436
+ 'id': 'ur', # Indonesian (treat as Urdu)
3437
+ 'ms': 'ur', # Malay (treat as Urdu)
3438
+ 'tr': 'ur', # Turkish (treat as Urdu)
3439
+ 'de': 'en', # German -> English
3440
+ 'fr': 'en', # French -> English
3441
+ 'es': 'en', # Spanish -> English
3442
+ 'it': 'en', # Italian -> English
3443
+ 'pt': 'en', # Portuguese -> English
3444
+ 'ru': 'en', # Russian -> English
3445
+ 'ja': 'en', # Japanese -> English
3446
+ 'ko': 'en', # Korean -> English
3447
+ 'zh': 'en', # Chinese -> English
3448
  }
3449
 
3450
  # Check if text contains Urdu/Arabic characters or Islamic greetings
 
3458
  detected_lang = 'ur'
3459
  logger.info(f"[Voice] Overriding language detection to Urdu due to Arabic/Urdu characters or Islamic greeting")
3460
 
3461
+ # Force language to English or Urdu only
3462
  reply_language = lang_mapping.get(detected_lang, 'en')
3463
+ logger.info(f"[Voice] Language '{detected_lang}' FORCED to: {reply_language}")
3464
+
3465
+ # Additional safety check - if still not English or Urdu, force to English
3466
+ if reply_language not in ['en', 'ur']:
3467
+ logger.warning(f"[Voice] Language '{reply_language}' not in allowed list, forcing to English")
3468
+ reply_language = 'en'
3469
 
3470
  except Exception as e:
3471
+ logger.warning(f"[Voice] Language detection failed: {e}, defaulting to English")
 
 
 
 
3472
  reply_language = 'en'
3473
 
3474
  # For Urdu voice notes, translate to English for processing