Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -293,7 +293,15 @@ async def transcribe_voice_with_openai(file_path: str) -> str:
|
|
| 293 |
system_prompt = """
|
| 294 |
You are transcribing voice messages for Apex Biotical Veterinary WhatsApp Assistant. This is a professional veterinary products chatbot.
|
| 295 |
|
| 296 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 297 |
|
| 298 |
PRODUCT NAMES (Veterinary Products):
|
| 299 |
- Hydropex (electrolyte supplement)
|
|
@@ -339,13 +347,15 @@ English: search, browse, download, catalog, contact, availability, main menu, op
|
|
| 339 |
Urdu: تلاش, براؤز, ڈاؤن لوڈ, کیٹلاگ, رابطہ, دستیابی, مین مینو, آپشن, نمبر, اختیار
|
| 340 |
|
| 341 |
TRANSCRIPTION RULES:
|
| 342 |
-
1.
|
| 343 |
-
2.
|
| 344 |
-
3.
|
| 345 |
-
4.
|
| 346 |
-
5.
|
| 347 |
-
6.
|
| 348 |
-
7.
|
|
|
|
|
|
|
| 349 |
|
| 350 |
EXAMPLES:
|
| 351 |
- "hydropex" -> "hydropex"
|
|
@@ -3408,13 +3418,14 @@ async def handle_voice_message_complete(from_number: str, msg: dict):
|
|
| 3408 |
logger.info(f"[Voice] Applied corrections: '{transcribed_text}' -> '{corrected_text}'")
|
| 3409 |
transcribed_text = corrected_text
|
| 3410 |
|
| 3411 |
-
# Detect language of transcribed text
|
| 3412 |
detected_lang = 'en' # Default to English
|
| 3413 |
try:
|
| 3414 |
detected_lang = detect(transcribed_text)
|
| 3415 |
-
logger.info(f"[Voice]
|
| 3416 |
|
| 3417 |
-
#
|
|
|
|
| 3418 |
lang_mapping = {
|
| 3419 |
'ur': 'ur', # Urdu
|
| 3420 |
'ar': 'ur', # Arabic (treat as Urdu for Islamic greetings)
|
|
@@ -3422,9 +3433,18 @@ async def handle_voice_message_complete(from_number: str, msg: dict):
|
|
| 3422 |
'hi': 'ur', # Hindi (treat as Urdu)
|
| 3423 |
'bn': 'ur', # Bengali (treat as Urdu)
|
| 3424 |
'pa': 'ur', # Punjabi (treat as Urdu)
|
| 3425 |
-
'id': 'ur', # Indonesian (
|
| 3426 |
-
'ms': 'ur', # Malay (
|
| 3427 |
-
'tr': 'ur', # Turkish (
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3428 |
}
|
| 3429 |
|
| 3430 |
# Check if text contains Urdu/Arabic characters or Islamic greetings
|
|
@@ -3438,15 +3458,17 @@ async def handle_voice_message_complete(from_number: str, msg: dict):
|
|
| 3438 |
detected_lang = 'ur'
|
| 3439 |
logger.info(f"[Voice] Overriding language detection to Urdu due to Arabic/Urdu characters or Islamic greeting")
|
| 3440 |
|
|
|
|
| 3441 |
reply_language = lang_mapping.get(detected_lang, 'en')
|
| 3442 |
-
logger.info(f"[Voice] Language '{detected_lang}'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3443 |
|
| 3444 |
except Exception as e:
|
| 3445 |
-
logger.warning(f"[Voice] Language detection failed: {e}")
|
| 3446 |
-
reply_language = 'en'
|
| 3447 |
-
|
| 3448 |
-
if reply_language not in ['en', 'ur']:
|
| 3449 |
-
logger.info(f"[Voice] Language '{reply_language}' not supported, defaulting to English")
|
| 3450 |
reply_language = 'en'
|
| 3451 |
|
| 3452 |
# For Urdu voice notes, translate to English for processing
|
|
|
|
| 293 |
system_prompt = """
|
| 294 |
You are transcribing voice messages for Apex Biotical Veterinary WhatsApp Assistant. This is a professional veterinary products chatbot.
|
| 295 |
|
| 296 |
+
IMPORTANT: ONLY TRANSCRIBE ENGLISH OR URDU SPEECH. IGNORE ALL OTHER LANGUAGES.
|
| 297 |
+
|
| 298 |
+
CONTEXT: Users can speak product names, menu selections, numbers, and general queries in English or Urdu ONLY.
|
| 299 |
+
|
| 300 |
+
LANGUAGE RESTRICTION:
|
| 301 |
+
- ONLY English (en) or Urdu (ur) are allowed
|
| 302 |
+
- If you detect any other language, force it to English
|
| 303 |
+
- Never transcribe in German, French, Spanish, Italian, or any other language
|
| 304 |
+
- Always assume English or Urdu speech patterns
|
| 305 |
|
| 306 |
PRODUCT NAMES (Veterinary Products):
|
| 307 |
- Hydropex (electrolyte supplement)
|
|
|
|
| 347 |
Urdu: تلاش, براؤز, ڈاؤن لوڈ, کیٹلاگ, رابطہ, دستیابی, مین مینو, آپشن, نمبر, اختیار
|
| 348 |
|
| 349 |
TRANSCRIPTION RULES:
|
| 350 |
+
1. ONLY transcribe English or Urdu speech
|
| 351 |
+
2. Transcribe product names exactly as listed above
|
| 352 |
+
3. Convert spoken numbers to digits (1, 2, 3, etc.)
|
| 353 |
+
4. Handle both English and Urdu speech
|
| 354 |
+
5. Preserve exact spelling for product names
|
| 355 |
+
6. Convert menu selections to numbers
|
| 356 |
+
7. Handle common transcription errors (opium->option, numara->number)
|
| 357 |
+
8. Maintain context for veterinary domain
|
| 358 |
+
9. If unsure about language, default to English
|
| 359 |
|
| 360 |
EXAMPLES:
|
| 361 |
- "hydropex" -> "hydropex"
|
|
|
|
| 3418 |
logger.info(f"[Voice] Applied corrections: '{transcribed_text}' -> '{corrected_text}'")
|
| 3419 |
transcribed_text = corrected_text
|
| 3420 |
|
| 3421 |
+
# Detect language of transcribed text - FORCE ENGLISH OR URDU ONLY
|
| 3422 |
detected_lang = 'en' # Default to English
|
| 3423 |
try:
|
| 3424 |
detected_lang = detect(transcribed_text)
|
| 3425 |
+
logger.info(f"[Voice] Raw detected language: {detected_lang}")
|
| 3426 |
|
| 3427 |
+
# FORCE LANGUAGE TO ENGLISH OR URDU ONLY
|
| 3428 |
+
# Map all languages to either English or Urdu
|
| 3429 |
lang_mapping = {
|
| 3430 |
'ur': 'ur', # Urdu
|
| 3431 |
'ar': 'ur', # Arabic (treat as Urdu for Islamic greetings)
|
|
|
|
| 3433 |
'hi': 'ur', # Hindi (treat as Urdu)
|
| 3434 |
'bn': 'ur', # Bengali (treat as Urdu)
|
| 3435 |
'pa': 'ur', # Punjabi (treat as Urdu)
|
| 3436 |
+
'id': 'ur', # Indonesian (treat as Urdu)
|
| 3437 |
+
'ms': 'ur', # Malay (treat as Urdu)
|
| 3438 |
+
'tr': 'ur', # Turkish (treat as Urdu)
|
| 3439 |
+
'de': 'en', # German -> English
|
| 3440 |
+
'fr': 'en', # French -> English
|
| 3441 |
+
'es': 'en', # Spanish -> English
|
| 3442 |
+
'it': 'en', # Italian -> English
|
| 3443 |
+
'pt': 'en', # Portuguese -> English
|
| 3444 |
+
'ru': 'en', # Russian -> English
|
| 3445 |
+
'ja': 'en', # Japanese -> English
|
| 3446 |
+
'ko': 'en', # Korean -> English
|
| 3447 |
+
'zh': 'en', # Chinese -> English
|
| 3448 |
}
|
| 3449 |
|
| 3450 |
# Check if text contains Urdu/Arabic characters or Islamic greetings
|
|
|
|
| 3458 |
detected_lang = 'ur'
|
| 3459 |
logger.info(f"[Voice] Overriding language detection to Urdu due to Arabic/Urdu characters or Islamic greeting")
|
| 3460 |
|
| 3461 |
+
# Force language to English or Urdu only
|
| 3462 |
reply_language = lang_mapping.get(detected_lang, 'en')
|
| 3463 |
+
logger.info(f"[Voice] Language '{detected_lang}' FORCED to: {reply_language}")
|
| 3464 |
+
|
| 3465 |
+
# Additional safety check - if still not English or Urdu, force to English
|
| 3466 |
+
if reply_language not in ['en', 'ur']:
|
| 3467 |
+
logger.warning(f"[Voice] Language '{reply_language}' not in allowed list, forcing to English")
|
| 3468 |
+
reply_language = 'en'
|
| 3469 |
|
| 3470 |
except Exception as e:
|
| 3471 |
+
logger.warning(f"[Voice] Language detection failed: {e}, defaulting to English")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3472 |
reply_language = 'en'
|
| 3473 |
|
| 3474 |
# For Urdu voice notes, translate to English for processing
|