Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -293,81 +293,48 @@ async def transcribe_voice_with_openai(file_path: str) -> str:
|
|
| 293 |
system_prompt = """
|
| 294 |
You are transcribing voice messages for Apex Biotical Veterinary WhatsApp Assistant. This is a professional veterinary products chatbot.
|
| 295 |
|
| 296 |
-
CRITICAL
|
| 297 |
-
|
| 298 |
-
CONTEXT: Users can speak product names, menu selections, numbers, and general queries in English or Urdu ONLY.
|
| 299 |
-
|
| 300 |
-
STRICT LANGUAGE RULES:
|
| 301 |
-
- ONLY transcribe English (en) or Urdu (ur) speech
|
| 302 |
-
- REJECT all other languages completely
|
| 303 |
-
- If you hear any language other than English or Urdu, transcribe as English
|
| 304 |
-
- Never transcribe German, French, Spanish, Italian, Portuguese, Russian, Chinese, Japanese, Korean, or any other language
|
| 305 |
-
- Always default to English if uncertain
|
| 306 |
-
- This is a veterinary assistant - users will speak in English or Urdu only
|
| 307 |
-
|
| 308 |
-
PRODUCT NAMES (Veterinary Products):
|
| 309 |
-
- Hydropex (electrolyte supplement)
|
| 310 |
-
- Respira Aid Plus (respiratory support)
|
| 311 |
-
- Heposel (liver tonic)
|
| 312 |
-
- Bromacid (respiratory/mucolytic)
|
| 313 |
-
- Hexatox (liver & kidney support)
|
| 314 |
-
- APMA Fort (mycotoxin binder)
|
| 315 |
-
- Para C.E (heat stress support)
|
| 316 |
-
- Tribiotic (antibiotic)
|
| 317 |
-
- PHYTO-SAL (phytogenic supplement)
|
| 318 |
-
- Mycopex Super (mycotoxin binder)
|
| 319 |
-
- Eflin KT-20 (antibiotic)
|
| 320 |
-
- Salcozine ST-30 (anticoccidial)
|
| 321 |
-
- Oftilex UA-10 (antibiotic)
|
| 322 |
-
- Biscomin 10 (injectable antibiotic)
|
| 323 |
-
- Apvita Plus (vitamin supplement)
|
| 324 |
-
- B-G Aspro-C (aspirin + vitamin C)
|
| 325 |
-
- EC-Immune (immune booster)
|
| 326 |
-
- Liverpex (liver tonic)
|
| 327 |
-
- Symodex (multivitamin)
|
| 328 |
-
- Respira Aid (respiratory support)
|
| 329 |
-
- Adek Gold (multivitamin)
|
| 330 |
-
- Immuno DX (immune enhancer)
|
| 331 |
-
|
| 332 |
-
MENU SELECTIONS:
|
| 333 |
-
- Main menu options: 1, 2, 3, 4
|
| 334 |
-
- Product numbers: 1-23
|
| 335 |
-
- Category numbers: 1-10
|
| 336 |
-
- Navigation: main, menu, back, home, start
|
| 337 |
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
|
|
|
|
|
|
|
|
|
| 342 |
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
|
|
|
|
|
|
|
|
|
| 346 |
|
| 347 |
MENU COMMANDS:
|
| 348 |
-
|
| 349 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 350 |
|
| 351 |
TRANSCRIPTION RULES:
|
| 352 |
-
1.
|
| 353 |
-
2.
|
| 354 |
-
3.
|
| 355 |
-
4.
|
| 356 |
-
5.
|
| 357 |
-
6.
|
| 358 |
-
7. Handle common transcription errors (opium->option, numara->number)
|
| 359 |
-
8. Maintain context for veterinary domain
|
| 360 |
-
9. If unsure about language, default to English
|
| 361 |
|
| 362 |
EXAMPLES:
|
| 363 |
- "hydropex" -> "hydropex"
|
| 364 |
- "respira aid plus" -> "respira aid plus"
|
| 365 |
-
- "option
|
| 366 |
-
- "aik" -> "1"
|
| 367 |
-
- "do" -> "2"
|
| 368 |
- "main menu" -> "main"
|
| 369 |
- "salam" -> "salam"
|
| 370 |
- "search products" -> "search products"
|
|
|
|
| 371 |
"""
|
| 372 |
|
| 373 |
# First attempt with comprehensive system prompt
|
|
@@ -474,10 +441,21 @@ TRANSCRIPTION RULES:
|
|
| 474 |
transcribed_text = transcript.text.strip()
|
| 475 |
logger.info(f"[Transcribe] Third attempt (mixed) transcribed: '{transcribed_text}'")
|
| 476 |
|
| 477 |
-
# Final check for empty transcription
|
| 478 |
if not transcribed_text or len(transcribed_text.strip()) < 2:
|
| 479 |
logger.warning(f"[Transcribe] Very short or empty transcription: '{transcribed_text}'")
|
| 480 |
-
return ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 481 |
|
| 482 |
return transcribed_text
|
| 483 |
|
|
@@ -2599,8 +2577,8 @@ Response:
|
|
| 2599 |
if reply_language == 'ur':
|
| 2600 |
try:
|
| 2601 |
# Get all product and category names
|
| 2602 |
-
product_names = [p.get('Product Name', '') for p in all_products if p.get('Product Name')]
|
| 2603 |
-
category_names = list(set([p.get('Category', '') for p in all_products if p.get('Category')]))
|
| 2604 |
translated_response = GoogleTranslator(source='auto', target='ur').translate(ai_response)
|
| 2605 |
# Restore English terms
|
| 2606 |
translated_response = restore_english_terms(translated_response, ai_response, product_names, category_names)
|
|
@@ -3431,21 +3409,23 @@ async def handle_voice_message_complete(from_number: str, msg: dict):
|
|
| 3431 |
except:
|
| 3432 |
pass
|
| 3433 |
|
| 3434 |
-
# Handle empty or
|
| 3435 |
-
if not transcribed_text or transcribed_text.strip() == "":
|
| 3436 |
-
logger.warning(f"[Voice] Empty transcription for {from_number}")
|
| 3437 |
send_whatsjet_message(from_number,
|
| 3438 |
"🎤 *Voice Message Issue*\n\n"
|
| 3439 |
-
"I couldn't
|
| 3440 |
"• Very short voice note\n"
|
| 3441 |
"• Background noise\n"
|
| 3442 |
"• Microphone too far away\n"
|
| 3443 |
-
"• Audio quality issues\n
|
|
|
|
| 3444 |
"💡 *Tips for better voice notes:*\n"
|
| 3445 |
"• Speak clearly and slowly\n"
|
| 3446 |
"• Keep phone close to mouth\n"
|
| 3447 |
"• Record in quiet environment\n"
|
| 3448 |
-
"• Make voice note at least 2-3 seconds\n
|
|
|
|
| 3449 |
"💬 *You can also:*\n"
|
| 3450 |
"• Send a text message\n"
|
| 3451 |
"• Type 'main' to see menu options\n"
|
|
|
|
| 293 |
system_prompt = """
|
| 294 |
You are transcribing voice messages for Apex Biotical Veterinary WhatsApp Assistant. This is a professional veterinary products chatbot.
|
| 295 |
|
| 296 |
+
CRITICAL: TRANSCRIBE ONLY ENGLISH OR URDU SPEECH - NOTHING ELSE
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 297 |
|
| 298 |
+
IMPORTANT RULES:
|
| 299 |
+
1. ONLY transcribe English or Urdu speech
|
| 300 |
+
2. If you hear unclear audio, transcribe as English
|
| 301 |
+
3. If you hear mixed languages, transcribe as English
|
| 302 |
+
4. Never transcribe gibberish or random characters
|
| 303 |
+
5. If audio is unclear, transcribe as "unclear audio"
|
| 304 |
+
6. Keep transcriptions simple and clean
|
| 305 |
|
| 306 |
+
PRODUCT NAMES (exact spelling required):
|
| 307 |
+
- Hydropex, Respira Aid Plus, Heposel, Bromacid, Hexatox
|
| 308 |
+
- APMA Fort, Para C.E, Tribiotic, PHYTO-SAL, Mycopex Super
|
| 309 |
+
- Eflin KT-20, Salcozine ST-30, Oftilex UA-10, Biscomin 10
|
| 310 |
+
- Apvita Plus, B-G Aspro-C, EC-Immune, Liverpex, Symodex
|
| 311 |
+
- Respira Aid, Adek Gold, Immuno DX
|
| 312 |
|
| 313 |
MENU COMMANDS:
|
| 314 |
+
- Numbers: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
|
| 315 |
+
- Navigation: main, menu, back, home, start
|
| 316 |
+
- Options: option, number, choice, select
|
| 317 |
+
|
| 318 |
+
GREETINGS:
|
| 319 |
+
- English: hi, hello, hey, good morning, good afternoon, good evening
|
| 320 |
+
- Urdu: salam, assalamu alaikum, adaab, namaste, khuda hafiz
|
| 321 |
|
| 322 |
TRANSCRIPTION RULES:
|
| 323 |
+
1. Transcribe exactly what you hear in English or Urdu
|
| 324 |
+
2. Convert numbers to digits (one->1, two->2, etc.)
|
| 325 |
+
3. Preserve product names exactly
|
| 326 |
+
4. If unclear, transcribe as "unclear audio"
|
| 327 |
+
5. Keep it simple and clean
|
| 328 |
+
6. No random characters or mixed languages
|
|
|
|
|
|
|
|
|
|
| 329 |
|
| 330 |
EXAMPLES:
|
| 331 |
- "hydropex" -> "hydropex"
|
| 332 |
- "respira aid plus" -> "respira aid plus"
|
| 333 |
+
- "option one" -> "1"
|
|
|
|
|
|
|
| 334 |
- "main menu" -> "main"
|
| 335 |
- "salam" -> "salam"
|
| 336 |
- "search products" -> "search products"
|
| 337 |
+
- Unclear audio -> "unclear audio"
|
| 338 |
"""
|
| 339 |
|
| 340 |
# First attempt with comprehensive system prompt
|
|
|
|
| 441 |
transcribed_text = transcript.text.strip()
|
| 442 |
logger.info(f"[Transcribe] Third attempt (mixed) transcribed: '{transcribed_text}'")
|
| 443 |
|
| 444 |
+
# Final check for empty transcription or unclear audio
|
| 445 |
if not transcribed_text or len(transcribed_text.strip()) < 2:
|
| 446 |
logger.warning(f"[Transcribe] Very short or empty transcription: '{transcribed_text}'")
|
| 447 |
+
return "unclear audio"
|
| 448 |
+
|
| 449 |
+
# Check for gibberish or mixed characters
|
| 450 |
+
if len(transcribed_text) > 10 and not re.search(r'[a-zA-Z\u0600-\u06FF]', transcribed_text):
|
| 451 |
+
logger.warning(f"[Transcribe] Gibberish detected: '{transcribed_text}'")
|
| 452 |
+
return "unclear audio"
|
| 453 |
+
|
| 454 |
+
# Check for too many special characters
|
| 455 |
+
special_char_ratio = len(re.findall(r'[^\w\s]', transcribed_text)) / len(transcribed_text)
|
| 456 |
+
if special_char_ratio > 0.3:
|
| 457 |
+
logger.warning(f"[Transcribe] Too many special characters: '{transcribed_text}'")
|
| 458 |
+
return "unclear audio"
|
| 459 |
|
| 460 |
return transcribed_text
|
| 461 |
|
|
|
|
| 2577 |
if reply_language == 'ur':
|
| 2578 |
try:
|
| 2579 |
# Get all product and category names
|
| 2580 |
+
product_names = [str(p.get('Product Name', '')) for p in all_products if p.get('Product Name')]
|
| 2581 |
+
category_names = list(set([str(p.get('Category', '')) for p in all_products if p.get('Category')]))
|
| 2582 |
translated_response = GoogleTranslator(source='auto', target='ur').translate(ai_response)
|
| 2583 |
# Restore English terms
|
| 2584 |
translated_response = restore_english_terms(translated_response, ai_response, product_names, category_names)
|
|
|
|
| 3409 |
except:
|
| 3410 |
pass
|
| 3411 |
|
| 3412 |
+
# Handle empty, failed, or unclear transcription
|
| 3413 |
+
if not transcribed_text or transcribed_text.strip() == "" or transcribed_text.lower() == "unclear audio":
|
| 3414 |
+
logger.warning(f"[Voice] Empty or unclear transcription for {from_number}: '{transcribed_text}'")
|
| 3415 |
send_whatsjet_message(from_number,
|
| 3416 |
"🎤 *Voice Message Issue*\n\n"
|
| 3417 |
+
"I couldn't understand your voice message clearly. This can happen due to:\n"
|
| 3418 |
"• Very short voice note\n"
|
| 3419 |
"• Background noise\n"
|
| 3420 |
"• Microphone too far away\n"
|
| 3421 |
+
"• Audio quality issues\n"
|
| 3422 |
+
"• Speaking too fast\n\n"
|
| 3423 |
"💡 *Tips for better voice notes:*\n"
|
| 3424 |
"• Speak clearly and slowly\n"
|
| 3425 |
"• Keep phone close to mouth\n"
|
| 3426 |
"• Record in quiet environment\n"
|
| 3427 |
+
"• Make voice note at least 2-3 seconds\n"
|
| 3428 |
+
"• Speak in English or Urdu only\n\n"
|
| 3429 |
"💬 *You can also:*\n"
|
| 3430 |
"• Send a text message\n"
|
| 3431 |
"• Type 'main' to see menu options\n"
|