Spaces:

DreamStream-1
/

chatbot

Sleeping

App Files Files Community

DreamStream-1 commited on Jul 1, 2025

Commit

ae9f70f

verified ·

1 Parent(s): 34d3fc3

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -37

app.py CHANGED Viewed

@@ -293,15 +293,17 @@ async def transcribe_voice_with_openai(file_path: str) -> str:
         system_prompt = """
 You are transcribing voice messages for Apex Biotical Veterinary WhatsApp Assistant. This is a professional veterinary products chatbot.
-IMPORTANT: ONLY TRANSCRIBE ENGLISH OR URDU SPEECH. IGNORE ALL OTHER LANGUAGES.
 CONTEXT: Users can speak product names, menu selections, numbers, and general queries in English or Urdu ONLY.
-LANGUAGE RESTRICTION:
-- ONLY English (en) or Urdu (ur) are allowed
-- If you detect any other language, force it to English
-- Never transcribe in German, French, Spanish, Italian, or any other language
-- Always assume English or Urdu speech patterns
 PRODUCT NAMES (Veterinary Products):
 - Hydropex (electrolyte supplement)
@@ -3418,34 +3420,20 @@ async def handle_voice_message_complete(from_number: str, msg: dict):
             logger.info(f"[Voice] Applied corrections: '{transcribed_text}' -> '{corrected_text}'")
             transcribed_text = corrected_text
-        # Detect language of transcribed text - FORCE ENGLISH OR URDU ONLY
         detected_lang = 'en'  # Default to English
         try:
             detected_lang = detect(transcribed_text)
             logger.info(f"[Voice] Raw detected language: {detected_lang}")
-            # FORCE LANGUAGE TO ENGLISH OR URDU ONLY
-            # Map all languages to either English or Urdu
-            lang_mapping = {
-                'ur': 'ur',  # Urdu
-                'ar': 'ur',  # Arabic (treat as Urdu for Islamic greetings)
-                'en': 'en',  # English
-                'hi': 'ur',  # Hindi (treat as Urdu)
-                'bn': 'ur',  # Bengali (treat as Urdu)
-                'pa': 'ur',  # Punjabi (treat as Urdu)
-                'id': 'ur',  # Indonesian (treat as Urdu)
-                'ms': 'ur',  # Malay (treat as Urdu)
-                'tr': 'ur',  # Turkish (treat as Urdu)
-                'de': 'en',  # German -> English
-                'fr': 'en',  # French -> English
-                'es': 'en',  # Spanish -> English
-                'it': 'en',  # Italian -> English
-                'pt': 'en',  # Portuguese -> English
-                'ru': 'en',  # Russian -> English
-                'ja': 'en',  # Japanese -> English
-                'ko': 'en',  # Korean -> English
-                'zh': 'en',  # Chinese -> English
-            }
             # Check if text contains Urdu/Arabic characters or Islamic greetings
             urdu_arabic_pattern = re.compile(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]')
@@ -3456,16 +3444,10 @@ async def handle_voice_message_complete(from_number: str, msg: dict):
             if has_urdu_chars or has_islamic_greeting:
                 detected_lang = 'ur'
                 logger.info(f"[Voice] Overriding language detection to Urdu due to Arabic/Urdu characters or Islamic greeting")
-            # Force language to English or Urdu only
-            reply_language = lang_mapping.get(detected_lang, 'en')
-            logger.info(f"[Voice] Language '{detected_lang}' FORCED to: {reply_language}")
-            # Additional safety check - if still not English or Urdu, force to English
-            if reply_language not in ['en', 'ur']:
-                logger.warning(f"[Voice] Language '{reply_language}' not in allowed list, forcing to English")
-                reply_language = 'en'
         except Exception as e:
             logger.warning(f"[Voice] Language detection failed: {e}, defaulting to English")

         system_prompt = """
 You are transcribing voice messages for Apex Biotical Veterinary WhatsApp Assistant. This is a professional veterinary products chatbot.
+CRITICAL LANGUAGE RESTRICTION: ONLY ENGLISH OR URDU - NOTHING ELSE
 CONTEXT: Users can speak product names, menu selections, numbers, and general queries in English or Urdu ONLY.
+STRICT LANGUAGE RULES:
+- ONLY transcribe English (en) or Urdu (ur) speech
+- REJECT all other languages completely
+- If you hear any language other than English or Urdu, transcribe as English
+- Never transcribe German, French, Spanish, Italian, Portuguese, Russian, Chinese, Japanese, Korean, or any other language
+- Always default to English if uncertain
+- This is a veterinary assistant - users will speak in English or Urdu only
 PRODUCT NAMES (Veterinary Products):
 - Hydropex (electrolyte supplement)
             logger.info(f"[Voice] Applied corrections: '{transcribed_text}' -> '{corrected_text}'")
             transcribed_text = corrected_text
+                # Detect language of transcribed text - STRICTLY ENGLISH OR URDU ONLY
         detected_lang = 'en'  # Default to English
         try:
             detected_lang = detect(transcribed_text)
             logger.info(f"[Voice] Raw detected language: {detected_lang}")
+            # STRICTLY ENGLISH OR URDU ONLY - NO OTHER LANGUAGES
+            # Only allow English and Urdu, reject everything else
+            if detected_lang in ['en', 'ur']:
+                reply_language = detected_lang
+            else:
+                # Force any other language to English
+                reply_language = 'en'
+                logger.warning(f"[Voice] Detected language '{detected_lang}' is not English or Urdu, forcing to English")
             # Check if text contains Urdu/Arabic characters or Islamic greetings
             urdu_arabic_pattern = re.compile(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]')
             if has_urdu_chars or has_islamic_greeting:
                 detected_lang = 'ur'
+                reply_language = 'ur'
                 logger.info(f"[Voice] Overriding language detection to Urdu due to Arabic/Urdu characters or Islamic greeting")
+            logger.info(f"[Voice] Final language set to: {reply_language}")
         except Exception as e:
             logger.warning(f"[Voice] Language detection failed: {e}, defaulting to English")