Spaces:

MedSwin
/

MedAI_Processing

Sleeping

App Files Files Community

LiamKhoaLe commited on Oct 8, 2025

Commit

915cc29

1 Parent(s): b0a3faf

Redunt conversationals

Browse files

Files changed (3) hide show

test_conversational_cleaning.py +164 -0
utils/augment.py +82 -4
utils/processor.py +10 -1

test_conversational_cleaning.py ADDED Viewed

	@@ -0,0 +1,164 @@

+#!/usr/bin/env python3
+"""
+Test conversational element cleaning and failed response handling
+"""
+import os
+import sys
+import logging
+from pathlib import Path
+# Add the project root to Python path
+project_root = Path(__file__).parent
+sys.path.insert(0, str(project_root))
+from utils import augment as A
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def test_conversational_cleaning():
+    """Test conversational element cleaning"""
+    logger.info("Testing conversational element cleaning...")
+    test_cases = [
+        # (input, expected_contains, expected_not_contains, description)
+        ("Hi, I'm a doctor. Diabetes symptoms include...", "Diabetes symptoms", ["Hi", "I'm a doctor"], "English greeting + doctor intro"),
+        ("Xin chào, tôi là bác sĩ. Triệu chứng tiểu đường...", "Triệu chứng tiểu đường", ["Xin chào", "tôi là bác sĩ"], "Vietnamese greeting + doctor intro"),
+        ("If you are a doctor, please answer...", "answer", ["If you are a doctor", "please"], "Doctor conditional"),
+        ("Thank you for your question. The symptoms are...", "The symptoms are", ["Thank you", "for your question"], "Thank you prefix"),
+        ("I hope this helps. Best regards!", "helps", ["I hope this", "Best regards"], "Thank you suffix"),
+        ("Nếu bạn là bác sĩ, vui lòng trả lời...", "trả lời", ["Nếu bạn là bác sĩ", "vui lòng"], "Vietnamese doctor conditional"),
+        ("As a medical professional, I can tell you...", "I can tell you", ["As a medical professional"], "Medical professional intro"),
+        ("From a medical perspective, the answer is...", "the answer is", ["From a medical perspective"], "Medical perspective intro"),
+        ("Medically speaking, this condition...", "this condition", ["Medically speaking"], "Medically speaking intro"),
+        ("I'm here to help. The treatment is...", "The treatment is", ["I'm here to help"], "Helpful intro"),
+    ]
+    all_passed = True
+    for input_text, expected_contains, expected_not_contains, description in test_cases:
+        cleaned = A.clean_conversational_elements(input_text)
+        # Check that expected content is preserved
+        contains_expected = all(phrase in cleaned for phrase in expected_contains)
+        # Check that conversational elements are removed
+        not_contains_expected = all(phrase not in cleaned for phrase in expected_not_contains)
+        status = "✅" if contains_expected and not_contains_expected else "❌"
+        if not (contains_expected and not_contains_expected):
+            all_passed = False
+        logger.info(f"{status} {description}")
+        logger.info(f"  Input: '{input_text}'")
+        logger.info(f"  Cleaned: '{cleaned}'")
+        logger.info(f"  Contains expected: {contains_expected}, Removes unwanted: {not_contains_expected}")
+        logger.info("")
+    return all_passed
+def test_invalid_response_detection():
+    """Test invalid response detection"""
+    logger.info("Testing invalid response detection...")
+    test_cases = [
+        # (text, expected_invalid, description)
+        ("FAIL", True, "Simple fail response"),
+        ("I can't help you", True, "Can't help response"),
+        ("I don't know", True, "Don't know response"),
+        ("Sorry, I'm unable to", True, "Unable response"),
+        ("Diabetes symptoms include...", False, "Valid medical response"),
+        ("The treatment is...", False, "Valid treatment response"),
+        ("", True, "Empty response"),
+        ("Hi", True, "Too short response"),
+        ("I'm sorry, I cannot determine", True, "Cannot determine response"),
+    ]
+    all_passed = True
+    for text, expected_invalid, description in test_cases:
+        is_invalid = A.is_invalid_response(text)
+        status = "✅" if is_invalid == expected_invalid else "❌"
+        if is_invalid != expected_invalid:
+            all_passed = False
+        logger.info(f"{status} {description}: '{text}' -> {is_invalid} (expected {expected_invalid})")
+    return all_passed
+def test_retry_logic():
+    """Test retry logic for failed responses"""
+    logger.info("Testing retry logic...")
+    # Test that invalid responses are detected
+    invalid_responses = ["FAIL", "I can't help", "Sorry", ""]
+    for response in invalid_responses:
+        is_invalid = A.is_invalid_response(response)
+        if is_invalid:
+            logger.info(f"✅ Correctly detected invalid response: '{response}'")
+        else:
+            logger.error(f"❌ Failed to detect invalid response: '{response}'")
+            return False
+    # Test conversational cleaning
+    conversational_text = "Hi, I'm a doctor. Diabetes symptoms include increased thirst."
+    cleaned = A.clean_conversational_elements(conversational_text)
+    if "Diabetes symptoms include increased thirst" in cleaned and "Hi" not in cleaned:
+        logger.info("✅ Conversational cleaning working correctly")
+    else:
+        logger.error("❌ Conversational cleaning failed")
+        return False
+    return True
+def main():
+    """Run all tests"""
+    logger.info("Testing conversational cleaning and failed response handling...")
+    logger.info("=" * 70)
+    tests = [
+        ("Conversational Cleaning", test_conversational_cleaning),
+        ("Invalid Response Detection", test_invalid_response_detection),
+        ("Retry Logic", test_retry_logic),
+    ]
+    results = {}
+    for test_name, test_func in tests:
+        logger.info(f"\n--- {test_name} ---")
+        try:
+            result = test_func()
+            results[test_name] = result
+            status = "✅ PASSED" if result else "❌ FAILED"
+            logger.info(f"{test_name}: {status}")
+        except Exception as e:
+            logger.error(f"{test_name}: ❌ ERROR - {e}")
+            results[test_name] = False
+    # Summary
+    logger.info("\n" + "=" * 70)
+    logger.info("CONVERSATIONAL CLEANING TEST SUMMARY")
+    logger.info("=" * 70)
+    passed = sum(1 for result in results.values() if result)
+    total = len(results)
+    for test_name, result in results.items():
+        status = "✅ PASSED" if result else "❌ FAILED"
+        logger.info(f"{test_name}: {status}")
+    logger.info(f"\nOverall: {passed}/{total} tests passed")
+    if passed == total:
+        logger.info("🎉 All tests passed! Conversational cleaning is working correctly.")
+        logger.info("✅ Failed responses will be retried, not recorded!")
+        logger.info("✅ Conversational elements are properly cleaned!")
+    else:
+        logger.warning("⚠️ Some tests failed. Please check the logs above.")
+    return passed == total
+if __name__ == "__main__":
+    success = main()
+    sys.exit(0 if success else 1)

utils/augment.py CHANGED Viewed

@@ -142,6 +142,64 @@ def is_invalid_response(text: str) -> bool:
     return False
 def clean_invalid_response(text: str, fallback: str = "") -> str:
     """Clean invalid responses by returning fallback or empty string"""
     if is_invalid_response(text):
@@ -153,14 +211,34 @@ def retry_invalid_response(text: str, paraphraser, max_retries: int = 3) -> str:
     if not is_invalid_response(text):
         return text
     for attempt in range(max_retries):
         try:
-            # Try paraphrasing with different difficulty levels
-            difficulty = "easy" if attempt == 0 else "hard" if attempt == 1 else "easy"
-            retry_text = paraphraser.paraphrase(text, difficulty=difficulty)
             if retry_text and not is_invalid_response(retry_text):
-                return retry_text
         except Exception as e:
             logger.warning(f"Retry attempt {attempt + 1} failed: {e}")
             continue

     return False
+def clean_conversational_elements(text: str) -> str:
+    """Remove conversational elements and non-medical information smartly"""
+    if not text or not isinstance(text, str):
+        return text
+    # Remove common conversational prefixes
+    conversational_prefixes = [
+        r"^(hi|hello|hey|greetings?)\s*,?\s*",
+        r"^(xin chào|chào|chào bạn)\s*,?\s*",
+        r"^(if you are a doctor|if you're a doctor|as a doctor)\s*,?\s*",
+        r"^(nếu bạn là bác sĩ|nếu bạn là doctor)\s*,?\s*",
+        r"^(please|vui lòng)\s*,?\s*",
+        r"^(thank you|cảm ơn)\s*,?\s*",
+        r"^(thanks|cảm ơn)\s*,?\s*",
+        r"^(regards|best regards|cheers)\s*,?\s*",
+        r"^(i hope this helps|hy vọng điều này giúp ích)\s*,?\s*",
+        r"^(i'm sorry|tôi xin lỗi)\s*,?\s*",
+        r"^(let me help|để tôi giúp)\s*,?\s*",
+        r"^(i understand|tôi hiểu)\s*,?\s*",
+        r"^(i can help|tôi có thể giúp)\s*,?\s*",
+        r"^(i'll be happy to|tôi sẽ vui lòng)\s*,?\s*",
+        r"^(i would be glad to|tôi sẽ rất vui)\s*,?\s*",
+        r"^(i'm here to help|tôi ở đây để giúp)\s*,?\s*",
+        r"^(i'm a doctor|tôi là bác sĩ)\s*,?\s*",
+        r"^(as a medical professional|như một chuyên gia y tế)\s*,?\s*",
+        r"^(from a medical perspective|từ góc độ y tế)\s*,?\s*",
+        r"^(medically speaking|nói về mặt y tế)\s*,?\s*",
+    ]
+    cleaned_text = text
+    for pattern in conversational_prefixes:
+        import re
+        cleaned_text = re.sub(pattern, "", cleaned_text, flags=re.IGNORECASE)
+    # Remove common conversational suffixes
+    conversational_suffixes = [
+        r"\s*,?\s*(hope this helps|hy vọng điều này giúp ích).*$",
+        r"\s*,?\s*(let me know if you need more|hãy cho tôi biết nếu bạn cần thêm).*$",
+        r"\s*,?\s*(feel free to ask|đừng ngại hỏi).*$",
+        r"\s*,?\s*(if you have any questions|nếu bạn có câu hỏi).*$",
+        r"\s*,?\s*(please let me know|vui lòng cho tôi biết).*$",
+        r"\s*,?\s*(i'm here to help|tôi ở đây để giúp).*$",
+        r"\s*,?\s*(best regards|trân trọng).*$",
+        r"\s*,?\s*(take care|chúc sức khỏe).*$",
+        r"\s*,?\s*(good luck|chúc may mắn).*$",
+        r"\s*,?\s*(wishing you well|chúc bạn khỏe mạnh).*$",
+    ]
+    for pattern in conversational_suffixes:
+        import re
+        cleaned_text = re.sub(pattern, "", cleaned_text, flags=re.IGNORECASE)
+    # Clean up extra whitespace and punctuation
+    cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()
+    cleaned_text = re.sub(r'^[,\s]+|[,\s]+$', '', cleaned_text)
+    return cleaned_text if cleaned_text else text
 def clean_invalid_response(text: str, fallback: str = "") -> str:
     """Clean invalid responses by returning fallback or empty string"""
     if is_invalid_response(text):
     if not is_invalid_response(text):
         return text
+    # Clean conversational elements first
+    cleaned_text = clean_conversational_elements(text)
+    if cleaned_text != text and not is_invalid_response(cleaned_text):
+        return cleaned_text
     for attempt in range(max_retries):
         try:
+            # Try different strategies based on attempt
+            if attempt == 0:
+                # First try: Simple paraphrasing
+                retry_text = paraphraser.paraphrase(text, difficulty="easy")
+            elif attempt == 1:
+                # Second try: More aggressive paraphrasing with medical focus
+                medical_prompt = f"Rewrite this medical response to be more professional and accurate:\n\n{text}"
+                retry_text = paraphraser.paraphrase(text, difficulty="hard", custom_prompt=medical_prompt)
+            else:
+                # Third try: Direct medical content generation
+                medical_prompt = f"Provide a professional medical response to this question:\n\n{text}"
+                retry_text = paraphraser.paraphrase(text, difficulty="hard", custom_prompt=medical_prompt)
             if retry_text and not is_invalid_response(retry_text):
+                # Clean conversational elements from retry
+                cleaned_retry = clean_conversational_elements(retry_text)
+                if cleaned_retry and not is_invalid_response(cleaned_retry):
+                    return cleaned_retry
+                elif retry_text:  # Use original retry if cleaning fails
+                    return retry_text
         except Exception as e:
             logger.warning(f"Retry attempt {attempt + 1} failed: {e}")
             continue

utils/processor.py CHANGED Viewed

@@ -141,6 +141,8 @@ def _build_enriched_variants(user: str, out: str, paraphraser, opts: Dict, stats
                 enhanced_out = paraphraser.paraphrase(out, difficulty="hard", custom_prompt=style_prompt)
                 if enhanced_out and not A.is_invalid_response(enhanced_out):
                     if opts.get("style_standardize", True):
                         enhanced_out = A.style_standardize_answer(enhanced_out)
                     enhanced_out = A.ensure_terminal_punct(enhanced_out)
@@ -170,6 +172,8 @@ def _build_enriched_variants(user: str, out: str, paraphraser, opts: Dict, stats
                 enhanced_user = paraphraser.paraphrase(user, difficulty="hard", custom_prompt=style_prompt)
                 if enhanced_user and not A.is_invalid_response(enhanced_user):
                     enhanced_user = A.ensure_terminal_punct(enhanced_user)
                     question_variants.append((enhanced_user, tags))
                     stats["paraphrased_input"] += 1
@@ -237,6 +241,10 @@ def _apply_aug(instr: str, user: str, out: str, source: str, opts: Dict, paraphr
     # Stack list of entries that has been applied augmentation and stylings
     applied = []
     # Clean invalid responses with retry logic
     if A.is_invalid_response(out):
         out = A.retry_invalid_response(out, paraphraser, max_retries=3)
@@ -306,9 +314,10 @@ def _proc_med_dialog(source, path, writer, paraphraser, opts, sample_limit, stat
         try:
             instr, user, out, applied = _apply_aug(instr, user, out, source, opts, paraphraser, stats)
-            # Skip if retry failed (empty output)
             if not out:
                 stats["dropped_invalid"] = stats.get("dropped_invalid", 0) + 1
                 continue
             # 1) ALWAYS write the original (cleaned/style-standardised only)

                 enhanced_out = paraphraser.paraphrase(out, difficulty="hard", custom_prompt=style_prompt)
                 if enhanced_out and not A.is_invalid_response(enhanced_out):
+                    # Clean conversational elements
+                    enhanced_out = A.clean_conversational_elements(enhanced_out)
                     if opts.get("style_standardize", True):
                         enhanced_out = A.style_standardize_answer(enhanced_out)
                     enhanced_out = A.ensure_terminal_punct(enhanced_out)
                 enhanced_user = paraphraser.paraphrase(user, difficulty="hard", custom_prompt=style_prompt)
                 if enhanced_user and not A.is_invalid_response(enhanced_user):
+                    # Clean conversational elements
+                    enhanced_user = A.clean_conversational_elements(enhanced_user)
                     enhanced_user = A.ensure_terminal_punct(enhanced_user)
                     question_variants.append((enhanced_user, tags))
                     stats["paraphrased_input"] += 1
     # Stack list of entries that has been applied augmentation and stylings
     applied = []
+    # Clean conversational elements first
+    out = A.clean_conversational_elements(out)
+    user = A.clean_conversational_elements(user)
     # Clean invalid responses with retry logic
     if A.is_invalid_response(out):
         out = A.retry_invalid_response(out, paraphraser, max_retries=3)
         try:
             instr, user, out, applied = _apply_aug(instr, user, out, source, opts, paraphraser, stats)
+            # Skip if retry failed (empty output) - DO NOT RECORD FAILED RESPONSES
             if not out:
                 stats["dropped_invalid"] = stats.get("dropped_invalid", 0) + 1
+                logger.warning(f"[PROC] {source} dropped invalid response for item {i} - will retry in next batch")
                 continue
             # 1) ALWAYS write the original (cleaned/style-standardised only)