LiamKhoaLe commited on
Commit
915cc29
·
1 Parent(s): b0a3faf

Redunt conversationals

Browse files
test_conversational_cleaning.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test conversational element cleaning and failed response handling
4
+ """
5
+
6
+ import os
7
+ import sys
8
+ import logging
9
+ from pathlib import Path
10
+
11
+ # Add the project root to Python path
12
+ project_root = Path(__file__).parent
13
+ sys.path.insert(0, str(project_root))
14
+
15
+ from utils import augment as A
16
+
17
+ # Set up logging
18
+ logging.basicConfig(level=logging.INFO)
19
+ logger = logging.getLogger(__name__)
20
+
21
+ def test_conversational_cleaning():
22
+ """Test conversational element cleaning"""
23
+ logger.info("Testing conversational element cleaning...")
24
+
25
+ test_cases = [
26
+ # (input, expected_contains, expected_not_contains, description)
27
+ ("Hi, I'm a doctor. Diabetes symptoms include...", "Diabetes symptoms", ["Hi", "I'm a doctor"], "English greeting + doctor intro"),
28
+ ("Xin chào, tôi là bác sĩ. Triệu chứng tiểu đường...", "Triệu chứng tiểu đường", ["Xin chào", "tôi là bác sĩ"], "Vietnamese greeting + doctor intro"),
29
+ ("If you are a doctor, please answer...", "answer", ["If you are a doctor", "please"], "Doctor conditional"),
30
+ ("Thank you for your question. The symptoms are...", "The symptoms are", ["Thank you", "for your question"], "Thank you prefix"),
31
+ ("I hope this helps. Best regards!", "helps", ["I hope this", "Best regards"], "Thank you suffix"),
32
+ ("Nếu bạn là bác sĩ, vui lòng trả lời...", "trả lời", ["Nếu bạn là bác sĩ", "vui lòng"], "Vietnamese doctor conditional"),
33
+ ("As a medical professional, I can tell you...", "I can tell you", ["As a medical professional"], "Medical professional intro"),
34
+ ("From a medical perspective, the answer is...", "the answer is", ["From a medical perspective"], "Medical perspective intro"),
35
+ ("Medically speaking, this condition...", "this condition", ["Medically speaking"], "Medically speaking intro"),
36
+ ("I'm here to help. The treatment is...", "The treatment is", ["I'm here to help"], "Helpful intro"),
37
+ ]
38
+
39
+ all_passed = True
40
+ for input_text, expected_contains, expected_not_contains, description in test_cases:
41
+ cleaned = A.clean_conversational_elements(input_text)
42
+
43
+ # Check that expected content is preserved
44
+ contains_expected = all(phrase in cleaned for phrase in expected_contains)
45
+
46
+ # Check that conversational elements are removed
47
+ not_contains_expected = all(phrase not in cleaned for phrase in expected_not_contains)
48
+
49
+ status = "✅" if contains_expected and not_contains_expected else "❌"
50
+ if not (contains_expected and not_contains_expected):
51
+ all_passed = False
52
+
53
+ logger.info(f"{status} {description}")
54
+ logger.info(f" Input: '{input_text}'")
55
+ logger.info(f" Cleaned: '{cleaned}'")
56
+ logger.info(f" Contains expected: {contains_expected}, Removes unwanted: {not_contains_expected}")
57
+ logger.info("")
58
+
59
+ return all_passed
60
+
61
+ def test_invalid_response_detection():
62
+ """Test invalid response detection"""
63
+ logger.info("Testing invalid response detection...")
64
+
65
+ test_cases = [
66
+ # (text, expected_invalid, description)
67
+ ("FAIL", True, "Simple fail response"),
68
+ ("I can't help you", True, "Can't help response"),
69
+ ("I don't know", True, "Don't know response"),
70
+ ("Sorry, I'm unable to", True, "Unable response"),
71
+ ("Diabetes symptoms include...", False, "Valid medical response"),
72
+ ("The treatment is...", False, "Valid treatment response"),
73
+ ("", True, "Empty response"),
74
+ ("Hi", True, "Too short response"),
75
+ ("I'm sorry, I cannot determine", True, "Cannot determine response"),
76
+ ]
77
+
78
+ all_passed = True
79
+ for text, expected_invalid, description in test_cases:
80
+ is_invalid = A.is_invalid_response(text)
81
+ status = "✅" if is_invalid == expected_invalid else "❌"
82
+ if is_invalid != expected_invalid:
83
+ all_passed = False
84
+
85
+ logger.info(f"{status} {description}: '{text}' -> {is_invalid} (expected {expected_invalid})")
86
+
87
+ return all_passed
88
+
89
+ def test_retry_logic():
90
+ """Test retry logic for failed responses"""
91
+ logger.info("Testing retry logic...")
92
+
93
+ # Test that invalid responses are detected
94
+ invalid_responses = ["FAIL", "I can't help", "Sorry", ""]
95
+
96
+ for response in invalid_responses:
97
+ is_invalid = A.is_invalid_response(response)
98
+ if is_invalid:
99
+ logger.info(f"✅ Correctly detected invalid response: '{response}'")
100
+ else:
101
+ logger.error(f"❌ Failed to detect invalid response: '{response}'")
102
+ return False
103
+
104
+ # Test conversational cleaning
105
+ conversational_text = "Hi, I'm a doctor. Diabetes symptoms include increased thirst."
106
+ cleaned = A.clean_conversational_elements(conversational_text)
107
+
108
+ if "Diabetes symptoms include increased thirst" in cleaned and "Hi" not in cleaned:
109
+ logger.info("✅ Conversational cleaning working correctly")
110
+ else:
111
+ logger.error("❌ Conversational cleaning failed")
112
+ return False
113
+
114
+ return True
115
+
116
+ def main():
117
+ """Run all tests"""
118
+ logger.info("Testing conversational cleaning and failed response handling...")
119
+ logger.info("=" * 70)
120
+
121
+ tests = [
122
+ ("Conversational Cleaning", test_conversational_cleaning),
123
+ ("Invalid Response Detection", test_invalid_response_detection),
124
+ ("Retry Logic", test_retry_logic),
125
+ ]
126
+
127
+ results = {}
128
+ for test_name, test_func in tests:
129
+ logger.info(f"\n--- {test_name} ---")
130
+ try:
131
+ result = test_func()
132
+ results[test_name] = result
133
+ status = "✅ PASSED" if result else "❌ FAILED"
134
+ logger.info(f"{test_name}: {status}")
135
+ except Exception as e:
136
+ logger.error(f"{test_name}: ❌ ERROR - {e}")
137
+ results[test_name] = False
138
+
139
+ # Summary
140
+ logger.info("\n" + "=" * 70)
141
+ logger.info("CONVERSATIONAL CLEANING TEST SUMMARY")
142
+ logger.info("=" * 70)
143
+
144
+ passed = sum(1 for result in results.values() if result)
145
+ total = len(results)
146
+
147
+ for test_name, result in results.items():
148
+ status = "✅ PASSED" if result else "❌ FAILED"
149
+ logger.info(f"{test_name}: {status}")
150
+
151
+ logger.info(f"\nOverall: {passed}/{total} tests passed")
152
+
153
+ if passed == total:
154
+ logger.info("🎉 All tests passed! Conversational cleaning is working correctly.")
155
+ logger.info("✅ Failed responses will be retried, not recorded!")
156
+ logger.info("✅ Conversational elements are properly cleaned!")
157
+ else:
158
+ logger.warning("⚠️ Some tests failed. Please check the logs above.")
159
+
160
+ return passed == total
161
+
162
+ if __name__ == "__main__":
163
+ success = main()
164
+ sys.exit(0 if success else 1)
utils/augment.py CHANGED
@@ -142,6 +142,64 @@ def is_invalid_response(text: str) -> bool:
142
 
143
  return False
144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  def clean_invalid_response(text: str, fallback: str = "") -> str:
146
  """Clean invalid responses by returning fallback or empty string"""
147
  if is_invalid_response(text):
@@ -153,14 +211,34 @@ def retry_invalid_response(text: str, paraphraser, max_retries: int = 3) -> str:
153
  if not is_invalid_response(text):
154
  return text
155
 
 
 
 
 
 
156
  for attempt in range(max_retries):
157
  try:
158
- # Try paraphrasing with different difficulty levels
159
- difficulty = "easy" if attempt == 0 else "hard" if attempt == 1 else "easy"
160
- retry_text = paraphraser.paraphrase(text, difficulty=difficulty)
 
 
 
 
 
 
 
 
 
161
 
162
  if retry_text and not is_invalid_response(retry_text):
163
- return retry_text
 
 
 
 
 
 
164
  except Exception as e:
165
  logger.warning(f"Retry attempt {attempt + 1} failed: {e}")
166
  continue
 
142
 
143
  return False
144
 
145
+ def clean_conversational_elements(text: str) -> str:
146
+ """Remove conversational elements and non-medical information smartly"""
147
+ if not text or not isinstance(text, str):
148
+ return text
149
+
150
+ # Remove common conversational prefixes
151
+ conversational_prefixes = [
152
+ r"^(hi|hello|hey|greetings?)\s*,?\s*",
153
+ r"^(xin chào|chào|chào bạn)\s*,?\s*",
154
+ r"^(if you are a doctor|if you're a doctor|as a doctor)\s*,?\s*",
155
+ r"^(nếu bạn là bác sĩ|nếu bạn là doctor)\s*,?\s*",
156
+ r"^(please|vui lòng)\s*,?\s*",
157
+ r"^(thank you|cảm ơn)\s*,?\s*",
158
+ r"^(thanks|cảm ơn)\s*,?\s*",
159
+ r"^(regards|best regards|cheers)\s*,?\s*",
160
+ r"^(i hope this helps|hy vọng điều này giúp ích)\s*,?\s*",
161
+ r"^(i'm sorry|tôi xin lỗi)\s*,?\s*",
162
+ r"^(let me help|để tôi giúp)\s*,?\s*",
163
+ r"^(i understand|tôi hiểu)\s*,?\s*",
164
+ r"^(i can help|tôi có thể giúp)\s*,?\s*",
165
+ r"^(i'll be happy to|tôi sẽ vui lòng)\s*,?\s*",
166
+ r"^(i would be glad to|tôi sẽ rất vui)\s*,?\s*",
167
+ r"^(i'm here to help|tôi ở đây để giúp)\s*,?\s*",
168
+ r"^(i'm a doctor|tôi là bác sĩ)\s*,?\s*",
169
+ r"^(as a medical professional|như một chuyên gia y tế)\s*,?\s*",
170
+ r"^(from a medical perspective|từ góc độ y tế)\s*,?\s*",
171
+ r"^(medically speaking|nói về mặt y tế)\s*,?\s*",
172
+ ]
173
+
174
+ cleaned_text = text
175
+ for pattern in conversational_prefixes:
176
+ import re
177
+ cleaned_text = re.sub(pattern, "", cleaned_text, flags=re.IGNORECASE)
178
+
179
+ # Remove common conversational suffixes
180
+ conversational_suffixes = [
181
+ r"\s*,?\s*(hope this helps|hy vọng điều này giúp ích).*$",
182
+ r"\s*,?\s*(let me know if you need more|hãy cho tôi biết nếu bạn cần thêm).*$",
183
+ r"\s*,?\s*(feel free to ask|đừng ngại hỏi).*$",
184
+ r"\s*,?\s*(if you have any questions|nếu bạn có câu hỏi).*$",
185
+ r"\s*,?\s*(please let me know|vui lòng cho tôi biết).*$",
186
+ r"\s*,?\s*(i'm here to help|tôi ở đây để giúp).*$",
187
+ r"\s*,?\s*(best regards|trân trọng).*$",
188
+ r"\s*,?\s*(take care|chúc sức khỏe).*$",
189
+ r"\s*,?\s*(good luck|chúc may mắn).*$",
190
+ r"\s*,?\s*(wishing you well|chúc bạn khỏe mạnh).*$",
191
+ ]
192
+
193
+ for pattern in conversational_suffixes:
194
+ import re
195
+ cleaned_text = re.sub(pattern, "", cleaned_text, flags=re.IGNORECASE)
196
+
197
+ # Clean up extra whitespace and punctuation
198
+ cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()
199
+ cleaned_text = re.sub(r'^[,\s]+|[,\s]+$', '', cleaned_text)
200
+
201
+ return cleaned_text if cleaned_text else text
202
+
203
  def clean_invalid_response(text: str, fallback: str = "") -> str:
204
  """Clean invalid responses by returning fallback or empty string"""
205
  if is_invalid_response(text):
 
211
  if not is_invalid_response(text):
212
  return text
213
 
214
+ # Clean conversational elements first
215
+ cleaned_text = clean_conversational_elements(text)
216
+ if cleaned_text != text and not is_invalid_response(cleaned_text):
217
+ return cleaned_text
218
+
219
  for attempt in range(max_retries):
220
  try:
221
+ # Try different strategies based on attempt
222
+ if attempt == 0:
223
+ # First try: Simple paraphrasing
224
+ retry_text = paraphraser.paraphrase(text, difficulty="easy")
225
+ elif attempt == 1:
226
+ # Second try: More aggressive paraphrasing with medical focus
227
+ medical_prompt = f"Rewrite this medical response to be more professional and accurate:\n\n{text}"
228
+ retry_text = paraphraser.paraphrase(text, difficulty="hard", custom_prompt=medical_prompt)
229
+ else:
230
+ # Third try: Direct medical content generation
231
+ medical_prompt = f"Provide a professional medical response to this question:\n\n{text}"
232
+ retry_text = paraphraser.paraphrase(text, difficulty="hard", custom_prompt=medical_prompt)
233
 
234
  if retry_text and not is_invalid_response(retry_text):
235
+ # Clean conversational elements from retry
236
+ cleaned_retry = clean_conversational_elements(retry_text)
237
+ if cleaned_retry and not is_invalid_response(cleaned_retry):
238
+ return cleaned_retry
239
+ elif retry_text: # Use original retry if cleaning fails
240
+ return retry_text
241
+
242
  except Exception as e:
243
  logger.warning(f"Retry attempt {attempt + 1} failed: {e}")
244
  continue
utils/processor.py CHANGED
@@ -141,6 +141,8 @@ def _build_enriched_variants(user: str, out: str, paraphraser, opts: Dict, stats
141
  enhanced_out = paraphraser.paraphrase(out, difficulty="hard", custom_prompt=style_prompt)
142
 
143
  if enhanced_out and not A.is_invalid_response(enhanced_out):
 
 
144
  if opts.get("style_standardize", True):
145
  enhanced_out = A.style_standardize_answer(enhanced_out)
146
  enhanced_out = A.ensure_terminal_punct(enhanced_out)
@@ -170,6 +172,8 @@ def _build_enriched_variants(user: str, out: str, paraphraser, opts: Dict, stats
170
  enhanced_user = paraphraser.paraphrase(user, difficulty="hard", custom_prompt=style_prompt)
171
 
172
  if enhanced_user and not A.is_invalid_response(enhanced_user):
 
 
173
  enhanced_user = A.ensure_terminal_punct(enhanced_user)
174
  question_variants.append((enhanced_user, tags))
175
  stats["paraphrased_input"] += 1
@@ -237,6 +241,10 @@ def _apply_aug(instr: str, user: str, out: str, source: str, opts: Dict, paraphr
237
  # Stack list of entries that has been applied augmentation and stylings
238
  applied = []
239
 
 
 
 
 
240
  # Clean invalid responses with retry logic
241
  if A.is_invalid_response(out):
242
  out = A.retry_invalid_response(out, paraphraser, max_retries=3)
@@ -306,9 +314,10 @@ def _proc_med_dialog(source, path, writer, paraphraser, opts, sample_limit, stat
306
  try:
307
  instr, user, out, applied = _apply_aug(instr, user, out, source, opts, paraphraser, stats)
308
 
309
- # Skip if retry failed (empty output)
310
  if not out:
311
  stats["dropped_invalid"] = stats.get("dropped_invalid", 0) + 1
 
312
  continue
313
 
314
  # 1) ALWAYS write the original (cleaned/style-standardised only)
 
141
  enhanced_out = paraphraser.paraphrase(out, difficulty="hard", custom_prompt=style_prompt)
142
 
143
  if enhanced_out and not A.is_invalid_response(enhanced_out):
144
+ # Clean conversational elements
145
+ enhanced_out = A.clean_conversational_elements(enhanced_out)
146
  if opts.get("style_standardize", True):
147
  enhanced_out = A.style_standardize_answer(enhanced_out)
148
  enhanced_out = A.ensure_terminal_punct(enhanced_out)
 
172
  enhanced_user = paraphraser.paraphrase(user, difficulty="hard", custom_prompt=style_prompt)
173
 
174
  if enhanced_user and not A.is_invalid_response(enhanced_user):
175
+ # Clean conversational elements
176
+ enhanced_user = A.clean_conversational_elements(enhanced_user)
177
  enhanced_user = A.ensure_terminal_punct(enhanced_user)
178
  question_variants.append((enhanced_user, tags))
179
  stats["paraphrased_input"] += 1
 
241
  # Stack list of entries that has been applied augmentation and stylings
242
  applied = []
243
 
244
+ # Clean conversational elements first
245
+ out = A.clean_conversational_elements(out)
246
+ user = A.clean_conversational_elements(user)
247
+
248
  # Clean invalid responses with retry logic
249
  if A.is_invalid_response(out):
250
  out = A.retry_invalid_response(out, paraphraser, max_retries=3)
 
314
  try:
315
  instr, user, out, applied = _apply_aug(instr, user, out, source, opts, paraphraser, stats)
316
 
317
+ # Skip if retry failed (empty output) - DO NOT RECORD FAILED RESPONSES
318
  if not out:
319
  stats["dropped_invalid"] = stats.get("dropped_invalid", 0) + 1
320
+ logger.warning(f"[PROC] {source} dropped invalid response for item {i} - will retry in next batch")
321
  continue
322
 
323
  # 1) ALWAYS write the original (cleaned/style-standardised only)