LiamKhoaLe commited on
Commit
235b116
·
1 Parent(s): 915cc29
Files changed (2) hide show
  1. review.md +40 -0
  2. test_conversational_cleaning.py +0 -164
review.md ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ a. LLM-Based Paraphrasing
2
+ - **Multi-model approach**: Llama-8B (same architecture) and Gemini (Flash/Pro) models for reliability
3
+ - **Difficulty levels**: Easy vs. Hard paraphrasing modes to effectively use different models with auditing.
4
+ - **Medical context preservation**: Maintains clinical terminology accuracy
5
+ - **Configurable ratios**: User-defined augmentation percentages
6
+
7
+ b. Back-Translation Augmentation
8
+ - **Pivot languages** EN-VI-EN-VI...
9
+ - **Quality control**: Length and semantic similarity validation
10
+ - **Meaning preservation**: Maintains semantic accuracy through translation cycles
11
+
12
+ c. Style Standardization
13
+ - **Clinical voice enforcement**: Neutral, professional medical tone
14
+ - **Absolute language removal**: Replaces guarantees with probabilistic language
15
+ - **Forum sign-off removal**: Eliminates informal communication patterns
16
+
17
+ d. Multi-Variant Generation (for reasoning)
18
+ - **Answer variants**: Concise, detailed, clinical, patient-friendly styles
19
+ - **Question variants**: Clarifying, follow-up, symptom-focused, treatment-focused
20
+ - **Cross combinations**: All question × answer variant combinations (up to 9 per sample) e. Clinical Scenario Creation
21
+ - **Context variations**: Emergency room, routine checkup, chronic conditions, family member perspectives
22
+ - **Enhanced diversity**: Multiple reasoning paths for improved model training
23
+
24
+ f. Quality Assurance
25
+ f1. Data Cleaning
26
+ - **PHI removal**: Email, phone, URL, IP address redaction
27
+ - **Deduplication**: MD5-based content hashing with normalized comparison
28
+ - **Invalid response handling**: Detection and retry logic for failed responses
29
+ - **Conversational element cleaning**: Removal of greetings and non-medical content
30
+
31
+ f2. Validation
32
+ - **Medical accuracy validation**: LLM-based consistency checking
33
+ - **Length control**: Configurable maximum character limits
34
+ - **Language detection**: English validation for content quality
35
+
36
+ g. Output Formats: SFT Format
37
+ - **Instruction**: Task description
38
+ - **Input**: User question/context
39
+ - **Output**: Model response
40
+ - **Metadata**: Augmentation tags and source information
test_conversational_cleaning.py DELETED
@@ -1,164 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Test conversational element cleaning and failed response handling
4
- """
5
-
6
- import os
7
- import sys
8
- import logging
9
- from pathlib import Path
10
-
11
- # Add the project root to Python path
12
- project_root = Path(__file__).parent
13
- sys.path.insert(0, str(project_root))
14
-
15
- from utils import augment as A
16
-
17
- # Set up logging
18
- logging.basicConfig(level=logging.INFO)
19
- logger = logging.getLogger(__name__)
20
-
21
- def test_conversational_cleaning():
22
- """Test conversational element cleaning"""
23
- logger.info("Testing conversational element cleaning...")
24
-
25
- test_cases = [
26
- # (input, expected_contains, expected_not_contains, description)
27
- ("Hi, I'm a doctor. Diabetes symptoms include...", "Diabetes symptoms", ["Hi", "I'm a doctor"], "English greeting + doctor intro"),
28
- ("Xin chào, tôi là bác sĩ. Triệu chứng tiểu đường...", "Triệu chứng tiểu đường", ["Xin chào", "tôi là bác sĩ"], "Vietnamese greeting + doctor intro"),
29
- ("If you are a doctor, please answer...", "answer", ["If you are a doctor", "please"], "Doctor conditional"),
30
- ("Thank you for your question. The symptoms are...", "The symptoms are", ["Thank you", "for your question"], "Thank you prefix"),
31
- ("I hope this helps. Best regards!", "helps", ["I hope this", "Best regards"], "Thank you suffix"),
32
- ("Nếu bạn là bác sĩ, vui lòng trả lời...", "trả lời", ["Nếu bạn là bác sĩ", "vui lòng"], "Vietnamese doctor conditional"),
33
- ("As a medical professional, I can tell you...", "I can tell you", ["As a medical professional"], "Medical professional intro"),
34
- ("From a medical perspective, the answer is...", "the answer is", ["From a medical perspective"], "Medical perspective intro"),
35
- ("Medically speaking, this condition...", "this condition", ["Medically speaking"], "Medically speaking intro"),
36
- ("I'm here to help. The treatment is...", "The treatment is", ["I'm here to help"], "Helpful intro"),
37
- ]
38
-
39
- all_passed = True
40
- for input_text, expected_contains, expected_not_contains, description in test_cases:
41
- cleaned = A.clean_conversational_elements(input_text)
42
-
43
- # Check that expected content is preserved
44
- contains_expected = all(phrase in cleaned for phrase in expected_contains)
45
-
46
- # Check that conversational elements are removed
47
- not_contains_expected = all(phrase not in cleaned for phrase in expected_not_contains)
48
-
49
- status = "✅" if contains_expected and not_contains_expected else "❌"
50
- if not (contains_expected and not_contains_expected):
51
- all_passed = False
52
-
53
- logger.info(f"{status} {description}")
54
- logger.info(f" Input: '{input_text}'")
55
- logger.info(f" Cleaned: '{cleaned}'")
56
- logger.info(f" Contains expected: {contains_expected}, Removes unwanted: {not_contains_expected}")
57
- logger.info("")
58
-
59
- return all_passed
60
-
61
- def test_invalid_response_detection():
62
- """Test invalid response detection"""
63
- logger.info("Testing invalid response detection...")
64
-
65
- test_cases = [
66
- # (text, expected_invalid, description)
67
- ("FAIL", True, "Simple fail response"),
68
- ("I can't help you", True, "Can't help response"),
69
- ("I don't know", True, "Don't know response"),
70
- ("Sorry, I'm unable to", True, "Unable response"),
71
- ("Diabetes symptoms include...", False, "Valid medical response"),
72
- ("The treatment is...", False, "Valid treatment response"),
73
- ("", True, "Empty response"),
74
- ("Hi", True, "Too short response"),
75
- ("I'm sorry, I cannot determine", True, "Cannot determine response"),
76
- ]
77
-
78
- all_passed = True
79
- for text, expected_invalid, description in test_cases:
80
- is_invalid = A.is_invalid_response(text)
81
- status = "✅" if is_invalid == expected_invalid else "❌"
82
- if is_invalid != expected_invalid:
83
- all_passed = False
84
-
85
- logger.info(f"{status} {description}: '{text}' -> {is_invalid} (expected {expected_invalid})")
86
-
87
- return all_passed
88
-
89
- def test_retry_logic():
90
- """Test retry logic for failed responses"""
91
- logger.info("Testing retry logic...")
92
-
93
- # Test that invalid responses are detected
94
- invalid_responses = ["FAIL", "I can't help", "Sorry", ""]
95
-
96
- for response in invalid_responses:
97
- is_invalid = A.is_invalid_response(response)
98
- if is_invalid:
99
- logger.info(f"✅ Correctly detected invalid response: '{response}'")
100
- else:
101
- logger.error(f"❌ Failed to detect invalid response: '{response}'")
102
- return False
103
-
104
- # Test conversational cleaning
105
- conversational_text = "Hi, I'm a doctor. Diabetes symptoms include increased thirst."
106
- cleaned = A.clean_conversational_elements(conversational_text)
107
-
108
- if "Diabetes symptoms include increased thirst" in cleaned and "Hi" not in cleaned:
109
- logger.info("✅ Conversational cleaning working correctly")
110
- else:
111
- logger.error("❌ Conversational cleaning failed")
112
- return False
113
-
114
- return True
115
-
116
- def main():
117
- """Run all tests"""
118
- logger.info("Testing conversational cleaning and failed response handling...")
119
- logger.info("=" * 70)
120
-
121
- tests = [
122
- ("Conversational Cleaning", test_conversational_cleaning),
123
- ("Invalid Response Detection", test_invalid_response_detection),
124
- ("Retry Logic", test_retry_logic),
125
- ]
126
-
127
- results = {}
128
- for test_name, test_func in tests:
129
- logger.info(f"\n--- {test_name} ---")
130
- try:
131
- result = test_func()
132
- results[test_name] = result
133
- status = "✅ PASSED" if result else "❌ FAILED"
134
- logger.info(f"{test_name}: {status}")
135
- except Exception as e:
136
- logger.error(f"{test_name}: ❌ ERROR - {e}")
137
- results[test_name] = False
138
-
139
- # Summary
140
- logger.info("\n" + "=" * 70)
141
- logger.info("CONVERSATIONAL CLEANING TEST SUMMARY")
142
- logger.info("=" * 70)
143
-
144
- passed = sum(1 for result in results.values() if result)
145
- total = len(results)
146
-
147
- for test_name, result in results.items():
148
- status = "✅ PASSED" if result else "❌ FAILED"
149
- logger.info(f"{test_name}: {status}")
150
-
151
- logger.info(f"\nOverall: {passed}/{total} tests passed")
152
-
153
- if passed == total:
154
- logger.info("🎉 All tests passed! Conversational cleaning is working correctly.")
155
- logger.info("✅ Failed responses will be retried, not recorded!")
156
- logger.info("✅ Conversational elements are properly cleaned!")
157
- else:
158
- logger.warning("⚠️ Some tests failed. Please check the logs above.")
159
-
160
- return passed == total
161
-
162
- if __name__ == "__main__":
163
- success = main()
164
- sys.exit(0 if success else 1)