File size: 18,362 Bytes
13d5ab4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
"""
Confidence Gating System Test - Phase 4 Validation
Tests the confidence gating and validation system functionality.

Author: MiniMax Agent
Date: 2025-10-29
Version: 1.0.0
"""

import logging
import asyncio
import sys
from pathlib import Path
from typing import Dict, Any
from dataclasses import dataclass
from datetime import datetime

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


class ConfidenceGatingSystemTester:
    """Tests confidence gating system functionality"""
    
    def __init__(self):
        """Initialize tester"""
        self.test_results = {
            "confidence_calculation": False,
            "validation_decisions": False,
            "review_priority": False,
            "queue_management": False,
            "statistics_tracking": False,
            "audit_logging": False
        }
    
    def test_confidence_calculation(self) -> bool:
        """Test composite confidence calculation"""
        logger.info("๐Ÿงฎ Testing confidence calculation...")
        
        try:
            from confidence_gating_system import ConfidenceGatingSystem
            from medical_schemas import ConfidenceScore
            
            # Initialize system
            system = ConfidenceGatingSystem()
            
            # Test confidence score calculation
            confidence = ConfidenceScore(
                extraction_confidence=0.90,
                model_confidence=0.85,
                data_quality=0.80
            )
            
            # Verify weighted formula: 0.5 * 0.90 + 0.3 * 0.85 + 0.2 * 0.80 = 0.865
            expected = 0.5 * 0.90 + 0.3 * 0.85 + 0.2 * 0.80
            actual = confidence.overall_confidence
            
            if abs(actual - expected) < 0.001:
                logger.info(f"โœ… Confidence calculation correct: {actual:.3f}")
                self.test_results["confidence_calculation"] = True
                return True
            else:
                logger.error(f"โŒ Confidence calculation failed: expected {expected:.3f}, got {actual:.3f}")
                self.test_results["confidence_calculation"] = False
                return False
                
        except Exception as e:
            logger.error(f"โŒ Confidence calculation test failed: {e}")
            self.test_results["confidence_calculation"] = False
            return False
    
    def test_validation_decisions(self) -> bool:
        """Test validation decision logic"""
        logger.info("โš–๏ธ Testing validation decisions...")
        
        try:
            from confidence_gating_system import ConfidenceGatingSystem, ValidationDecision
            from medical_schemas import ConfidenceScore
            
            system = ConfidenceGatingSystem()
            
            # Test cases for different confidence levels
            test_cases = [
                {
                    "name": "High Confidence (Auto Approve)",
                    "confidence": ConfidenceScore(extraction_confidence=0.95, model_confidence=0.90, data_quality=0.85),
                    "expected_decision": ValidationDecision.AUTO_APPROVE
                },
                {
                    "name": "Medium-High Confidence (Review Recommended)",
                    "confidence": ConfidenceScore(extraction_confidence=0.80, model_confidence=0.75, data_quality=0.70),
                    "expected_decision": ValidationDecision.REVIEW_RECOMMENDED
                },
                {
                    "name": "Medium Confidence (Review Recommended)",
                    "confidence": ConfidenceScore(extraction_confidence=0.70, model_confidence=0.65, data_quality=0.60),
                    "expected_decision": ValidationDecision.REVIEW_RECOMMENDED
                },
                {
                    "name": "Low Confidence (Manual Required)",
                    "confidence": ConfidenceScore(extraction_confidence=0.55, model_confidence=0.50, data_quality=0.45),
                    "expected_decision": ValidationDecision.MANUAL_REQUIRED
                },
                {
                    "name": "Very Low Confidence (Blocked)",
                    "confidence": ConfidenceScore(extraction_confidence=0.30, model_confidence=0.25, data_quality=0.20),
                    "expected_decision": ValidationDecision.BLOCKED
                }
            ]
            
            all_passed = True
            for case in test_cases:
                decision = system._make_validation_decision(case["confidence"])
                overall = case["confidence"].overall_confidence
                
                if decision == case["expected_decision"]:
                    logger.info(f"โœ… {case['name']}: {decision.value} (confidence: {overall:.3f})")
                else:
                    logger.error(f"โŒ {case['name']}: expected {case['expected_decision'].value}, got {decision.value} (confidence: {overall:.3f})")
                    all_passed = False
            
            if all_passed:
                logger.info("โœ… All validation decision tests passed")
                self.test_results["validation_decisions"] = True
                return True
            else:
                logger.error("โŒ Some validation decision tests failed")
                self.test_results["validation_decisions"] = False
                return False
                
        except Exception as e:
            logger.error(f"โŒ Validation decisions test failed: {e}")
            self.test_results["validation_decisions"] = False
            return False
    
    def test_review_priority(self) -> bool:
        """Test review priority assignment"""
        logger.info("๐Ÿ“‹ Testing review priority assignment...")
        
        try:
            from confidence_gating_system import ConfidenceGatingSystem, ReviewPriority
            from medical_schemas import ConfidenceScore
            
            system = ConfidenceGatingSystem()
            
            # Test priority assignment
            test_cases = [
                {
                    "confidence": ConfidenceScore(extraction_confidence=0.50, model_confidence=0.45, data_quality=0.40),
                    "expected_priority": ReviewPriority.CRITICAL
                },
                {
                    "confidence": ConfidenceScore(extraction_confidence=0.65, model_confidence=0.60, data_quality=0.55),
                    "expected_priority": ReviewPriority.HIGH  
                },
                {
                    "confidence": ConfidenceScore(extraction_confidence=0.75, model_confidence=0.70, data_quality=0.65),
                    "expected_priority": ReviewPriority.MEDIUM
                },
                {
                    "confidence": ConfidenceScore(extraction_confidence=0.85, model_confidence=0.80, data_quality=0.75),
                    "expected_priority": ReviewPriority.LOW
                },
                {
                    "confidence": ConfidenceScore(extraction_confidence=0.95, model_confidence=0.90, data_quality=0.85),
                    "expected_priority": ReviewPriority.NONE
                }
            ]
            
            all_passed = True
            for case in test_cases:
                priority = system._determine_review_priority(case["confidence"])
                overall = case["confidence"].overall_confidence
                
                if priority == case["expected_priority"]:
                    logger.info(f"โœ… Priority {priority.value} assigned for confidence {overall:.3f}")
                else:
                    logger.error(f"โŒ Expected {case['expected_priority'].value}, got {priority.value} for confidence {overall:.3f}")
                    all_passed = False
            
            if all_passed:
                logger.info("โœ… Review priority assignment tests passed")
                self.test_results["review_priority"] = True
                return True
            else:
                logger.error("โŒ Review priority assignment tests failed")
                self.test_results["review_priority"] = False
                return False
                
        except Exception as e:
            logger.error(f"โŒ Review priority test failed: {e}")
            self.test_results["review_priority"] = False
            return False
    
    def test_queue_management(self) -> bool:
        """Test review queue management"""
        logger.info("๐Ÿ“Š Testing review queue management...")
        
        try:
            from confidence_gating_system import ConfidenceGatingSystem, ReviewQueueItem, ReviewPriority, ValidationDecision
            from medical_schemas import ConfidenceScore
            
            system = ConfidenceGatingSystem()
            
            # Test queue status when empty
            status = system.get_review_queue_status()
            if status["total_pending"] == 0:
                logger.info("โœ… Empty queue status correct")
            else:
                logger.error(f"โŒ Empty queue should have 0 pending, got {status['total_pending']}")
                self.test_results["queue_management"] = False
                return False
            
            # Create mock queue items
            test_item = ReviewQueueItem(
                item_id="test_123",
                document_id="doc_123",
                priority=ReviewPriority.HIGH,
                confidence_score=ConfidenceScore(extraction_confidence=0.70, model_confidence=0.65, data_quality=0.60),
                processing_result=None,  # Simplified for test
                model_inference=None,    # Simplified for test
                review_decision=ValidationDecision.REVIEW_RECOMMENDED,
                created_timestamp=datetime.now(),
                review_deadline=datetime.now()  # Immediate deadline for testing
            )
            
            # Add to queue
            system.review_queue[test_item.item_id] = test_item
            
            # Test queue status with items
            status = system.get_review_queue_status()
            if status["total_pending"] == 1 and status["overdue_count"] >= 0:
                logger.info(f"โœ… Queue with items: {status['total_pending']} pending, {status['overdue_count']} overdue")
                self.test_results["queue_management"] = True
                return True
            else:
                logger.error(f"โŒ Queue status incorrect: {status}")
                self.test_results["queue_management"] = False
                return False
                
        except Exception as e:
            logger.error(f"โŒ Queue management test failed: {e}")
            self.test_results["queue_management"] = False
            return False
    
    def test_statistics_tracking(self) -> bool:
        """Test statistics tracking"""
        logger.info("๐Ÿ“ˆ Testing statistics tracking...")
        
        try:
            from confidence_gating_system import ConfidenceGatingSystem, ValidationDecision
            from medical_schemas import ConfidenceScore
            
            system = ConfidenceGatingSystem()
            
            # Test initial statistics
            stats = system.get_system_statistics()
            if stats["total_processed"] == 0:
                logger.info("โœ… Initial statistics correct (no processing)")
            else:
                logger.error(f"โŒ Initial statistics should show 0 processed, got {stats['total_processed']}")
                self.test_results["statistics_tracking"] = False
                return False
            
            # Simulate some processing
            test_confidence = ConfidenceScore(extraction_confidence=0.85, model_confidence=0.80, data_quality=0.75)
            system._update_statistics(ValidationDecision.AUTO_APPROVE, test_confidence, 2.5)
            
            # Test updated statistics
            stats = system.get_system_statistics()
            if (stats["total_processed"] == 1 and 
                stats["distribution"]["auto_approved"]["count"] == 1 and
                abs(stats["confidence_metrics"]["average_confidence"] - test_confidence.overall_confidence) < 0.001):
                logger.info("โœ… Statistics tracking working correctly")
                logger.info(f"  - Total processed: {stats['total_processed']}")
                logger.info(f"  - Auto approved: {stats['distribution']['auto_approved']['count']}")
                logger.info(f"  - Average confidence: {stats['confidence_metrics']['average_confidence']:.3f}")
                self.test_results["statistics_tracking"] = True
                return True
            else:
                logger.error(f"โŒ Statistics tracking failed: {stats}")
                self.test_results["statistics_tracking"] = False
                return False
                
        except Exception as e:
            logger.error(f"โŒ Statistics tracking test failed: {e}")
            self.test_results["statistics_tracking"] = False
            return False
    
    async def test_audit_logging(self) -> bool:
        """Test audit logging functionality"""
        logger.info("๐Ÿ“ Testing audit logging...")
        
        try:
            from confidence_gating_system import ConfidenceGatingSystem
            
            system = ConfidenceGatingSystem()
            
            # Test audit logging
            await system._log_audit_event(
                document_id="test_doc_123",
                event_type="test_event",
                user_id="test_user",
                confidence_scores={"overall": 0.85, "extraction": 0.90, "model": 0.80, "data_quality": 0.75},
                decision="auto_approved",
                reasoning="Test audit log entry"
            )
            
            # Check if audit log file was created
            log_files = list(system.audit_log_path.glob("audit_*.jsonl"))
            if log_files:
                logger.info(f"โœ… Audit log created: {log_files[0].name}")
                
                # Read the log entry
                with open(log_files[0], 'r') as f:
                    log_content = f.read().strip()
                    if "test_doc_123" in log_content and "auto_approved" in log_content:
                        logger.info("โœ… Audit log content verified")
                        self.test_results["audit_logging"] = True
                        return True
                    else:
                        logger.error("โŒ Audit log content incorrect")
                        self.test_results["audit_logging"] = False
                        return False
            else:
                logger.error("โŒ Audit log file not created")
                self.test_results["audit_logging"] = False
                return False
                
        except Exception as e:
            logger.error(f"โŒ Audit logging test failed: {e}")
            self.test_results["audit_logging"] = False
            return False
    
    async def run_all_tests(self) -> Dict[str, bool]:
        """Run all confidence gating system tests"""
        logger.info("๐Ÿš€ Starting Confidence Gating System Tests - Phase 4")
        logger.info("=" * 70)
        
        # Run tests in sequence
        self.test_confidence_calculation()
        self.test_validation_decisions()
        self.test_review_priority()
        self.test_queue_management()
        self.test_statistics_tracking()
        await self.test_audit_logging()
        
        # Generate test report
        logger.info("=" * 70)
        logger.info("๐Ÿ“Š CONFIDENCE GATING SYSTEM TEST RESULTS")
        logger.info("=" * 70)
        
        for test_name, result in self.test_results.items():
            status = "โœ… PASS" if result else "โŒ FAIL"
            logger.info(f"{test_name.replace('_', ' ').title()}: {status}")
        
        total_tests = len(self.test_results)
        passed_tests = sum(self.test_results.values())
        success_rate = (passed_tests / total_tests) * 100
        
        logger.info("-" * 70)
        logger.info(f"Overall Success Rate: {passed_tests}/{total_tests} ({success_rate:.1f}%)")
        
        if success_rate >= 80:
            logger.info("๐ŸŽ‰ CONFIDENCE GATING SYSTEM TESTS PASSED - Phase 4 Complete!")
            logger.info("")
            logger.info("โœ… VALIDATED COMPONENTS:")
            logger.info("  โ€ข Composite confidence calculation with weighted formula")
            logger.info("  โ€ข Validation decision logic with configurable thresholds")
            logger.info("  โ€ข Review priority assignment (Critical/High/Medium/Low/None)")
            logger.info("  โ€ข Review queue management with deadline tracking")
            logger.info("  โ€ข Statistics tracking for performance monitoring")
            logger.info("  โ€ข Audit logging for compliance and traceability")
            logger.info("")
            logger.info("๐ŸŽฏ CONFIDENCE THRESHOLDS IMPLEMENTED:")
            logger.info("  โ€ข โ‰ฅ0.85: Auto-approve (no human review needed)")
            logger.info("  โ€ข 0.60-0.85: Review recommended (quality assurance)")
            logger.info("  โ€ข <0.60: Manual review required (safety check)")
            logger.info("  โ€ข Critical errors: Blocked (immediate intervention)")
            logger.info("")
            logger.info("๐Ÿ”„ COMPLETE PIPELINE ESTABLISHED:")
            logger.info("  File Detection โ†’ PHI Removal โ†’ Structured Extraction โ†’ Model Routing โ†’ Confidence Gating โ†’ Review Queue/Auto-Approval")
            logger.info("")
            logger.info("๐Ÿš€ READY FOR PHASE 5: Enhanced Frontend with Structured Data Display")
        else:
            logger.warning("โš ๏ธ CONFIDENCE GATING SYSTEM TESTS FAILED - Phase 4 Issues Detected")
        
        return self.test_results


async def main():
    """Main test execution"""
    try:
        tester = ConfidenceGatingSystemTester()
        results = await tester.run_all_tests()
        
        # Return appropriate exit code
        success_rate = sum(results.values()) / len(results)
        exit_code = 0 if success_rate >= 0.8 else 1
        sys.exit(exit_code)
        
    except Exception as e:
        logger.error(f"โŒ Confidence gating system test execution failed: {e}")
        sys.exit(1)


if __name__ == "__main__":
    asyncio.run(main())