"""Simplified production system audit without complex imports.""" import json import logging import sys from pathlib import Path logging.basicConfig( level=logging.INFO, format='%(asctime)s [%(levelname)s] %(name)s: %(message)s', ) logger = logging.getLogger('medcare_ddi.quick_audit') BASE_DIR = Path(__file__).resolve().parents[2] DATA_PATH = BASE_DIR / 'data' / 'processed' / 'ddinter_combined.parquet' MODEL_DIR = BASE_DIR / 'models' FEATURE_PIPELINE_MULTISOURCE_PATH = MODEL_DIR / 'feature_pipeline_multisource.pkl' def audit_files(): """Check that all critical files exist.""" logger.info('='*60) logger.info('FILE EXISTENCE CHECK') logger.info('='*60) critical_files = { 'Feature Pipeline (11MB)': FEATURE_PIPELINE_MULTISOURCE_PATH, 'Model Checkpoint (318KB)': MODEL_DIR / 'ddi_mlp_best.pt', 'Data File (13MB)': DATA_PATH, 'Metadata': MODEL_DIR / 'multisource_metadata.json', 'Training Config': MODEL_DIR / 'training_config.json', 'FastAPI Backend': BASE_DIR / 'src' / 'inference' / 'app_production.py', 'Production Training': BASE_DIR / 'src' / 'training' / 'train_production_simple.py', 'Smoke Tests': BASE_DIR / 'src' / 'validation' / 'smoke_test.py', } all_good = True for name, path in critical_files.items(): exists = path.exists() status = '✓' if exists else '✗' if path.suffix in ['.pkl', '.pt', '.csv']: try: size_mb = path.stat().st_size / (1024 * 1024) logger.info(f'{status} {name}: {size_mb:.1f}MB') except: logger.info(f'{status} {name}') else: logger.info(f'{status} {name}') all_good = all_good and exists return all_good def audit_metadata(): """Check metadata schema.""" logger.info('') logger.info('='*60) logger.info('METADATA & SCHEMA CHECK') logger.info('='*60) try: with open(MODEL_DIR / 'multisource_metadata.json') as f: metadata = json.load(f) # Check for both possible field names total_dim = metadata.get('total_dim') or metadata.get('vector_dim', 0) logger.info(f'✓ Multisource metadata loaded') logger.info(f' - Total dimension: {total_dim}') if total_dim != 560: logger.error(f'✗ SCHEMA MISMATCH: Expected 560, got {total_dim}') return False # Check feature groups feature_groups = metadata.get('feature_groups') or metadata.get('group_keep_counts', {}) if feature_groups: for group, dim_or_count in feature_groups.items(): # Handle both dict and int values dim = dim_or_count if isinstance(dim_or_count, int) else dim_or_count.get('dim', 0) logger.info(f' - {group}: {dim}') logger.info(f'✓ 560-dimensional schema confirmed') return True except Exception as e: logger.error(f'✗ Metadata check failed: {e}') return False def audit_model_config(): """Check training config.""" logger.info('') logger.info('='*60) logger.info('MODEL TRAINING CONFIG') logger.info('='*60) try: with open(MODEL_DIR / 'training_config.json') as f: config = json.load(f) logger.info(f'✓ Training config loaded') logger.info(f' - Loss type: {config.get("loss_type")}') logger.info(f' - Sampler: {config.get("sampler")}') logger.info(f' - Hidden dim: {config.get("hidden_dim")}') logger.info(f' - Learning rate: {config.get("lr")}') if config.get('loss_type') == 'focal' and config.get('sampler') == 'weighted': logger.info(f'✓ Healthcare optimization features enabled') return True else: logger.warning(f'⚠ Some optimization features may not be enabled') return True except Exception as e: logger.error(f'✗ Config check failed: {e}') return False def audit_summary_metrics(): """Check metrics from previous training.""" logger.info('') logger.info('='*60) logger.info('PREVIOUS MODEL METRICS') logger.info('='*60) try: with open(MODEL_DIR / 'ddi_mlp_best.summary.json') as f: summary = json.load(f) logger.info(f'✓ Model summary loaded') logger.info(f' - Accuracy: {summary.get("best_validation_accuracy", 0):.2%}') logger.info(f' - Dataset size: {summary.get("dataset_size", 0):,}') logger.info(f' - Training epochs: {len(summary.get("training_history", []))}') return True except Exception as e: logger.error(f'✗ Metrics check failed: {e}') return False def audit_code_structure(): """Check that production code files exist and have content.""" logger.info('') logger.info('='*60) logger.info('PRODUCTION CODE STRUCTURE') logger.info('='*60) code_files = { 'FastAPI Backend': BASE_DIR / 'src' / 'inference' / 'app_production.py', 'Training Pipeline': BASE_DIR / 'src' / 'training' / 'train_production_simple.py', 'Smoke Tests': BASE_DIR / 'src' / 'validation' / 'smoke_test.py', 'Predictor': BASE_DIR / 'src' / 'inference' / 'predictor.py', } all_good = True for name, path in code_files.items(): if not path.exists(): logger.error(f'✗ {name} missing') all_good = False continue try: with open(path) as f: lines = len(f.readlines()) logger.info(f'✓ {name}: {lines} lines') except Exception as e: logger.error(f'✗ {name}: {e}') all_good = False return all_good def main(): """Run quick audit.""" logger.info('') logger.info('╔' + '═'*58 + '╗') logger.info('║ MEDCARE-DDI QUICK PRODUCTION AUDIT' + ' '*24 + '║') logger.info('╚' + '═'*58 + '╝') results = { 'Files': audit_files(), 'Metadata': audit_metadata(), 'Config': audit_model_config(), 'Metrics': audit_summary_metrics(), 'Code': audit_code_structure(), } logger.info('') logger.info('='*60) logger.info('AUDIT SUMMARY') logger.info('='*60) all_passed = all(results.values()) status = '✓ READY' if all_passed else '⚠ NEEDS_ATTENTION' logger.info(f'{status} - Production system status') for check, passed in results.items(): status = '✓' if passed else '✗' logger.info(f'{status} {check}') logger.info('') # Save report report = { 'timestamp': __import__('datetime').datetime.now().isoformat(), 'checks': results, 'status': 'READY' if all_passed else 'NEEDS_ATTENTION', } report_path = MODEL_DIR / 'reports' / 'quick_audit.json' report_path.parent.mkdir(parents=True, exist_ok=True) with open(report_path, 'w') as f: json.dump(report, f, indent=2) logger.info(f'✓ Report saved to {report_path}') logger.info('') if __name__ == '__main__': main()