Spaces:
Running
Running
| """Simplified production system audit without complex imports.""" | |
| import json | |
| import logging | |
| import sys | |
| from pathlib import Path | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s [%(levelname)s] %(name)s: %(message)s', | |
| ) | |
| logger = logging.getLogger('medcare_ddi.quick_audit') | |
| BASE_DIR = Path(__file__).resolve().parents[2] | |
| DATA_PATH = BASE_DIR / 'data' / 'processed' / 'ddinter_combined.parquet' | |
| MODEL_DIR = BASE_DIR / 'models' | |
| FEATURE_PIPELINE_MULTISOURCE_PATH = MODEL_DIR / 'feature_pipeline_multisource.pkl' | |
| def audit_files(): | |
| """Check that all critical files exist.""" | |
| logger.info('='*60) | |
| logger.info('FILE EXISTENCE CHECK') | |
| logger.info('='*60) | |
| critical_files = { | |
| 'Feature Pipeline (11MB)': FEATURE_PIPELINE_MULTISOURCE_PATH, | |
| 'Model Checkpoint (318KB)': MODEL_DIR / 'ddi_mlp_best.pt', | |
| 'Data File (13MB)': DATA_PATH, | |
| 'Metadata': MODEL_DIR / 'multisource_metadata.json', | |
| 'Training Config': MODEL_DIR / 'training_config.json', | |
| 'FastAPI Backend': BASE_DIR / 'src' / 'inference' / 'app_production.py', | |
| 'Production Training': BASE_DIR / 'src' / 'training' / 'train_production_simple.py', | |
| 'Smoke Tests': BASE_DIR / 'src' / 'validation' / 'smoke_test.py', | |
| } | |
| all_good = True | |
| for name, path in critical_files.items(): | |
| exists = path.exists() | |
| status = 'β' if exists else 'β' | |
| if path.suffix in ['.pkl', '.pt', '.csv']: | |
| try: | |
| size_mb = path.stat().st_size / (1024 * 1024) | |
| logger.info(f'{status} {name}: {size_mb:.1f}MB') | |
| except: | |
| logger.info(f'{status} {name}') | |
| else: | |
| logger.info(f'{status} {name}') | |
| all_good = all_good and exists | |
| return all_good | |
| def audit_metadata(): | |
| """Check metadata schema.""" | |
| logger.info('') | |
| logger.info('='*60) | |
| logger.info('METADATA & SCHEMA CHECK') | |
| logger.info('='*60) | |
| try: | |
| with open(MODEL_DIR / 'multisource_metadata.json') as f: | |
| metadata = json.load(f) | |
| # Check for both possible field names | |
| total_dim = metadata.get('total_dim') or metadata.get('vector_dim', 0) | |
| logger.info(f'β Multisource metadata loaded') | |
| logger.info(f' - Total dimension: {total_dim}') | |
| if total_dim != 560: | |
| logger.error(f'β SCHEMA MISMATCH: Expected 560, got {total_dim}') | |
| return False | |
| # Check feature groups | |
| feature_groups = metadata.get('feature_groups') or metadata.get('group_keep_counts', {}) | |
| if feature_groups: | |
| for group, dim_or_count in feature_groups.items(): | |
| # Handle both dict and int values | |
| dim = dim_or_count if isinstance(dim_or_count, int) else dim_or_count.get('dim', 0) | |
| logger.info(f' - {group}: {dim}') | |
| logger.info(f'β 560-dimensional schema confirmed') | |
| return True | |
| except Exception as e: | |
| logger.error(f'β Metadata check failed: {e}') | |
| return False | |
| def audit_model_config(): | |
| """Check training config.""" | |
| logger.info('') | |
| logger.info('='*60) | |
| logger.info('MODEL TRAINING CONFIG') | |
| logger.info('='*60) | |
| try: | |
| with open(MODEL_DIR / 'training_config.json') as f: | |
| config = json.load(f) | |
| logger.info(f'β Training config loaded') | |
| logger.info(f' - Loss type: {config.get("loss_type")}') | |
| logger.info(f' - Sampler: {config.get("sampler")}') | |
| logger.info(f' - Hidden dim: {config.get("hidden_dim")}') | |
| logger.info(f' - Learning rate: {config.get("lr")}') | |
| if config.get('loss_type') == 'focal' and config.get('sampler') == 'weighted': | |
| logger.info(f'β Healthcare optimization features enabled') | |
| return True | |
| else: | |
| logger.warning(f'β Some optimization features may not be enabled') | |
| return True | |
| except Exception as e: | |
| logger.error(f'β Config check failed: {e}') | |
| return False | |
| def audit_summary_metrics(): | |
| """Check metrics from previous training.""" | |
| logger.info('') | |
| logger.info('='*60) | |
| logger.info('PREVIOUS MODEL METRICS') | |
| logger.info('='*60) | |
| try: | |
| with open(MODEL_DIR / 'ddi_mlp_best.summary.json') as f: | |
| summary = json.load(f) | |
| logger.info(f'β Model summary loaded') | |
| logger.info(f' - Accuracy: {summary.get("best_validation_accuracy", 0):.2%}') | |
| logger.info(f' - Dataset size: {summary.get("dataset_size", 0):,}') | |
| logger.info(f' - Training epochs: {len(summary.get("training_history", []))}') | |
| return True | |
| except Exception as e: | |
| logger.error(f'β Metrics check failed: {e}') | |
| return False | |
| def audit_code_structure(): | |
| """Check that production code files exist and have content.""" | |
| logger.info('') | |
| logger.info('='*60) | |
| logger.info('PRODUCTION CODE STRUCTURE') | |
| logger.info('='*60) | |
| code_files = { | |
| 'FastAPI Backend': BASE_DIR / 'src' / 'inference' / 'app_production.py', | |
| 'Training Pipeline': BASE_DIR / 'src' / 'training' / 'train_production_simple.py', | |
| 'Smoke Tests': BASE_DIR / 'src' / 'validation' / 'smoke_test.py', | |
| 'Predictor': BASE_DIR / 'src' / 'inference' / 'predictor.py', | |
| } | |
| all_good = True | |
| for name, path in code_files.items(): | |
| if not path.exists(): | |
| logger.error(f'β {name} missing') | |
| all_good = False | |
| continue | |
| try: | |
| with open(path) as f: | |
| lines = len(f.readlines()) | |
| logger.info(f'β {name}: {lines} lines') | |
| except Exception as e: | |
| logger.error(f'β {name}: {e}') | |
| all_good = False | |
| return all_good | |
| def main(): | |
| """Run quick audit.""" | |
| logger.info('') | |
| logger.info('β' + 'β'*58 + 'β') | |
| logger.info('β MEDCARE-DDI QUICK PRODUCTION AUDIT' + ' '*24 + 'β') | |
| logger.info('β' + 'β'*58 + 'β') | |
| results = { | |
| 'Files': audit_files(), | |
| 'Metadata': audit_metadata(), | |
| 'Config': audit_model_config(), | |
| 'Metrics': audit_summary_metrics(), | |
| 'Code': audit_code_structure(), | |
| } | |
| logger.info('') | |
| logger.info('='*60) | |
| logger.info('AUDIT SUMMARY') | |
| logger.info('='*60) | |
| all_passed = all(results.values()) | |
| status = 'β READY' if all_passed else 'β NEEDS_ATTENTION' | |
| logger.info(f'{status} - Production system status') | |
| for check, passed in results.items(): | |
| status = 'β' if passed else 'β' | |
| logger.info(f'{status} {check}') | |
| logger.info('') | |
| # Save report | |
| report = { | |
| 'timestamp': __import__('datetime').datetime.now().isoformat(), | |
| 'checks': results, | |
| 'status': 'READY' if all_passed else 'NEEDS_ATTENTION', | |
| } | |
| report_path = MODEL_DIR / 'reports' / 'quick_audit.json' | |
| report_path.parent.mkdir(parents=True, exist_ok=True) | |
| with open(report_path, 'w') as f: | |
| json.dump(report, f, indent=2) | |
| logger.info(f'β Report saved to {report_path}') | |
| logger.info('') | |
| if __name__ == '__main__': | |
| main() | |