Spaces:
Sleeping
Sleeping
| """ | |
| Generate a compact quality report for the clinical case bank. | |
| Usage: | |
| python scripts/data_quality_report.py | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import sys | |
| from pathlib import Path | |
| from typing import Any, Dict, List | |
| ROOT = Path(__file__).resolve().parent.parent | |
| if str(ROOT) not in sys.path: | |
| sys.path.insert(0, str(ROOT)) | |
| from tasks.case_bank import AE_CASES, DEVIATION_CASES, NARRATIVE_CASES | |
| REQUIRED_KEYS = { | |
| "ae": [ | |
| "case_id", | |
| "narrative", | |
| "ground_truth", | |
| "patient_age", | |
| "study_drug", | |
| "lab_values", | |
| ], | |
| "deviation": [ | |
| "site_id", | |
| "site_name", | |
| "findings", | |
| "ground_truth", | |
| ], | |
| "narrative": [ | |
| "case_id", | |
| "patient_demographics", | |
| "adverse_event", | |
| "ground_truth", | |
| ], | |
| } | |
| def _missing_keys(case: Dict[str, Any], required_keys: List[str]) -> List[str]: | |
| return [key for key in required_keys if key not in case or case[key] in (None, "", [])] | |
| def _analyze_cases(cases: List[Dict[str, Any]], required: List[str], label: str) -> Dict[str, Any]: | |
| missing = [] | |
| for case in cases: | |
| gaps = _missing_keys(case, required) | |
| if gaps: | |
| missing.append({"id": case.get("case_id") or case.get("site_id"), "missing": gaps}) | |
| return { | |
| "task": label, | |
| "count": len(cases), | |
| "missing_case_count": len(missing), | |
| "missing_examples": missing[:5], | |
| } | |
| def main() -> None: | |
| report = { | |
| "summary": { | |
| "ae_cases": len(AE_CASES), | |
| "deviation_cases": len(DEVIATION_CASES), | |
| "narrative_cases": len(NARRATIVE_CASES), | |
| "total_cases": len(AE_CASES) + len(DEVIATION_CASES) + len(NARRATIVE_CASES), | |
| }, | |
| "quality": [ | |
| _analyze_cases(AE_CASES, REQUIRED_KEYS["ae"], "adverse_event_triage"), | |
| _analyze_cases(DEVIATION_CASES, REQUIRED_KEYS["deviation"], "protocol_deviation_audit"), | |
| _analyze_cases(NARRATIVE_CASES, REQUIRED_KEYS["narrative"], "safety_narrative_generation"), | |
| ], | |
| } | |
| output_path = ROOT / "outputs" / "data_quality_report.json" | |
| output_path.parent.mkdir(parents=True, exist_ok=True) | |
| output_path.write_text(json.dumps(report, indent=2), encoding="utf-8") | |
| print(json.dumps(report, indent=2)) | |
| print(f"\nSaved report to: {output_path}") | |
| if __name__ == "__main__": | |
| main() | |