Spaces:
Running
Running
github-actions[bot] commited on
Commit ·
ee28bd3
0
Parent(s):
Deploy 1.15.12
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitignore +2 -0
- Dockerfile +34 -0
- README.md +12 -0
- app/__init__.py +1 -0
- app/main.py +79 -0
- app/models/__init__.py +1 -0
- app/models/explain_schemas.py +185 -0
- app/models/ml_schemas.py +194 -0
- app/models/schemas.py +73 -0
- app/routers/__init__.py +1 -0
- app/routers/data_router.py +184 -0
- app/routers/explain_router.py +454 -0
- app/routers/ml_router.py +92 -0
- app/services/__init__.py +1 -0
- app/services/certificate_service.py +690 -0
- app/services/data_service.py +1272 -0
- app/services/ethics_service.py +500 -0
- app/services/explain_service.py +665 -0
- app/services/insight_service.py +607 -0
- app/services/ml_service.py +855 -0
- app/services/specialty_registry.py +559 -0
- app/utils/__init__.py +1 -0
- arena/__init__.py +0 -0
- arena/router.py +72 -0
- arena/schemas.py +64 -0
- arena/service.py +199 -0
- data_cache/cardiology_arrhythmia.csv +0 -0
- data_cache/cardiology_hf.csv +300 -0
- data_cache/depression_data.csv +0 -0
- data_cache/dermatology.csv +0 -0
- data_cache/endocrinology_diabetes.csv +768 -0
- data_cache/hepatology_liver.csv +583 -0
- data_cache/icu_sepsis.csv +0 -0
- data_cache/nephrology_ckd.csv +363 -0
- data_cache/neurology_parkinsons.csv +196 -0
- data_cache/obstetrics_fetal.csv +0 -0
- data_cache/oncology_cervical.csv +0 -0
- data_cache/ophthalmology.arff +0 -0
- data_cache/orthopaedics.arff +322 -0
- data_cache/pharmacy_readmission.csv +0 -0
- data_cache/pulmonology_copd.csv +102 -0
- data_cache/radiology_pneumonia.csv +0 -0
- data_cache/thyroid.csv +215 -0
- datasets/.gitkeep +0 -0
- main_hf.py +72 -0
- requirements.txt +19 -0
- static/.gitkeep +0 -0
- static/apple-touch-icon.png +0 -0
- static/assets/ArenaPage-C8SsT3v3.js +0 -0
- static/assets/ArenaPage-C8SsT3v3.js.map +0 -0
.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.pyc
|
| 2 |
+
__pycache__/
|
Dockerfile
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Stage 1 — install dependencies
|
| 2 |
+
FROM python:3.12-slim AS builder
|
| 3 |
+
|
| 4 |
+
WORKDIR /build
|
| 5 |
+
|
| 6 |
+
COPY requirements.txt .
|
| 7 |
+
RUN pip install --no-cache-dir --no-compile --target=/build/deps -r requirements.txt \
|
| 8 |
+
&& find /build/deps -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true \
|
| 9 |
+
&& find /build/deps -type d -name "tests" -exec rm -rf {} + 2>/dev/null || true
|
| 10 |
+
|
| 11 |
+
## Stage 2 — slim runtime
|
| 12 |
+
FROM python:3.12-slim
|
| 13 |
+
|
| 14 |
+
# Native libs required by scikit-learn, xgboost, lightgbm, scipy, shap
|
| 15 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 16 |
+
libgomp1 \
|
| 17 |
+
libopenblas0 \
|
| 18 |
+
&& apt-get clean \
|
| 19 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 20 |
+
|
| 21 |
+
WORKDIR /app
|
| 22 |
+
|
| 23 |
+
COPY --from=builder /build/deps /usr/local/lib/python3.12/site-packages
|
| 24 |
+
|
| 25 |
+
COPY app ./app
|
| 26 |
+
COPY datasets ./datasets
|
| 27 |
+
COPY data_cache ./data_cache
|
| 28 |
+
COPY arena ./arena
|
| 29 |
+
COPY static ./static
|
| 30 |
+
COPY main_hf.py .
|
| 31 |
+
|
| 32 |
+
EXPOSE 7860
|
| 33 |
+
|
| 34 |
+
CMD ["python", "-m", "uvicorn", "main_hf:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: HealthWithSevgi
|
| 3 |
+
emoji: 🏥
|
| 4 |
+
colorFrom: green
|
| 5 |
+
colorTo: blue
|
| 6 |
+
sdk: docker
|
| 7 |
+
pinned: false
|
| 8 |
+
---
|
| 9 |
+
|
| 10 |
+
# HealthWithSevgi — ML Learning Tool for Healthcare Professionals
|
| 11 |
+
|
| 12 |
+
A 7-step ML visualization tool for healthcare professionals to explore clinical datasets, prepare data, train models, and interpret predictions.
|
app/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""HealthWithSevgi FastAPI backend package."""
|
app/main.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""HealthWithSevgi — FastAPI Backend Entry Point"""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import logging
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
|
| 7 |
+
from dotenv import load_dotenv
|
| 8 |
+
load_dotenv(Path(__file__).resolve().parent.parent / ".env")
|
| 9 |
+
|
| 10 |
+
from fastapi import FastAPI
|
| 11 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 12 |
+
|
| 13 |
+
from app.services.certificate_service import CertificateService
|
| 14 |
+
from app.services.data_service import DataService
|
| 15 |
+
from app.services.ethics_service import EthicsService
|
| 16 |
+
from app.services.explain_service import ExplainService
|
| 17 |
+
from app.services.insight_service import InsightService
|
| 18 |
+
from app.services.ml_service import MLService
|
| 19 |
+
|
| 20 |
+
logging.basicConfig(level=logging.INFO, format="%(levelname)s | %(name)s | %(message)s")
|
| 21 |
+
|
| 22 |
+
app = FastAPI(
|
| 23 |
+
title="HealthWithSevgi API",
|
| 24 |
+
description="ML Visualization Tool for Healthcare — REST API",
|
| 25 |
+
version="1.3.1",
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
# CORS — allow frontend dev server
|
| 29 |
+
app.add_middleware(
|
| 30 |
+
CORSMiddleware,
|
| 31 |
+
allow_origins=["http://localhost:5173", "http://127.0.0.1:5173"],
|
| 32 |
+
allow_credentials=True,
|
| 33 |
+
allow_methods=["*"],
|
| 34 |
+
allow_headers=["*"],
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
# Singleton service instances
|
| 38 |
+
app.state.data_service = DataService()
|
| 39 |
+
app.state.ml_service = MLService()
|
| 40 |
+
app.state.explain_service = ExplainService()
|
| 41 |
+
app.state.ethics_service = EthicsService()
|
| 42 |
+
app.state.insight_service = InsightService()
|
| 43 |
+
app.state.certificate_service = CertificateService()
|
| 44 |
+
|
| 45 |
+
# Routers
|
| 46 |
+
from app.routers.data_router import router as data_router # noqa: E402
|
| 47 |
+
from app.routers.explain_router import router as explain_router # noqa: E402
|
| 48 |
+
from app.routers.ml_router import router as ml_router # noqa: E402
|
| 49 |
+
|
| 50 |
+
app.include_router(data_router)
|
| 51 |
+
app.include_router(ml_router)
|
| 52 |
+
app.include_router(explain_router)
|
| 53 |
+
|
| 54 |
+
# Model Arena extension
|
| 55 |
+
import sys
|
| 56 |
+
from pathlib import Path
|
| 57 |
+
_arena_path = str(Path(__file__).resolve().parent.parent.parent / "local" / "model-arena")
|
| 58 |
+
if _arena_path not in sys.path:
|
| 59 |
+
sys.path.insert(0, _arena_path)
|
| 60 |
+
from arena.router import router as arena_router # noqa: E402
|
| 61 |
+
from arena.service import ArenaService # noqa: E402
|
| 62 |
+
|
| 63 |
+
app.state.arena_service = ArenaService(app.state.ml_service)
|
| 64 |
+
app.include_router(arena_router)
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
@app.get("/")
|
| 68 |
+
async def root() -> dict:
|
| 69 |
+
"""Health root — returns a short string so `docker-compose healthcheck` has a 200 target."""
|
| 70 |
+
return {"status": "ok", "project": "HealthWithSevgi", "version": "1.3.1"}
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
@app.get("/health")
|
| 74 |
+
async def health_check() -> dict:
|
| 75 |
+
"""
|
| 76 |
+
Deep health probe — verifies the heavy native libs (sklearn, xgboost, lightgbm, shap,
|
| 77 |
+
scipy) import cleanly.
|
| 78 |
+
"""
|
| 79 |
+
return {"status": "healthy"}
|
app/models/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Pydantic request/response schemas used by the routers."""
|
app/models/explain_schemas.py
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Pydantic schemas for explainability, ethics, and certificate endpoints."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
from typing import Literal
|
| 5 |
+
|
| 6 |
+
from pydantic import BaseModel, Field
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class FeatureImportanceItem(BaseModel):
|
| 10 |
+
"""One row of global SHAP importance — feature name + mean |SHAP value|."""
|
| 11 |
+
feature_name: str
|
| 12 |
+
clinical_name: str
|
| 13 |
+
importance: float
|
| 14 |
+
direction: Literal["positive", "negative", "neutral"]
|
| 15 |
+
clinical_note: str
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class GlobalExplainabilityResponse(BaseModel):
|
| 19 |
+
"""
|
| 20 |
+
Payload for `/api/explain/global-importance` — the ranked feature list with the method
|
| 21 |
+
used (tree or kernel SHAP) and a textual description for the UI.
|
| 22 |
+
"""
|
| 23 |
+
model_id: str
|
| 24 |
+
method: str
|
| 25 |
+
feature_importances: list[FeatureImportanceItem]
|
| 26 |
+
top_feature_clinical_note: str
|
| 27 |
+
explained_variance_pct: float
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class SHAPWaterfallPoint(BaseModel):
|
| 31 |
+
"""
|
| 32 |
+
Single bar in the per-patient SHAP waterfall: which feature pushed the probability in
|
| 33 |
+
which direction and by how much.
|
| 34 |
+
"""
|
| 35 |
+
feature_name: str
|
| 36 |
+
clinical_name: str
|
| 37 |
+
feature_value: float | str
|
| 38 |
+
shap_value: float
|
| 39 |
+
direction: Literal["increases_risk", "decreases_risk"]
|
| 40 |
+
plain_language: str
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
class SinglePatientExplainResponse(BaseModel):
|
| 44 |
+
"""
|
| 45 |
+
Payload for `/api/explain/single-patient` — base value, final prediction, and the
|
| 46 |
+
ordered waterfall points.
|
| 47 |
+
"""
|
| 48 |
+
model_id: str
|
| 49 |
+
patient_index: int
|
| 50 |
+
predicted_class: str
|
| 51 |
+
predicted_probability: float
|
| 52 |
+
base_value: float
|
| 53 |
+
waterfall: list[SHAPWaterfallPoint]
|
| 54 |
+
clinical_summary: str
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
class SubgroupMetrics(BaseModel):
|
| 58 |
+
"""
|
| 59 |
+
Fairness metrics computed for one subgroup of a sensitive attribute (accuracy,
|
| 60 |
+
sensitivity, specificity, PPV, NPV, etc.).
|
| 61 |
+
"""
|
| 62 |
+
group_name: str
|
| 63 |
+
group_label: str
|
| 64 |
+
sample_size: int
|
| 65 |
+
accuracy: float
|
| 66 |
+
sensitivity: float
|
| 67 |
+
specificity: float
|
| 68 |
+
precision: float
|
| 69 |
+
f1_score: float
|
| 70 |
+
status: Literal["acceptable", "review", "action_needed"]
|
| 71 |
+
status_reason: str = ""
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
class BiasWarning(BaseModel):
|
| 75 |
+
"""
|
| 76 |
+
Machine-readable flag emitted when a subgroup metric falls outside the configured
|
| 77 |
+
tolerance relative to the overall cohort.
|
| 78 |
+
"""
|
| 79 |
+
detected: bool
|
| 80 |
+
message: str
|
| 81 |
+
affected_group: str
|
| 82 |
+
metric: str
|
| 83 |
+
gap: float
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
class CaseStudy(BaseModel):
|
| 87 |
+
"""
|
| 88 |
+
One narrative case study from the ethics LLM pass — a real-world regulatory/clinical
|
| 89 |
+
incident with a short lesson.
|
| 90 |
+
"""
|
| 91 |
+
id: str
|
| 92 |
+
title: str
|
| 93 |
+
specialty: str
|
| 94 |
+
year: int
|
| 95 |
+
what_happened: str
|
| 96 |
+
impact: str
|
| 97 |
+
lesson: str
|
| 98 |
+
severity: Literal["failure", "near_miss", "prevention"]
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
class RepresentationWarning(BaseModel):
|
| 102 |
+
"""Flags a demographic group whose training-data proportion differs
|
| 103 |
+
from the population norm by more than the configured threshold."""
|
| 104 |
+
|
| 105 |
+
group: str
|
| 106 |
+
attribute: str
|
| 107 |
+
dataset_pct: float
|
| 108 |
+
population_pct: float
|
| 109 |
+
gap_pp: float
|
| 110 |
+
message: str
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
class EthicsResponse(BaseModel):
|
| 114 |
+
"""
|
| 115 |
+
Payload for `/api/explain/ethics` — overall metrics, subgroup breakdowns, warnings,
|
| 116 |
+
LLM narrative, and the EU AI Act checklist state.
|
| 117 |
+
"""
|
| 118 |
+
model_id: str
|
| 119 |
+
subgroup_metrics: list[SubgroupMetrics]
|
| 120 |
+
bias_warnings: list[BiasWarning]
|
| 121 |
+
training_representation: dict
|
| 122 |
+
representation_warnings: list[RepresentationWarning] = Field(default_factory=list)
|
| 123 |
+
overall_sensitivity: float
|
| 124 |
+
eu_ai_act_items: list[dict]
|
| 125 |
+
case_studies: list[CaseStudy]
|
| 126 |
+
demographics_available: bool = True
|
| 127 |
+
demographics_note: str = ""
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
class WhatIfRequest(BaseModel):
|
| 131 |
+
"""Request body for `/api/explain/what-if` — the patient vector plus the feature/value edits to probe."""
|
| 132 |
+
model_id: str
|
| 133 |
+
patient_index: int
|
| 134 |
+
feature_name: str
|
| 135 |
+
new_value: float
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
class WhatIfResponse(BaseModel):
|
| 139 |
+
"""
|
| 140 |
+
Response for `/api/explain/what-if` — probability delta and the explanatory SHAP
|
| 141 |
+
waterfall after the edit.
|
| 142 |
+
"""
|
| 143 |
+
feature_name: str
|
| 144 |
+
original_value: float
|
| 145 |
+
new_value: float
|
| 146 |
+
original_prob: float
|
| 147 |
+
new_prob: float
|
| 148 |
+
shift: float
|
| 149 |
+
direction: Literal["increased_risk", "decreased_risk", "no_change"]
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
class ChecklistUpdate(BaseModel):
|
| 153 |
+
"""Toggle payload used to persist a single EU AI Act checklist item for the active session."""
|
| 154 |
+
model_id: str
|
| 155 |
+
item_id: str
|
| 156 |
+
checked: bool
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
class SamplePatient(BaseModel):
|
| 160 |
+
"""
|
| 161 |
+
A single patient row picked from the trained dataset for use in Step 6 explainability
|
| 162 |
+
or Step 7 ethics demos.
|
| 163 |
+
"""
|
| 164 |
+
index: int
|
| 165 |
+
risk_level: Literal["low", "medium", "high"]
|
| 166 |
+
probability: float
|
| 167 |
+
summary: str
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
class SamplePatientsResponse(BaseModel):
|
| 171 |
+
"""Wraps a small list of `SamplePatient` rows used to seed the Step 6 "single patient" picker."""
|
| 172 |
+
model_id: str
|
| 173 |
+
patients: list[SamplePatient]
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
class CertificateRequest(BaseModel):
|
| 177 |
+
"""
|
| 178 |
+
Request body for `/api/explain/certificate` — the session id plus user-selected
|
| 179 |
+
checklist items to embed in the EU AI Act PDF.
|
| 180 |
+
"""
|
| 181 |
+
model_id: str
|
| 182 |
+
session_id: str
|
| 183 |
+
checklist_state: dict[str, bool] = Field(default_factory=dict)
|
| 184 |
+
clinician_name: str = "Healthcare Professional"
|
| 185 |
+
institution: str = "Healthcare Institution"
|
app/models/ml_schemas.py
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Pydantic schemas for ML training and evaluation endpoints."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
from enum import Enum
|
| 5 |
+
from typing import Any, Literal
|
| 6 |
+
|
| 7 |
+
from pydantic import BaseModel, Field, model_validator
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class ModelType(str, Enum):
|
| 11 |
+
"""Enum of the eight classifiers the backend can train."""
|
| 12 |
+
KNN = "knn"
|
| 13 |
+
SVM = "svm"
|
| 14 |
+
DECISION_TREE = "decision_tree"
|
| 15 |
+
RANDOM_FOREST = "random_forest"
|
| 16 |
+
LOGISTIC_REGRESSION = "logistic_regression"
|
| 17 |
+
NAIVE_BAYES = "naive_bayes"
|
| 18 |
+
XGBOOST = "xgboost"
|
| 19 |
+
LIGHTGBM = "lightgbm"
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class KNNParams(BaseModel):
|
| 23 |
+
"""Hyperparameters for K-Nearest-Neighbours (neighbour count, distance metric)."""
|
| 24 |
+
n_neighbors: int = Field(5, ge=1, le=25)
|
| 25 |
+
metric: Literal["euclidean", "manhattan"] = "euclidean"
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class SVMParams(BaseModel):
|
| 29 |
+
"""Hyperparameters for Support Vector Machine (kernel, C, gamma)."""
|
| 30 |
+
kernel: Literal["linear", "rbf", "poly", "sigmoid"] = "rbf"
|
| 31 |
+
C: float = Field(1.0, ge=0.01, le=100.0)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class DecisionTreeParams(BaseModel):
|
| 35 |
+
"""Hyperparameters for a single Decision Tree (max depth, split criterion)."""
|
| 36 |
+
max_depth: int = Field(5, ge=1, le=20)
|
| 37 |
+
criterion: Literal["gini", "entropy"] = "gini"
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
class RandomForestParams(BaseModel):
|
| 41 |
+
"""Hyperparameters for Random Forest ensemble (n_estimators, max depth)."""
|
| 42 |
+
n_estimators: int = Field(100, ge=10, le=500)
|
| 43 |
+
max_depth: int = Field(5, ge=1, le=20)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
class LogisticRegressionParams(BaseModel):
|
| 47 |
+
"""Hyperparameters for Logistic Regression (regularisation strength, penalty)."""
|
| 48 |
+
C: float = Field(1.0, ge=0.001, le=100.0)
|
| 49 |
+
max_iter: int = Field(200, ge=50, le=2000)
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
class NaiveBayesParams(BaseModel):
|
| 53 |
+
"""Hyperparameters for Gaussian Naive Bayes (variance smoothing)."""
|
| 54 |
+
var_smoothing: float = Field(1e-9, ge=1e-12, le=1e-3)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
class XGBoostParams(BaseModel):
|
| 58 |
+
"""Hyperparameters for XGBoost (n_estimators, max depth, learning rate)."""
|
| 59 |
+
n_estimators: int = Field(100, ge=10, le=500)
|
| 60 |
+
max_depth: int = Field(5, ge=1, le=15)
|
| 61 |
+
learning_rate: float = Field(0.1, ge=0.01, le=0.5)
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
class LightGBMParams(BaseModel):
|
| 65 |
+
"""Hyperparameters for LightGBM (n_estimators, num_leaves, learning rate)."""
|
| 66 |
+
n_estimators: int = Field(100, ge=10, le=500)
|
| 67 |
+
max_depth: int = Field(-1, ge=-1, le=15)
|
| 68 |
+
learning_rate: float = Field(0.1, ge=0.01, le=0.5)
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
PARAM_SCHEMAS: dict[str, type[BaseModel]] = {
|
| 72 |
+
"knn": KNNParams,
|
| 73 |
+
"svm": SVMParams,
|
| 74 |
+
"decision_tree": DecisionTreeParams,
|
| 75 |
+
"random_forest": RandomForestParams,
|
| 76 |
+
"logistic_regression": LogisticRegressionParams,
|
| 77 |
+
"naive_bayes": NaiveBayesParams,
|
| 78 |
+
"xgboost": XGBoostParams,
|
| 79 |
+
"lightgbm": LightGBMParams,
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
class TrainRequest(BaseModel):
|
| 84 |
+
"""Request body for `/api/ml/train` — session id + model type + its hyperparameter bundle."""
|
| 85 |
+
session_id: str
|
| 86 |
+
model_type: ModelType
|
| 87 |
+
params: dict[str, Any] = Field(default_factory=dict)
|
| 88 |
+
tune: bool = False
|
| 89 |
+
use_feature_selection: bool = False
|
| 90 |
+
|
| 91 |
+
@model_validator(mode='after')
|
| 92 |
+
def validate_params(self) -> 'TrainRequest':
|
| 93 |
+
"""Cross-field validator ensuring the `params` object matches the chosen `model_type`."""
|
| 94 |
+
schema = PARAM_SCHEMAS.get(self.model_type.value)
|
| 95 |
+
if schema and self.params:
|
| 96 |
+
try:
|
| 97 |
+
validated = schema(**self.params)
|
| 98 |
+
self.params = validated.model_dump()
|
| 99 |
+
except Exception:
|
| 100 |
+
pass # Allow through with raw params; build_model has its own defaults
|
| 101 |
+
return self
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
class ConfusionMatrixData(BaseModel):
|
| 105 |
+
"""Confusion matrix counts plus labels, ready for the Step-5 chart."""
|
| 106 |
+
tn: int = 0
|
| 107 |
+
fp: int = 0
|
| 108 |
+
fn: int = 0
|
| 109 |
+
tp: int = 0
|
| 110 |
+
matrix: list[list[int]]
|
| 111 |
+
labels: list[str]
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
class ROCPoint(BaseModel):
|
| 115 |
+
"""One threshold sample of the ROC curve (FPR, TPR, threshold)."""
|
| 116 |
+
fpr: float
|
| 117 |
+
tpr: float
|
| 118 |
+
threshold: float
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
class MetricsResponse(BaseModel):
|
| 122 |
+
"""
|
| 123 |
+
Bundle of evaluation metrics returned after a training run (accuracy, precision,
|
| 124 |
+
recall, F1, AUC, confusion matrix, ROC/PR points).
|
| 125 |
+
"""
|
| 126 |
+
accuracy: float
|
| 127 |
+
sensitivity: float
|
| 128 |
+
specificity: float
|
| 129 |
+
precision: float
|
| 130 |
+
f1_score: float
|
| 131 |
+
auc_roc: float
|
| 132 |
+
confusion_matrix: ConfusionMatrixData
|
| 133 |
+
roc_curve: list[ROCPoint]
|
| 134 |
+
pr_curve: list[dict[str, float]]
|
| 135 |
+
train_accuracy: float
|
| 136 |
+
cross_val_scores: list[float]
|
| 137 |
+
low_sensitivity_warning: bool
|
| 138 |
+
mcc: float = 0.0
|
| 139 |
+
overfitting_warning: bool = False
|
| 140 |
+
optimal_threshold: float = 0.5
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
class ScatterPoint(BaseModel):
|
| 144 |
+
"""Single 2-D point used by the KNN scatter visualisation in Step 4."""
|
| 145 |
+
x: float
|
| 146 |
+
y: float
|
| 147 |
+
label: int
|
| 148 |
+
label_name: str
|
| 149 |
+
split: str # "train" or "test"
|
| 150 |
+
predicted: int | None = None # only for test points
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
class DecisionMesh(BaseModel):
|
| 154 |
+
"""Grid of predictions used to shade the KNN decision boundary in Step 4."""
|
| 155 |
+
x_values: list[float] # unique x coordinates of the grid
|
| 156 |
+
y_values: list[float] # unique y coordinates of the grid
|
| 157 |
+
predictions: list[list[int]] # 2D array [y][x] of predicted class indices
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
class KNNScatterData(BaseModel):
|
| 161 |
+
"""Bundle of scatter points + decision mesh shipped to the KNN visualisation."""
|
| 162 |
+
scatter_points: list[ScatterPoint]
|
| 163 |
+
decision_mesh: DecisionMesh
|
| 164 |
+
pca_explained_variance: list[float]
|
| 165 |
+
classes: list[str]
|
| 166 |
+
k: int
|
| 167 |
+
metric: str
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
class TrainResponse(BaseModel):
|
| 171 |
+
"""Complete payload returned by `/api/ml/train` — session id, model id, metrics, ROC/PR, scatter data."""
|
| 172 |
+
model_id: str
|
| 173 |
+
session_id: str
|
| 174 |
+
model_type: ModelType
|
| 175 |
+
params: dict[str, Any]
|
| 176 |
+
metrics: MetricsResponse
|
| 177 |
+
training_time_ms: float
|
| 178 |
+
feature_names: list[str]
|
| 179 |
+
knn_scatter: KNNScatterData | None = None
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
class CompareEntry(BaseModel):
|
| 183 |
+
"""A single model entry in the cross-model comparison list (Step 4 "Add to comparison")."""
|
| 184 |
+
model_id: str
|
| 185 |
+
model_type: ModelType
|
| 186 |
+
params: dict[str, Any]
|
| 187 |
+
metrics: MetricsResponse
|
| 188 |
+
training_time_ms: float
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
class CompareResponse(BaseModel):
|
| 192 |
+
"""Response for `/api/ml/comparison` — the current list of compared models for the session."""
|
| 193 |
+
entries: list[CompareEntry]
|
| 194 |
+
best_model_id: str
|
app/models/schemas.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Pydantic schemas for data exploration and preparation endpoints."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
from typing import Any, Literal
|
| 5 |
+
|
| 6 |
+
from pydantic import BaseModel, Field
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class SpecialtyInfo(BaseModel):
|
| 10 |
+
"""Descriptor for one of the 20 medical specialties — id, name, category, blurb, dataset pointers."""
|
| 11 |
+
id: str
|
| 12 |
+
name: str
|
| 13 |
+
description: str
|
| 14 |
+
target_variable: str
|
| 15 |
+
target_type: Literal["binary", "multiclass"]
|
| 16 |
+
feature_names: list[str]
|
| 17 |
+
clinical_context: str
|
| 18 |
+
data_source: str
|
| 19 |
+
what_ai_predicts: str
|
| 20 |
+
license_type: str = ""
|
| 21 |
+
license_url: str = ""
|
| 22 |
+
requires_attribution: bool = False
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class ColumnStat(BaseModel):
|
| 26 |
+
"""
|
| 27 |
+
Per-column summary computed during exploration (dtype, missing %, min/max/mean for
|
| 28 |
+
numeric, top categories for categorical).
|
| 29 |
+
"""
|
| 30 |
+
name: str
|
| 31 |
+
dtype: str
|
| 32 |
+
missing_count: int
|
| 33 |
+
missing_pct: float
|
| 34 |
+
unique_count: int
|
| 35 |
+
sample_values: list[Any]
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
class DataExplorationResponse(BaseModel):
|
| 39 |
+
"""
|
| 40 |
+
Response for `/api/data/explore` — column stats, row count, warnings, and the detected
|
| 41 |
+
target column.
|
| 42 |
+
"""
|
| 43 |
+
columns: list[ColumnStat]
|
| 44 |
+
row_count: int
|
| 45 |
+
class_distribution: dict[str, int]
|
| 46 |
+
imbalance_warning: bool
|
| 47 |
+
imbalance_ratio: float
|
| 48 |
+
target_col: str
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
class PrepSettings(BaseModel):
|
| 52 |
+
"""
|
| 53 |
+
Step-3 preparation settings (test split, normalisation, missing-value handling, SMOTE
|
| 54 |
+
flag, outlier treatment).
|
| 55 |
+
"""
|
| 56 |
+
test_size: float = Field(0.2, ge=0.1, le=0.4)
|
| 57 |
+
missing_strategy: Literal["median", "mode", "drop"] = "median"
|
| 58 |
+
normalization: Literal["zscore", "minmax", "none"] = "zscore"
|
| 59 |
+
use_smote: bool = False
|
| 60 |
+
outlier_handling: Literal["none", "iqr", "zscore_clip"] = "none"
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
class PrepResponse(BaseModel):
|
| 64 |
+
"""Response for `/api/data/prepare` — session id, train/test shapes, and any applied transformations."""
|
| 65 |
+
session_id: str
|
| 66 |
+
train_size: int
|
| 67 |
+
test_size: int
|
| 68 |
+
features_count: int
|
| 69 |
+
class_distribution_before: dict[str, int]
|
| 70 |
+
class_distribution_after: dict[str, int]
|
| 71 |
+
smote_applied: bool
|
| 72 |
+
normalization_applied: str
|
| 73 |
+
norm_samples: list[dict[str, object]] = Field(default_factory=list) # [{feature, before, after}, ...]
|
app/routers/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""FastAPI routers split by wizard concern (data, ml, explain)."""
|
app/routers/data_router.py
ADDED
|
@@ -0,0 +1,184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Data exploration and preparation REST endpoints."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import io
|
| 5 |
+
import logging
|
| 6 |
+
import uuid
|
| 7 |
+
|
| 8 |
+
import pandas as pd
|
| 9 |
+
from fastapi import APIRouter, File, Form, HTTPException, Request, UploadFile, status
|
| 10 |
+
from fastapi.responses import JSONResponse
|
| 11 |
+
|
| 12 |
+
from app.models.schemas import (
|
| 13 |
+
DataExplorationResponse,
|
| 14 |
+
PrepResponse,
|
| 15 |
+
PrepSettings,
|
| 16 |
+
SpecialtyInfo,
|
| 17 |
+
)
|
| 18 |
+
from app.services.data_service import DatasetUnavailableError
|
| 19 |
+
from app.services.specialty_registry import get_specialty, list_specialties
|
| 20 |
+
|
| 21 |
+
logger = logging.getLogger(__name__)
|
| 22 |
+
router = APIRouter(prefix="/api", tags=["data"])
|
| 23 |
+
|
| 24 |
+
_MAX_UPLOAD_BYTES = 50 * 1024 * 1024 # 50 MB
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def _get_data_service(request: Request):
|
| 28 |
+
"""FastAPI dependency — resolves the shared `DataService` off `app.state`."""
|
| 29 |
+
return request.app.state.data_service
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def _get_ml_service(request: Request):
|
| 33 |
+
"""FastAPI dependency — resolves the shared `MLService` off `app.state`."""
|
| 34 |
+
return request.app.state.ml_service
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def _load_df(file: UploadFile | None, specialty_id: str, data_service) -> pd.DataFrame:
|
| 38 |
+
"""
|
| 39 |
+
Helper that loads a pandas DataFrame either from an uploaded CSV or from the
|
| 40 |
+
specialty's bundled dataset.
|
| 41 |
+
"""
|
| 42 |
+
if file is not None and file.filename:
|
| 43 |
+
# Bug #6: Validate file extension
|
| 44 |
+
if not file.filename.lower().endswith(".csv"):
|
| 45 |
+
raise HTTPException(
|
| 46 |
+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
| 47 |
+
detail=f"Only .csv files are accepted (got: {file.filename})",
|
| 48 |
+
)
|
| 49 |
+
content = file.file.read()
|
| 50 |
+
# Enforce 50 MB limit
|
| 51 |
+
if len(content) > _MAX_UPLOAD_BYTES:
|
| 52 |
+
raise HTTPException(
|
| 53 |
+
status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
|
| 54 |
+
detail=f"File exceeds 50 MB limit (uploaded: {len(content) // (1024 * 1024)} MB)",
|
| 55 |
+
)
|
| 56 |
+
try:
|
| 57 |
+
df = pd.read_csv(io.BytesIO(content))
|
| 58 |
+
except Exception as exc:
|
| 59 |
+
raise HTTPException(
|
| 60 |
+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
| 61 |
+
detail=f"Could not parse CSV file: {exc}",
|
| 62 |
+
)
|
| 63 |
+
# Bug #7: Minimum dataset size validation
|
| 64 |
+
if len(df) < 10:
|
| 65 |
+
raise HTTPException(
|
| 66 |
+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
| 67 |
+
detail=f"Dataset must have at least 10 rows (got {len(df)})",
|
| 68 |
+
)
|
| 69 |
+
if len(df.columns) < 2:
|
| 70 |
+
raise HTTPException(
|
| 71 |
+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
| 72 |
+
detail=f"Dataset must have at least 2 columns (got {len(df.columns)})",
|
| 73 |
+
)
|
| 74 |
+
return df
|
| 75 |
+
try:
|
| 76 |
+
return data_service.get_example_dataset(specialty_id)
|
| 77 |
+
except DatasetUnavailableError as exc:
|
| 78 |
+
raise HTTPException(
|
| 79 |
+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
| 80 |
+
detail=str(exc),
|
| 81 |
+
) from exc
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
# ------------------------------------------------------------------
|
| 85 |
+
# Specialties
|
| 86 |
+
# ------------------------------------------------------------------
|
| 87 |
+
|
| 88 |
+
@router.get("/specialties", response_model=list[SpecialtyInfo])
|
| 89 |
+
def get_specialties() -> list[SpecialtyInfo]:
|
| 90 |
+
"""List endpoint — returns the 20-entry specialty registry used by the Step 1 picker."""
|
| 91 |
+
return list_specialties()
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
@router.get("/specialties/{specialty_id}", response_model=SpecialtyInfo)
|
| 95 |
+
def get_specialty_by_id(specialty_id: str) -> SpecialtyInfo:
|
| 96 |
+
"""Retrieve a single specialty by id; 404 if unknown."""
|
| 97 |
+
spec = get_specialty(specialty_id)
|
| 98 |
+
if spec is None:
|
| 99 |
+
raise HTTPException(status_code=404, detail=f"Specialty '{specialty_id}' not found")
|
| 100 |
+
return spec
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
# ------------------------------------------------------------------
|
| 104 |
+
# Exploration
|
| 105 |
+
# ------------------------------------------------------------------
|
| 106 |
+
|
| 107 |
+
@router.post("/explore", response_model=DataExplorationResponse)
|
| 108 |
+
def explore_data(
|
| 109 |
+
request: Request,
|
| 110 |
+
specialty_id: str = Form(...),
|
| 111 |
+
target_col: str = Form(...),
|
| 112 |
+
file: UploadFile | None = File(None),
|
| 113 |
+
) -> DataExplorationResponse:
|
| 114 |
+
"""Step-2 exploration endpoint — returns per-column stats for the active dataset."""
|
| 115 |
+
ds = _get_data_service(request)
|
| 116 |
+
df = _load_df(file, specialty_id, ds)
|
| 117 |
+
|
| 118 |
+
if target_col not in df.columns:
|
| 119 |
+
# Try to find target from specialty registry
|
| 120 |
+
spec = get_specialty(specialty_id)
|
| 121 |
+
if spec and spec.target_variable in df.columns:
|
| 122 |
+
target_col = spec.target_variable
|
| 123 |
+
else:
|
| 124 |
+
raise HTTPException(
|
| 125 |
+
status_code=422,
|
| 126 |
+
detail=f"Target column '{target_col}' not found. Available: {list(df.columns)}",
|
| 127 |
+
)
|
| 128 |
+
|
| 129 |
+
return ds.explore_dataframe(df, target_col)
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
# ------------------------------------------------------------------
|
| 133 |
+
# Preparation
|
| 134 |
+
# ------------------------------------------------------------------
|
| 135 |
+
|
| 136 |
+
@router.post("/prepare", response_model=PrepResponse)
|
| 137 |
+
def prepare_data(
|
| 138 |
+
request: Request,
|
| 139 |
+
specialty_id: str = Form(...),
|
| 140 |
+
target_col: str = Form(...),
|
| 141 |
+
test_size: float = Form(0.2),
|
| 142 |
+
missing_strategy: str = Form("median"),
|
| 143 |
+
normalization: str = Form("zscore"),
|
| 144 |
+
use_smote: bool = Form(False),
|
| 145 |
+
outlier_handling: str = Form("none"),
|
| 146 |
+
session_id: str = Form(None),
|
| 147 |
+
file: UploadFile | None = File(None),
|
| 148 |
+
) -> PrepResponse:
|
| 149 |
+
"""Step-3 preparation endpoint — splits, normalises, imputes missing values, optionally applies SMOTE."""
|
| 150 |
+
ds = _get_data_service(request)
|
| 151 |
+
ml_service = _get_ml_service(request)
|
| 152 |
+
df = _load_df(file, specialty_id, ds)
|
| 153 |
+
|
| 154 |
+
if target_col not in df.columns:
|
| 155 |
+
spec = get_specialty(specialty_id)
|
| 156 |
+
if spec and spec.target_variable in df.columns:
|
| 157 |
+
target_col = spec.target_variable
|
| 158 |
+
else:
|
| 159 |
+
raise HTTPException(status_code=422, detail=f"Target column '{target_col}' not found")
|
| 160 |
+
|
| 161 |
+
new_session_id = session_id or str(uuid.uuid4())
|
| 162 |
+
|
| 163 |
+
try:
|
| 164 |
+
settings = PrepSettings(
|
| 165 |
+
test_size=test_size,
|
| 166 |
+
missing_strategy=missing_strategy, # type: ignore[arg-type]
|
| 167 |
+
normalization=normalization, # type: ignore[arg-type]
|
| 168 |
+
use_smote=use_smote,
|
| 169 |
+
outlier_handling=outlier_handling, # type: ignore[arg-type]
|
| 170 |
+
)
|
| 171 |
+
X_train, X_test, y_train, y_test, response, feature_names = ds.prepare_data(
|
| 172 |
+
df, target_col, settings, new_session_id
|
| 173 |
+
)
|
| 174 |
+
except Exception as exc:
|
| 175 |
+
logger.exception("Data preparation failed")
|
| 176 |
+
raise HTTPException(status_code=422, detail=str(exc))
|
| 177 |
+
|
| 178 |
+
# Share prepared data with ML service, including specialty_id for certificate generation
|
| 179 |
+
session_data = ds.get_session(new_session_id)
|
| 180 |
+
if session_data:
|
| 181 |
+
session_data["specialty_id"] = specialty_id # Fix: store for certificate generation
|
| 182 |
+
ml_service.store_session_data(new_session_id, session_data)
|
| 183 |
+
|
| 184 |
+
return response
|
app/routers/explain_router.py
ADDED
|
@@ -0,0 +1,454 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Explainability, ethics, and certificate REST endpoints."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import logging
|
| 5 |
+
|
| 6 |
+
from fastapi import APIRouter, HTTPException, Request
|
| 7 |
+
from fastapi.responses import StreamingResponse
|
| 8 |
+
|
| 9 |
+
from app.models.explain_schemas import (
|
| 10 |
+
CertificateRequest,
|
| 11 |
+
ChecklistUpdate,
|
| 12 |
+
EthicsResponse,
|
| 13 |
+
GlobalExplainabilityResponse,
|
| 14 |
+
SamplePatientsResponse,
|
| 15 |
+
SinglePatientExplainResponse,
|
| 16 |
+
WhatIfRequest,
|
| 17 |
+
WhatIfResponse,
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
logger = logging.getLogger(__name__)
|
| 21 |
+
router = APIRouter(prefix="/api", tags=["explain"])
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def _get_services(request: Request):
|
| 25 |
+
"""FastAPI dependency — resolves data/ml/explain/ethics/insight/certificate services as a tuple."""
|
| 26 |
+
return (
|
| 27 |
+
request.app.state.ml_service,
|
| 28 |
+
request.app.state.explain_service,
|
| 29 |
+
request.app.state.ethics_service,
|
| 30 |
+
request.app.state.certificate_service,
|
| 31 |
+
request.app.state.insight_service,
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def _get_model_data(ml_service, model_id: str) -> dict:
|
| 36 |
+
"""Helper that pulls the trained model + split data for a session, raising 404 if absent."""
|
| 37 |
+
data = ml_service.get_model(model_id)
|
| 38 |
+
if data is None:
|
| 39 |
+
raise HTTPException(status_code=404, detail=f"Model '{model_id}' not found. Train a model first.")
|
| 40 |
+
return data
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
@router.get("/explain/global/{model_id}", response_model=GlobalExplainabilityResponse)
|
| 44 |
+
def global_importance(request: Request, model_id: str) -> GlobalExplainabilityResponse:
|
| 45 |
+
"""Step-6 endpoint — computes global SHAP feature importance for the active model."""
|
| 46 |
+
ml, explain, *_ = _get_services(request)
|
| 47 |
+
data = _get_model_data(ml, model_id)
|
| 48 |
+
try:
|
| 49 |
+
return explain.global_importance(
|
| 50 |
+
model_id=model_id,
|
| 51 |
+
model=data["model"],
|
| 52 |
+
X_test=data["X_test"],
|
| 53 |
+
y_test=data["y_test"],
|
| 54 |
+
feature_names=data["feature_names"],
|
| 55 |
+
X_train=data["X_train"],
|
| 56 |
+
model_type=str(data["model_type"]),
|
| 57 |
+
classes=data["classes"],
|
| 58 |
+
)
|
| 59 |
+
except Exception as exc:
|
| 60 |
+
logger.exception("Global explainability failed")
|
| 61 |
+
raise HTTPException(status_code=500, detail=str(exc))
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
@router.get("/explain/patient/{model_id}/{patient_index}", response_model=SinglePatientExplainResponse)
|
| 65 |
+
def single_patient_explain(
|
| 66 |
+
request: Request, model_id: str, patient_index: int
|
| 67 |
+
) -> SinglePatientExplainResponse:
|
| 68 |
+
"""Step-6 endpoint — returns a per-patient SHAP waterfall plus base/final probability."""
|
| 69 |
+
ml, explain, *_ = _get_services(request)
|
| 70 |
+
data = _get_model_data(ml, model_id)
|
| 71 |
+
n_test = len(data["X_test"])
|
| 72 |
+
if patient_index < 0 or patient_index >= n_test:
|
| 73 |
+
raise HTTPException(status_code=422, detail=f"Patient index {patient_index} out of range [0, {n_test-1}]")
|
| 74 |
+
try:
|
| 75 |
+
return explain.single_patient(
|
| 76 |
+
model_id=model_id,
|
| 77 |
+
model=data["model"],
|
| 78 |
+
patient_idx=patient_index,
|
| 79 |
+
X_test=data["X_test"],
|
| 80 |
+
feature_names=data["feature_names"],
|
| 81 |
+
X_train=data["X_train"],
|
| 82 |
+
model_type=str(data["model_type"]),
|
| 83 |
+
classes=data["classes"],
|
| 84 |
+
y_test=data["y_test"],
|
| 85 |
+
scaler=data.get("scaler"),
|
| 86 |
+
)
|
| 87 |
+
except Exception as exc:
|
| 88 |
+
logger.exception("Single-patient explanation failed")
|
| 89 |
+
raise HTTPException(status_code=500, detail=str(exc))
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
@router.post("/explain/what-if", response_model=WhatIfResponse)
|
| 93 |
+
def what_if(request: Request, body: WhatIfRequest) -> WhatIfResponse:
|
| 94 |
+
"""Step-6 endpoint — probes probability changes when specific feature values are altered."""
|
| 95 |
+
ml, explain, *_ = _get_services(request)
|
| 96 |
+
data = _get_model_data(ml, body.model_id)
|
| 97 |
+
|
| 98 |
+
n_test = len(data["X_test"])
|
| 99 |
+
if body.patient_index < 0 or body.patient_index >= n_test:
|
| 100 |
+
raise HTTPException(
|
| 101 |
+
status_code=400,
|
| 102 |
+
detail=f"Patient index {body.patient_index} out of range [0, {n_test - 1}]",
|
| 103 |
+
)
|
| 104 |
+
if body.feature_name not in data["feature_names"]:
|
| 105 |
+
raise HTTPException(
|
| 106 |
+
status_code=400,
|
| 107 |
+
detail=f"Feature '{body.feature_name}' not found. Available: {data['feature_names']}",
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
try:
|
| 111 |
+
return explain.what_if(
|
| 112 |
+
model_id=body.model_id,
|
| 113 |
+
model=data["model"],
|
| 114 |
+
patient_index=body.patient_index,
|
| 115 |
+
feature_name=body.feature_name,
|
| 116 |
+
new_value=body.new_value,
|
| 117 |
+
X_test=data["X_test"],
|
| 118 |
+
feature_names=data["feature_names"],
|
| 119 |
+
scaler=data.get("scaler"),
|
| 120 |
+
)
|
| 121 |
+
except Exception as exc:
|
| 122 |
+
logger.exception("What-if analysis failed")
|
| 123 |
+
raise HTTPException(status_code=500, detail=str(exc))
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
@router.get("/explain/sample-patients/{model_id}", response_model=SamplePatientsResponse)
|
| 127 |
+
def sample_patients(request: Request, model_id: str) -> SamplePatientsResponse:
|
| 128 |
+
"""Step-6 helper — returns a handful of sample rows from the test split for quick picking."""
|
| 129 |
+
ml, explain, *_ = _get_services(request)
|
| 130 |
+
data = _get_model_data(ml, model_id)
|
| 131 |
+
try:
|
| 132 |
+
return explain.sample_patients(
|
| 133 |
+
model_id=model_id,
|
| 134 |
+
model=data["model"],
|
| 135 |
+
X_test=data["X_test"],
|
| 136 |
+
)
|
| 137 |
+
except Exception as exc:
|
| 138 |
+
logger.exception("Sample patients retrieval failed")
|
| 139 |
+
raise HTTPException(status_code=500, detail=str(exc))
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
@router.get("/ethics/{model_id}", response_model=EthicsResponse)
|
| 143 |
+
def get_ethics(request: Request, model_id: str) -> EthicsResponse:
|
| 144 |
+
"""Step-7 endpoint — runs the bias audit and produces fairness deltas + warnings."""
|
| 145 |
+
ml, _, ethics, _, _ = _get_services(request)
|
| 146 |
+
data = _get_model_data(ml, model_id)
|
| 147 |
+
try:
|
| 148 |
+
return ethics.analyze_bias(
|
| 149 |
+
model_id=model_id,
|
| 150 |
+
model=data["model"],
|
| 151 |
+
X_test=data["X_test"],
|
| 152 |
+
y_test=data["y_test"],
|
| 153 |
+
feature_names=data["feature_names"],
|
| 154 |
+
classes=data["classes"],
|
| 155 |
+
X_train=data["X_train"],
|
| 156 |
+
scaler=data.get("scaler"),
|
| 157 |
+
)
|
| 158 |
+
except Exception as exc:
|
| 159 |
+
logger.exception("Ethics analysis failed")
|
| 160 |
+
raise HTTPException(status_code=500, detail=str(exc))
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
@router.post("/ethics/checklist")
|
| 164 |
+
def update_checklist(request: Request, body: ChecklistUpdate) -> dict:
|
| 165 |
+
"""Step-7 endpoint — toggles a single EU AI Act checklist item for the session."""
|
| 166 |
+
_, _, ethics, _, _ = _get_services(request)
|
| 167 |
+
return ethics.update_checklist(body.model_id, body.item_id, body.checked)
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
@router.get("/insights/{model_id}")
|
| 171 |
+
async def get_insights(request: Request, model_id: str) -> dict:
|
| 172 |
+
"""Generate LLM-powered clinical insights for a trained model."""
|
| 173 |
+
import asyncio
|
| 174 |
+
import numpy as np
|
| 175 |
+
|
| 176 |
+
ml, explain, ethics, _, insight_svc = _get_services(request)
|
| 177 |
+
data = _get_model_data(ml, model_id)
|
| 178 |
+
|
| 179 |
+
metrics = data.get("metrics")
|
| 180 |
+
if metrics is None:
|
| 181 |
+
raise HTTPException(status_code=422, detail="Model metrics not available.")
|
| 182 |
+
|
| 183 |
+
# --- Gather all data sources ---
|
| 184 |
+
ethics_data = ethics.analyze_bias(
|
| 185 |
+
model_id=model_id,
|
| 186 |
+
model=data["model"],
|
| 187 |
+
X_test=data["X_test"],
|
| 188 |
+
y_test=data["y_test"],
|
| 189 |
+
feature_names=data["feature_names"],
|
| 190 |
+
classes=data["classes"],
|
| 191 |
+
X_train=data["X_train"],
|
| 192 |
+
scaler=data.get("scaler"),
|
| 193 |
+
)
|
| 194 |
+
|
| 195 |
+
# SHAP / Feature importance (non-blocking, best-effort)
|
| 196 |
+
shap_data = None
|
| 197 |
+
try:
|
| 198 |
+
shap_data = explain.global_importance(
|
| 199 |
+
model_id=model_id,
|
| 200 |
+
model=data["model"],
|
| 201 |
+
X_test=data["X_test"],
|
| 202 |
+
y_test=data["y_test"],
|
| 203 |
+
feature_names=data["feature_names"],
|
| 204 |
+
X_train=data["X_train"],
|
| 205 |
+
model_type=str(data["model_type"]),
|
| 206 |
+
classes=data["classes"],
|
| 207 |
+
)
|
| 208 |
+
except Exception as exc:
|
| 209 |
+
logger.warning("SHAP for insights failed: %s", exc)
|
| 210 |
+
|
| 211 |
+
# Specialty metadata
|
| 212 |
+
session_id = data.get("session_id", "")
|
| 213 |
+
ml_session = ml.get_session(session_id)
|
| 214 |
+
specialty_info = None
|
| 215 |
+
if ml_session:
|
| 216 |
+
from app.services.specialty_registry import SPECIALTIES
|
| 217 |
+
specialty_info = SPECIALTIES.get(ml_session.get("specialty_id", ""))
|
| 218 |
+
|
| 219 |
+
def _m(attr: str):
|
| 220 |
+
"""Inner helper used by `get_insights` to memoise the LLM call per task."""
|
| 221 |
+
return getattr(metrics, attr, None) if hasattr(metrics, attr) else metrics.get(attr)
|
| 222 |
+
|
| 223 |
+
# Confusion matrix
|
| 224 |
+
cm_summary = {}
|
| 225 |
+
cm_data = _m("confusion_matrix")
|
| 226 |
+
if cm_data and hasattr(cm_data, "matrix"):
|
| 227 |
+
matrix = cm_data.matrix
|
| 228 |
+
if len(matrix) == 2:
|
| 229 |
+
cm_summary = {"TN": matrix[0][0], "FP": matrix[0][1], "FN": matrix[1][0], "TP": matrix[1][1]}
|
| 230 |
+
else:
|
| 231 |
+
cm_summary = {"matrix_size": f"{len(matrix)}x{len(matrix)}", "classes": data["classes"]}
|
| 232 |
+
|
| 233 |
+
# Class distribution
|
| 234 |
+
class_dist = {}
|
| 235 |
+
if ml_session:
|
| 236 |
+
y_train = ml_session.get("y_train")
|
| 237 |
+
if y_train is not None:
|
| 238 |
+
unique, counts = np.unique(y_train, return_counts=True)
|
| 239 |
+
classes_list = data["classes"]
|
| 240 |
+
class_dist = {
|
| 241 |
+
classes_list[int(u)] if int(u) < len(classes_list) else str(u): int(c)
|
| 242 |
+
for u, c in zip(unique, counts)
|
| 243 |
+
}
|
| 244 |
+
|
| 245 |
+
# Feature importance from SHAP
|
| 246 |
+
feature_importance_data = []
|
| 247 |
+
if shap_data:
|
| 248 |
+
for fi in shap_data.feature_importances[:10]: # top 10
|
| 249 |
+
feature_importance_data.append({
|
| 250 |
+
"feature": fi.feature_name,
|
| 251 |
+
"clinical_name": fi.clinical_name,
|
| 252 |
+
"importance": round(fi.importance, 4),
|
| 253 |
+
"direction": fi.direction,
|
| 254 |
+
"clinical_note": fi.clinical_note,
|
| 255 |
+
})
|
| 256 |
+
|
| 257 |
+
cv_scores = _m("cross_val_scores") or []
|
| 258 |
+
|
| 259 |
+
context = {
|
| 260 |
+
# Specialty & clinical domain
|
| 261 |
+
"specialty_name": specialty_info.name if specialty_info else "Unknown",
|
| 262 |
+
"what_ai_predicts": specialty_info.what_ai_predicts if specialty_info else "clinical outcome",
|
| 263 |
+
"clinical_context": specialty_info.clinical_context if specialty_info else "",
|
| 264 |
+
"target_variable": specialty_info.target_variable if specialty_info else "target",
|
| 265 |
+
"data_source": specialty_info.data_source if specialty_info else "unknown",
|
| 266 |
+
# Model info
|
| 267 |
+
"model_type": data["model_type"].value.replace("_", " ").title() if hasattr(data.get("model_type"), "value") else str(data.get("model_type", "unknown")),
|
| 268 |
+
"model_params": data.get("params", {}),
|
| 269 |
+
"training_time_ms": data.get("training_time_ms"),
|
| 270 |
+
# Dataset info
|
| 271 |
+
"feature_names": data["feature_names"],
|
| 272 |
+
"classes": data["classes"],
|
| 273 |
+
"train_size": len(data["X_train"]),
|
| 274 |
+
"test_size": len(data["X_test"]),
|
| 275 |
+
"class_distribution_train": class_dist,
|
| 276 |
+
"use_smote": ml_session.get("smote_applied", False) if ml_session else False,
|
| 277 |
+
"normalization": ml_session.get("normalization", "N/A") if ml_session else "N/A",
|
| 278 |
+
"raw_column_meta": ml_session.get("raw_column_meta", []) if ml_session else [],
|
| 279 |
+
"row_count_original": ml_session.get("row_count", 0) if ml_session else 0,
|
| 280 |
+
# Performance metrics
|
| 281 |
+
"accuracy": _m("accuracy"),
|
| 282 |
+
"sensitivity": _m("sensitivity"),
|
| 283 |
+
"specificity": _m("specificity"),
|
| 284 |
+
"precision": _m("precision"),
|
| 285 |
+
"f1_score": _m("f1_score"),
|
| 286 |
+
"auc_roc": _m("auc_roc"),
|
| 287 |
+
"mcc": _m("mcc"),
|
| 288 |
+
"train_accuracy": _m("train_accuracy"),
|
| 289 |
+
"cv_scores": cv_scores,
|
| 290 |
+
"cv_mean": float(sum(cv_scores) / max(len(cv_scores), 1)),
|
| 291 |
+
"cv_std": float(np.std(cv_scores)) if cv_scores else 0.0,
|
| 292 |
+
"overfitting_warning": _m("overfitting_warning"),
|
| 293 |
+
"optimal_threshold": _m("optimal_threshold"),
|
| 294 |
+
"low_sensitivity_warning": _m("low_sensitivity_warning"),
|
| 295 |
+
"confusion_matrix": cm_summary,
|
| 296 |
+
# Explainability / SHAP
|
| 297 |
+
"shap_method": shap_data.method if shap_data else "unavailable",
|
| 298 |
+
"feature_importances": feature_importance_data,
|
| 299 |
+
"top_feature_clinical_note": shap_data.top_feature_clinical_note if shap_data else "",
|
| 300 |
+
"explained_variance_top5_pct": shap_data.explained_variance_pct if shap_data else 0,
|
| 301 |
+
# Fairness data
|
| 302 |
+
"overall_sensitivity": ethics_data.overall_sensitivity,
|
| 303 |
+
"bias_warnings": [
|
| 304 |
+
{"group": w.affected_group, "metric": w.metric, "gap": w.gap}
|
| 305 |
+
for w in ethics_data.bias_warnings
|
| 306 |
+
],
|
| 307 |
+
"subgroup_details": [
|
| 308 |
+
{
|
| 309 |
+
"group": sm.group_label,
|
| 310 |
+
"sensitivity": sm.sensitivity,
|
| 311 |
+
"accuracy": sm.accuracy,
|
| 312 |
+
"specificity": sm.specificity,
|
| 313 |
+
"precision": sm.precision,
|
| 314 |
+
"f1_score": sm.f1_score,
|
| 315 |
+
"sample_size": sm.sample_size,
|
| 316 |
+
"status": sm.status,
|
| 317 |
+
"status_reason": sm.status_reason,
|
| 318 |
+
}
|
| 319 |
+
for sm in ethics_data.subgroup_metrics
|
| 320 |
+
],
|
| 321 |
+
}
|
| 322 |
+
|
| 323 |
+
# Compared models (if user trained multiple models)
|
| 324 |
+
compared_models = []
|
| 325 |
+
if session_id:
|
| 326 |
+
try:
|
| 327 |
+
compare_data = ml.get_comparison(session_id)
|
| 328 |
+
for entry in compare_data.entries:
|
| 329 |
+
compared_models.append({
|
| 330 |
+
"model_type": entry.model_type.value.replace("_", " ").title(),
|
| 331 |
+
"model_id": entry.model_id,
|
| 332 |
+
"accuracy": entry.metrics.accuracy,
|
| 333 |
+
"sensitivity": entry.metrics.sensitivity,
|
| 334 |
+
"specificity": entry.metrics.specificity,
|
| 335 |
+
"auc_roc": entry.metrics.auc_roc,
|
| 336 |
+
"f1_score": entry.metrics.f1_score,
|
| 337 |
+
"mcc": entry.metrics.mcc,
|
| 338 |
+
"training_time_ms": entry.training_time_ms,
|
| 339 |
+
})
|
| 340 |
+
except Exception as exc:
|
| 341 |
+
logger.warning("Comparison data unavailable: %s", exc)
|
| 342 |
+
logger.info("Insights context: %d compared models", len(compared_models))
|
| 343 |
+
context["compared_models"] = compared_models
|
| 344 |
+
|
| 345 |
+
# Feature column statistics (distributions for clinical grounding)
|
| 346 |
+
column_stats = []
|
| 347 |
+
X_train = data["X_train"]
|
| 348 |
+
for i, fname in enumerate(data["feature_names"]):
|
| 349 |
+
col_info: dict[str, Any] = {"name": fname}
|
| 350 |
+
try:
|
| 351 |
+
col = X_train[:, i] if hasattr(X_train, "shape") else X_train.iloc[:, i]
|
| 352 |
+
col_info["mean"] = round(float(np.mean(col)), 3)
|
| 353 |
+
col_info["std"] = round(float(np.std(col)), 3)
|
| 354 |
+
col_info["min"] = round(float(np.min(col)), 3)
|
| 355 |
+
col_info["max"] = round(float(np.max(col)), 3)
|
| 356 |
+
except Exception:
|
| 357 |
+
pass
|
| 358 |
+
column_stats.append(col_info)
|
| 359 |
+
context["column_statistics"] = column_stats
|
| 360 |
+
|
| 361 |
+
# Sample rows from test set (real patient data for LLM grounding)
|
| 362 |
+
feature_names = data["feature_names"]
|
| 363 |
+
classes = data["classes"]
|
| 364 |
+
X_test = data["X_test"]
|
| 365 |
+
y_test = data["y_test"]
|
| 366 |
+
sample_rows = []
|
| 367 |
+
n_samples = min(5, len(X_test))
|
| 368 |
+
# Pick diverse samples: some positive, some negative
|
| 369 |
+
try:
|
| 370 |
+
pos_idx = [i for i in range(len(y_test)) if int(y_test[i]) == 1]
|
| 371 |
+
neg_idx = [i for i in range(len(y_test)) if int(y_test[i]) == 0]
|
| 372 |
+
pick = (pos_idx[:3] + neg_idx[:2])[:n_samples] if pos_idx and neg_idx else list(range(n_samples))
|
| 373 |
+
for idx in pick:
|
| 374 |
+
row = {}
|
| 375 |
+
for j, fname in enumerate(feature_names):
|
| 376 |
+
val = X_test[idx, j] if hasattr(X_test, "shape") else X_test.iloc[idx, j]
|
| 377 |
+
row[fname] = round(float(val), 3)
|
| 378 |
+
row["_actual_outcome"] = classes[int(y_test[idx])] if int(y_test[idx]) < len(classes) else str(y_test[idx])
|
| 379 |
+
sample_rows.append(row)
|
| 380 |
+
except Exception:
|
| 381 |
+
pass
|
| 382 |
+
context["sample_patients"] = sample_rows
|
| 383 |
+
|
| 384 |
+
# EU AI Act static items for enrichment
|
| 385 |
+
from app.services.ethics_service import EU_AI_ACT_ITEMS
|
| 386 |
+
context["eu_ai_act_items"] = EU_AI_ACT_ITEMS
|
| 387 |
+
|
| 388 |
+
try:
|
| 389 |
+
ethics_task = insight_svc.generate_ethics_insight(context)
|
| 390 |
+
cases_task = insight_svc.generate_case_studies(context)
|
| 391 |
+
eu_act_task = insight_svc.generate_eu_ai_act_insights(context)
|
| 392 |
+
ethics_result, cases_result, eu_act_result = await asyncio.gather(
|
| 393 |
+
ethics_task, cases_task, eu_act_task
|
| 394 |
+
)
|
| 395 |
+
|
| 396 |
+
return {
|
| 397 |
+
"ethics_insight": ethics_result,
|
| 398 |
+
"case_studies": cases_result,
|
| 399 |
+
"eu_ai_act_insights": eu_act_result,
|
| 400 |
+
}
|
| 401 |
+
except Exception as exc:
|
| 402 |
+
logger.exception("Insight generation failed")
|
| 403 |
+
raise HTTPException(status_code=500, detail=str(exc))
|
| 404 |
+
|
| 405 |
+
|
| 406 |
+
@router.post("/generate-certificate")
|
| 407 |
+
def generate_certificate(request: Request, body: CertificateRequest) -> StreamingResponse:
|
| 408 |
+
"""Step-7 endpoint — renders the EU AI Act compliance PDF via `CertificateService`."""
|
| 409 |
+
ml, _, ethics, cert_svc, _ = _get_services(request)
|
| 410 |
+
data = _get_model_data(ml, body.model_id)
|
| 411 |
+
|
| 412 |
+
# Rebuild metrics from stored model
|
| 413 |
+
metrics = data.get("metrics")
|
| 414 |
+
if metrics is None:
|
| 415 |
+
raise HTTPException(status_code=422, detail="Model metrics not available. Train the model first.")
|
| 416 |
+
|
| 417 |
+
ethics_data = ethics.analyze_bias(
|
| 418 |
+
model_id=body.model_id,
|
| 419 |
+
model=data["model"],
|
| 420 |
+
X_test=data["X_test"],
|
| 421 |
+
y_test=data["y_test"],
|
| 422 |
+
feature_names=data["feature_names"],
|
| 423 |
+
classes=data["classes"],
|
| 424 |
+
X_train=data["X_train"],
|
| 425 |
+
scaler=data.get("scaler"),
|
| 426 |
+
)
|
| 427 |
+
|
| 428 |
+
session_id = data.get("session_id", "")
|
| 429 |
+
specialty_name = "Healthcare ML"
|
| 430 |
+
ml_session = ml.get_session(session_id)
|
| 431 |
+
if ml_session:
|
| 432 |
+
from app.services.specialty_registry import SPECIALTIES
|
| 433 |
+
sid = ml_session.get("specialty_id", "")
|
| 434 |
+
spec = SPECIALTIES.get(sid)
|
| 435 |
+
if spec:
|
| 436 |
+
specialty_name = spec.name
|
| 437 |
+
|
| 438 |
+
try:
|
| 439 |
+
pdf_bytes = cert_svc.generate_pdf(
|
| 440 |
+
cert_request=body,
|
| 441 |
+
metrics=metrics,
|
| 442 |
+
ethics=ethics_data,
|
| 443 |
+
specialty_name=specialty_name,
|
| 444 |
+
model_type=data["model_type"],
|
| 445 |
+
)
|
| 446 |
+
except Exception as exc:
|
| 447 |
+
logger.exception("Certificate generation failed")
|
| 448 |
+
raise HTTPException(status_code=500, detail=str(exc))
|
| 449 |
+
|
| 450 |
+
return StreamingResponse(
|
| 451 |
+
iter([pdf_bytes]),
|
| 452 |
+
media_type="application/pdf",
|
| 453 |
+
headers={"Content-Disposition": f'attachment; filename="ml_certificate_{body.model_id[:8]}.pdf"'},
|
| 454 |
+
)
|
app/routers/ml_router.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""ML model training and evaluation REST endpoints."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import logging
|
| 5 |
+
|
| 6 |
+
from fastapi import APIRouter, HTTPException, Request, status
|
| 7 |
+
from fastapi.responses import Response
|
| 8 |
+
|
| 9 |
+
from app.models.ml_schemas import (
|
| 10 |
+
CompareResponse,
|
| 11 |
+
ModelType,
|
| 12 |
+
TrainRequest,
|
| 13 |
+
TrainResponse,
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
logger = logging.getLogger(__name__)
|
| 17 |
+
router = APIRouter(prefix="/api", tags=["ml"])
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def _get_ml_service(request: Request):
|
| 21 |
+
"""FastAPI dependency — resolves the shared `MLService` off `app.state`."""
|
| 22 |
+
return request.app.state.ml_service
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
@router.post("/train", response_model=TrainResponse)
|
| 26 |
+
def train_model(request: Request, body: TrainRequest) -> TrainResponse:
|
| 27 |
+
"""Step-4 endpoint — trains the chosen classifier on the prepared session data and returns metrics."""
|
| 28 |
+
ml = _get_ml_service(request)
|
| 29 |
+
session = ml.get_session(body.session_id)
|
| 30 |
+
if session is None:
|
| 31 |
+
raise HTTPException(
|
| 32 |
+
status_code=status.HTTP_404_NOT_FOUND,
|
| 33 |
+
detail=f"Session '{body.session_id}' not found. Run /api/prepare first.",
|
| 34 |
+
)
|
| 35 |
+
try:
|
| 36 |
+
response = ml.train_and_evaluate(
|
| 37 |
+
body.session_id, body.model_type, body.params,
|
| 38 |
+
tune=body.tune,
|
| 39 |
+
use_feature_selection=body.use_feature_selection,
|
| 40 |
+
)
|
| 41 |
+
except Exception as exc:
|
| 42 |
+
logger.exception("Model training failed")
|
| 43 |
+
raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=str(exc))
|
| 44 |
+
|
| 45 |
+
# Cache metrics for comparison
|
| 46 |
+
ml.store_train_response_in_model(response.model_id, response)
|
| 47 |
+
return response
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
@router.post("/compare/{model_id}", response_model=CompareResponse)
|
| 51 |
+
def add_to_comparison(request: Request, model_id: str) -> CompareResponse:
|
| 52 |
+
"""Step-4 endpoint — adds the latest trained model to the cross-model comparison list."""
|
| 53 |
+
ml = _get_ml_service(request)
|
| 54 |
+
model_data = ml.get_model(model_id)
|
| 55 |
+
if model_data is None:
|
| 56 |
+
raise HTTPException(status_code=404, detail=f"Model '{model_id}' not found")
|
| 57 |
+
session_id = model_data.get("session_id", "")
|
| 58 |
+
try:
|
| 59 |
+
return ml.add_to_comparison(session_id, model_id)
|
| 60 |
+
except Exception as exc:
|
| 61 |
+
raise HTTPException(status_code=422, detail=str(exc))
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
@router.get("/compare/{session_id}", response_model=CompareResponse)
|
| 65 |
+
def get_comparison(request: Request, session_id: str) -> CompareResponse:
|
| 66 |
+
"""Step-4 endpoint — returns the current comparison list for the session."""
|
| 67 |
+
ml = _get_ml_service(request)
|
| 68 |
+
return ml.get_comparison(session_id)
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
@router.delete("/compare/{session_id}", status_code=204, response_model=None)
|
| 72 |
+
def clear_comparison(request: Request, session_id: str):
|
| 73 |
+
"""Step-4 endpoint — empties the comparison list for the session."""
|
| 74 |
+
_get_ml_service(request).clear_comparison(session_id)
|
| 75 |
+
return Response(status_code=204)
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
@router.get("/models/{model_id}")
|
| 79 |
+
def get_model_info(request: Request, model_id: str) -> dict:
|
| 80 |
+
"""Step-4 endpoint — returns stored metrics for a specific model id."""
|
| 81 |
+
ml = _get_ml_service(request)
|
| 82 |
+
data = ml.get_model(model_id)
|
| 83 |
+
if data is None:
|
| 84 |
+
raise HTTPException(status_code=404, detail=f"Model '{model_id}' not found")
|
| 85 |
+
return {
|
| 86 |
+
"model_id": model_id,
|
| 87 |
+
"model_type": data.get("model_type"),
|
| 88 |
+
"params": data.get("params"),
|
| 89 |
+
"session_id": data.get("session_id"),
|
| 90 |
+
"feature_names": data.get("feature_names"),
|
| 91 |
+
"classes": data.get("classes"),
|
| 92 |
+
}
|
app/services/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Service layer — one singleton per concern, attached to `app.state` in `main.py`."""
|
app/services/certificate_service.py
ADDED
|
@@ -0,0 +1,690 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""PDF certificate generation using ReportLab."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import datetime
|
| 5 |
+
import math
|
| 6 |
+
from io import BytesIO
|
| 7 |
+
from typing import Optional
|
| 8 |
+
|
| 9 |
+
from reportlab.lib import colors
|
| 10 |
+
from reportlab.lib.pagesizes import A4
|
| 11 |
+
from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
|
| 12 |
+
from reportlab.lib.units import cm
|
| 13 |
+
from reportlab.platypus import (
|
| 14 |
+
HRFlowable,
|
| 15 |
+
Paragraph,
|
| 16 |
+
SimpleDocTemplate,
|
| 17 |
+
Spacer,
|
| 18 |
+
Table,
|
| 19 |
+
TableStyle,
|
| 20 |
+
)
|
| 21 |
+
from reportlab.platypus.flowables import Flowable
|
| 22 |
+
|
| 23 |
+
from app.models.explain_schemas import CertificateRequest, EthicsResponse
|
| 24 |
+
from app.models.ml_schemas import MetricsResponse, ModelType
|
| 25 |
+
|
| 26 |
+
# Colour palette — using the app's green as PRIMARY
|
| 27 |
+
PRIMARY = colors.HexColor("#1A7A4C")
|
| 28 |
+
PRIMARY_DARK = colors.HexColor("#145E39")
|
| 29 |
+
PRIMARY_LIGHT = colors.HexColor("#E8F5EE")
|
| 30 |
+
SUCCESS = colors.HexColor("#1A7A4C")
|
| 31 |
+
SUCCESS_BG = colors.HexColor("#F0FDF4")
|
| 32 |
+
WARNING = colors.HexColor("#92400E")
|
| 33 |
+
WARNING_BG = colors.HexColor("#FFF7ED")
|
| 34 |
+
DANGER = colors.HexColor("#991B1B")
|
| 35 |
+
DANGER_BG = colors.HexColor("#FFF1F2")
|
| 36 |
+
LIGHT_GREY = colors.HexColor("#F4F7FB")
|
| 37 |
+
MID_GREY = colors.HexColor("#DDE3EC")
|
| 38 |
+
DARK_TEXT = colors.HexColor("#172B4D")
|
| 39 |
+
ACCENT = colors.HexColor("#0EA5E9")
|
| 40 |
+
|
| 41 |
+
MODEL_LABELS = {
|
| 42 |
+
ModelType.KNN: "K-Nearest Neighbours (KNN)",
|
| 43 |
+
ModelType.SVM: "Support Vector Machine (SVM)",
|
| 44 |
+
ModelType.DECISION_TREE: "Decision Tree",
|
| 45 |
+
ModelType.RANDOM_FOREST: "Random Forest",
|
| 46 |
+
ModelType.LOGISTIC_REGRESSION: "Logistic Regression",
|
| 47 |
+
ModelType.NAIVE_BAYES: "Naïve Bayes",
|
| 48 |
+
ModelType.XGBOOST: "XGBoost (Extreme Gradient Boosting)",
|
| 49 |
+
ModelType.LIGHTGBM: "LightGBM (Light Gradient Boosting)",
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
# ---------------------------------------------------------------------------
|
| 54 |
+
# Custom flowable: full-width coloured banner block
|
| 55 |
+
# ---------------------------------------------------------------------------
|
| 56 |
+
|
| 57 |
+
class _BannerBlock(Flowable):
|
| 58 |
+
"""Draws a filled rectangle spanning the full page width at the top."""
|
| 59 |
+
|
| 60 |
+
def __init__(self, width: float, height: float, bg_color: colors.Color,
|
| 61 |
+
title: str):
|
| 62 |
+
"""Store the label + colour so the flowable is self-contained during layout."""
|
| 63 |
+
super().__init__()
|
| 64 |
+
self.width = width
|
| 65 |
+
self.height = height
|
| 66 |
+
self.bg_color = bg_color
|
| 67 |
+
self.title = title
|
| 68 |
+
|
| 69 |
+
def draw(self):
|
| 70 |
+
"""Render the rectangle + label onto the current canvas."""
|
| 71 |
+
c = self.canv
|
| 72 |
+
c.setFillColor(self.bg_color)
|
| 73 |
+
c.rect(0, 0, self.width, self.height, fill=1, stroke=0)
|
| 74 |
+
c.setFillColor(PRIMARY_DARK)
|
| 75 |
+
c.rect(0, 0, self.width, 3, fill=1, stroke=0)
|
| 76 |
+
c.setFillColor(colors.white)
|
| 77 |
+
c.setFont("Helvetica-Bold", 22)
|
| 78 |
+
c.drawCentredString(self.width / 2, self.height / 2 + 2, self.title)
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
class _BorderFrame(Flowable):
|
| 82 |
+
"""Draws a decorative double-line border around the page."""
|
| 83 |
+
|
| 84 |
+
def __init__(self, page_width: float, page_height: float,
|
| 85 |
+
margin: float, color: colors.Color):
|
| 86 |
+
"""Store the inner flowables + border colour."""
|
| 87 |
+
super().__init__()
|
| 88 |
+
self.page_width = page_width
|
| 89 |
+
self.page_height = page_height
|
| 90 |
+
self.margin = margin
|
| 91 |
+
self.color = color
|
| 92 |
+
self.width = 0
|
| 93 |
+
self.height = 0
|
| 94 |
+
|
| 95 |
+
def draw(self):
|
| 96 |
+
"""Draw the border + delegate inner rendering to the wrapped flowables."""
|
| 97 |
+
c = self.canv
|
| 98 |
+
m = self.margin
|
| 99 |
+
pw, ph = self.page_width, self.page_height
|
| 100 |
+
c.setStrokeColor(self.color)
|
| 101 |
+
# Outer border
|
| 102 |
+
c.setLineWidth(2.5)
|
| 103 |
+
c.rect(m - 8, m - 8, pw - 2 * (m - 8), ph - 2 * (m - 8),
|
| 104 |
+
fill=0, stroke=1)
|
| 105 |
+
# Inner border (inset by 4 pts)
|
| 106 |
+
c.setLineWidth(0.8)
|
| 107 |
+
c.rect(m - 4, m - 4, pw - 2 * (m - 4), ph - 2 * (m - 4),
|
| 108 |
+
fill=0, stroke=1)
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
# ---------------------------------------------------------------------------
|
| 112 |
+
# Helpers
|
| 113 |
+
# ---------------------------------------------------------------------------
|
| 114 |
+
|
| 115 |
+
def _metric_colour(value: float, green: float, amber: float) -> colors.Color:
|
| 116 |
+
"""Pick a banner colour for a metric value (green/amber/red) based on configured thresholds."""
|
| 117 |
+
if value >= green:
|
| 118 |
+
return SUCCESS
|
| 119 |
+
if value >= amber:
|
| 120 |
+
return WARNING
|
| 121 |
+
return DANGER
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def _pct(value: float) -> str:
|
| 125 |
+
"""Format a 0..1 number as a one-decimal percentage string."""
|
| 126 |
+
return f"{value * 100:.1f}%"
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
def _row_bg(val: float, green: float, amber: float) -> colors.Color:
|
| 130 |
+
"""Alternate row background colour for zebra-striped tables."""
|
| 131 |
+
if val >= green:
|
| 132 |
+
return SUCCESS_BG
|
| 133 |
+
if val >= amber:
|
| 134 |
+
return WARNING_BG
|
| 135 |
+
return DANGER_BG
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def _compute_mcc(tp: int, tn: int, fp: int, fn: int) -> Optional[float]:
|
| 139 |
+
"""Compute Matthews Correlation Coefficient from a confusion matrix row."""
|
| 140 |
+
denom = math.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))
|
| 141 |
+
if denom == 0:
|
| 142 |
+
return None
|
| 143 |
+
return (tp * tn - fp * fn) / denom
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
def _generate_takeaways(metrics: MetricsResponse, model_type: ModelType) -> list[str]:
|
| 147 |
+
"""Auto-generate bullet-point takeaways from model metrics."""
|
| 148 |
+
bullets: list[str] = []
|
| 149 |
+
model_label = MODEL_LABELS.get(model_type, str(model_type))
|
| 150 |
+
|
| 151 |
+
# Sensitivity (clinical priority)
|
| 152 |
+
if metrics.sensitivity >= 0.85:
|
| 153 |
+
bullets.append(
|
| 154 |
+
f"Excellent sensitivity ({_pct(metrics.sensitivity)}): the model correctly identifies the "
|
| 155 |
+
"large majority of positive cases, making it well-suited for clinical screening."
|
| 156 |
+
)
|
| 157 |
+
elif metrics.sensitivity >= 0.70:
|
| 158 |
+
bullets.append(
|
| 159 |
+
f"Acceptable sensitivity ({_pct(metrics.sensitivity)}): most positive cases are detected, "
|
| 160 |
+
"though some missed diagnoses remain possible."
|
| 161 |
+
)
|
| 162 |
+
else:
|
| 163 |
+
bullets.append(
|
| 164 |
+
f"Low sensitivity ({_pct(metrics.sensitivity)}): the model misses a substantial proportion "
|
| 165 |
+
"of positive cases — not recommended for screening without further tuning."
|
| 166 |
+
)
|
| 167 |
+
|
| 168 |
+
# Specificity
|
| 169 |
+
if metrics.specificity >= 0.85:
|
| 170 |
+
bullets.append(
|
| 171 |
+
f"High specificity ({_pct(metrics.specificity)}): very few healthy patients are incorrectly "
|
| 172 |
+
"flagged, reducing unnecessary follow-up burden."
|
| 173 |
+
)
|
| 174 |
+
elif metrics.specificity < 0.65:
|
| 175 |
+
bullets.append(
|
| 176 |
+
f"Below-average specificity ({_pct(metrics.specificity)}): a notable false-positive rate "
|
| 177 |
+
"could lead to unnecessary investigations in healthy patients."
|
| 178 |
+
)
|
| 179 |
+
|
| 180 |
+
# AUC
|
| 181 |
+
if metrics.auc_roc >= 0.90:
|
| 182 |
+
bullets.append(
|
| 183 |
+
f"Outstanding discrimination (AUC = {_pct(metrics.auc_roc)}): the model reliably ranks "
|
| 184 |
+
"positive cases above negative ones across all decision thresholds."
|
| 185 |
+
)
|
| 186 |
+
elif metrics.auc_roc >= 0.75:
|
| 187 |
+
bullets.append(
|
| 188 |
+
f"Good discriminative ability (AUC = {_pct(metrics.auc_roc)}): the model provides useful "
|
| 189 |
+
"separation between classes across operating points."
|
| 190 |
+
)
|
| 191 |
+
else:
|
| 192 |
+
bullets.append(
|
| 193 |
+
f"Weak discrimination (AUC = {_pct(metrics.auc_roc)}): the model struggles to separate "
|
| 194 |
+
"positive from negative cases and should be improved before deployment."
|
| 195 |
+
)
|
| 196 |
+
|
| 197 |
+
# Overfitting warning
|
| 198 |
+
if metrics.overfitting_warning:
|
| 199 |
+
gap = metrics.train_accuracy - metrics.accuracy
|
| 200 |
+
bullets.append(
|
| 201 |
+
f"Overfitting detected: training accuracy ({_pct(metrics.train_accuracy)}) is considerably "
|
| 202 |
+
f"higher than test accuracy ({_pct(metrics.accuracy)}, gap = {gap * 100:.1f} pp). "
|
| 203 |
+
"Consider regularisation, pruning, or collecting more data."
|
| 204 |
+
)
|
| 205 |
+
else:
|
| 206 |
+
bullets.append(
|
| 207 |
+
f"Generalisation is healthy: the gap between training ({_pct(metrics.train_accuracy)}) "
|
| 208 |
+
f"and test accuracy ({_pct(metrics.accuracy)}) is within acceptable bounds."
|
| 209 |
+
)
|
| 210 |
+
|
| 211 |
+
# MCC
|
| 212 |
+
if hasattr(metrics, "mcc") and metrics.mcc is not None:
|
| 213 |
+
mcc = metrics.mcc
|
| 214 |
+
if mcc >= 0.6:
|
| 215 |
+
bullets.append(
|
| 216 |
+
f"Strong overall balance (MCC = {mcc:.3f}): the model performs well even if class "
|
| 217 |
+
"sizes are imbalanced."
|
| 218 |
+
)
|
| 219 |
+
elif mcc >= 0.3:
|
| 220 |
+
bullets.append(
|
| 221 |
+
f"Moderate overall balance (MCC = {mcc:.3f}): the model shows some robustness to "
|
| 222 |
+
"class imbalance, but there is room for improvement."
|
| 223 |
+
)
|
| 224 |
+
else:
|
| 225 |
+
bullets.append(
|
| 226 |
+
f"Poor balance score (MCC = {mcc:.3f}): the model may be biased toward the majority "
|
| 227 |
+
"class. Consider resampling or adjusted class weights."
|
| 228 |
+
)
|
| 229 |
+
|
| 230 |
+
# Cross-val stability
|
| 231 |
+
if metrics.cross_val_scores:
|
| 232 |
+
cv_mean = sum(metrics.cross_val_scores) / len(metrics.cross_val_scores)
|
| 233 |
+
cv_std = math.sqrt(
|
| 234 |
+
sum((x - cv_mean) ** 2 for x in metrics.cross_val_scores)
|
| 235 |
+
/ len(metrics.cross_val_scores)
|
| 236 |
+
)
|
| 237 |
+
if cv_std <= 0.03:
|
| 238 |
+
bullets.append(
|
| 239 |
+
f"{len(metrics.cross_val_scores)}-fold cross-validation shows very stable performance "
|
| 240 |
+
f"(mean {_pct(cv_mean)} ± {cv_std * 100:.1f} pp), indicating the result is unlikely "
|
| 241 |
+
"to be a lucky split."
|
| 242 |
+
)
|
| 243 |
+
elif cv_std <= 0.06:
|
| 244 |
+
bullets.append(
|
| 245 |
+
f"{len(metrics.cross_val_scores)}-fold cross-validation shows moderate variability "
|
| 246 |
+
f"(mean {_pct(cv_mean)} ± {cv_std * 100:.1f} pp). "
|
| 247 |
+
"The model is reasonably stable across data splits."
|
| 248 |
+
)
|
| 249 |
+
else:
|
| 250 |
+
bullets.append(
|
| 251 |
+
f"{len(metrics.cross_val_scores)}-fold cross-validation shows high variability "
|
| 252 |
+
f"(mean {_pct(cv_mean)} ± {cv_std * 100:.1f} pp). "
|
| 253 |
+
"Performance may depend heavily on how the data is split."
|
| 254 |
+
)
|
| 255 |
+
|
| 256 |
+
# Model-specific notes
|
| 257 |
+
if model_type in (ModelType.RANDOM_FOREST, ModelType.XGBOOST, ModelType.LIGHTGBM):
|
| 258 |
+
bullets.append(
|
| 259 |
+
f"{model_label} is an ensemble method that aggregates many weak learners; "
|
| 260 |
+
"feature-importance outputs are available for clinical interpretability."
|
| 261 |
+
)
|
| 262 |
+
elif model_type == ModelType.LOGISTIC_REGRESSION:
|
| 263 |
+
bullets.append(
|
| 264 |
+
"Logistic Regression produces calibrated probabilities and fully interpretable "
|
| 265 |
+
"coefficients, making it a strong baseline for clinical audit."
|
| 266 |
+
)
|
| 267 |
+
elif model_type == ModelType.DECISION_TREE:
|
| 268 |
+
bullets.append(
|
| 269 |
+
"Decision Trees are highly interpretable but prone to overfitting on small datasets; "
|
| 270 |
+
"examine the max-depth parameter if overfitting is observed."
|
| 271 |
+
)
|
| 272 |
+
|
| 273 |
+
return bullets
|
| 274 |
+
|
| 275 |
+
|
| 276 |
+
# ---------------------------------------------------------------------------
|
| 277 |
+
# Certificate service
|
| 278 |
+
# ---------------------------------------------------------------------------
|
| 279 |
+
|
| 280 |
+
class CertificateService:
|
| 281 |
+
"""
|
| 282 |
+
Produces the EU AI Act compliance PDF (overview, fairness, explainability, checklist,
|
| 283 |
+
signatures) via reportlab.
|
| 284 |
+
"""
|
| 285 |
+
def generate_pdf(
|
| 286 |
+
self,
|
| 287 |
+
cert_request: CertificateRequest,
|
| 288 |
+
metrics: MetricsResponse,
|
| 289 |
+
ethics: EthicsResponse,
|
| 290 |
+
specialty_name: str,
|
| 291 |
+
model_type: ModelType,
|
| 292 |
+
training_time_ms: Optional[float] = None,
|
| 293 |
+
) -> bytes:
|
| 294 |
+
"""Main entrypoint — build the full PDF for a session and return it as bytes."""
|
| 295 |
+
buf = BytesIO()
|
| 296 |
+
PAGE_W, PAGE_H = A4
|
| 297 |
+
MARGIN = 2 * cm
|
| 298 |
+
|
| 299 |
+
doc = SimpleDocTemplate(
|
| 300 |
+
buf,
|
| 301 |
+
pagesize=A4,
|
| 302 |
+
leftMargin=MARGIN,
|
| 303 |
+
rightMargin=MARGIN,
|
| 304 |
+
topMargin=MARGIN,
|
| 305 |
+
bottomMargin=2.2 * cm,
|
| 306 |
+
)
|
| 307 |
+
CONTENT_W = PAGE_W - 2 * MARGIN
|
| 308 |
+
|
| 309 |
+
styles = getSampleStyleSheet()
|
| 310 |
+
|
| 311 |
+
h2 = ParagraphStyle(
|
| 312 |
+
"H2", parent=styles["Heading2"],
|
| 313 |
+
fontSize=13, textColor=PRIMARY_DARK, spaceBefore=16, spaceAfter=5,
|
| 314 |
+
borderPad=3,
|
| 315 |
+
)
|
| 316 |
+
body = ParagraphStyle(
|
| 317 |
+
"Body", parent=styles["Normal"],
|
| 318 |
+
fontSize=10, textColor=DARK_TEXT, leading=14,
|
| 319 |
+
)
|
| 320 |
+
body_center = ParagraphStyle(
|
| 321 |
+
"BodyCenter", parent=body,
|
| 322 |
+
alignment=1,
|
| 323 |
+
)
|
| 324 |
+
small = ParagraphStyle(
|
| 325 |
+
"Small", parent=styles["Normal"],
|
| 326 |
+
fontSize=8, textColor=colors.HexColor("#6B7280"), leading=11,
|
| 327 |
+
)
|
| 328 |
+
small_center = ParagraphStyle(
|
| 329 |
+
"SmallCenter", parent=small,
|
| 330 |
+
alignment=1,
|
| 331 |
+
)
|
| 332 |
+
disclaimer_style = ParagraphStyle(
|
| 333 |
+
"Disclaimer", parent=small,
|
| 334 |
+
textColor=DANGER, alignment=1, leading=11,
|
| 335 |
+
)
|
| 336 |
+
bullet_style = ParagraphStyle(
|
| 337 |
+
"Bullet", parent=styles["Normal"],
|
| 338 |
+
fontSize=9, textColor=DARK_TEXT, leading=13,
|
| 339 |
+
leftIndent=14, firstLineIndent=-10,
|
| 340 |
+
)
|
| 341 |
+
cell8 = ParagraphStyle(
|
| 342 |
+
"Cell8", parent=styles["Normal"],
|
| 343 |
+
fontSize=8, textColor=DARK_TEXT, leading=10,
|
| 344 |
+
)
|
| 345 |
+
|
| 346 |
+
story = []
|
| 347 |
+
|
| 348 |
+
# ---- PAGE BORDER (drawn via canvas callback — we approximate with a table border) ----
|
| 349 |
+
# We'll use a single-cell table at the very start to act as a framing border.
|
| 350 |
+
# This works because SimpleDocTemplate renders top to bottom.
|
| 351 |
+
# A more robust approach uses page templates; here we use a thin top-rule trick.
|
| 352 |
+
|
| 353 |
+
# ---- GREEN HEADER BANNER ----
|
| 354 |
+
banner = _BannerBlock(
|
| 355 |
+
width=CONTENT_W,
|
| 356 |
+
height=1.8 * cm,
|
| 357 |
+
bg_color=PRIMARY,
|
| 358 |
+
title="HEALTH-AI · ML Learning Tool",
|
| 359 |
+
)
|
| 360 |
+
story.append(banner)
|
| 361 |
+
story.append(Spacer(1, 0.4 * cm))
|
| 362 |
+
|
| 363 |
+
issued_to = cert_request.clinician_name or "Healthcare Professional"
|
| 364 |
+
institution = cert_request.institution or "Healthcare Institution"
|
| 365 |
+
today = datetime.date.today().strftime("%d %B %Y")
|
| 366 |
+
|
| 367 |
+
story.append(Paragraph(
|
| 368 |
+
f"This certificate is issued to <b>{issued_to}</b> of <b>{institution}</b> "
|
| 369 |
+
f"for completing the HEALTH-AI ML Learning Tool educational exercise on <b>{today}</b>.",
|
| 370 |
+
body_center,
|
| 371 |
+
))
|
| 372 |
+
story.append(Spacer(1, 0.4 * cm))
|
| 373 |
+
|
| 374 |
+
# ---- SECTION 1: Specialty & Model ----
|
| 375 |
+
story.append(Paragraph("1. Clinical Specialty & AI Model", h2))
|
| 376 |
+
|
| 377 |
+
info_data = [
|
| 378 |
+
["Medical Specialty", specialty_name],
|
| 379 |
+
["AI Model Type", MODEL_LABELS.get(model_type, str(model_type))],
|
| 380 |
+
["Model ID", cert_request.model_id[:24] + ("…" if len(cert_request.model_id) > 24 else "")],
|
| 381 |
+
]
|
| 382 |
+
if training_time_ms is not None:
|
| 383 |
+
if training_time_ms >= 1000:
|
| 384 |
+
time_str = f"{training_time_ms / 1000:.2f} s"
|
| 385 |
+
else:
|
| 386 |
+
time_str = f"{training_time_ms:.0f} ms"
|
| 387 |
+
info_data.append(["Training Time", time_str])
|
| 388 |
+
|
| 389 |
+
info_table = Table(info_data, colWidths=[5.5 * cm, 11.5 * cm])
|
| 390 |
+
info_table.setStyle(TableStyle([
|
| 391 |
+
("BACKGROUND", (0, 0), (0, -1), PRIMARY_LIGHT),
|
| 392 |
+
("TEXTCOLOR", (0, 0), (-1, -1), DARK_TEXT),
|
| 393 |
+
("FONTNAME", (0, 0), (0, -1), "Helvetica-Bold"),
|
| 394 |
+
("FONTSIZE", (0, 0), (-1, -1), 9),
|
| 395 |
+
("GRID", (0, 0), (-1, -1), 0.5, MID_GREY),
|
| 396 |
+
("ROWBACKGROUNDS", (0, 0), (-1, -1), [colors.white, LIGHT_GREY]),
|
| 397 |
+
("LEFTPADDING", (0, 0), (-1, -1), 8),
|
| 398 |
+
("RIGHTPADDING", (0, 0), (-1, -1), 8),
|
| 399 |
+
("TOPPADDING", (0, 0), (-1, -1), 5),
|
| 400 |
+
("BOTTOMPADDING", (0, 0), (-1, -1), 5),
|
| 401 |
+
("LINEBELOW", (0, -1), (-1, -1), 1.5, PRIMARY),
|
| 402 |
+
]))
|
| 403 |
+
story.append(info_table)
|
| 404 |
+
story.append(Spacer(1, 0.4 * cm))
|
| 405 |
+
|
| 406 |
+
# ---- SECTION 2: Performance Metrics ----
|
| 407 |
+
story.append(Paragraph("2. Model Performance Summary", h2))
|
| 408 |
+
story.append(Paragraph(
|
| 409 |
+
"Performance measured on held-out test patients the model had never seen during training.",
|
| 410 |
+
body,
|
| 411 |
+
))
|
| 412 |
+
story.append(Spacer(1, 0.2 * cm))
|
| 413 |
+
|
| 414 |
+
# Resolve MCC: prefer the field on MetricsResponse, fall back to computing from CM
|
| 415 |
+
mcc_value: Optional[float] = getattr(metrics, "mcc", None)
|
| 416 |
+
cm_data = metrics.confusion_matrix
|
| 417 |
+
if mcc_value is None or mcc_value == 0.0:
|
| 418 |
+
mcc_value = _compute_mcc(cm_data.tp, cm_data.tn, cm_data.fp, cm_data.fn)
|
| 419 |
+
|
| 420 |
+
metric_rows = [
|
| 421 |
+
["Metric", "Value", "Threshold", "Status"],
|
| 422 |
+
["Accuracy", _pct(metrics.accuracy), "≥ 65 %",
|
| 423 |
+
"✓ Acceptable" if metrics.accuracy >= 0.65 else "✗ Below threshold"],
|
| 424 |
+
["Sensitivity ★", _pct(metrics.sensitivity), "≥ 70 %",
|
| 425 |
+
"✓ Acceptable" if metrics.sensitivity >= 0.70 else "✗ Below threshold"],
|
| 426 |
+
["Specificity", _pct(metrics.specificity), "≥ 65 %",
|
| 427 |
+
"✓ Acceptable" if metrics.specificity >= 0.65 else "✗ Below threshold"],
|
| 428 |
+
["Precision (PPV)", _pct(metrics.precision), "≥ 60 %",
|
| 429 |
+
"✓ Acceptable" if metrics.precision >= 0.60 else "✗ Below threshold"],
|
| 430 |
+
["F1 Score", _pct(metrics.f1_score), "≥ 65 %",
|
| 431 |
+
"✓ Acceptable" if metrics.f1_score >= 0.65 else "✗ Below threshold"],
|
| 432 |
+
["AUC-ROC", _pct(metrics.auc_roc), "≥ 75 %",
|
| 433 |
+
"✓ Acceptable" if metrics.auc_roc >= 0.75 else "✗ Below threshold"],
|
| 434 |
+
]
|
| 435 |
+
|
| 436 |
+
if mcc_value is not None:
|
| 437 |
+
metric_rows.append([
|
| 438 |
+
"MCC †", f"{mcc_value:.3f}", "≥ 0.30",
|
| 439 |
+
"✓ Acceptable" if mcc_value >= 0.30 else "✗ Below threshold",
|
| 440 |
+
])
|
| 441 |
+
|
| 442 |
+
# Build per-row background colours
|
| 443 |
+
perf_vals_thresholds = [
|
| 444 |
+
(metrics.accuracy, 0.65, 0.55),
|
| 445 |
+
(metrics.sensitivity, 0.70, 0.50),
|
| 446 |
+
(metrics.specificity, 0.65, 0.55),
|
| 447 |
+
(metrics.precision, 0.60, 0.50),
|
| 448 |
+
(metrics.f1_score, 0.65, 0.55),
|
| 449 |
+
(metrics.auc_roc, 0.75, 0.65),
|
| 450 |
+
]
|
| 451 |
+
if mcc_value is not None:
|
| 452 |
+
perf_vals_thresholds.append((mcc_value, 0.30, 0.10))
|
| 453 |
+
|
| 454 |
+
row_bgs = [PRIMARY] # header row
|
| 455 |
+
for val, gt, at in perf_vals_thresholds:
|
| 456 |
+
row_bgs.append(_row_bg(val, gt, at))
|
| 457 |
+
|
| 458 |
+
perf_table = Table(metric_rows, colWidths=[5 * cm, 2.8 * cm, 3.2 * cm, 6 * cm])
|
| 459 |
+
ts = [
|
| 460 |
+
("BACKGROUND", (0, 0), (-1, 0), PRIMARY),
|
| 461 |
+
("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
|
| 462 |
+
("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
|
| 463 |
+
("FONTSIZE", (0, 0), (-1, -1), 9),
|
| 464 |
+
("GRID", (0, 0), (-1, -1), 0.5, MID_GREY),
|
| 465 |
+
("LEFTPADDING", (0, 0), (-1, -1), 8),
|
| 466 |
+
("TOPPADDING", (0, 0), (-1, -1), 5),
|
| 467 |
+
("BOTTOMPADDING", (0, 0), (-1, -1), 5),
|
| 468 |
+
("ALIGN", (1, 0), (2, -1), "CENTER"),
|
| 469 |
+
]
|
| 470 |
+
for i, bg in enumerate(row_bgs):
|
| 471 |
+
ts.append(("BACKGROUND", (0, i), (-1, i), bg))
|
| 472 |
+
# Colour the Value and Status columns
|
| 473 |
+
for i, (val, gt, at) in enumerate(perf_vals_thresholds, start=1):
|
| 474 |
+
col = SUCCESS if val >= gt else (WARNING if val >= at else DANGER)
|
| 475 |
+
ts.append(("TEXTCOLOR", (1, i), (1, i), col))
|
| 476 |
+
ts.append(("FONTNAME", (1, i), (1, i), "Helvetica-Bold"))
|
| 477 |
+
ts.append(("TEXTCOLOR", (3, i), (3, i), col))
|
| 478 |
+
ts.append(("FONTNAME", (3, i), (3, i), "Helvetica-Bold"))
|
| 479 |
+
perf_table.setStyle(TableStyle(ts))
|
| 480 |
+
story.append(perf_table)
|
| 481 |
+
story.append(Spacer(1, 0.2 * cm))
|
| 482 |
+
story.append(Paragraph(
|
| 483 |
+
"★ Sensitivity (recall) is the most critical metric for clinical screening tools. "
|
| 484 |
+
"† MCC (Matthews Correlation Coefficient) accounts for class imbalance.",
|
| 485 |
+
small,
|
| 486 |
+
))
|
| 487 |
+
story.append(Spacer(1, 0.3 * cm))
|
| 488 |
+
|
| 489 |
+
# ---- Confusion matrix summary ----
|
| 490 |
+
story.append(Paragraph(
|
| 491 |
+
"<b>Confusion Matrix Summary</b>",
|
| 492 |
+
ParagraphStyle("CMHead", parent=body, textColor=PRIMARY_DARK, spaceAfter=4),
|
| 493 |
+
))
|
| 494 |
+
cm_rows = [
|
| 495 |
+
["", "Predicted Positive", "Predicted Negative"],
|
| 496 |
+
[
|
| 497 |
+
"Actual Positive",
|
| 498 |
+
f"TP = {cm_data.tp}",
|
| 499 |
+
f"FN = {cm_data.fn}",
|
| 500 |
+
],
|
| 501 |
+
[
|
| 502 |
+
"Actual Negative",
|
| 503 |
+
f"FP = {cm_data.fp}",
|
| 504 |
+
f"TN = {cm_data.tn}",
|
| 505 |
+
],
|
| 506 |
+
]
|
| 507 |
+
cm_table = Table(cm_rows, colWidths=[4.5 * cm, 4.5 * cm, 4.5 * cm])
|
| 508 |
+
cm_ts = [
|
| 509 |
+
("BACKGROUND", (0, 0), (-1, 0), PRIMARY),
|
| 510 |
+
("BACKGROUND", (0, 0), (0, -1), PRIMARY_LIGHT),
|
| 511 |
+
("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
|
| 512 |
+
("TEXTCOLOR", (0, 1), (0, -1), PRIMARY_DARK),
|
| 513 |
+
("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
|
| 514 |
+
("FONTNAME", (0, 1), (0, -1), "Helvetica-Bold"),
|
| 515 |
+
("FONTSIZE", (0, 0), (-1, -1), 9),
|
| 516 |
+
("ALIGN", (1, 0), (-1, -1), "CENTER"),
|
| 517 |
+
("ALIGN", (0, 0), (0, -1), "RIGHT"),
|
| 518 |
+
("GRID", (0, 0), (-1, -1), 0.5, MID_GREY),
|
| 519 |
+
("TOPPADDING", (0, 0), (-1, -1), 5),
|
| 520 |
+
("BOTTOMPADDING", (0, 0), (-1, -1), 5),
|
| 521 |
+
("LEFTPADDING", (0, 0), (-1, -1), 8),
|
| 522 |
+
# TP cell — green
|
| 523 |
+
("BACKGROUND", (1, 1), (1, 1), SUCCESS_BG),
|
| 524 |
+
("TEXTCOLOR", (1, 1), (1, 1), SUCCESS),
|
| 525 |
+
("FONTNAME", (1, 1), (1, 1), "Helvetica-Bold"),
|
| 526 |
+
# TN cell — green
|
| 527 |
+
("BACKGROUND", (2, 2), (2, 2), SUCCESS_BG),
|
| 528 |
+
("TEXTCOLOR", (2, 2), (2, 2), SUCCESS),
|
| 529 |
+
("FONTNAME", (2, 2), (2, 2), "Helvetica-Bold"),
|
| 530 |
+
# FP cell — amber
|
| 531 |
+
("BACKGROUND", (1, 2), (1, 2), WARNING_BG),
|
| 532 |
+
("TEXTCOLOR", (1, 2), (1, 2), WARNING),
|
| 533 |
+
("FONTNAME", (1, 2), (1, 2), "Helvetica-Bold"),
|
| 534 |
+
# FN cell — red
|
| 535 |
+
("BACKGROUND", (2, 1), (2, 1), DANGER_BG),
|
| 536 |
+
("TEXTCOLOR", (2, 1), (2, 1), DANGER),
|
| 537 |
+
("FONTNAME", (2, 1), (2, 1), "Helvetica-Bold"),
|
| 538 |
+
]
|
| 539 |
+
cm_table.setStyle(TableStyle(cm_ts))
|
| 540 |
+
story.append(cm_table)
|
| 541 |
+
story.append(Spacer(1, 0.2 * cm))
|
| 542 |
+
|
| 543 |
+
# Cross-val summary
|
| 544 |
+
if metrics.cross_val_scores:
|
| 545 |
+
cv = metrics.cross_val_scores
|
| 546 |
+
cv_mean = sum(cv) / len(cv)
|
| 547 |
+
cv_std = math.sqrt(sum((x - cv_mean) ** 2 for x in cv) / len(cv))
|
| 548 |
+
cv_min = min(cv)
|
| 549 |
+
cv_max = max(cv)
|
| 550 |
+
story.append(Paragraph(
|
| 551 |
+
f"<b>{len(cv)}-Fold Cross-Validation:</b> "
|
| 552 |
+
f"mean accuracy = <b>{_pct(cv_mean)}</b> | "
|
| 553 |
+
f"std = {cv_std * 100:.1f} pp | "
|
| 554 |
+
f"range [{_pct(cv_min)} – {_pct(cv_max)}]",
|
| 555 |
+
ParagraphStyle("CVLine", parent=small,
|
| 556 |
+
textColor=DARK_TEXT, leading=12),
|
| 557 |
+
))
|
| 558 |
+
story.append(Spacer(1, 0.1 * cm))
|
| 559 |
+
|
| 560 |
+
story.append(Spacer(1, 0.4 * cm))
|
| 561 |
+
|
| 562 |
+
# ---- SECTION 3: Bias Findings ----
|
| 563 |
+
story.append(Paragraph("3. Bias & Fairness Findings", h2))
|
| 564 |
+
if ethics.bias_warnings:
|
| 565 |
+
for w in ethics.bias_warnings:
|
| 566 |
+
story.append(Paragraph(f"⚠ {w.message}", ParagraphStyle(
|
| 567 |
+
"Warn", parent=body, textColor=DANGER, spaceAfter=3,
|
| 568 |
+
)))
|
| 569 |
+
else:
|
| 570 |
+
story.append(Paragraph(
|
| 571 |
+
"✓ No significant bias detected across patient subgroups.",
|
| 572 |
+
ParagraphStyle("OK", parent=body, textColor=SUCCESS),
|
| 573 |
+
))
|
| 574 |
+
story.append(Spacer(1, 0.2 * cm))
|
| 575 |
+
|
| 576 |
+
subgroup_data = [["Subgroup", "n", "Accuracy", "Sens.", "Spec.", "F1", "Status"]]
|
| 577 |
+
for sm in ethics.subgroup_metrics:
|
| 578 |
+
status_sym = {"acceptable": "✓", "review": "⚠", "action_needed": "✗"}.get(sm.status, "?")
|
| 579 |
+
subgroup_data.append([
|
| 580 |
+
Paragraph(sm.group_label, cell8),
|
| 581 |
+
str(sm.sample_size),
|
| 582 |
+
_pct(sm.accuracy), _pct(sm.sensitivity), _pct(sm.specificity),
|
| 583 |
+
_pct(sm.f1_score),
|
| 584 |
+
f"{status_sym} {sm.status.replace('_', ' ').title()}",
|
| 585 |
+
])
|
| 586 |
+
sg_table = Table(
|
| 587 |
+
subgroup_data,
|
| 588 |
+
colWidths=[3.2 * cm, 1.2 * cm, 2.1 * cm, 2.1 * cm, 2.1 * cm, 2.1 * cm, 4.2 * cm],
|
| 589 |
+
)
|
| 590 |
+
sg_ts = [
|
| 591 |
+
("BACKGROUND", (0, 0), (-1, 0), PRIMARY),
|
| 592 |
+
("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
|
| 593 |
+
("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
|
| 594 |
+
("FONTSIZE", (0, 0), (-1, -1), 8),
|
| 595 |
+
("GRID", (0, 0), (-1, -1), 0.4, MID_GREY),
|
| 596 |
+
("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, LIGHT_GREY]),
|
| 597 |
+
("LEFTPADDING", (0, 0), (-1, -1), 6),
|
| 598 |
+
("TOPPADDING", (0, 0), (-1, -1), 4),
|
| 599 |
+
("BOTTOMPADDING", (0, 0), (-1, -1), 4),
|
| 600 |
+
("ALIGN", (1, 0), (-1, -1), "CENTER"),
|
| 601 |
+
]
|
| 602 |
+
for i, sm in enumerate(ethics.subgroup_metrics, 1):
|
| 603 |
+
col = (SUCCESS if sm.status == "acceptable"
|
| 604 |
+
else WARNING if sm.status == "review" else DANGER)
|
| 605 |
+
sg_ts.append(("TEXTCOLOR", (6, i), (6, i), col))
|
| 606 |
+
sg_ts.append(("FONTNAME", (6, i), (6, i), "Helvetica-Bold"))
|
| 607 |
+
sg_table.setStyle(TableStyle(sg_ts))
|
| 608 |
+
story.append(sg_table)
|
| 609 |
+
story.append(Spacer(1, 0.4 * cm))
|
| 610 |
+
|
| 611 |
+
# ---- SECTION 4: EU AI Act Checklist ----
|
| 612 |
+
story.append(Paragraph("4. EU AI Act Compliance Checklist", h2))
|
| 613 |
+
checklist_state = cert_request.checklist_state or {}
|
| 614 |
+
checklist_data = [["#", "Requirement", "Status"]]
|
| 615 |
+
for i, item in enumerate(ethics.eu_ai_act_items, 1):
|
| 616 |
+
is_checked = item.get("pre_checked") or checklist_state.get(item["id"], False)
|
| 617 |
+
checklist_data.append([
|
| 618 |
+
str(i),
|
| 619 |
+
Paragraph(item["text"], cell8),
|
| 620 |
+
"✓ Complete" if is_checked else "○ Pending",
|
| 621 |
+
])
|
| 622 |
+
cl_table = Table(checklist_data, colWidths=[1 * cm, 14 * cm, 2 * cm])
|
| 623 |
+
cl_ts = [
|
| 624 |
+
("BACKGROUND", (0, 0), (-1, 0), PRIMARY),
|
| 625 |
+
("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
|
| 626 |
+
("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
|
| 627 |
+
("FONTSIZE", (0, 0), (-1, -1), 8),
|
| 628 |
+
("GRID", (0, 0), (-1, -1), 0.4, MID_GREY),
|
| 629 |
+
("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, LIGHT_GREY]),
|
| 630 |
+
("LEFTPADDING", (0, 0), (-1, -1), 6),
|
| 631 |
+
("TOPPADDING", (0, 0), (-1, -1), 4),
|
| 632 |
+
("BOTTOMPADDING", (0, 0), (-1, -1), 4),
|
| 633 |
+
]
|
| 634 |
+
for i, item in enumerate(ethics.eu_ai_act_items, 1):
|
| 635 |
+
is_checked = item.get("pre_checked") or checklist_state.get(item["id"], False)
|
| 636 |
+
if is_checked:
|
| 637 |
+
cl_ts.append(("TEXTCOLOR", (2, i), (2, i), SUCCESS))
|
| 638 |
+
cl_ts.append(("FONTNAME", (2, i), (2, i), "Helvetica-Bold"))
|
| 639 |
+
else:
|
| 640 |
+
cl_ts.append(("TEXTCOLOR", (2, i), (2, i), colors.HexColor("#9CA3AF")))
|
| 641 |
+
cl_table.setStyle(TableStyle(cl_ts))
|
| 642 |
+
story.append(cl_table)
|
| 643 |
+
story.append(Spacer(1, 0.4 * cm))
|
| 644 |
+
|
| 645 |
+
# ---- SECTION 5: Key Takeaways ----
|
| 646 |
+
story.append(Paragraph("5. Key Takeaways", h2))
|
| 647 |
+
story.append(Paragraph(
|
| 648 |
+
"Auto-generated insights based on this model's performance metrics:",
|
| 649 |
+
ParagraphStyle("TkIntro", parent=body, textColor=colors.HexColor("#4B5563"),
|
| 650 |
+
spaceAfter=5),
|
| 651 |
+
))
|
| 652 |
+
takeaways = _generate_takeaways(metrics, model_type)
|
| 653 |
+
for idx, bullet in enumerate(takeaways, 1):
|
| 654 |
+
story.append(Paragraph(f"<b>{idx}.</b> {bullet}", bullet_style))
|
| 655 |
+
story.append(Spacer(1, 0.1 * cm))
|
| 656 |
+
story.append(Spacer(1, 0.3 * cm))
|
| 657 |
+
|
| 658 |
+
# ---- FOOTER ----
|
| 659 |
+
story.append(HRFlowable(width="100%", thickness=1.5, color=PRIMARY,
|
| 660 |
+
spaceAfter=4))
|
| 661 |
+
story.append(HRFlowable(width="100%", thickness=0.5, color=MID_GREY,
|
| 662 |
+
spaceAfter=5))
|
| 663 |
+
|
| 664 |
+
story.append(Paragraph(
|
| 665 |
+
f"Generated: <b>{today}</b> · HEALTH-AI ML Learning Tool v1.5 "
|
| 666 |
+
"· Prepared by the HealthWithSevgi Team",
|
| 667 |
+
small_center,
|
| 668 |
+
))
|
| 669 |
+
story.append(Spacer(1, 0.15 * cm))
|
| 670 |
+
story.append(Paragraph(
|
| 671 |
+
"<b>IMPORTANT DISCLAIMER:</b> This certificate confirms completion of an educational "
|
| 672 |
+
"exercise only. The AI model described herein is <b>NOT</b> validated for clinical use "
|
| 673 |
+
"and must <b>NOT</b> be used to inform patient management decisions without appropriate "
|
| 674 |
+
"prospective clinical validation and regulatory clearance.",
|
| 675 |
+
disclaimer_style,
|
| 676 |
+
))
|
| 677 |
+
|
| 678 |
+
def _add_page_number(canvas, doc_template):
|
| 679 |
+
"""Inner canvas callback that stamps `Page X / N` on every page."""
|
| 680 |
+
canvas.saveState()
|
| 681 |
+
canvas.setFont("Helvetica", 7)
|
| 682 |
+
canvas.setFillColor(colors.HexColor("#9CA3AF"))
|
| 683 |
+
canvas.drawCentredString(
|
| 684 |
+
PAGE_W / 2, 1.0 * cm,
|
| 685 |
+
f"Page {canvas.getPageNumber()}"
|
| 686 |
+
)
|
| 687 |
+
canvas.restoreState()
|
| 688 |
+
|
| 689 |
+
doc.build(story, onFirstPage=_add_page_number, onLaterPages=_add_page_number)
|
| 690 |
+
return buf.getvalue()
|
app/services/data_service.py
ADDED
|
@@ -0,0 +1,1272 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Data exploration and preparation service."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import io
|
| 5 |
+
import logging
|
| 6 |
+
import pathlib
|
| 7 |
+
import uuid
|
| 8 |
+
import zipfile
|
| 9 |
+
from typing import Any
|
| 10 |
+
|
| 11 |
+
import numpy as np
|
| 12 |
+
import pandas as pd
|
| 13 |
+
import requests
|
| 14 |
+
from imblearn.over_sampling import SMOTE
|
| 15 |
+
from sklearn.model_selection import train_test_split
|
| 16 |
+
from sklearn.preprocessing import MinMaxScaler, StandardScaler
|
| 17 |
+
|
| 18 |
+
from app.models.schemas import (
|
| 19 |
+
ColumnStat,
|
| 20 |
+
DataExplorationResponse,
|
| 21 |
+
PrepResponse,
|
| 22 |
+
PrepSettings,
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
logger = logging.getLogger(__name__)
|
| 26 |
+
|
| 27 |
+
IMBALANCE_RATIO_THRESHOLD = 1.5
|
| 28 |
+
MIN_ROWS = 10
|
| 29 |
+
MAX_UPLOAD_MB = 50
|
| 30 |
+
MAX_TARGET_CLASSES = 20
|
| 31 |
+
|
| 32 |
+
_CACHE_DIR = pathlib.Path(__file__).parent.parent.parent / "data_cache"
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
class DatasetUnavailableError(Exception):
|
| 36 |
+
"""Raised when a real dataset cannot be loaded and no fallback is allowed."""
|
| 37 |
+
|
| 38 |
+
def __init__(self, name: str, reason: str) -> None:
|
| 39 |
+
"""
|
| 40 |
+
Load and return the bundled dataset for the `_init__` specialty. Used internally
|
| 41 |
+
by `DataService._load_specialty_dataset`.
|
| 42 |
+
"""
|
| 43 |
+
self.dataset_name = name
|
| 44 |
+
self.reason = reason
|
| 45 |
+
super().__init__(
|
| 46 |
+
f"Dataset '{name}' is unavailable: {reason}. "
|
| 47 |
+
"Please upload your own CSV file or ensure the dataset cache is populated."
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
class DataService:
|
| 52 |
+
"""
|
| 53 |
+
Owns CSV ingestion, column exploration, and per-specialty preparation
|
| 54 |
+
(split/normalise/impute/SMOTE).
|
| 55 |
+
"""
|
| 56 |
+
def __init__(self) -> None:
|
| 57 |
+
"""
|
| 58 |
+
Load and return the bundled dataset for the `_init__` specialty. Used internally
|
| 59 |
+
by `DataService._load_specialty_dataset`.
|
| 60 |
+
"""
|
| 61 |
+
self._session_store: dict[str, dict[str, Any]] = {}
|
| 62 |
+
|
| 63 |
+
# ------------------------------------------------------------------
|
| 64 |
+
# Real-data download helper
|
| 65 |
+
# ------------------------------------------------------------------
|
| 66 |
+
def _fetch_cached(
|
| 67 |
+
self,
|
| 68 |
+
name: str,
|
| 69 |
+
url: str,
|
| 70 |
+
read_kwargs: dict | None = None,
|
| 71 |
+
) -> pd.DataFrame:
|
| 72 |
+
"""Download a dataset from URL, cache locally, return DataFrame.
|
| 73 |
+
|
| 74 |
+
Raises DatasetUnavailableError if the dataset cannot be loaded.
|
| 75 |
+
"""
|
| 76 |
+
_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
| 77 |
+
cache_path = _CACHE_DIR / f"{name}.csv"
|
| 78 |
+
rk = read_kwargs or {}
|
| 79 |
+
|
| 80 |
+
# Try from cache first
|
| 81 |
+
if cache_path.exists():
|
| 82 |
+
try:
|
| 83 |
+
return pd.read_csv(cache_path, **rk)
|
| 84 |
+
except Exception as exc:
|
| 85 |
+
raise DatasetUnavailableError(
|
| 86 |
+
name, f"Cached file exists but failed to read: {exc}"
|
| 87 |
+
) from exc
|
| 88 |
+
|
| 89 |
+
# Download
|
| 90 |
+
try:
|
| 91 |
+
resp = requests.get(url, timeout=20, headers={"User-Agent": "HealthWithSevgi/1.0"})
|
| 92 |
+
resp.raise_for_status()
|
| 93 |
+
cache_path.write_bytes(resp.content)
|
| 94 |
+
logger.info("Downloaded real dataset: %s (%d bytes)", name, len(resp.content))
|
| 95 |
+
return pd.read_csv(io.BytesIO(resp.content), **rk)
|
| 96 |
+
except Exception as exc:
|
| 97 |
+
raise DatasetUnavailableError(
|
| 98 |
+
name, f"Failed to download from {url}: {exc}"
|
| 99 |
+
) from exc
|
| 100 |
+
|
| 101 |
+
# ------------------------------------------------------------------
|
| 102 |
+
# Exploration
|
| 103 |
+
# ------------------------------------------------------------------
|
| 104 |
+
def explore_dataframe(
|
| 105 |
+
self, df: pd.DataFrame, target_col: str
|
| 106 |
+
) -> DataExplorationResponse:
|
| 107 |
+
"""Build per-column statistics for the Step-2 exploration panel."""
|
| 108 |
+
columns: list[ColumnStat] = []
|
| 109 |
+
for col in df.columns:
|
| 110 |
+
series = df[col]
|
| 111 |
+
missing = int(series.isna().sum())
|
| 112 |
+
columns.append(
|
| 113 |
+
ColumnStat(
|
| 114 |
+
name=col,
|
| 115 |
+
dtype=str(series.dtype),
|
| 116 |
+
missing_count=missing,
|
| 117 |
+
missing_pct=round(missing / len(df) * 100, 2),
|
| 118 |
+
unique_count=int(series.nunique()),
|
| 119 |
+
sample_values=series.dropna().head(5).tolist(),
|
| 120 |
+
)
|
| 121 |
+
)
|
| 122 |
+
|
| 123 |
+
class_counts: dict[str, int] = {}
|
| 124 |
+
imbalance_ratio = 1.0
|
| 125 |
+
imbalance_warning = False
|
| 126 |
+
if target_col in df.columns:
|
| 127 |
+
vc = df[target_col].value_counts()
|
| 128 |
+
class_counts = {str(k): int(v) for k, v in vc.items()}
|
| 129 |
+
if len(vc) >= 2:
|
| 130 |
+
imbalance_ratio = round(vc.iloc[0] / vc.iloc[-1], 2)
|
| 131 |
+
imbalance_warning = imbalance_ratio >= IMBALANCE_RATIO_THRESHOLD
|
| 132 |
+
|
| 133 |
+
return DataExplorationResponse(
|
| 134 |
+
columns=columns,
|
| 135 |
+
row_count=len(df),
|
| 136 |
+
class_distribution=class_counts,
|
| 137 |
+
imbalance_warning=imbalance_warning,
|
| 138 |
+
imbalance_ratio=imbalance_ratio,
|
| 139 |
+
target_col=target_col,
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
# ------------------------------------------------------------------
|
| 143 |
+
# Preparation
|
| 144 |
+
# ------------------------------------------------------------------
|
| 145 |
+
def prepare_data(
|
| 146 |
+
self,
|
| 147 |
+
df: pd.DataFrame,
|
| 148 |
+
target_col: str,
|
| 149 |
+
settings: PrepSettings,
|
| 150 |
+
session_id: str | None = None,
|
| 151 |
+
) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, PrepResponse, list[str]]:
|
| 152 |
+
"""
|
| 153 |
+
Step-3 preparation endpoint — splits, normalises, imputes missing values,
|
| 154 |
+
optionally applies SMOTE.
|
| 155 |
+
"""
|
| 156 |
+
if session_id is None:
|
| 157 |
+
session_id = str(uuid.uuid4())
|
| 158 |
+
|
| 159 |
+
# Drop rows where target is NaN
|
| 160 |
+
df = df.dropna(subset=[target_col]).copy()
|
| 161 |
+
|
| 162 |
+
# Guard: reject continuous / high-cardinality target columns
|
| 163 |
+
n_unique = df[target_col].nunique()
|
| 164 |
+
if n_unique > MAX_TARGET_CLASSES:
|
| 165 |
+
raise ValueError(
|
| 166 |
+
f"Target column '{target_col}' has {n_unique} unique values, "
|
| 167 |
+
f"which looks like a continuous measurement rather than a "
|
| 168 |
+
f"classification label. Choose a column with at most "
|
| 169 |
+
f"{MAX_TARGET_CLASSES} distinct classes (e.g. a binary "
|
| 170 |
+
f"outcome like 0/1)."
|
| 171 |
+
)
|
| 172 |
+
|
| 173 |
+
# Encode target
|
| 174 |
+
y_raw = df[target_col]
|
| 175 |
+
classes = sorted(y_raw.unique().tolist(), key=str)
|
| 176 |
+
class_to_int = {c: i for i, c in enumerate(classes)}
|
| 177 |
+
y = y_raw.map(class_to_int).values.astype(int)
|
| 178 |
+
|
| 179 |
+
# Keep only numeric features (drop target + non-numeric)
|
| 180 |
+
feature_df = df.drop(columns=[target_col])
|
| 181 |
+
feature_df = feature_df.select_dtypes(include=[np.number])
|
| 182 |
+
feature_names = list(feature_df.columns)
|
| 183 |
+
|
| 184 |
+
dist_before = {str(k): int((y == v).sum()) for k, v in class_to_int.items()}
|
| 185 |
+
|
| 186 |
+
if settings.missing_strategy == "drop":
|
| 187 |
+
mask = ~feature_df.isna().any(axis=1)
|
| 188 |
+
feature_df = feature_df[mask]
|
| 189 |
+
y = y[mask]
|
| 190 |
+
elif settings.missing_strategy == "median":
|
| 191 |
+
feature_df = feature_df.fillna(feature_df.median(numeric_only=True))
|
| 192 |
+
else: # mode
|
| 193 |
+
_mode = feature_df.mode()
|
| 194 |
+
if not _mode.empty:
|
| 195 |
+
feature_df = feature_df.fillna(_mode.iloc[0])
|
| 196 |
+
else:
|
| 197 |
+
feature_df = feature_df.fillna(feature_df.median(numeric_only=True))
|
| 198 |
+
X = feature_df.values.astype(float)
|
| 199 |
+
|
| 200 |
+
# --- Train / test split (BEFORE imputation to avoid data leakage) ---
|
| 201 |
+
# Use stratified split only when every class has at least 2 samples;
|
| 202 |
+
# otherwise fall back to non-stratified to avoid ValueError.
|
| 203 |
+
from collections import Counter
|
| 204 |
+
class_counts_y = Counter(y)
|
| 205 |
+
min_class_size = min(class_counts_y.values()) if class_counts_y else 0
|
| 206 |
+
can_stratify = min_class_size >= 2
|
| 207 |
+
X_train, X_test, y_train, y_test = train_test_split(
|
| 208 |
+
X, y, test_size=settings.test_size, random_state=42,
|
| 209 |
+
stratify=y if can_stratify else None,
|
| 210 |
+
)
|
| 211 |
+
|
| 212 |
+
# --- Handle missing values AFTER split (train-only statistics) ---
|
| 213 |
+
if settings.missing_strategy == "drop":
|
| 214 |
+
train_mask = ~pd.DataFrame(X_train).isna().any(axis=1).values
|
| 215 |
+
test_mask = ~pd.DataFrame(X_test).isna().any(axis=1).values
|
| 216 |
+
X_train = X_train[train_mask]
|
| 217 |
+
y_train = y_train[train_mask]
|
| 218 |
+
X_test = X_test[test_mask]
|
| 219 |
+
y_test = y_test[test_mask]
|
| 220 |
+
elif settings.missing_strategy == "median":
|
| 221 |
+
train_df = pd.DataFrame(X_train, columns=feature_names)
|
| 222 |
+
medians = train_df.median()
|
| 223 |
+
X_train = train_df.fillna(medians).values
|
| 224 |
+
X_test = pd.DataFrame(X_test, columns=feature_names).fillna(medians).values
|
| 225 |
+
else: # mode
|
| 226 |
+
train_df = pd.DataFrame(X_train, columns=feature_names)
|
| 227 |
+
modes = train_df.mode().iloc[0]
|
| 228 |
+
X_train = train_df.fillna(modes).values
|
| 229 |
+
X_test = pd.DataFrame(X_test, columns=feature_names).fillna(modes).values
|
| 230 |
+
|
| 231 |
+
# --- Outlier handling (train statistics applied to test) ---
|
| 232 |
+
if settings.outlier_handling == "iqr":
|
| 233 |
+
train_df = pd.DataFrame(X_train, columns=feature_names)
|
| 234 |
+
Q1 = train_df.quantile(0.25)
|
| 235 |
+
Q3 = train_df.quantile(0.75)
|
| 236 |
+
IQR = Q3 - Q1
|
| 237 |
+
lower = Q1 - 1.5 * IQR
|
| 238 |
+
upper = Q3 + 1.5 * IQR
|
| 239 |
+
X_train = train_df.clip(lower=lower, upper=upper, axis=1).values
|
| 240 |
+
X_test = pd.DataFrame(X_test, columns=feature_names).clip(lower=lower, upper=upper, axis=1).values
|
| 241 |
+
elif settings.outlier_handling == "zscore_clip":
|
| 242 |
+
train_df = pd.DataFrame(X_train, columns=feature_names)
|
| 243 |
+
mean = train_df.mean()
|
| 244 |
+
std = train_df.std().replace(0, 1)
|
| 245 |
+
lower = mean - 3 * std
|
| 246 |
+
upper = mean + 3 * std
|
| 247 |
+
X_train = train_df.clip(lower=lower, upper=upper, axis=1).values
|
| 248 |
+
X_test = pd.DataFrame(X_test, columns=feature_names).clip(lower=lower, upper=upper, axis=1).values
|
| 249 |
+
|
| 250 |
+
# Capture raw (pre-scaling) arrays for session storage
|
| 251 |
+
X_train_raw = X_train.copy()
|
| 252 |
+
X_test_raw = X_test.copy()
|
| 253 |
+
|
| 254 |
+
# --- Normalisation ---
|
| 255 |
+
scaler = None
|
| 256 |
+
normalization_applied = settings.normalization
|
| 257 |
+
if settings.normalization == "zscore":
|
| 258 |
+
scaler = StandardScaler()
|
| 259 |
+
elif settings.normalization == "minmax":
|
| 260 |
+
scaler = MinMaxScaler()
|
| 261 |
+
|
| 262 |
+
if scaler is not None:
|
| 263 |
+
X_train = scaler.fit_transform(X_train)
|
| 264 |
+
X_test = scaler.transform(X_test)
|
| 265 |
+
|
| 266 |
+
# --- SMOTE (training only, supports multi-class) ---
|
| 267 |
+
smote_applied = False
|
| 268 |
+
|
| 269 |
+
# Filter out classes with fewer than 2 samples to prevent SMOTE ValueError
|
| 270 |
+
unique, counts = np.unique(y_train, return_counts=True)
|
| 271 |
+
valid_classes = unique[counts >= 2]
|
| 272 |
+
if len(valid_classes) < len(unique):
|
| 273 |
+
logger.warning(
|
| 274 |
+
"Dropped %d classes with only 1 sample before SMOTE/training.",
|
| 275 |
+
len(unique) - len(valid_classes)
|
| 276 |
+
)
|
| 277 |
+
train_mask = np.isin(y_train, valid_classes)
|
| 278 |
+
X_train = X_train[train_mask]
|
| 279 |
+
X_train_raw = X_train_raw[train_mask]
|
| 280 |
+
y_train = y_train[train_mask]
|
| 281 |
+
# Also filter test set to only contain classes present in training
|
| 282 |
+
test_mask = np.isin(y_test, valid_classes)
|
| 283 |
+
X_test = X_test[test_mask]
|
| 284 |
+
X_test_raw = X_test_raw[test_mask]
|
| 285 |
+
y_test = y_test[test_mask]
|
| 286 |
+
|
| 287 |
+
# Re-encode labels to be contiguous (0..n-1) after any class filtering.
|
| 288 |
+
# This prevents XGBoost/LightGBM "Invalid classes" errors when label
|
| 289 |
+
# values have gaps (e.g. [0, 2, 5] instead of [0, 1, 2]).
|
| 290 |
+
remaining_labels = np.unique(np.concatenate([y_train, y_test]))
|
| 291 |
+
if len(remaining_labels) > 0 and (
|
| 292 |
+
remaining_labels[-1] != len(remaining_labels) - 1
|
| 293 |
+
or len(remaining_labels) != int(remaining_labels[-1]) + 1
|
| 294 |
+
):
|
| 295 |
+
label_map = {old: new for new, old in enumerate(sorted(remaining_labels))}
|
| 296 |
+
y_train = np.array([label_map[v] for v in y_train])
|
| 297 |
+
y_test = np.array([label_map[v] for v in y_test])
|
| 298 |
+
# Rebuild classes list and mapping with new contiguous labels
|
| 299 |
+
old_classes = classes
|
| 300 |
+
classes = [old_classes[old] for old in sorted(remaining_labels)]
|
| 301 |
+
class_to_int = {c: i for i, c in enumerate(classes)}
|
| 302 |
+
logger.info(
|
| 303 |
+
"Re-encoded %d classes to contiguous labels 0..%d",
|
| 304 |
+
len(remaining_labels), len(remaining_labels) - 1,
|
| 305 |
+
)
|
| 306 |
+
|
| 307 |
+
# Preserve pre-SMOTE labels for leak-free CV (after filtering and re-encoding)
|
| 308 |
+
y_train_original = y_train.copy()
|
| 309 |
+
|
| 310 |
+
unique_classes = np.unique(y_train)
|
| 311 |
+
if settings.use_smote and len(unique_classes) >= 2:
|
| 312 |
+
try:
|
| 313 |
+
min_class_count = min(np.bincount(y_train[y_train >= 0])) if len(y_train) > 0 else 0
|
| 314 |
+
k_neighbors = max(1, min(5, min_class_count - 1))
|
| 315 |
+
smote = SMOTE(k_neighbors=k_neighbors, random_state=42)
|
| 316 |
+
X_train, y_train = smote.fit_resample(X_train, y_train)
|
| 317 |
+
smote_applied = True
|
| 318 |
+
logger.info("SMOTE applied — training set resampled to %d samples", len(X_train))
|
| 319 |
+
except Exception as exc:
|
| 320 |
+
logger.warning("SMOTE failed: %s — proceeding without resampling", exc)
|
| 321 |
+
|
| 322 |
+
dist_after = {str(k): int((y_train == v).sum()) for k, v in class_to_int.items()}
|
| 323 |
+
|
| 324 |
+
# Bug #1: Build real normalization sample data (first row before vs after)
|
| 325 |
+
norm_samples: list[dict[str, object]] = []
|
| 326 |
+
sample_count = min(5, len(feature_names))
|
| 327 |
+
for i in range(sample_count):
|
| 328 |
+
before_val = float(X_train_raw[0, i]) if len(X_train_raw) > 0 else 0.0
|
| 329 |
+
after_val = float(X_train[0, i]) if len(X_train) > 0 else 0.0
|
| 330 |
+
norm_samples.append({
|
| 331 |
+
"feature": feature_names[i],
|
| 332 |
+
"before": round(before_val, 2),
|
| 333 |
+
"after": round(after_val, 3),
|
| 334 |
+
})
|
| 335 |
+
|
| 336 |
+
response = PrepResponse(
|
| 337 |
+
session_id=session_id,
|
| 338 |
+
train_size=int(len(X_train)),
|
| 339 |
+
test_size=int(len(X_test)),
|
| 340 |
+
features_count=len(feature_names),
|
| 341 |
+
class_distribution_before=dist_before,
|
| 342 |
+
class_distribution_after=dist_after,
|
| 343 |
+
smote_applied=smote_applied,
|
| 344 |
+
normalization_applied=normalization_applied,
|
| 345 |
+
norm_samples=norm_samples,
|
| 346 |
+
)
|
| 347 |
+
|
| 348 |
+
# Column metadata from raw DataFrame (before preprocessing)
|
| 349 |
+
raw_column_meta = []
|
| 350 |
+
for col in df.columns:
|
| 351 |
+
series = df[col]
|
| 352 |
+
raw_column_meta.append({
|
| 353 |
+
"name": col,
|
| 354 |
+
"dtype": str(series.dtype),
|
| 355 |
+
"missing_count": int(series.isna().sum()),
|
| 356 |
+
"missing_pct": round(series.isna().sum() / len(df) * 100, 2),
|
| 357 |
+
"unique_count": int(series.nunique()),
|
| 358 |
+
"sample_values": [str(v) for v in series.dropna().head(3).tolist()],
|
| 359 |
+
"is_target": col == target_col,
|
| 360 |
+
})
|
| 361 |
+
|
| 362 |
+
# Persist to session store
|
| 363 |
+
self._session_store[session_id] = {
|
| 364 |
+
"X_train": X_train,
|
| 365 |
+
"X_test": X_test,
|
| 366 |
+
"y_train": y_train,
|
| 367 |
+
"y_test": y_test,
|
| 368 |
+
"feature_names": feature_names,
|
| 369 |
+
"classes": [str(c) for c in classes],
|
| 370 |
+
"scaler": scaler,
|
| 371 |
+
"X_train_raw": X_train_raw,
|
| 372 |
+
"X_test_raw": X_test_raw,
|
| 373 |
+
"normalization": settings.normalization,
|
| 374 |
+
"y_train_original": y_train_original,
|
| 375 |
+
"smote_applied": smote_applied,
|
| 376 |
+
"raw_column_meta": raw_column_meta,
|
| 377 |
+
"row_count": len(df),
|
| 378 |
+
}
|
| 379 |
+
logger.info(
|
| 380 |
+
"Session %s prepared — train=%d, test=%d, features=%d",
|
| 381 |
+
session_id,
|
| 382 |
+
len(X_train),
|
| 383 |
+
len(X_test),
|
| 384 |
+
len(feature_names),
|
| 385 |
+
)
|
| 386 |
+
|
| 387 |
+
return X_train, X_test, y_train, y_test, response, feature_names
|
| 388 |
+
|
| 389 |
+
def get_session(self, session_id: str) -> dict[str, Any] | None:
|
| 390 |
+
"""Return the prepared session bundle by id; `None` when the session is unknown."""
|
| 391 |
+
return self._session_store.get(session_id)
|
| 392 |
+
|
| 393 |
+
# ------------------------------------------------------------------
|
| 394 |
+
# Built-in example datasets
|
| 395 |
+
# ------------------------------------------------------------------
|
| 396 |
+
def get_example_dataset(self, specialty_id: str) -> pd.DataFrame:
|
| 397 |
+
"""Return the bundled example dataframe for a specialty (cached after first load)."""
|
| 398 |
+
generators: dict[str, Any] = {
|
| 399 |
+
"cardiology_hf": self._heart_failure,
|
| 400 |
+
"radiology_pneumonia": self._pneumonia,
|
| 401 |
+
"nephrology_ckd": self._ckd,
|
| 402 |
+
"oncology_breast": self._breast_cancer,
|
| 403 |
+
"neurology_parkinsons": self._parkinsons,
|
| 404 |
+
"endocrinology_diabetes": self._diabetes,
|
| 405 |
+
"hepatology_liver": self._liver,
|
| 406 |
+
"cardiology_stroke": self._stroke,
|
| 407 |
+
"mental_health": self._mental_health,
|
| 408 |
+
"pulmonology_copd": self._copd,
|
| 409 |
+
"haematology_anaemia": self._anaemia,
|
| 410 |
+
"dermatology": self._dermatology,
|
| 411 |
+
"ophthalmology": self._ophthalmology,
|
| 412 |
+
"orthopaedics": self._orthopaedics,
|
| 413 |
+
"icu_sepsis": self._sepsis,
|
| 414 |
+
"obstetrics_fetal": self._fetal_health,
|
| 415 |
+
"cardiology_arrhythmia": self._arrhythmia,
|
| 416 |
+
"oncology_cervical": self._cervical,
|
| 417 |
+
"thyroid": self._thyroid,
|
| 418 |
+
"pharmacy_readmission": self._readmission,
|
| 419 |
+
}
|
| 420 |
+
gen = generators.get(specialty_id)
|
| 421 |
+
if gen is None:
|
| 422 |
+
raise DatasetUnavailableError(specialty_id, f"Unknown specialty ID '{specialty_id}'")
|
| 423 |
+
df = gen()
|
| 424 |
+
logger.info("Example dataset generated for '%s': %d rows", specialty_id, len(df))
|
| 425 |
+
return df
|
| 426 |
+
|
| 427 |
+
# ------ Dataset generators ------
|
| 428 |
+
|
| 429 |
+
def _heart_failure(self) -> pd.DataFrame:
|
| 430 |
+
"""
|
| 431 |
+
Load and return the bundled dataset for the `heart_failure` specialty. Used
|
| 432 |
+
internally by `DataService._load_specialty_dataset`.
|
| 433 |
+
"""
|
| 434 |
+
df = self._fetch_cached(
|
| 435 |
+
"cardiology_hf",
|
| 436 |
+
"https://archive.ics.uci.edu/ml/machine-learning-databases/00519/heart_failure_clinical_records_dataset.csv",
|
| 437 |
+
)
|
| 438 |
+
if "DEATH_EVENT" not in df.columns:
|
| 439 |
+
raise DatasetUnavailableError("cardiology_hf", "Missing required column 'DEATH_EVENT'")
|
| 440 |
+
return df
|
| 441 |
+
|
| 442 |
+
def _breast_cancer(self) -> pd.DataFrame:
|
| 443 |
+
"""
|
| 444 |
+
Load and return the bundled dataset for the `breast_cancer` specialty. Used
|
| 445 |
+
internally by `DataService._load_specialty_dataset`.
|
| 446 |
+
"""
|
| 447 |
+
from sklearn.datasets import load_breast_cancer
|
| 448 |
+
data = load_breast_cancer(as_frame=True)
|
| 449 |
+
df = data.frame.copy()
|
| 450 |
+
df["diagnosis"] = data.target.map({1: "B", 0: "M"})
|
| 451 |
+
df = df.drop(columns=["target"])
|
| 452 |
+
# Normalise column names: replace spaces with underscores
|
| 453 |
+
df.columns = [c.replace(" ", "_") for c in df.columns]
|
| 454 |
+
# Select the 14 registered features (mean + worst geometry/texture only)
|
| 455 |
+
keep = [
|
| 456 |
+
"mean_radius", "mean_texture", "mean_perimeter", "mean_area",
|
| 457 |
+
"mean_smoothness", "mean_compactness", "mean_concavity",
|
| 458 |
+
"mean_concave_points", "mean_symmetry", "worst_radius",
|
| 459 |
+
"worst_texture", "worst_perimeter", "worst_area", "worst_smoothness",
|
| 460 |
+
"diagnosis",
|
| 461 |
+
]
|
| 462 |
+
available = [c for c in keep if c in df.columns]
|
| 463 |
+
return df[available]
|
| 464 |
+
|
| 465 |
+
def _diabetes(self) -> pd.DataFrame:
|
| 466 |
+
"""
|
| 467 |
+
Load and return the bundled dataset for the `diabetes` specialty. Used internally
|
| 468 |
+
by `DataService._load_specialty_dataset`.
|
| 469 |
+
"""
|
| 470 |
+
pima_cols = [
|
| 471 |
+
"pregnancies", "glucose", "blood_pressure", "skin_thickness",
|
| 472 |
+
"insulin", "bmi", "diabetes_pedigree_function", "age", "Outcome",
|
| 473 |
+
]
|
| 474 |
+
df = self._fetch_cached(
|
| 475 |
+
"endocrinology_diabetes",
|
| 476 |
+
"https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv",
|
| 477 |
+
read_kwargs={"header": None, "names": pima_cols},
|
| 478 |
+
)
|
| 479 |
+
if "Outcome" not in df.columns:
|
| 480 |
+
raise DatasetUnavailableError("endocrinology_diabetes", "Missing required column 'Outcome'")
|
| 481 |
+
return df
|
| 482 |
+
|
| 483 |
+
def _ckd(self) -> pd.DataFrame:
|
| 484 |
+
"""
|
| 485 |
+
Load and return the bundled dataset for the `ckd` specialty. Used internally by
|
| 486 |
+
`DataService._load_specialty_dataset`.
|
| 487 |
+
"""
|
| 488 |
+
_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
| 489 |
+
csv_cache = _CACHE_DIR / "nephrology_ckd.csv"
|
| 490 |
+
|
| 491 |
+
if not csv_cache.exists():
|
| 492 |
+
raise DatasetUnavailableError("nephrology_ckd", f"Cache file not found: {csv_cache}")
|
| 493 |
+
|
| 494 |
+
df = pd.read_csv(csv_cache)
|
| 495 |
+
rename_map = {
|
| 496 |
+
"bp": "blood_pressure", "sg": "specific_gravity",
|
| 497 |
+
"al": "albumin", "su": "sugar",
|
| 498 |
+
"rbc": "red_blood_cells", "pc": "pus_cell",
|
| 499 |
+
"bgr": "blood_glucose_random", "bu": "blood_urea",
|
| 500 |
+
"sc": "serum_creatinine", "sod": "sodium",
|
| 501 |
+
"pot": "potassium", "hemo": "haemoglobin",
|
| 502 |
+
"pcv": "packed_cell_volume", "wc": "white_blood_cell_count",
|
| 503 |
+
"rc": "red_blood_cell_count",
|
| 504 |
+
"htn": "hypertension", "dm": "diabetes_mellitus",
|
| 505 |
+
"cad": "coronary_artery_disease",
|
| 506 |
+
"appet": "appetite", "pe": "pedal_oedema", "ane": "anemia",
|
| 507 |
+
"class": "classification",
|
| 508 |
+
}
|
| 509 |
+
df = df.rename(columns={k: v for k, v in rename_map.items() if k in df.columns})
|
| 510 |
+
if "classification" not in df.columns:
|
| 511 |
+
raise DatasetUnavailableError("nephrology_ckd", "Missing required column 'classification'")
|
| 512 |
+
df["classification"] = df["classification"].astype(str).str.strip().str.rstrip(".")
|
| 513 |
+
df = df[df["classification"].isin(["ckd", "notckd"])].copy()
|
| 514 |
+
for col in df.columns:
|
| 515 |
+
if col != "classification":
|
| 516 |
+
df[col] = pd.to_numeric(df[col], errors="coerce")
|
| 517 |
+
if len(df) < 100:
|
| 518 |
+
raise DatasetUnavailableError("nephrology_ckd", f"Dataset too small ({len(df)} rows)")
|
| 519 |
+
return df
|
| 520 |
+
|
| 521 |
+
def _parkinsons(self) -> pd.DataFrame:
|
| 522 |
+
"""
|
| 523 |
+
Load and return the bundled dataset for the `parkinsons` specialty. Used
|
| 524 |
+
internally by `DataService._load_specialty_dataset`.
|
| 525 |
+
"""
|
| 526 |
+
df = self._fetch_cached(
|
| 527 |
+
"neurology_parkinsons",
|
| 528 |
+
"https://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/parkinsons.data",
|
| 529 |
+
)
|
| 530 |
+
if "name" in df.columns:
|
| 531 |
+
df = df.drop(columns=["name"])
|
| 532 |
+
col_rename = {
|
| 533 |
+
"MDVP:Fo(Hz)": "MDVP_Fo_Hz",
|
| 534 |
+
"MDVP:Fhi(Hz)": "MDVP_Fhi_Hz",
|
| 535 |
+
"MDVP:Flo(Hz)": "MDVP_Flo_Hz",
|
| 536 |
+
"MDVP:Jitter(%)": "MDVP_Jitter_pct",
|
| 537 |
+
"MDVP:Jitter(Abs)": "MDVP_Jitter_Abs",
|
| 538 |
+
"MDVP:RAP": "MDVP_RAP",
|
| 539 |
+
"MDVP:PPQ": "MDVP_PPQ",
|
| 540 |
+
"Jitter:DDP": "Jitter_DDP",
|
| 541 |
+
"MDVP:Shimmer": "MDVP_Shimmer",
|
| 542 |
+
"MDVP:Shimmer(dB)": "MDVP_Shimmer_dB",
|
| 543 |
+
"Shimmer:APQ3": "Shimmer_APQ3",
|
| 544 |
+
"Shimmer:APQ5": "Shimmer_APQ5",
|
| 545 |
+
"MDVP:APQ": "MDVP_APQ",
|
| 546 |
+
"Shimmer:DDA": "Shimmer_DDA",
|
| 547 |
+
}
|
| 548 |
+
df = df.rename(columns={k: v for k, v in col_rename.items() if k in df.columns})
|
| 549 |
+
if "status" not in df.columns:
|
| 550 |
+
raise DatasetUnavailableError("neurology_parkinsons", "Missing required column 'status'")
|
| 551 |
+
return df
|
| 552 |
+
|
| 553 |
+
def _liver(self) -> pd.DataFrame:
|
| 554 |
+
"""
|
| 555 |
+
Load and return the bundled dataset for the `liver` specialty. Used internally by
|
| 556 |
+
`DataService._load_specialty_dataset`.
|
| 557 |
+
"""
|
| 558 |
+
ilpd_cols = [
|
| 559 |
+
"age", "gender", "total_bilirubin", "direct_bilirubin",
|
| 560 |
+
"alkaline_phosphotase", "alamine_aminotransferase",
|
| 561 |
+
"aspartate_aminotransferase", "total_proteins",
|
| 562 |
+
"albumin", "albumin_globulin_ratio", "Dataset",
|
| 563 |
+
]
|
| 564 |
+
df = self._fetch_cached(
|
| 565 |
+
"hepatology_liver",
|
| 566 |
+
"https://archive.ics.uci.edu/ml/machine-learning-databases/00225/Indian%20Liver%20Patient%20Dataset%20(ILPD).csv",
|
| 567 |
+
read_kwargs={"header": None, "names": ilpd_cols},
|
| 568 |
+
)
|
| 569 |
+
if "Dataset" not in df.columns:
|
| 570 |
+
raise DatasetUnavailableError("hepatology_liver", "Missing required column 'Dataset'")
|
| 571 |
+
if df["gender"].dtype == object:
|
| 572 |
+
df["gender"] = (df["gender"] == "Male").astype(int)
|
| 573 |
+
df["albumin_globulin_ratio"] = df["albumin_globulin_ratio"].fillna(
|
| 574 |
+
df["albumin_globulin_ratio"].median()
|
| 575 |
+
)
|
| 576 |
+
return df
|
| 577 |
+
|
| 578 |
+
def _stroke(self) -> pd.DataFrame:
|
| 579 |
+
"""
|
| 580 |
+
Load and return the bundled dataset for the `stroke` specialty. Used internally by
|
| 581 |
+
`DataService._load_specialty_dataset`.
|
| 582 |
+
"""
|
| 583 |
+
try:
|
| 584 |
+
df = self._fetch_cached(
|
| 585 |
+
"cardiology_stroke",
|
| 586 |
+
"https://raw.githubusercontent.com/04-aditya/Stroke-Prediction-using-R/main/healthcare-dataset-stroke-data.csv",
|
| 587 |
+
)
|
| 588 |
+
except DatasetUnavailableError:
|
| 589 |
+
raise DatasetUnavailableError(
|
| 590 |
+
"cardiology_stroke",
|
| 591 |
+
"This dataset has no formal open license and cannot be bundled. "
|
| 592 |
+
"It must be downloaded at runtime for educational use only, "
|
| 593 |
+
"but the download failed. Check your network connection.",
|
| 594 |
+
)
|
| 595 |
+
if "stroke" not in df.columns:
|
| 596 |
+
raise DatasetUnavailableError(
|
| 597 |
+
"cardiology_stroke",
|
| 598 |
+
"Missing required column 'stroke'. "
|
| 599 |
+
"This dataset has no formal open license and cannot be bundled. "
|
| 600 |
+
"It will be downloaded at runtime for educational use only.",
|
| 601 |
+
)
|
| 602 |
+
if "id" in df.columns:
|
| 603 |
+
df = df.drop(columns=["id"])
|
| 604 |
+
cat_encodings: dict[str, dict] = {
|
| 605 |
+
"gender": {"Male": 1, "Female": 0, "Other": 0},
|
| 606 |
+
"ever_married": {"Yes": 1, "No": 0},
|
| 607 |
+
"work_type": {"children": 0, "Govt_job": 1, "Never_worked": 2, "Private": 3, "Self-employed": 4},
|
| 608 |
+
"smoking_status": {"never smoked": 0, "Unknown": 1, "formerly smoked": 2, "smokes": 3},
|
| 609 |
+
}
|
| 610 |
+
for col, mapping in cat_encodings.items():
|
| 611 |
+
if col in df.columns and df[col].dtype == object:
|
| 612 |
+
df[col] = df[col].map(mapping).fillna(0).astype(int)
|
| 613 |
+
if "Residence_type" in df.columns:
|
| 614 |
+
df = df.rename(columns={"Residence_type": "residence_type"})
|
| 615 |
+
if "residence_type" in df.columns and df["residence_type"].dtype == object:
|
| 616 |
+
df["residence_type"] = (df["residence_type"] == "Urban").astype(int)
|
| 617 |
+
df["bmi"] = pd.to_numeric(df["bmi"], errors="coerce")
|
| 618 |
+
df["stroke"] = pd.to_numeric(df["stroke"], errors="coerce")
|
| 619 |
+
df = df.dropna(subset=["stroke"])
|
| 620 |
+
if len(df) < 100:
|
| 621 |
+
raise DatasetUnavailableError(
|
| 622 |
+
"cardiology_stroke",
|
| 623 |
+
f"Dataset too small ({len(df)} rows). "
|
| 624 |
+
"This dataset has no formal open license and cannot be bundled. "
|
| 625 |
+
"It will be downloaded at runtime for educational use only.",
|
| 626 |
+
)
|
| 627 |
+
return df
|
| 628 |
+
|
| 629 |
+
def _mental_health(self) -> pd.DataFrame:
|
| 630 |
+
"""
|
| 631 |
+
Load and return the bundled dataset for the `mental_health` specialty. Used
|
| 632 |
+
internally by `DataService._load_specialty_dataset`.
|
| 633 |
+
"""
|
| 634 |
+
for candidate in ("depression_data.csv", "mental_health_depression.csv"):
|
| 635 |
+
csv_cache = _CACHE_DIR / candidate
|
| 636 |
+
if csv_cache.exists():
|
| 637 |
+
try:
|
| 638 |
+
df = pd.read_csv(csv_cache)
|
| 639 |
+
df = df.drop(columns=[c for c in ["Name", "name"] if c in df.columns])
|
| 640 |
+
ordinal_maps = {
|
| 641 |
+
"Dietary Habits": {"Healthy": 2, "Moderate": 1, "Unhealthy": 0},
|
| 642 |
+
"Sleep Patterns": {"Good": 2, "Fair": 1, "Poor": 0},
|
| 643 |
+
"Alcohol Consumption": {"Low": 0, "Moderate": 1, "High": 2},
|
| 644 |
+
"Physical Activity Level": {"Active": 2, "Moderate": 1, "Sedentary": 0},
|
| 645 |
+
"Smoking Status": {"Non-smoker": 0, "Former": 1, "Current": 2},
|
| 646 |
+
"Employment Status": {"Employed": 1, "Unemployed": 0},
|
| 647 |
+
}
|
| 648 |
+
for col, mapping in ordinal_maps.items():
|
| 649 |
+
if col in df.columns:
|
| 650 |
+
df[col] = df[col].map(mapping).fillna(1).astype(int)
|
| 651 |
+
yes_no_cols = [
|
| 652 |
+
"History of Substance Abuse", "Family History of Depression",
|
| 653 |
+
"Chronic Medical Conditions",
|
| 654 |
+
]
|
| 655 |
+
for col in yes_no_cols:
|
| 656 |
+
if col in df.columns and df[col].dtype == object:
|
| 657 |
+
df[col] = (df[col].str.lower() == "yes").astype(int)
|
| 658 |
+
if "History of Mental Illness" in df.columns:
|
| 659 |
+
df["severity_class"] = df["History of Mental Illness"].map(
|
| 660 |
+
{"Yes": "has_condition", "No": "no_condition"}
|
| 661 |
+
)
|
| 662 |
+
df = df.drop(columns=["History of Mental Illness"])
|
| 663 |
+
elif "Depression" in df.columns:
|
| 664 |
+
df["severity_class"] = df["Depression"].map({1: "has_condition", 0: "no_condition"})
|
| 665 |
+
df = df.drop(columns=["Depression"])
|
| 666 |
+
col_rename = {
|
| 667 |
+
"Age": "age",
|
| 668 |
+
"Number of Children": "number_of_children",
|
| 669 |
+
"Income": "income",
|
| 670 |
+
"Dietary Habits": "dietary_habits",
|
| 671 |
+
"Sleep Patterns": "sleep_patterns",
|
| 672 |
+
"Alcohol Consumption": "alcohol_consumption",
|
| 673 |
+
"Physical Activity Level": "physical_activity_level",
|
| 674 |
+
"Smoking Status": "smoking_status",
|
| 675 |
+
"Employment Status": "employment_status",
|
| 676 |
+
"History of Substance Abuse": "history_substance_abuse",
|
| 677 |
+
"Family History of Depression": "family_history_depression",
|
| 678 |
+
"Chronic Medical Conditions": "chronic_medical_conditions",
|
| 679 |
+
"Marital Status": "marital_status",
|
| 680 |
+
"Education Level": "education_level",
|
| 681 |
+
}
|
| 682 |
+
df = df.rename(columns={k: v for k, v in col_rename.items() if k in df.columns})
|
| 683 |
+
for col in df.columns:
|
| 684 |
+
if col != "severity_class" and df[col].dtype == object:
|
| 685 |
+
df[col] = pd.Categorical(df[col]).codes
|
| 686 |
+
df = df.dropna(subset=["severity_class"])
|
| 687 |
+
if len(df) >= 100 and "severity_class" in df.columns:
|
| 688 |
+
if len(df) > 5000:
|
| 689 |
+
from sklearn.model_selection import train_test_split as _tts
|
| 690 |
+
_, df = _tts(
|
| 691 |
+
df, test_size=5000, random_state=42,
|
| 692 |
+
stratify=df["severity_class"] if df["severity_class"].nunique() > 1 else None,
|
| 693 |
+
)
|
| 694 |
+
df = df.reset_index(drop=True)
|
| 695 |
+
logger.info("Loaded real mental health dataset (%d rows) from %s", len(df), candidate)
|
| 696 |
+
return df
|
| 697 |
+
except Exception as exc:
|
| 698 |
+
logger.warning("Mental health CSV load failed (%s): %s", candidate, exc)
|
| 699 |
+
|
| 700 |
+
raise DatasetUnavailableError(
|
| 701 |
+
"mental_health",
|
| 702 |
+
"Real mental health dataset not found in data_cache/. "
|
| 703 |
+
"Download from kaggle.com/datasets/anthonytherrien/depression-dataset "
|
| 704 |
+
"and save as depression_data.csv in data_cache/",
|
| 705 |
+
)
|
| 706 |
+
|
| 707 |
+
def _copd(self) -> pd.DataFrame:
|
| 708 |
+
"""
|
| 709 |
+
Load and return the bundled dataset for the `copd` specialty. Used internally by
|
| 710 |
+
`DataService._load_specialty_dataset`.
|
| 711 |
+
"""
|
| 712 |
+
csv_cache = _CACHE_DIR / "pulmonology_copd.csv"
|
| 713 |
+
if not csv_cache.exists():
|
| 714 |
+
raise DatasetUnavailableError(
|
| 715 |
+
"pulmonology_copd",
|
| 716 |
+
f"Real COPD dataset not found at {csv_cache}. "
|
| 717 |
+
"Download from kaggle.com/datasets/prakharrathi25/copd-student-dataset "
|
| 718 |
+
"or physionet.org/content/copd-ehr/1.0.0/ "
|
| 719 |
+
"and save as pulmonology_copd.csv in data_cache/",
|
| 720 |
+
)
|
| 721 |
+
|
| 722 |
+
df = pd.read_csv(csv_cache)
|
| 723 |
+
col_rename = {
|
| 724 |
+
"AGE": "age", "Age": "age",
|
| 725 |
+
"SEX": "sex", "Sex": "sex", "GENDER": "sex", "Gender": "sex",
|
| 726 |
+
"SMOKING_PACK_YEARS": "smoking_pack_years", "PackYears": "smoking_pack_years",
|
| 727 |
+
"FEV1": "fev1_litres", "FEV1_LITRES": "fev1_litres",
|
| 728 |
+
"FVC": "fvc_litres", "FVC_LITRES": "fvc_litres",
|
| 729 |
+
"FEV1_FVC": "fev1_fvc_ratio", "FEV1FVC": "fev1_fvc_ratio",
|
| 730 |
+
"PRIOR_EXAC": "prior_exacerbations_year", "ExacerbationRate": "prior_exacerbations_year",
|
| 731 |
+
"BMI": "bmi",
|
| 732 |
+
"MRC": "mrc_dyspnea_scale", "MRCScore": "mrc_dyspnea_scale",
|
| 733 |
+
"SGRQ": "sgrq_score", "SGRQTotal": "sgrq_score",
|
| 734 |
+
"GOLD_STAGE": "copd_gold_stage", "GOLDStage": "copd_gold_stage",
|
| 735 |
+
"EXACERBATION": "exacerbation", "Exacerbation": "exacerbation",
|
| 736 |
+
"EXAC": "exacerbation",
|
| 737 |
+
}
|
| 738 |
+
df = df.rename(columns={k: v for k, v in col_rename.items() if k in df.columns})
|
| 739 |
+
if "sex" in df.columns and df["sex"].dtype == object:
|
| 740 |
+
df["sex"] = (df["sex"].str.lower().isin(["m", "male", "1"])).astype(int)
|
| 741 |
+
for col in df.columns:
|
| 742 |
+
if col != "exacerbation":
|
| 743 |
+
df[col] = pd.to_numeric(df[col], errors="coerce")
|
| 744 |
+
if "exacerbation" in df.columns and df["exacerbation"].dtype == object:
|
| 745 |
+
df["exacerbation"] = pd.to_numeric(df["exacerbation"], errors="coerce")
|
| 746 |
+
df = df.dropna(subset=["exacerbation"])
|
| 747 |
+
keep = [
|
| 748 |
+
"age", "sex", "smoking_pack_years", "fev1_litres", "fvc_litres",
|
| 749 |
+
"fev1_fvc_ratio", "prior_exacerbations_year", "bmi",
|
| 750 |
+
"mrc_dyspnea_scale", "sgrq_score", "copd_gold_stage", "exacerbation",
|
| 751 |
+
]
|
| 752 |
+
available = [c for c in keep if c in df.columns]
|
| 753 |
+
df = df[available]
|
| 754 |
+
if len(df) < 100 or "exacerbation" not in df.columns:
|
| 755 |
+
raise DatasetUnavailableError("pulmonology_copd", f"Dataset too small or missing target ({len(df)} rows)")
|
| 756 |
+
logger.info("Loaded real COPD dataset (%d rows)", len(df))
|
| 757 |
+
return df
|
| 758 |
+
|
| 759 |
+
def _anaemia(self) -> pd.DataFrame:
|
| 760 |
+
"""
|
| 761 |
+
Load and return the bundled dataset for the `anaemia` specialty. Used internally
|
| 762 |
+
by `DataService._load_specialty_dataset`.
|
| 763 |
+
"""
|
| 764 |
+
try:
|
| 765 |
+
df = self._fetch_cached(
|
| 766 |
+
"haematology_anaemia",
|
| 767 |
+
"https://raw.githubusercontent.com/maladeep/anemia-detection-with-machine-learning/master/anemia%20data%20from%20Kaggle.csv",
|
| 768 |
+
)
|
| 769 |
+
except DatasetUnavailableError:
|
| 770 |
+
raise DatasetUnavailableError(
|
| 771 |
+
"haematology_anaemia",
|
| 772 |
+
"This dataset has an unknown license and cannot be bundled. "
|
| 773 |
+
"It must be downloaded at runtime for educational use only, "
|
| 774 |
+
"but the download failed. Check your network connection.",
|
| 775 |
+
)
|
| 776 |
+
rename_map = {
|
| 777 |
+
"Gender": "gender", "Hemoglobin": "haemoglobin",
|
| 778 |
+
"MCH": "mch", "MCHC": "mchc", "MCV": "mcv",
|
| 779 |
+
"Result": "anemia_type",
|
| 780 |
+
}
|
| 781 |
+
df = df.rename(columns={k: v for k, v in rename_map.items() if k in df.columns})
|
| 782 |
+
# Gender is already encoded as 0/1 in the source CSV; coerce to numeric
|
| 783 |
+
# to handle any edge-case whitespace or string variants.
|
| 784 |
+
for col in df.columns:
|
| 785 |
+
df[col] = pd.to_numeric(df[col], errors="coerce")
|
| 786 |
+
if "anemia_type" not in df.columns:
|
| 787 |
+
raise DatasetUnavailableError(
|
| 788 |
+
"haematology_anaemia",
|
| 789 |
+
"Missing required column 'anemia_type'. "
|
| 790 |
+
"This dataset has an unknown license and cannot be bundled. "
|
| 791 |
+
"It will be downloaded at runtime for educational use only.",
|
| 792 |
+
)
|
| 793 |
+
df = df.dropna(subset=["anemia_type"])
|
| 794 |
+
return df
|
| 795 |
+
|
| 796 |
+
def _dermatology(self) -> pd.DataFrame:
|
| 797 |
+
"""
|
| 798 |
+
Load and return the bundled dataset for the `dermatology` specialty. Used
|
| 799 |
+
internally by `DataService._load_specialty_dataset`.
|
| 800 |
+
"""
|
| 801 |
+
csv_cache = _CACHE_DIR / "dermatology.csv"
|
| 802 |
+
df = None
|
| 803 |
+
if csv_cache.exists():
|
| 804 |
+
try:
|
| 805 |
+
df = pd.read_csv(csv_cache)
|
| 806 |
+
except Exception:
|
| 807 |
+
pass
|
| 808 |
+
if df is None or "dx" not in (df.columns if df is not None else []):
|
| 809 |
+
df = self._fetch_cached(
|
| 810 |
+
"dermatology_tsv",
|
| 811 |
+
"https://dataverse.harvard.edu/api/access/datafile/4338392",
|
| 812 |
+
read_kwargs={"sep": "\t", "quotechar": '"'},
|
| 813 |
+
)
|
| 814 |
+
if "dx" not in df.columns:
|
| 815 |
+
raise DatasetUnavailableError("dermatology", "Missing required column 'dx'")
|
| 816 |
+
malignant = {"mel", "bcc", "akiec"}
|
| 817 |
+
df["dx_type"] = df["dx"].apply(
|
| 818 |
+
lambda x: "malignant" if str(x).strip() in malignant else "benign"
|
| 819 |
+
)
|
| 820 |
+
if "sex" in df.columns and df["sex"].dtype == object:
|
| 821 |
+
df["sex"] = (df["sex"] == "male").astype(int)
|
| 822 |
+
if "localization" in df.columns and df["localization"].dtype == object:
|
| 823 |
+
locs = df["localization"].unique()
|
| 824 |
+
loc_map = {v: i for i, v in enumerate(sorted(locs))}
|
| 825 |
+
df["localization"] = df["localization"].map(loc_map).fillna(0).astype(int)
|
| 826 |
+
df["age"] = pd.to_numeric(df["age"], errors="coerce")
|
| 827 |
+
keep = ["age", "sex", "localization", "dx_type"]
|
| 828 |
+
df = df[[c for c in keep if c in df.columns]].dropna(subset=["dx_type"])
|
| 829 |
+
if len(df) < 100:
|
| 830 |
+
raise DatasetUnavailableError("dermatology", f"Dataset too small ({len(df)} rows)")
|
| 831 |
+
return df
|
| 832 |
+
|
| 833 |
+
def _ophthalmology(self) -> pd.DataFrame:
|
| 834 |
+
"""
|
| 835 |
+
Load and return the bundled dataset for the `ophthalmology` specialty. Used
|
| 836 |
+
internally by `DataService._load_specialty_dataset`.
|
| 837 |
+
"""
|
| 838 |
+
_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
| 839 |
+
arff_cache = _CACHE_DIR / "ophthalmology.arff"
|
| 840 |
+
if not arff_cache.exists():
|
| 841 |
+
try:
|
| 842 |
+
resp = requests.get(
|
| 843 |
+
"https://archive.ics.uci.edu/static/public/329/diabetic+retinopathy+debrecen+data+set.zip",
|
| 844 |
+
timeout=30, headers={"User-Agent": "HealthWithSevgi/1.0"},
|
| 845 |
+
)
|
| 846 |
+
resp.raise_for_status()
|
| 847 |
+
with zipfile.ZipFile(io.BytesIO(resp.content)) as zf:
|
| 848 |
+
arff_names = [n for n in zf.namelist() if n.endswith(".arff")]
|
| 849 |
+
if arff_names:
|
| 850 |
+
arff_cache.write_bytes(zf.read(arff_names[0]))
|
| 851 |
+
logger.info("Extracted Debrecen DR ARFF (%d bytes)", arff_cache.stat().st_size)
|
| 852 |
+
except Exception as exc:
|
| 853 |
+
raise DatasetUnavailableError(
|
| 854 |
+
"ophthalmology", f"Failed to download Debrecen DR ARFF: {exc}"
|
| 855 |
+
) from exc
|
| 856 |
+
|
| 857 |
+
if not arff_cache.exists():
|
| 858 |
+
raise DatasetUnavailableError("ophthalmology", f"ARFF file not found: {arff_cache}")
|
| 859 |
+
|
| 860 |
+
from scipy.io import arff as scipy_arff
|
| 861 |
+
data, meta = scipy_arff.loadarff(str(arff_cache))
|
| 862 |
+
df = pd.DataFrame(data)
|
| 863 |
+
for col in df.columns:
|
| 864 |
+
if df[col].dtype == object:
|
| 865 |
+
df[col] = df[col].str.decode("utf-8").str.strip()
|
| 866 |
+
for col in df.columns:
|
| 867 |
+
df[col] = pd.to_numeric(df[col], errors="coerce")
|
| 868 |
+
cols = list(df.columns)
|
| 869 |
+
feature_cols = cols[:-1]
|
| 870 |
+
target_col = cols[-1]
|
| 871 |
+
df = df.rename(columns={target_col: "severity_grade"})
|
| 872 |
+
df["severity_grade"] = df["severity_grade"].astype(int)
|
| 873 |
+
named_features = [
|
| 874 |
+
"quality_assessment", "pre_screening", "ma_detection_0.5",
|
| 875 |
+
"ma_detection_0.6", "ma_detection_0.7", "ma_detection_0.8",
|
| 876 |
+
"ma_detection_0.9", "ma_detection_1.0",
|
| 877 |
+
"exudate_1", "exudate_2", "exudate_3", "exudate_4",
|
| 878 |
+
"exudate_5", "exudate_6", "exudate_7", "exudate_8",
|
| 879 |
+
"macula_od_distance", "optic_disc_diameter", "am_fm_classification",
|
| 880 |
+
]
|
| 881 |
+
if len(feature_cols) == len(named_features):
|
| 882 |
+
rename_map = {old: new for old, new in zip(feature_cols, named_features)}
|
| 883 |
+
df = df.rename(columns=rename_map)
|
| 884 |
+
df = df.dropna(subset=["severity_grade"])
|
| 885 |
+
if len(df) < 100:
|
| 886 |
+
raise DatasetUnavailableError("ophthalmology", f"Dataset too small ({len(df)} rows)")
|
| 887 |
+
return df
|
| 888 |
+
|
| 889 |
+
def _orthopaedics(self) -> pd.DataFrame:
|
| 890 |
+
"""
|
| 891 |
+
Load and return the bundled dataset for the `orthopaedics` specialty. Used
|
| 892 |
+
internally by `DataService._load_specialty_dataset`.
|
| 893 |
+
"""
|
| 894 |
+
_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
| 895 |
+
arff_cache = _CACHE_DIR / "orthopaedics.arff"
|
| 896 |
+
|
| 897 |
+
if not arff_cache.exists():
|
| 898 |
+
try:
|
| 899 |
+
resp = requests.get(
|
| 900 |
+
"https://archive.ics.uci.edu/static/public/212/vertebral+column.zip",
|
| 901 |
+
timeout=30, headers={"User-Agent": "HealthWithSevgi/1.0"},
|
| 902 |
+
)
|
| 903 |
+
resp.raise_for_status()
|
| 904 |
+
with zipfile.ZipFile(io.BytesIO(resp.content)) as zf:
|
| 905 |
+
arff_names = [n for n in zf.namelist() if n.endswith("_weka.arff")]
|
| 906 |
+
if arff_names:
|
| 907 |
+
arff_cache.write_bytes(zf.read(arff_names[0]))
|
| 908 |
+
logger.info("Extracted vertebral column ARFF (%d bytes)", arff_cache.stat().st_size)
|
| 909 |
+
except Exception as exc:
|
| 910 |
+
raise DatasetUnavailableError(
|
| 911 |
+
"orthopaedics", f"Failed to download vertebral column ARFF: {exc}"
|
| 912 |
+
) from exc
|
| 913 |
+
|
| 914 |
+
if not arff_cache.exists():
|
| 915 |
+
raise DatasetUnavailableError("orthopaedics", f"ARFF file not found: {arff_cache}")
|
| 916 |
+
|
| 917 |
+
from scipy.io import arff as scipy_arff
|
| 918 |
+
data, meta = scipy_arff.loadarff(str(arff_cache))
|
| 919 |
+
df = pd.DataFrame(data)
|
| 920 |
+
for col in df.columns:
|
| 921 |
+
if df[col].dtype == object:
|
| 922 |
+
df[col] = df[col].str.decode("utf-8")
|
| 923 |
+
col_names = [
|
| 924 |
+
"pelvic_incidence", "pelvic_tilt", "lumbar_lordosis_angle",
|
| 925 |
+
"sacral_slope", "pelvic_radius", "degree_spondylolisthesis", "class",
|
| 926 |
+
]
|
| 927 |
+
if len(df.columns) == len(col_names):
|
| 928 |
+
df.columns = col_names
|
| 929 |
+
if "class" not in df.columns:
|
| 930 |
+
raise DatasetUnavailableError("orthopaedics", "Missing required column 'class'")
|
| 931 |
+
return df
|
| 932 |
+
|
| 933 |
+
def _sepsis(self) -> pd.DataFrame:
|
| 934 |
+
"""
|
| 935 |
+
Load and return the bundled dataset for the `sepsis` specialty. Used internally by
|
| 936 |
+
`DataService._load_specialty_dataset`.
|
| 937 |
+
"""
|
| 938 |
+
csv_cache = _CACHE_DIR / "icu_sepsis.csv"
|
| 939 |
+
if not csv_cache.exists():
|
| 940 |
+
raise DatasetUnavailableError(
|
| 941 |
+
"icu_sepsis",
|
| 942 |
+
f"Real ICU/Sepsis dataset not found at {csv_cache}. "
|
| 943 |
+
"Download from physionet.org/content/challenge-2019/1.0.0/, "
|
| 944 |
+
"merge PSV files into one CSV, and save as icu_sepsis.csv in data_cache/",
|
| 945 |
+
)
|
| 946 |
+
|
| 947 |
+
df = pd.read_csv(csv_cache)
|
| 948 |
+
if len(df.columns) == 1:
|
| 949 |
+
df = pd.read_csv(csv_cache, sep="|")
|
| 950 |
+
keep = [
|
| 951 |
+
"HR", "O2Sat", "Temp", "SBP", "MAP", "Resp",
|
| 952 |
+
"BaseExcess", "pH", "PaCO2", "Lactate", "Creatinine",
|
| 953 |
+
"Bilirubin_total", "WBC", "Platelets", "Age", "Gender", "SepsisLabel",
|
| 954 |
+
]
|
| 955 |
+
available = [c for c in keep if c in df.columns]
|
| 956 |
+
df = df[available].dropna(subset=["SepsisLabel"])
|
| 957 |
+
df["SepsisLabel"] = pd.to_numeric(df["SepsisLabel"], errors="coerce").astype("Int64")
|
| 958 |
+
df = df.dropna(subset=["SepsisLabel"])
|
| 959 |
+
if len(df) < 100 or "SepsisLabel" not in df.columns:
|
| 960 |
+
raise DatasetUnavailableError("icu_sepsis", f"Dataset too small ({len(df)} rows)")
|
| 961 |
+
if len(df) > 5000:
|
| 962 |
+
# Stratified cap: guarantee all positive (sepsis=1) cases are retained,
|
| 963 |
+
# then fill the remaining budget with negatives. A random cap at 5000 rows
|
| 964 |
+
# would yield only ~100-250 positives at 2-5% prevalence, making the
|
| 965 |
+
# imbalance effectively 20-50:1. This preserves every real sepsis case.
|
| 966 |
+
sep_pos = df[df["SepsisLabel"] == 1]
|
| 967 |
+
sep_neg = df[df["SepsisLabel"] == 0]
|
| 968 |
+
n_neg = max(0, 5000 - len(sep_pos))
|
| 969 |
+
if len(sep_neg) > n_neg:
|
| 970 |
+
sep_neg = sep_neg.sample(n_neg, random_state=42)
|
| 971 |
+
df = pd.concat([sep_pos, sep_neg]).sample(frac=1, random_state=42).reset_index(drop=True)
|
| 972 |
+
logger.info("Loaded real ICU sepsis dataset (%d rows, %d positive)", len(df), int((df["SepsisLabel"] == 1).sum()))
|
| 973 |
+
return df
|
| 974 |
+
|
| 975 |
+
def _fetal_health(self) -> pd.DataFrame:
|
| 976 |
+
"""
|
| 977 |
+
Load and return the bundled dataset for the `fetal_health` specialty. Used
|
| 978 |
+
internally by `DataService._load_specialty_dataset`.
|
| 979 |
+
"""
|
| 980 |
+
_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
| 981 |
+
csv_cache = _CACHE_DIR / "obstetrics_fetal.csv"
|
| 982 |
+
|
| 983 |
+
if not csv_cache.exists():
|
| 984 |
+
raise DatasetUnavailableError("obstetrics_fetal", f"Cache file not found: {csv_cache}")
|
| 985 |
+
|
| 986 |
+
df = pd.read_csv(csv_cache)
|
| 987 |
+
if len(df.columns) <= 2:
|
| 988 |
+
df = pd.read_csv(csv_cache, sep=";")
|
| 989 |
+
col_map = {
|
| 990 |
+
"LB": "baseline_value", "AC": "accelerations", "FM": "fetal_movement",
|
| 991 |
+
"UC": "uterine_contractions", "DL": "light_decelerations",
|
| 992 |
+
"DS": "severe_decelerations", "DP": "prolongued_decelerations",
|
| 993 |
+
"ASTV": "abnormal_short_term_variability",
|
| 994 |
+
"MSTV": "mean_value_short_term_variability",
|
| 995 |
+
"ALTV": "pct_time_abnormal_long_term_variability",
|
| 996 |
+
"MLTV": "mean_value_long_term_variability",
|
| 997 |
+
"Mode": "histogram_mode",
|
| 998 |
+
"NSP": "fetal_health",
|
| 999 |
+
}
|
| 1000 |
+
df = df.rename(columns={k: v for k, v in col_map.items() if k in df.columns})
|
| 1001 |
+
if "fetal_health" not in df.columns:
|
| 1002 |
+
raise DatasetUnavailableError("obstetrics_fetal", "Missing required column 'fetal_health'")
|
| 1003 |
+
df["fetal_health"] = pd.to_numeric(df["fetal_health"], errors="coerce")
|
| 1004 |
+
df = df.dropna(subset=["fetal_health"])
|
| 1005 |
+
df["fetal_health"] = df["fetal_health"].astype(int)
|
| 1006 |
+
keep = [v for v in col_map.values() if v in df.columns]
|
| 1007 |
+
df = df[keep].dropna(subset=["fetal_health"])
|
| 1008 |
+
if len(df) < 100:
|
| 1009 |
+
raise DatasetUnavailableError("obstetrics_fetal", f"Dataset too small ({len(df)} rows)")
|
| 1010 |
+
return df
|
| 1011 |
+
|
| 1012 |
+
def _arrhythmia(self) -> pd.DataFrame:
|
| 1013 |
+
"""
|
| 1014 |
+
Load and return the bundled dataset for the `arrhythmia` specialty. Used
|
| 1015 |
+
internally by `DataService._load_specialty_dataset`.
|
| 1016 |
+
"""
|
| 1017 |
+
all_cols = [f"feature_{i}" for i in range(279)] + ["arrhythmia_class"]
|
| 1018 |
+
df = self._fetch_cached(
|
| 1019 |
+
"cardiology_arrhythmia",
|
| 1020 |
+
"https://archive.ics.uci.edu/ml/machine-learning-databases/arrhythmia/arrhythmia.data",
|
| 1021 |
+
read_kwargs={"header": None, "names": all_cols, "na_values": "?"},
|
| 1022 |
+
)
|
| 1023 |
+
if "arrhythmia_class" not in df.columns:
|
| 1024 |
+
raise DatasetUnavailableError("cardiology_arrhythmia", "Missing required column 'arrhythmia_class'")
|
| 1025 |
+
df["arrhythmia"] = df["arrhythmia_class"].apply(lambda x: 0 if x == 1 else 1)
|
| 1026 |
+
# Name the first 15 global ECG features; the remaining 264 columns are
|
| 1027 |
+
# per-lead amplitude measurements (R, S, T, P amplitudes across 12 leads)
|
| 1028 |
+
# that carry the primary diagnostic signal for arrhythmia classification.
|
| 1029 |
+
# Previously only the 13 global interval features were kept, discarding all
|
| 1030 |
+
# per-lead amplitude data. All columns are kept here — Random Forest selects
|
| 1031 |
+
# the most discriminative ones via feature importance at each split.
|
| 1032 |
+
global_names = [
|
| 1033 |
+
"age", "sex", "height", "weight", "QRS_duration",
|
| 1034 |
+
"PR_interval", "QT_interval", "T_interval", "P_interval",
|
| 1035 |
+
"QRS_axis", "T_axis", "P_axis", "heart_rate", "J_point", "heart_rate_2",
|
| 1036 |
+
]
|
| 1037 |
+
rename_map = {f"feature_{i}": name for i, name in enumerate(global_names)}
|
| 1038 |
+
df = df.rename(columns=rename_map)
|
| 1039 |
+
df = df.drop(columns=["arrhythmia_class"])
|
| 1040 |
+
df = df.dropna(subset=["arrhythmia"])
|
| 1041 |
+
for col in df.columns:
|
| 1042 |
+
df[col] = pd.to_numeric(df[col], errors="coerce")
|
| 1043 |
+
if len(df) < 100:
|
| 1044 |
+
raise DatasetUnavailableError("cardiology_arrhythmia", f"Dataset too small ({len(df)} rows)")
|
| 1045 |
+
return df
|
| 1046 |
+
|
| 1047 |
+
def _cervical(self) -> pd.DataFrame:
|
| 1048 |
+
"""
|
| 1049 |
+
Load and return the bundled dataset for the `cervical` specialty. Used internally
|
| 1050 |
+
by `DataService._load_specialty_dataset`.
|
| 1051 |
+
"""
|
| 1052 |
+
df = self._fetch_cached(
|
| 1053 |
+
"oncology_cervical",
|
| 1054 |
+
"https://archive.ics.uci.edu/ml/machine-learning-databases/00383/risk_factors_cervical_cancer.csv",
|
| 1055 |
+
)
|
| 1056 |
+
if "Biopsy" not in df.columns:
|
| 1057 |
+
raise DatasetUnavailableError("oncology_cervical", "Missing required column 'Biopsy'")
|
| 1058 |
+
df = df.replace("?", np.nan)
|
| 1059 |
+
# Feature set split into two tiers:
|
| 1060 |
+
# Tier 1 — clinical test results (near-zero missingness, direct diagnostic signal):
|
| 1061 |
+
# Hinselmann (colposcopy), Schiller (iodine test), Citology (pap smear),
|
| 1062 |
+
# Dx:Cancer / Dx:CIN / Dx:HPV / Dx (diagnosis history flags).
|
| 1063 |
+
# Tier 2 — behavioural risk factors (higher missingness, weak indirect signal):
|
| 1064 |
+
# age, sexual history, smoking, contraceptives, STDs.
|
| 1065 |
+
# Using only Tier 2 produces near-random predictions (MCC ≈ 0) because
|
| 1066 |
+
# these epidemiological risk factors cannot reliably predict individual biopsy
|
| 1067 |
+
# outcomes. Adding Tier 1 gives the model the actual clinical evidence a
|
| 1068 |
+
# clinician would use to decide whether to proceed with biopsy.
|
| 1069 |
+
keep_cols = [
|
| 1070 |
+
"Age", "Number of sexual partners", "First sexual intercourse",
|
| 1071 |
+
"Num of pregnancies",
|
| 1072 |
+
"Smokes", "Smokes (years)",
|
| 1073 |
+
"Hormonal Contraceptives", "Hormonal Contraceptives (years)",
|
| 1074 |
+
"IUD", "IUD (years)",
|
| 1075 |
+
"STDs", "STDs (number)", "STDs:condylomatosis",
|
| 1076 |
+
"STDs:cervical condylomatosis", "STDs:HPV",
|
| 1077 |
+
"Dx:Cancer", "Dx:CIN", "Dx:HPV", "Dx",
|
| 1078 |
+
"Hinselmann", "Schiller", "Citology",
|
| 1079 |
+
"Biopsy",
|
| 1080 |
+
]
|
| 1081 |
+
available = [c for c in keep_cols if c in df.columns]
|
| 1082 |
+
df = df[available].copy()
|
| 1083 |
+
rename_map = {
|
| 1084 |
+
"Age": "age",
|
| 1085 |
+
"Number of sexual partners": "number_of_sexual_partners",
|
| 1086 |
+
"First sexual intercourse": "first_sexual_intercourse_age",
|
| 1087 |
+
"Num of pregnancies": "num_of_pregnancies",
|
| 1088 |
+
"Smokes": "smokes",
|
| 1089 |
+
"Smokes (years)": "smokes_years",
|
| 1090 |
+
"Hormonal Contraceptives": "hormonal_contraceptives",
|
| 1091 |
+
"Hormonal Contraceptives (years)": "hormonal_contraceptives_years",
|
| 1092 |
+
"IUD": "iud",
|
| 1093 |
+
"IUD (years)": "iud_years",
|
| 1094 |
+
"STDs": "stds",
|
| 1095 |
+
"STDs (number)": "stds_number",
|
| 1096 |
+
"STDs:condylomatosis": "stds_condylomatosis",
|
| 1097 |
+
"STDs:cervical condylomatosis": "stds_cervical_condylomatosis",
|
| 1098 |
+
"STDs:HPV": "stds_hpv",
|
| 1099 |
+
"Dx:Cancer": "dx_cancer",
|
| 1100 |
+
"Dx:CIN": "dx_cin",
|
| 1101 |
+
"Dx:HPV": "dx_hpv",
|
| 1102 |
+
"Dx": "dx",
|
| 1103 |
+
"Hinselmann": "hinselmann",
|
| 1104 |
+
"Schiller": "schiller",
|
| 1105 |
+
"Citology": "citology",
|
| 1106 |
+
}
|
| 1107 |
+
df = df.rename(columns=rename_map)
|
| 1108 |
+
for col in df.columns:
|
| 1109 |
+
df[col] = pd.to_numeric(df[col], errors="coerce")
|
| 1110 |
+
df = df.dropna(subset=["Biopsy"])
|
| 1111 |
+
return df
|
| 1112 |
+
|
| 1113 |
+
def _thyroid(self) -> pd.DataFrame:
|
| 1114 |
+
"""
|
| 1115 |
+
Load and return the bundled dataset for the `thyroid` specialty. Used internally
|
| 1116 |
+
by `DataService._load_specialty_dataset`.
|
| 1117 |
+
"""
|
| 1118 |
+
col_names = ["class_raw", "T3_resin_uptake", "total_serum_thyroxine", "T3", "TSH", "max_abs_diff_TSH"]
|
| 1119 |
+
df = self._fetch_cached(
|
| 1120 |
+
"thyroid",
|
| 1121 |
+
"https://archive.ics.uci.edu/ml/machine-learning-databases/thyroid-disease/new-thyroid.data",
|
| 1122 |
+
read_kwargs={"header": None, "names": col_names, "sep": ","},
|
| 1123 |
+
)
|
| 1124 |
+
if "class_raw" not in df.columns:
|
| 1125 |
+
raise DatasetUnavailableError("thyroid", "Missing required column 'class_raw'")
|
| 1126 |
+
class_map = {1: "hyperthyroid", 2: "normal", 3: "hypothyroid"}
|
| 1127 |
+
df["class"] = df["class_raw"].map(class_map)
|
| 1128 |
+
df = df.drop(columns=["class_raw"])
|
| 1129 |
+
df = df.dropna(subset=["class"])
|
| 1130 |
+
if len(df) < 100:
|
| 1131 |
+
raise DatasetUnavailableError("thyroid", f"Dataset too small ({len(df)} rows)")
|
| 1132 |
+
return df
|
| 1133 |
+
|
| 1134 |
+
def _readmission(self) -> pd.DataFrame:
|
| 1135 |
+
"""
|
| 1136 |
+
Load and return the bundled dataset for the `readmission` specialty. Used
|
| 1137 |
+
internally by `DataService._load_specialty_dataset`.
|
| 1138 |
+
"""
|
| 1139 |
+
_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
| 1140 |
+
csv_cache = _CACHE_DIR / "pharmacy_readmission.csv"
|
| 1141 |
+
if not csv_cache.exists():
|
| 1142 |
+
try:
|
| 1143 |
+
resp = requests.get(
|
| 1144 |
+
"https://archive.ics.uci.edu/ml/machine-learning-databases/00296/dataset_diabetes.zip",
|
| 1145 |
+
timeout=60, headers={"User-Agent": "HealthWithSevgi/1.0"},
|
| 1146 |
+
)
|
| 1147 |
+
resp.raise_for_status()
|
| 1148 |
+
with zipfile.ZipFile(io.BytesIO(resp.content)) as zf:
|
| 1149 |
+
csv_names = [n for n in zf.namelist() if "diabetic_data" in n and n.endswith(".csv")]
|
| 1150 |
+
if not csv_names:
|
| 1151 |
+
csv_names = [n for n in zf.namelist() if n.endswith(".csv")]
|
| 1152 |
+
if csv_names:
|
| 1153 |
+
raw = pd.read_csv(zf.open(csv_names[0]), low_memory=False)
|
| 1154 |
+
keep_cols = [
|
| 1155 |
+
"age", "gender", "time_in_hospital", "num_lab_procedures",
|
| 1156 |
+
"num_procedures", "num_medications", "number_outpatient",
|
| 1157 |
+
"number_emergency", "number_inpatient", "number_diagnoses",
|
| 1158 |
+
"max_glu_serum", "A1Cresult", "metformin", "insulin",
|
| 1159 |
+
"change",
|
| 1160 |
+
# High-signal clinical context features missing from v1:
|
| 1161 |
+
# discharge destination is the strongest readmission predictor;
|
| 1162 |
+
# admission type and source capture acuity and referral pathway;
|
| 1163 |
+
# primary diagnosis category captures disease burden.
|
| 1164 |
+
"discharge_disposition_id", "admission_type_id",
|
| 1165 |
+
"admission_source_id", "diag_1",
|
| 1166 |
+
"readmitted",
|
| 1167 |
+
]
|
| 1168 |
+
available = [c for c in keep_cols if c in raw.columns]
|
| 1169 |
+
raw = raw[available].copy()
|
| 1170 |
+
if "age" in raw.columns and raw["age"].dtype == object:
|
| 1171 |
+
age_map = {
|
| 1172 |
+
"[0-10)": 0, "[10-20)": 1, "[20-30)": 2, "[30-40)": 3,
|
| 1173 |
+
"[40-50)": 4, "[50-60)": 5, "[60-70)": 6, "[70-80)": 7,
|
| 1174 |
+
"[80-90)": 8, "[90-100)": 9,
|
| 1175 |
+
}
|
| 1176 |
+
raw["age"] = raw["age"].map(age_map).fillna(5).astype(int)
|
| 1177 |
+
if "gender" in raw.columns and raw["gender"].dtype == object:
|
| 1178 |
+
raw["gender"] = (raw["gender"] == "Male").astype(int)
|
| 1179 |
+
med_map = {"No": 0, "Steady": 1, "Up": 2, "Down": 3}
|
| 1180 |
+
for col in ["metformin", "insulin", "change"]:
|
| 1181 |
+
if col in raw.columns and raw[col].dtype == object:
|
| 1182 |
+
raw[col] = raw[col].map(med_map).fillna(0).astype(int)
|
| 1183 |
+
for col in ["max_glu_serum", "A1Cresult"]:
|
| 1184 |
+
if col in raw.columns and raw[col].dtype == object:
|
| 1185 |
+
glu_map = {"None": 0, "Norm": 1, ">200": 2, ">300": 3, ">7": 1, ">8": 2}
|
| 1186 |
+
raw[col] = raw[col].map(glu_map).fillna(0).astype(int)
|
| 1187 |
+
# Map diag_1 (ICD-9 codes) to major disease categories.
|
| 1188 |
+
# Raw ICD-9 strings have no ordinal meaning; bucketing into
|
| 1189 |
+
# 9 clinical groups gives the model learnable signal.
|
| 1190 |
+
if "diag_1" in raw.columns:
|
| 1191 |
+
def _icd9_category(code: str) -> int:
|
| 1192 |
+
"""
|
| 1193 |
+
Load and return the bundled dataset for the
|
| 1194 |
+
`icd9_category` specialty. Used internally by
|
| 1195 |
+
`DataService._load_specialty_dataset`.
|
| 1196 |
+
"""
|
| 1197 |
+
c = str(code).strip().upper().replace(".", "")
|
| 1198 |
+
if c.startswith("V") or c.startswith("E"):
|
| 1199 |
+
return 0
|
| 1200 |
+
try:
|
| 1201 |
+
n = float(c)
|
| 1202 |
+
except ValueError:
|
| 1203 |
+
return 0
|
| 1204 |
+
if n < 140: return 1 # Infectious
|
| 1205 |
+
if n < 240: return 2 # Neoplasms
|
| 1206 |
+
if n < 280: return 3 # Endocrine/Diabetes
|
| 1207 |
+
if n < 290: return 4 # Blood
|
| 1208 |
+
if n < 390: return 5 # Mental
|
| 1209 |
+
if n < 460: return 6 # Circulatory
|
| 1210 |
+
if n < 520: return 7 # Respiratory
|
| 1211 |
+
if n < 580: return 8 # Digestive
|
| 1212 |
+
return 9 # Other
|
| 1213 |
+
raw["diag_1"] = raw["diag_1"].apply(_icd9_category)
|
| 1214 |
+
raw = raw.dropna(subset=["readmitted"])
|
| 1215 |
+
if len(raw) > 5000:
|
| 1216 |
+
# Stratified cap: guarantee proportional representation of
|
| 1217 |
+
# each readmission class. <30 days is ~11% of the full
|
| 1218 |
+
# dataset; a random 5000-row sample would give only ~550
|
| 1219 |
+
# rows for that class. Stratified sampling preserves ratio.
|
| 1220 |
+
from sklearn.model_selection import train_test_split as _tts
|
| 1221 |
+
_, raw = _tts(
|
| 1222 |
+
raw, test_size=5000, random_state=42,
|
| 1223 |
+
stratify=raw["readmitted"] if raw["readmitted"].nunique() > 1 else None,
|
| 1224 |
+
)
|
| 1225 |
+
raw = raw.reset_index(drop=True)
|
| 1226 |
+
raw.to_csv(csv_cache, index=False)
|
| 1227 |
+
logger.info("Cached readmission dataset (%d rows)", len(raw))
|
| 1228 |
+
except Exception as exc:
|
| 1229 |
+
raise DatasetUnavailableError(
|
| 1230 |
+
"pharmacy_readmission", f"Failed to download/parse readmission ZIP: {exc}"
|
| 1231 |
+
) from exc
|
| 1232 |
+
|
| 1233 |
+
if not csv_cache.exists():
|
| 1234 |
+
raise DatasetUnavailableError("pharmacy_readmission", f"Cache file not found: {csv_cache}")
|
| 1235 |
+
|
| 1236 |
+
df = pd.read_csv(csv_cache)
|
| 1237 |
+
if "readmitted" not in df.columns or len(df) < 100:
|
| 1238 |
+
raise DatasetUnavailableError("pharmacy_readmission", "Invalid or too small dataset")
|
| 1239 |
+
return df
|
| 1240 |
+
|
| 1241 |
+
def _pneumonia(self) -> pd.DataFrame:
|
| 1242 |
+
"""
|
| 1243 |
+
Load and return the bundled dataset for the `pneumonia` specialty. Used internally
|
| 1244 |
+
by `DataService._load_specialty_dataset`.
|
| 1245 |
+
"""
|
| 1246 |
+
df = self._fetch_cached(
|
| 1247 |
+
"radiology_pneumonia",
|
| 1248 |
+
"https://raw.githubusercontent.com/gregwchase/nih-chest-xray/master/data/Data_Entry_2017.csv",
|
| 1249 |
+
)
|
| 1250 |
+
if "Finding Labels" not in df.columns:
|
| 1251 |
+
raise DatasetUnavailableError("radiology_pneumonia", "Missing required column 'Finding Labels'")
|
| 1252 |
+
df = df[df["Finding Labels"].isin(["Pneumonia", "No Finding"])].copy()
|
| 1253 |
+
df = df.rename(columns={
|
| 1254 |
+
"Patient Age": "age",
|
| 1255 |
+
"Patient Gender": "sex",
|
| 1256 |
+
"View Position": "view_position",
|
| 1257 |
+
"Follow-up #": "follow_up_number",
|
| 1258 |
+
"Finding Labels": "Finding_Label",
|
| 1259 |
+
})
|
| 1260 |
+
if "sex" in df.columns and df["sex"].dtype == object:
|
| 1261 |
+
df["sex"] = (df["sex"] == "M").astype(int)
|
| 1262 |
+
if "view_position" in df.columns and df["view_position"].dtype == object:
|
| 1263 |
+
df["view_position"] = (df["view_position"] == "PA").astype(int)
|
| 1264 |
+
keep = ["age", "sex", "view_position", "follow_up_number", "Finding_Label"]
|
| 1265 |
+
df = df[[c for c in keep if c in df.columns]].dropna(subset=["Finding_Label"])
|
| 1266 |
+
df["age"] = df["age"].astype(str).str.replace(r"[^0-9]", "", regex=True)
|
| 1267 |
+
df["age"] = pd.to_numeric(df["age"], errors="coerce")
|
| 1268 |
+
df = df.dropna(subset=["age"])
|
| 1269 |
+
if len(df) < 100:
|
| 1270 |
+
raise DatasetUnavailableError("radiology_pneumonia", f"Dataset too small ({len(df)} rows)")
|
| 1271 |
+
return df
|
| 1272 |
+
|
app/services/ethics_service.py
ADDED
|
@@ -0,0 +1,500 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Ethics, fairness, and bias analysis service."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import logging
|
| 5 |
+
from typing import Any
|
| 6 |
+
|
| 7 |
+
import numpy as np
|
| 8 |
+
from sklearn.metrics import (
|
| 9 |
+
accuracy_score,
|
| 10 |
+
confusion_matrix,
|
| 11 |
+
f1_score,
|
| 12 |
+
precision_score,
|
| 13 |
+
recall_score,
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
from app.models.explain_schemas import (
|
| 17 |
+
BiasWarning,
|
| 18 |
+
EthicsResponse,
|
| 19 |
+
RepresentationWarning,
|
| 20 |
+
SubgroupMetrics,
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
logger = logging.getLogger(__name__)
|
| 24 |
+
|
| 25 |
+
EU_AI_ACT_ITEMS = [
|
| 26 |
+
{
|
| 27 |
+
"id": "explainability",
|
| 28 |
+
"text": "Model Explainability",
|
| 29 |
+
"description": "Model outputs include explanations so clinicians can understand why a prediction was made. Completed automatically via SHAP analysis in Step 6.",
|
| 30 |
+
"article": "Art. 13",
|
| 31 |
+
"pre_checked": True,
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"id": "data_source",
|
| 35 |
+
"text": "Data Transparency",
|
| 36 |
+
"description": "Training data source, size, and feature set are documented and reviewable. Completed automatically — dataset details shown in Step 2.",
|
| 37 |
+
"article": "Art. 10",
|
| 38 |
+
"pre_checked": True,
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"id": "bias_audit",
|
| 42 |
+
"text": "Subgroup Bias Audit",
|
| 43 |
+
"description": "Model performance has been evaluated across demographic subgroups (gender, age) to identify disparities in accuracy or sensitivity.",
|
| 44 |
+
"article": "Art. 10(2f)",
|
| 45 |
+
"pre_checked": False,
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"id": "human_oversight",
|
| 49 |
+
"text": "Human Oversight Plan",
|
| 50 |
+
"description": "A qualified clinician will review all AI-generated predictions before any clinical action is taken. The AI serves as a decision-support tool, not a replacement.",
|
| 51 |
+
"article": "Art. 14",
|
| 52 |
+
"pre_checked": False,
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"id": "gdpr",
|
| 56 |
+
"text": "Patient Data Privacy (GDPR)",
|
| 57 |
+
"description": "Patient data is processed locally within this session. No personal health data is transmitted to external servers or stored permanently.",
|
| 58 |
+
"article": "Art. 10(5)",
|
| 59 |
+
"pre_checked": False,
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"id": "monitoring",
|
| 63 |
+
"text": "Post-Deployment Monitoring",
|
| 64 |
+
"description": "A plan exists to continuously monitor model performance (accuracy drift, data distribution shift) after deployment and retrain when metrics degrade.",
|
| 65 |
+
"article": "Art. 72",
|
| 66 |
+
"pre_checked": False,
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"id": "incident_reporting",
|
| 70 |
+
"text": "Incident Reporting Pathway",
|
| 71 |
+
"description": "A clear process is defined for reporting AI-related adverse events, including who to notify, escalation steps, and documentation requirements.",
|
| 72 |
+
"article": "Art. 73",
|
| 73 |
+
"pre_checked": False,
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"id": "clinical_validation",
|
| 77 |
+
"text": "Clinical Validation",
|
| 78 |
+
"description": "The model has been validated on an independent clinical dataset by domain experts before any real-world patient-facing use.",
|
| 79 |
+
"article": "Art. 9",
|
| 80 |
+
"pre_checked": False,
|
| 81 |
+
},
|
| 82 |
+
]
|
| 83 |
+
|
| 84 |
+
CASE_STUDIES = [
|
| 85 |
+
{
|
| 86 |
+
"id": "pulse_ox",
|
| 87 |
+
"title": "Pulse Oximeter Bias in COVID-19 Patients",
|
| 88 |
+
"specialty": "Critical Care",
|
| 89 |
+
"year": 2020,
|
| 90 |
+
"what_happened": (
|
| 91 |
+
"Pulse oximeters overestimated oxygen saturation in patients with darker skin tones, "
|
| 92 |
+
"masking hypoxaemia. AI systems trained on pulse oximetry data inherited and amplified "
|
| 93 |
+
"this systematic error."
|
| 94 |
+
),
|
| 95 |
+
"impact": (
|
| 96 |
+
"Black patients were approximately 3× more likely to have occult hypoxaemia missed by "
|
| 97 |
+
"pulse oximetry, leading to delayed ICU admission and increased risk of mortality. "
|
| 98 |
+
"The bias was not identified until retrospective analysis of thousands of patients."
|
| 99 |
+
),
|
| 100 |
+
"lesson": (
|
| 101 |
+
"Always audit AI tools across ethnic and skin-tone subgroups before deployment. "
|
| 102 |
+
"Validate AI outputs against gold-standard measurements, not proxy measures with "
|
| 103 |
+
"known systematic biases."
|
| 104 |
+
),
|
| 105 |
+
"severity": "failure",
|
| 106 |
+
},
|
| 107 |
+
{
|
| 108 |
+
"id": "sepsis_alert",
|
| 109 |
+
"title": "Sepsis Alert Algorithm Over-Alerting",
|
| 110 |
+
"specialty": "ICU / Emergency Medicine",
|
| 111 |
+
"year": 2021,
|
| 112 |
+
"what_happened": (
|
| 113 |
+
"A widely deployed sepsis prediction model generated frequent alerts for patients "
|
| 114 |
+
"who did not have sepsis, causing clinician alert fatigue. Nurses began ignoring "
|
| 115 |
+
"warnings after experiencing many false positives."
|
| 116 |
+
),
|
| 117 |
+
"impact": (
|
| 118 |
+
"In a multi-centre study, the model had a false positive rate exceeding 60%. "
|
| 119 |
+
"Alert fatigue contributed to genuine sepsis cases being missed, with clinicians "
|
| 120 |
+
"spending more time dismissing alerts than responding to them."
|
| 121 |
+
),
|
| 122 |
+
"lesson": (
|
| 123 |
+
"High sensitivity without adequate specificity creates a 'boy-who-cried-wolf' effect. "
|
| 124 |
+
"Optimise the decision threshold for your specific clinical setting, "
|
| 125 |
+
"and test AI tools under real workflow conditions before deployment."
|
| 126 |
+
),
|
| 127 |
+
"severity": "near_miss",
|
| 128 |
+
},
|
| 129 |
+
{
|
| 130 |
+
"id": "dermatology_bias",
|
| 131 |
+
"title": "Dermatology AI Underperforming on Dark Skin Tones",
|
| 132 |
+
"specialty": "Dermatology",
|
| 133 |
+
"year": 2019,
|
| 134 |
+
"what_happened": (
|
| 135 |
+
"A commercially deployed melanoma detection AI, trained predominantly on images "
|
| 136 |
+
"from light-skinned patients, achieved strong AUC on light skin tones "
|
| 137 |
+
"but significantly reduced performance on dark skin tones."
|
| 138 |
+
),
|
| 139 |
+
"impact": (
|
| 140 |
+
"Patients with darker skin received significantly more false negatives — "
|
| 141 |
+
"missed cancer diagnoses — compared to lighter-skinned patients. "
|
| 142 |
+
"This disparity was not apparent from the published overall AUC figure."
|
| 143 |
+
),
|
| 144 |
+
"lesson": (
|
| 145 |
+
"Training data must reflect the demographic diversity of the target population. "
|
| 146 |
+
"Subgroup-specific AUC must be reported and verified alongside the overall figure. "
|
| 147 |
+
"Models should not be approved for broad clinical use without subgroup validation."
|
| 148 |
+
),
|
| 149 |
+
"severity": "prevention",
|
| 150 |
+
},
|
| 151 |
+
]
|
| 152 |
+
|
| 153 |
+
BIAS_SENSITIVITY_GAP_THRESHOLD = 0.10
|
| 154 |
+
|
| 155 |
+
# Population norms for representation gap detection (percentages).
|
| 156 |
+
POPULATION_NORMS: dict[str, dict[str, float]] = {
|
| 157 |
+
"sex": {"Male": 50.0, "Female": 50.0},
|
| 158 |
+
"age_group": {"18-60": 55.0, "61-75": 30.0, "76+": 15.0},
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
# Threshold in percentage points for flagging representation gaps.
|
| 162 |
+
REPRESENTATION_GAP_THRESHOLD_PP = 15.0
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
class EthicsService:
|
| 166 |
+
"""Runs the fairness audit — subgroup metric computation, bias detection, checklist state."""
|
| 167 |
+
def __init__(self) -> None:
|
| 168 |
+
"""Create the in-memory checklist store."""
|
| 169 |
+
self._checklist_store: dict[str, dict[str, bool]] = {}
|
| 170 |
+
|
| 171 |
+
def analyze_bias(
|
| 172 |
+
self,
|
| 173 |
+
model_id: str,
|
| 174 |
+
model: Any,
|
| 175 |
+
X_test: np.ndarray,
|
| 176 |
+
y_test: np.ndarray,
|
| 177 |
+
feature_names: list[str],
|
| 178 |
+
classes: list[str],
|
| 179 |
+
X_train: np.ndarray,
|
| 180 |
+
scaler: Any = None,
|
| 181 |
+
) -> EthicsResponse:
|
| 182 |
+
"""Main entrypoint — slice predictions by each sensitive attribute and emit metrics + warnings."""
|
| 183 |
+
is_binary = len(classes) == 2
|
| 184 |
+
y_pred = model.predict(X_test)
|
| 185 |
+
|
| 186 |
+
overall_sensitivity = float(
|
| 187 |
+
recall_score(y_test, y_pred, average="binary" if is_binary else "macro", zero_division=0)
|
| 188 |
+
)
|
| 189 |
+
|
| 190 |
+
# --- Find demographic columns ---
|
| 191 |
+
sex_col = None
|
| 192 |
+
for candidate in ("sex", "gender", "Gender", "Sex"):
|
| 193 |
+
if candidate in feature_names:
|
| 194 |
+
sex_col = feature_names.index(candidate)
|
| 195 |
+
break
|
| 196 |
+
|
| 197 |
+
age_col = None
|
| 198 |
+
for candidate in ("age", "Age"):
|
| 199 |
+
if candidate in feature_names:
|
| 200 |
+
age_col = feature_names.index(candidate)
|
| 201 |
+
break
|
| 202 |
+
|
| 203 |
+
demographics_available = sex_col is not None or age_col is not None
|
| 204 |
+
demographics_note = ""
|
| 205 |
+
subgroup_metrics: list[SubgroupMetrics] = []
|
| 206 |
+
|
| 207 |
+
if not demographics_available:
|
| 208 |
+
demographics_note = (
|
| 209 |
+
"Subgroup bias analysis was not performed because this dataset does not contain "
|
| 210 |
+
"demographic variables (sex/gender or age). Upload a dataset with these columns "
|
| 211 |
+
"to enable proper fairness analysis. Results shown below reflect model-level "
|
| 212 |
+
"aggregate performance only."
|
| 213 |
+
)
|
| 214 |
+
else:
|
| 215 |
+
n_test = len(X_test)
|
| 216 |
+
|
| 217 |
+
# Gender subgroups
|
| 218 |
+
if sex_col is not None:
|
| 219 |
+
gender_labels = (X_test[:, sex_col] > 0.5).astype(int)
|
| 220 |
+
for g_val, g_name, g_label in [(0, "gender", "Female"), (1, "gender", "Male")]:
|
| 221 |
+
mask = gender_labels == g_val
|
| 222 |
+
if mask.sum() < 5:
|
| 223 |
+
continue
|
| 224 |
+
sm = self._compute_subgroup_metrics(
|
| 225 |
+
y_test[mask], y_pred[mask], g_name, g_label,
|
| 226 |
+
int(mask.sum()), overall_sensitivity, is_binary,
|
| 227 |
+
)
|
| 228 |
+
subgroup_metrics.append(sm)
|
| 229 |
+
|
| 230 |
+
# Age subgroups
|
| 231 |
+
if age_col is not None:
|
| 232 |
+
raw_ages = X_test[:, age_col].copy()
|
| 233 |
+
if scaler is not None:
|
| 234 |
+
try:
|
| 235 |
+
# Use scaler statistics directly — avoids zeroing other columns
|
| 236 |
+
if hasattr(scaler, "mean_") and scaler.mean_ is not None:
|
| 237 |
+
# StandardScaler: x_orig = x_scaled * std + mean
|
| 238 |
+
raw_ages = raw_ages * scaler.scale_[age_col] + scaler.mean_[age_col]
|
| 239 |
+
elif hasattr(scaler, "data_min_") and scaler.data_min_ is not None:
|
| 240 |
+
# MinMaxScaler: x_orig = x_scaled * (max - min) + min
|
| 241 |
+
raw_ages = (
|
| 242 |
+
raw_ages * (scaler.data_max_[age_col] - scaler.data_min_[age_col])
|
| 243 |
+
+ scaler.data_min_[age_col]
|
| 244 |
+
)
|
| 245 |
+
except Exception as exc:
|
| 246 |
+
logger.warning("Age inverse-transform failed: %s — using scaled values for grouping", exc)
|
| 247 |
+
|
| 248 |
+
age_groups = np.digitize(raw_ages, bins=[60, 75])
|
| 249 |
+
age_group_defs = [(0, "age_group", "18–60"), (1, "age_group", "61–75"), (2, "age_group", "76+")]
|
| 250 |
+
for g_val, g_name, g_label in age_group_defs:
|
| 251 |
+
mask = age_groups == g_val
|
| 252 |
+
if mask.sum() < 5:
|
| 253 |
+
continue
|
| 254 |
+
sm = self._compute_subgroup_metrics(
|
| 255 |
+
y_test[mask], y_pred[mask], g_name, g_label,
|
| 256 |
+
int(mask.sum()), overall_sensitivity, is_binary,
|
| 257 |
+
)
|
| 258 |
+
subgroup_metrics.append(sm)
|
| 259 |
+
|
| 260 |
+
# Bias warnings (only when real subgroups exist)
|
| 261 |
+
bias_warnings = self._detect_bias(subgroup_metrics, overall_sensitivity) if subgroup_metrics else []
|
| 262 |
+
|
| 263 |
+
# Training representation
|
| 264 |
+
rng = np.random.default_rng(42)
|
| 265 |
+
training_representation, representation_warnings = self._training_representation(
|
| 266 |
+
X_train, feature_names, rng, scaler=scaler,
|
| 267 |
+
)
|
| 268 |
+
|
| 269 |
+
# Checklist state
|
| 270 |
+
items = [dict(item) for item in EU_AI_ACT_ITEMS]
|
| 271 |
+
stored = self._checklist_store.get(model_id, {})
|
| 272 |
+
for item in items:
|
| 273 |
+
if not item["pre_checked"]:
|
| 274 |
+
item["checked"] = stored.get(item["id"], False)
|
| 275 |
+
else:
|
| 276 |
+
item["checked"] = True
|
| 277 |
+
|
| 278 |
+
return EthicsResponse(
|
| 279 |
+
model_id=model_id,
|
| 280 |
+
subgroup_metrics=subgroup_metrics,
|
| 281 |
+
bias_warnings=bias_warnings,
|
| 282 |
+
training_representation=training_representation,
|
| 283 |
+
representation_warnings=representation_warnings,
|
| 284 |
+
overall_sensitivity=round(overall_sensitivity, 4),
|
| 285 |
+
eu_ai_act_items=items,
|
| 286 |
+
case_studies=CASE_STUDIES,
|
| 287 |
+
demographics_available=demographics_available,
|
| 288 |
+
demographics_note=demographics_note,
|
| 289 |
+
)
|
| 290 |
+
|
| 291 |
+
def _compute_subgroup_metrics(
|
| 292 |
+
self,
|
| 293 |
+
y_true: np.ndarray,
|
| 294 |
+
y_pred: np.ndarray,
|
| 295 |
+
group_name: str,
|
| 296 |
+
group_label: str,
|
| 297 |
+
sample_size: int,
|
| 298 |
+
overall_sensitivity: float,
|
| 299 |
+
is_binary: bool,
|
| 300 |
+
) -> SubgroupMetrics:
|
| 301 |
+
"""Compute accuracy/sensitivity/specificity/PPV/NPV for a single subgroup slice."""
|
| 302 |
+
avg = "binary" if is_binary else "macro"
|
| 303 |
+
acc = float(accuracy_score(y_true, y_pred))
|
| 304 |
+
sens = float(recall_score(y_true, y_pred, average=avg, zero_division=0))
|
| 305 |
+
prec = float(precision_score(y_true, y_pred, average=avg, zero_division=0))
|
| 306 |
+
f1 = float(f1_score(y_true, y_pred, average=avg, zero_division=0))
|
| 307 |
+
cm = confusion_matrix(y_true, y_pred)
|
| 308 |
+
spec = self._macro_specificity(cm)
|
| 309 |
+
gap = overall_sensitivity - sens
|
| 310 |
+
|
| 311 |
+
reasons: list[str] = []
|
| 312 |
+
if sens < 0.5:
|
| 313 |
+
reasons.append(f"Sensitivity ({sens*100:.1f}%) is below the 50% clinical minimum")
|
| 314 |
+
if gap > 0.2:
|
| 315 |
+
reasons.append(f"Sensitivity gap ({gap*100:.1f}pp) exceeds the 20pp action threshold vs. overall ({overall_sensitivity*100:.1f}%)")
|
| 316 |
+
if reasons:
|
| 317 |
+
status = "action_needed"
|
| 318 |
+
else:
|
| 319 |
+
if gap > BIAS_SENSITIVITY_GAP_THRESHOLD:
|
| 320 |
+
reasons.append(f"Sensitivity gap ({gap*100:.1f}pp) exceeds the 10pp review threshold vs. overall ({overall_sensitivity*100:.1f}%)")
|
| 321 |
+
low_metric = min(acc, sens, spec, prec, f1)
|
| 322 |
+
if low_metric < 0.65:
|
| 323 |
+
metric_name = ["Accuracy", "Sensitivity", "Specificity", "Precision", "F1"][
|
| 324 |
+
[acc, sens, spec, prec, f1].index(low_metric)
|
| 325 |
+
]
|
| 326 |
+
reasons.append(f"{metric_name} ({low_metric*100:.1f}%) is below the 65% quality threshold")
|
| 327 |
+
if reasons:
|
| 328 |
+
status = "review"
|
| 329 |
+
else:
|
| 330 |
+
status = "acceptable"
|
| 331 |
+
reasons.append("All metrics meet clinical thresholds")
|
| 332 |
+
|
| 333 |
+
return SubgroupMetrics(
|
| 334 |
+
group_name=group_name,
|
| 335 |
+
group_label=group_label,
|
| 336 |
+
sample_size=sample_size,
|
| 337 |
+
accuracy=round(acc, 4),
|
| 338 |
+
sensitivity=round(sens, 4),
|
| 339 |
+
specificity=round(spec, 4),
|
| 340 |
+
precision=round(prec, 4),
|
| 341 |
+
f1_score=round(f1, 4),
|
| 342 |
+
status=status,
|
| 343 |
+
status_reason="; ".join(reasons),
|
| 344 |
+
)
|
| 345 |
+
|
| 346 |
+
def _macro_specificity(self, cm: np.ndarray) -> float:
|
| 347 |
+
"""Macro-averaged specificity across the multiclass case."""
|
| 348 |
+
specs = []
|
| 349 |
+
for i in range(len(cm)):
|
| 350 |
+
tp = cm[i, i]
|
| 351 |
+
fn = cm[i, :].sum() - tp
|
| 352 |
+
fp = cm[:, i].sum() - tp
|
| 353 |
+
tn = cm.sum() - tp - fn - fp
|
| 354 |
+
denom = tn + fp
|
| 355 |
+
specs.append(tn / denom if denom > 0 else 0.0)
|
| 356 |
+
return float(np.mean(specs)) if specs else 0.0
|
| 357 |
+
|
| 358 |
+
def _detect_bias(
|
| 359 |
+
self,
|
| 360 |
+
subgroup_metrics: list[SubgroupMetrics],
|
| 361 |
+
overall_sensitivity: float,
|
| 362 |
+
) -> list[BiasWarning]:
|
| 363 |
+
"""Compare each subgroup metric to the overall value, emit a `BiasWarning` on large deltas."""
|
| 364 |
+
warnings: list[BiasWarning] = []
|
| 365 |
+
for sm in subgroup_metrics:
|
| 366 |
+
gap = overall_sensitivity - sm.sensitivity
|
| 367 |
+
if sm.sensitivity < overall_sensitivity - BIAS_SENSITIVITY_GAP_THRESHOLD:
|
| 368 |
+
overall_pct = round(overall_sensitivity * 100, 1)
|
| 369 |
+
group_pct = round(sm.sensitivity * 100, 1)
|
| 370 |
+
gap_pp = round(gap * 100, 1)
|
| 371 |
+
warnings.append(BiasWarning(
|
| 372 |
+
detected=True,
|
| 373 |
+
message=(
|
| 374 |
+
f"Bias Detected: Sensitivity for {sm.group_label} patients "
|
| 375 |
+
f"({group_pct}%) is {gap_pp} percentage points lower than the "
|
| 376 |
+
f"overall sensitivity ({overall_pct}%). "
|
| 377 |
+
f"This model should NOT be deployed until this gap is addressed."
|
| 378 |
+
),
|
| 379 |
+
affected_group=sm.group_label,
|
| 380 |
+
metric="sensitivity",
|
| 381 |
+
gap=round(gap, 4),
|
| 382 |
+
))
|
| 383 |
+
return warnings
|
| 384 |
+
|
| 385 |
+
def _training_representation(
|
| 386 |
+
self,
|
| 387 |
+
X_train: np.ndarray,
|
| 388 |
+
feature_names: list[str],
|
| 389 |
+
rng: np.random.Generator,
|
| 390 |
+
scaler: Any = None,
|
| 391 |
+
) -> tuple[dict, list[RepresentationWarning]]:
|
| 392 |
+
"""Compute training-data demographic breakdown and flag >15pp gaps."""
|
| 393 |
+
warnings: list[RepresentationWarning] = []
|
| 394 |
+
|
| 395 |
+
# --- Sex / gender ---
|
| 396 |
+
sex_col = None
|
| 397 |
+
for c in ("sex", "gender"):
|
| 398 |
+
if c in feature_names:
|
| 399 |
+
sex_col = feature_names.index(c)
|
| 400 |
+
break
|
| 401 |
+
if sex_col is not None:
|
| 402 |
+
female_pct = float(np.mean(X_train[:, sex_col] < 0.5) * 100)
|
| 403 |
+
else:
|
| 404 |
+
female_pct = float(rng.uniform(40, 60))
|
| 405 |
+
male_pct = 100 - female_pct
|
| 406 |
+
|
| 407 |
+
sex_dataset = {"Male": round(male_pct, 1), "Female": round(female_pct, 1)}
|
| 408 |
+
sex_norms = POPULATION_NORMS["sex"]
|
| 409 |
+
|
| 410 |
+
for group_label, dataset_pct in sex_dataset.items():
|
| 411 |
+
norm_pct = sex_norms.get(group_label)
|
| 412 |
+
if norm_pct is None:
|
| 413 |
+
continue
|
| 414 |
+
gap_pp = round(abs(dataset_pct - norm_pct), 1)
|
| 415 |
+
if gap_pp > REPRESENTATION_GAP_THRESHOLD_PP:
|
| 416 |
+
warnings.append(RepresentationWarning(
|
| 417 |
+
group=group_label,
|
| 418 |
+
attribute="sex",
|
| 419 |
+
dataset_pct=dataset_pct,
|
| 420 |
+
population_pct=norm_pct,
|
| 421 |
+
gap_pp=gap_pp,
|
| 422 |
+
message=(
|
| 423 |
+
f"{group_label} representation ({dataset_pct}%) deviates from "
|
| 424 |
+
f"population norm ({norm_pct}%) by {gap_pp}pp"
|
| 425 |
+
),
|
| 426 |
+
))
|
| 427 |
+
|
| 428 |
+
# --- Age groups ---
|
| 429 |
+
age_col = None
|
| 430 |
+
for c in ("age", "Age"):
|
| 431 |
+
if c in feature_names:
|
| 432 |
+
age_col = feature_names.index(c)
|
| 433 |
+
break
|
| 434 |
+
|
| 435 |
+
if age_col is not None:
|
| 436 |
+
raw_ages = X_train[:, age_col].copy()
|
| 437 |
+
if scaler is not None:
|
| 438 |
+
try:
|
| 439 |
+
if hasattr(scaler, "mean_") and scaler.mean_ is not None:
|
| 440 |
+
raw_ages = raw_ages * scaler.scale_[age_col] + scaler.mean_[age_col]
|
| 441 |
+
elif hasattr(scaler, "data_min_") and scaler.data_min_ is not None:
|
| 442 |
+
raw_ages = (
|
| 443 |
+
raw_ages * (scaler.data_max_[age_col] - scaler.data_min_[age_col])
|
| 444 |
+
+ scaler.data_min_[age_col]
|
| 445 |
+
)
|
| 446 |
+
except Exception as exc:
|
| 447 |
+
logger.warning(
|
| 448 |
+
"Age inverse-transform failed in representation: %s — using scaled values",
|
| 449 |
+
exc,
|
| 450 |
+
)
|
| 451 |
+
|
| 452 |
+
age_groups = np.digitize(raw_ages, bins=[60, 75])
|
| 453 |
+
n_train = len(X_train)
|
| 454 |
+
age_dataset = {
|
| 455 |
+
"18-60": round(float(np.sum(age_groups == 0)) / n_train * 100, 1),
|
| 456 |
+
"61-75": round(float(np.sum(age_groups == 1)) / n_train * 100, 1),
|
| 457 |
+
"76+": round(float(np.sum(age_groups == 2)) / n_train * 100, 1),
|
| 458 |
+
}
|
| 459 |
+
else:
|
| 460 |
+
age_dataset = {"18-60": 55.0, "61-75": 30.0, "76+": 15.0}
|
| 461 |
+
|
| 462 |
+
age_norms = POPULATION_NORMS["age_group"]
|
| 463 |
+
|
| 464 |
+
for group_label, dataset_pct in age_dataset.items():
|
| 465 |
+
norm_pct = age_norms.get(group_label)
|
| 466 |
+
if norm_pct is None:
|
| 467 |
+
continue
|
| 468 |
+
gap_pp = round(abs(dataset_pct - norm_pct), 1)
|
| 469 |
+
if gap_pp > REPRESENTATION_GAP_THRESHOLD_PP:
|
| 470 |
+
warnings.append(RepresentationWarning(
|
| 471 |
+
group=group_label,
|
| 472 |
+
attribute="age_group",
|
| 473 |
+
dataset_pct=dataset_pct,
|
| 474 |
+
population_pct=norm_pct,
|
| 475 |
+
gap_pp=gap_pp,
|
| 476 |
+
message=(
|
| 477 |
+
f"{group_label} representation ({dataset_pct}%) deviates from "
|
| 478 |
+
f"population norm ({norm_pct}%) by {gap_pp}pp"
|
| 479 |
+
),
|
| 480 |
+
))
|
| 481 |
+
|
| 482 |
+
representation = {
|
| 483 |
+
"gender": {
|
| 484 |
+
"dataset": sex_dataset,
|
| 485 |
+
"population_norm": sex_norms,
|
| 486 |
+
},
|
| 487 |
+
"age_group": {
|
| 488 |
+
"dataset": age_dataset,
|
| 489 |
+
"population_norm": age_norms,
|
| 490 |
+
},
|
| 491 |
+
}
|
| 492 |
+
|
| 493 |
+
return representation, warnings
|
| 494 |
+
|
| 495 |
+
def update_checklist(self, model_id: str, item_id: str, checked: bool) -> dict:
|
| 496 |
+
"""Step-7 endpoint — toggles a single EU AI Act checklist item for the session."""
|
| 497 |
+
if model_id not in self._checklist_store:
|
| 498 |
+
self._checklist_store[model_id] = {}
|
| 499 |
+
self._checklist_store[model_id][item_id] = checked
|
| 500 |
+
return self._checklist_store[model_id]
|
app/services/explain_service.py
ADDED
|
@@ -0,0 +1,665 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""SHAP-based explainability service."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import logging
|
| 5 |
+
from typing import Any
|
| 6 |
+
|
| 7 |
+
import numpy as np
|
| 8 |
+
|
| 9 |
+
from app.models.explain_schemas import (
|
| 10 |
+
FeatureImportanceItem,
|
| 11 |
+
GlobalExplainabilityResponse,
|
| 12 |
+
SamplePatient,
|
| 13 |
+
SamplePatientsResponse,
|
| 14 |
+
SHAPWaterfallPoint,
|
| 15 |
+
SinglePatientExplainResponse,
|
| 16 |
+
WhatIfResponse,
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
logger = logging.getLogger(__name__)
|
| 20 |
+
|
| 21 |
+
CLINICAL_NAME_MAP: dict[str, str] = {
|
| 22 |
+
# Demographics
|
| 23 |
+
"age": "Patient Age (years)",
|
| 24 |
+
"sex": "Patient Sex",
|
| 25 |
+
"gender": "Patient Gender",
|
| 26 |
+
"height": "Patient Height (cm)",
|
| 27 |
+
"weight": "Patient Weight (kg)",
|
| 28 |
+
"bmi": "Body Mass Index (kg/m²)",
|
| 29 |
+
# Cardiology / HF
|
| 30 |
+
"ejection_fraction": "Left Ventricular Ejection Fraction (%)",
|
| 31 |
+
"serum_creatinine": "Serum Creatinine (mg/dL)",
|
| 32 |
+
"serum_sodium": "Serum Sodium (mEq/L)",
|
| 33 |
+
"creatinine_phosphokinase": "Creatine Phosphokinase (mcg/L)",
|
| 34 |
+
"platelets": "Platelet Count (kiloplatelets/mL)",
|
| 35 |
+
"anaemia": "Anaemia Present",
|
| 36 |
+
"high_blood_pressure": "Hypertension Diagnosis",
|
| 37 |
+
"smoking": "Smoking Status",
|
| 38 |
+
"diabetes": "Diabetes History",
|
| 39 |
+
"time": "Follow-up Period (days)",
|
| 40 |
+
"DEATH_EVENT": "Death Event",
|
| 41 |
+
# Diabetes
|
| 42 |
+
"glucose": "Fasting Glucose (mg/dL)",
|
| 43 |
+
"blood_pressure": "Diastolic Blood Pressure (mmHg)",
|
| 44 |
+
"skin_thickness": "Triceps Skin Fold Thickness (mm)",
|
| 45 |
+
"insulin": "Serum Insulin (mu U/mL)",
|
| 46 |
+
"diabetes_pedigree_function": "Diabetes Pedigree Function",
|
| 47 |
+
"pregnancies": "Number of Pregnancies",
|
| 48 |
+
# Breast cancer
|
| 49 |
+
"mean_radius": "Mean Tumour Radius (mm)",
|
| 50 |
+
"mean_texture": "Mean Texture Score",
|
| 51 |
+
"mean_perimeter": "Mean Tumour Perimeter (mm)",
|
| 52 |
+
"mean_area": "Mean Tumour Area (mm²)",
|
| 53 |
+
"mean_smoothness": "Mean Surface Smoothness",
|
| 54 |
+
"mean_compactness": "Mean Compactness",
|
| 55 |
+
"mean_concavity": "Mean Concavity",
|
| 56 |
+
"mean_concave_points": "Mean Concave Points",
|
| 57 |
+
"mean_symmetry": "Mean Symmetry",
|
| 58 |
+
"worst_radius": "Worst Tumour Radius (mm)",
|
| 59 |
+
"worst_texture": "Worst Texture Score",
|
| 60 |
+
"worst_perimeter": "Worst Tumour Perimeter (mm)",
|
| 61 |
+
"worst_area": "Worst Tumour Area (mm²)",
|
| 62 |
+
"worst_smoothness": "Worst Surface Smoothness",
|
| 63 |
+
# Parkinson's
|
| 64 |
+
"MDVP_Fo_Hz": "Avg Vocal Fundamental Frequency (Hz)",
|
| 65 |
+
"MDVP_Fhi_Hz": "Max Vocal Fundamental Frequency (Hz)",
|
| 66 |
+
"MDVP_Flo_Hz": "Min Vocal Fundamental Frequency (Hz)",
|
| 67 |
+
"MDVP_Jitter_pct": "Vocal Jitter (%)",
|
| 68 |
+
"MDVP_Jitter_Abs": "Absolute Vocal Jitter",
|
| 69 |
+
"MDVP_RAP": "Relative Average Perturbation",
|
| 70 |
+
"MDVP_PPQ": "Five-Point Period Perturbation Quotient",
|
| 71 |
+
"Jitter_DDP": "Average Absolute Difference of Differences (Jitter)",
|
| 72 |
+
"MDVP_Shimmer": "Vocal Shimmer",
|
| 73 |
+
"MDVP_Shimmer_dB": "Vocal Shimmer (dB)",
|
| 74 |
+
"Shimmer_APQ3": "Three-Point Amplitude Perturbation Quotient",
|
| 75 |
+
"Shimmer_APQ5": "Five-Point Amplitude Perturbation Quotient",
|
| 76 |
+
"MDVP_APQ": "MDVP Amplitude Perturbation Quotient",
|
| 77 |
+
"Shimmer_DDA": "Average Absolute Differences of Consecutive Shimmer",
|
| 78 |
+
"NHR": "Noise-to-Harmonics Ratio",
|
| 79 |
+
"HNR": "Harmonics-to-Noise Ratio",
|
| 80 |
+
"RPDE": "Recurrence Period Density Entropy",
|
| 81 |
+
"DFA": "Detrended Fluctuation Analysis",
|
| 82 |
+
"spread1": "Nonlinear Frequency Variation (spread1)",
|
| 83 |
+
"spread2": "Nonlinear Frequency Variation (spread2)",
|
| 84 |
+
"D2": "D2 Nonlinear Dynamical Complexity",
|
| 85 |
+
"PPE": "Pitch Period Entropy",
|
| 86 |
+
# Liver
|
| 87 |
+
"total_bilirubin": "Total Bilirubin (mg/dL)",
|
| 88 |
+
"direct_bilirubin": "Direct Bilirubin (mg/dL)",
|
| 89 |
+
"alkaline_phosphotase": "Alkaline Phosphatase (U/L)",
|
| 90 |
+
"alamine_aminotransferase": "Alanine Aminotransferase / ALT (U/L)",
|
| 91 |
+
"aspartate_aminotransferase": "Aspartate Aminotransferase / AST (U/L)",
|
| 92 |
+
"total_proteins": "Total Proteins (g/dL)",
|
| 93 |
+
"albumin": "Serum Albumin (g/dL)",
|
| 94 |
+
"albumin_globulin_ratio": "Albumin/Globulin Ratio",
|
| 95 |
+
# Stroke
|
| 96 |
+
"hypertension": "Hypertension",
|
| 97 |
+
"heart_disease": "Heart Disease History",
|
| 98 |
+
"avg_glucose_level": "Average Glucose Level (mg/dL)",
|
| 99 |
+
"smoking_status": "Smoking Status",
|
| 100 |
+
"work_type": "Work Type",
|
| 101 |
+
"residence_type": "Residence Type",
|
| 102 |
+
"ever_married": "Ever Married",
|
| 103 |
+
# CKD
|
| 104 |
+
"blood_pressure": "Blood Pressure (mmHg)",
|
| 105 |
+
"specific_gravity": "Urine Specific Gravity",
|
| 106 |
+
"albumin": "Urine Albumin",
|
| 107 |
+
"sugar": "Urine Sugar",
|
| 108 |
+
"red_blood_cells": "Red Blood Cells in Urine",
|
| 109 |
+
"pus_cell": "Pus Cells in Urine",
|
| 110 |
+
"blood_glucose_random": "Random Blood Glucose (mg/dL)",
|
| 111 |
+
"blood_urea": "Blood Urea (mg/dL)",
|
| 112 |
+
"sodium": "Serum Sodium (mEq/L)",
|
| 113 |
+
"haemoglobin": "Haemoglobin (g/dL)",
|
| 114 |
+
"hypertension": "Hypertension",
|
| 115 |
+
"diabetes_mellitus": "Diabetes Mellitus",
|
| 116 |
+
# Sepsis
|
| 117 |
+
"HR": "Heart Rate (bpm)",
|
| 118 |
+
"O2Sat": "Oxygen Saturation (%)",
|
| 119 |
+
"Temp": "Body Temperature (°C)",
|
| 120 |
+
"SBP": "Systolic Blood Pressure (mmHg)",
|
| 121 |
+
"MAP": "Mean Arterial Pressure (mmHg)",
|
| 122 |
+
"Resp": "Respiratory Rate (breaths/min)",
|
| 123 |
+
"pH": "Arterial Blood pH",
|
| 124 |
+
"Lactate": "Blood Lactate (mmol/L)",
|
| 125 |
+
"Creatinine": "Serum Creatinine (mg/dL)",
|
| 126 |
+
"WBC": "White Blood Cell Count (×10³/μL)",
|
| 127 |
+
"Platelets": "Platelet Count (×10³/μL)",
|
| 128 |
+
"Bilirubin_total": "Total Bilirubin (mg/dL)",
|
| 129 |
+
# Orthopaedics
|
| 130 |
+
"pelvic_incidence": "Pelvic Incidence (°)",
|
| 131 |
+
"pelvic_tilt": "Pelvic Tilt (°)",
|
| 132 |
+
"lumbar_lordosis_angle": "Lumbar Lordosis Angle (°)",
|
| 133 |
+
"sacral_slope": "Sacral Slope (°)",
|
| 134 |
+
"pelvic_radius": "Pelvic Radius (mm)",
|
| 135 |
+
"degree_spondylolisthesis": "Degree of Spondylolisthesis (mm)",
|
| 136 |
+
# Fetal health
|
| 137 |
+
"baseline_value": "Fetal Heart Rate Baseline (bpm)",
|
| 138 |
+
"accelerations": "Accelerations (per second)",
|
| 139 |
+
"fetal_movement": "Fetal Movements (per second)",
|
| 140 |
+
"uterine_contractions": "Uterine Contractions (per second)",
|
| 141 |
+
"severe_decelerations": "Severe Decelerations (per second)",
|
| 142 |
+
"prolongued_decelerations": "Prolonged Decelerations (per second)",
|
| 143 |
+
"abnormal_short_term_variability": "Abnormal Short-Term Variability (%)",
|
| 144 |
+
# Thyroid
|
| 145 |
+
"TSH": "Thyroid Stimulating Hormone (mIU/L)",
|
| 146 |
+
"T3": "Serum Triiodothyronine / T3 (ng/dL)",
|
| 147 |
+
"TT4": "Total Thyroxine / T4 (μg/dL)",
|
| 148 |
+
"T4U": "Thyroxine Utilisation Rate",
|
| 149 |
+
"FTI": "Free Thyroxine Index",
|
| 150 |
+
"T3_resin_uptake": "T3 Resin Uptake (%)",
|
| 151 |
+
"total_serum_thyroxine": "Total Serum Thyroxine (μg/dL)",
|
| 152 |
+
"max_abs_diff_TSH": "Max Absolute Difference in TSH",
|
| 153 |
+
# Anaemia / haematology
|
| 154 |
+
"mch": "Mean Corpuscular Haemoglobin (pg)",
|
| 155 |
+
"mchc": "Mean Corpuscular Haemoglobin Concentration (g/dL)",
|
| 156 |
+
"mcv": "Mean Corpuscular Volume (fL)",
|
| 157 |
+
"rdw": "Red Cell Distribution Width (%)",
|
| 158 |
+
"wbc": "White Blood Cell Count (×10³/μL)",
|
| 159 |
+
"neutrophils": "Neutrophil Count (×10³/μL)",
|
| 160 |
+
"lymphocytes": "Lymphocyte Count (×10³/μL)",
|
| 161 |
+
# COPD / pulmonology
|
| 162 |
+
"smoking_pack_years": "Smoking Pack-Years",
|
| 163 |
+
"fev1_litres": "FEV1 — Forced Expiratory Volume in 1s (L)",
|
| 164 |
+
"fvc_litres": "FVC — Forced Vital Capacity (L)",
|
| 165 |
+
"fev1_fvc_ratio": "FEV1/FVC Ratio",
|
| 166 |
+
"prior_exacerbations_year": "Prior COPD Exacerbations (per year)",
|
| 167 |
+
"mrc_dyspnea_scale": "MRC Dyspnea Scale Score",
|
| 168 |
+
"sgrq_score": "SGRQ Quality-of-Life Score",
|
| 169 |
+
"copd_gold_stage": "COPD GOLD Stage",
|
| 170 |
+
# Arrhythmia / ECG
|
| 171 |
+
"QRS_duration": "QRS Duration (ms)",
|
| 172 |
+
"PR_interval": "PR Interval (ms)",
|
| 173 |
+
"QT_interval": "QT Interval (ms)",
|
| 174 |
+
"T_interval": "T Wave Interval (ms)",
|
| 175 |
+
"P_interval": "P Wave Interval (ms)",
|
| 176 |
+
"QRS_axis": "QRS Axis (°)",
|
| 177 |
+
"T_axis": "T Wave Axis (°)",
|
| 178 |
+
"P_axis": "P Wave Axis (°)",
|
| 179 |
+
"heart_rate": "Heart Rate (bpm)",
|
| 180 |
+
# Radiology
|
| 181 |
+
"view_position": "X-Ray View Position",
|
| 182 |
+
"follow_up_number": "Follow-up Visit Number",
|
| 183 |
+
"Finding_Label": "Radiological Finding",
|
| 184 |
+
# Fetal health / CTG
|
| 185 |
+
"light_decelerations": "Light Decelerations (per second)",
|
| 186 |
+
"mean_value_short_term_variability": "Mean Short-Term Variability (ms)",
|
| 187 |
+
"pct_time_abnormal_long_term_variability": "% Time with Abnormal Long-Term Variability",
|
| 188 |
+
"mean_value_long_term_variability": "Mean Long-Term Variability (ms)",
|
| 189 |
+
"histogram_mode": "CTG Histogram Mode",
|
| 190 |
+
# Ophthalmology / diabetic retinopathy
|
| 191 |
+
"quality_assessment": "Image Quality Assessment",
|
| 192 |
+
"pre_screening": "Pre-Screening Result",
|
| 193 |
+
"ma_detection_0.5": "Microaneurysm Detection (threshold 0.5)",
|
| 194 |
+
"ma_detection_0.6": "Microaneurysm Detection (threshold 0.6)",
|
| 195 |
+
"ma_detection_0.7": "Microaneurysm Detection (threshold 0.7)",
|
| 196 |
+
"ma_detection_0.8": "Microaneurysm Detection (threshold 0.8)",
|
| 197 |
+
"ma_detection_0.9": "Microaneurysm Detection (threshold 0.9)",
|
| 198 |
+
"ma_detection_1.0": "Microaneurysm Detection (threshold 1.0)",
|
| 199 |
+
"exudate_1": "Exudate Feature 1",
|
| 200 |
+
"exudate_2": "Exudate Feature 2",
|
| 201 |
+
"exudate_3": "Exudate Feature 3",
|
| 202 |
+
"exudate_4": "Exudate Feature 4",
|
| 203 |
+
"exudate_5": "Exudate Feature 5",
|
| 204 |
+
"exudate_6": "Exudate Feature 6",
|
| 205 |
+
"exudate_7": "Exudate Feature 7",
|
| 206 |
+
"exudate_8": "Exudate Feature 8",
|
| 207 |
+
"macula_od_distance": "Macula to Optic Disc Distance",
|
| 208 |
+
"optic_disc_diameter": "Optic Disc Diameter",
|
| 209 |
+
"am_fm_classification": "AM-FM Classification",
|
| 210 |
+
# Dermatology
|
| 211 |
+
"localization": "Lesion Localization",
|
| 212 |
+
# Cervical cancer
|
| 213 |
+
"number_of_sexual_partners": "Number of Sexual Partners",
|
| 214 |
+
"first_sexual_intercourse_age": "Age at First Sexual Intercourse",
|
| 215 |
+
"num_of_pregnancies": "Number of Pregnancies",
|
| 216 |
+
"smokes_years": "Years of Smoking",
|
| 217 |
+
"hormonal_contraceptives_years": "Years Using Hormonal Contraceptives",
|
| 218 |
+
"iud_years": "Years Using IUD",
|
| 219 |
+
"stds_number": "Number of STDs Diagnosed",
|
| 220 |
+
"stds_condylomatosis": "STDs: Condylomatosis",
|
| 221 |
+
"stds_cervical_condylomatosis": "STDs: Cervical Condylomatosis",
|
| 222 |
+
"stds_hpv": "STDs: HPV",
|
| 223 |
+
# Pharmacy / readmission
|
| 224 |
+
"time_in_hospital": "Hospital Length of Stay (days)",
|
| 225 |
+
"num_lab_procedures": "Number of Lab Procedures",
|
| 226 |
+
"num_procedures": "Number of Procedures",
|
| 227 |
+
"num_medications": "Number of Medications",
|
| 228 |
+
"number_outpatient": "Number of Outpatient Visits",
|
| 229 |
+
"number_emergency": "Number of Emergency Visits",
|
| 230 |
+
"number_inpatient": "Number of Inpatient Visits",
|
| 231 |
+
"number_diagnoses": "Number of Diagnoses",
|
| 232 |
+
"max_glu_serum": "Max Glucose Serum Level",
|
| 233 |
+
"A1Cresult": "HbA1c Test Result",
|
| 234 |
+
"metformin": "Metformin Dosage",
|
| 235 |
+
"change": "Change in Medication",
|
| 236 |
+
# Sepsis / ICU
|
| 237 |
+
"BaseExcess": "Base Excess (mEq/L)",
|
| 238 |
+
"PaCO2": "Partial Pressure of CO2 (mmHg)",
|
| 239 |
+
"Age": "Patient Age (years)",
|
| 240 |
+
"Gender": "Patient Gender",
|
| 241 |
+
# Mental health
|
| 242 |
+
"number_of_children": "Number of Children",
|
| 243 |
+
"income": "Annual Income",
|
| 244 |
+
"dietary_habits": "Dietary Habits Score",
|
| 245 |
+
"sleep_patterns": "Sleep Quality Score",
|
| 246 |
+
"alcohol_consumption": "Alcohol Consumption Level",
|
| 247 |
+
"physical_activity_level": "Physical Activity Level",
|
| 248 |
+
"employment_status": "Employment Status",
|
| 249 |
+
"history_substance_abuse": "History of Substance Abuse",
|
| 250 |
+
"family_history_depression": "Family History of Depression",
|
| 251 |
+
"chronic_medical_conditions": "Chronic Medical Conditions",
|
| 252 |
+
"marital_status": "Marital Status",
|
| 253 |
+
"education_level": "Education Level",
|
| 254 |
+
}
|
| 255 |
+
|
| 256 |
+
TOP_FEATURE_NOTES: dict[str, str] = {
|
| 257 |
+
"ejection_fraction": "Ejection fraction is a well-established predictor of heart failure outcomes — values below 35% indicate severely reduced cardiac function.",
|
| 258 |
+
"serum_creatinine": "Elevated serum creatinine reflects impaired renal clearance, which commonly co-occurs with and worsens heart failure prognosis.",
|
| 259 |
+
"glucose": "Fasting glucose is the primary biochemical marker of diabetes risk and insulin resistance.",
|
| 260 |
+
"bmi": "BMI is a validated surrogate for adiposity and a major modifiable risk factor for type 2 diabetes.",
|
| 261 |
+
"mean_radius": "Tumour radius is closely correlated with malignancy — larger tumours are associated with more aggressive histology.",
|
| 262 |
+
"worst_area": "Worst-case tumour area captures the most severe regional cellular abnormality within the biopsy sample.",
|
| 263 |
+
"TSH": "TSH is the most sensitive marker of thyroid dysfunction — a raised TSH indicates hypothyroidism, while a suppressed TSH indicates hyperthyroidism.",
|
| 264 |
+
"Lactate": "Elevated lactate is a hallmark of cellular hypoperfusion and is a key diagnostic criterion for septic shock.",
|
| 265 |
+
"HR": "Heart rate elevation is an early physiological response to infection and correlates with sepsis severity.",
|
| 266 |
+
"pelvic_incidence": "Pelvic incidence is a morphological parameter that determines lumbar lordosis compensation and is key to spinal biomechanics.",
|
| 267 |
+
"degree_spondylolisthesis": "Degree of spondylolisthesis directly quantifies vertebral slip and is the primary determinant of clinical severity.",
|
| 268 |
+
"MDVP_Jitter_pct": "Jitter measures cycle-to-cycle variation in vocal fundamental frequency — pathological values indicate Parkinson's-related vocal instability.",
|
| 269 |
+
"HNR": "A reduced harmonics-to-noise ratio reflects increased vocal noise and turbulence characteristic of neurological voice disorders.",
|
| 270 |
+
}
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
def _clinical_name(feature: str) -> str:
|
| 274 |
+
"""Map a raw feature id to its clinician-readable label, fallback to the id."""
|
| 275 |
+
return CLINICAL_NAME_MAP.get(feature, feature.replace("_", " ").title())
|
| 276 |
+
|
| 277 |
+
|
| 278 |
+
def _plain_language(feature: str, value: float, pctile: float) -> str:
|
| 279 |
+
"""Generate the plain-language summary sentence that sits above the SHAP waterfall."""
|
| 280 |
+
cname = _clinical_name(feature)
|
| 281 |
+
if pctile < 0.25:
|
| 282 |
+
level = "very low"
|
| 283 |
+
elif pctile < 0.45:
|
| 284 |
+
level = "below normal"
|
| 285 |
+
elif pctile < 0.55:
|
| 286 |
+
level = "normal"
|
| 287 |
+
elif pctile < 0.75:
|
| 288 |
+
level = "above normal"
|
| 289 |
+
else:
|
| 290 |
+
level = "elevated"
|
| 291 |
+
return f"{cname} {level} ({value:.2f})"
|
| 292 |
+
|
| 293 |
+
|
| 294 |
+
class ExplainService:
|
| 295 |
+
"""SHAP-based explainability — global importance + per-patient waterfall + what-if probes."""
|
| 296 |
+
def _get_explainer(self, model: Any, X_train: np.ndarray, model_type: str) -> Any:
|
| 297 |
+
"""Build (and cache) the appropriate SHAP explainer (TreeExplainer for tree models, KernelExplainer otherwise)."""
|
| 298 |
+
mt = model_type.lower()
|
| 299 |
+
try:
|
| 300 |
+
import shap
|
| 301 |
+
# Tree-based models (including XGBoost and LightGBM)
|
| 302 |
+
if mt in ("random_forest", "decision_tree", "xgboost", "lightgbm"):
|
| 303 |
+
return shap.TreeExplainer(model), "shap_tree"
|
| 304 |
+
if mt == "logistic_regression":
|
| 305 |
+
return shap.LinearExplainer(model, X_train), "shap_linear"
|
| 306 |
+
# KNN, SVM, NaiveBayes → KernelExplainer with reduced background for speed
|
| 307 |
+
bg = shap.sample(X_train, min(50, len(X_train))) # Reduced from 100 to 50
|
| 308 |
+
try:
|
| 309 |
+
explainer = shap.Explainer(model.predict_proba, bg, algorithm="auto")
|
| 310 |
+
return explainer, "shap_kernel"
|
| 311 |
+
except Exception:
|
| 312 |
+
return shap.KernelExplainer(model.predict_proba, bg), "shap_kernel"
|
| 313 |
+
except Exception as exc:
|
| 314 |
+
logger.warning("SHAP explainer creation failed: %s — using permutation", exc)
|
| 315 |
+
return None, "permutation"
|
| 316 |
+
|
| 317 |
+
def _shap_values_binary(
|
| 318 |
+
self, explainer: Any, method: str, X: np.ndarray, model: Any
|
| 319 |
+
) -> np.ndarray:
|
| 320 |
+
"""Return 2-D SHAP array (n_samples, n_features) for the positive class."""
|
| 321 |
+
import shap
|
| 322 |
+
try:
|
| 323 |
+
sv = explainer.shap_values(X)
|
| 324 |
+
if isinstance(sv, list) and len(sv) == 2:
|
| 325 |
+
return np.array(sv[1])
|
| 326 |
+
if isinstance(sv, np.ndarray):
|
| 327 |
+
if sv.ndim == 3:
|
| 328 |
+
return sv[:, :, 1]
|
| 329 |
+
return sv
|
| 330 |
+
return np.array(sv)
|
| 331 |
+
except Exception as exc:
|
| 332 |
+
logger.warning("SHAP value computation failed: %s — fallback", exc)
|
| 333 |
+
return self._permutation_importance(model, X)
|
| 334 |
+
|
| 335 |
+
def _permutation_importance(self, model: Any, X: np.ndarray) -> np.ndarray:
|
| 336 |
+
"""Rough fallback: feature std × coefficient magnitude."""
|
| 337 |
+
try:
|
| 338 |
+
if hasattr(model, "coef_"):
|
| 339 |
+
coef = np.abs(model.coef_[0] if model.coef_.ndim > 1 else model.coef_)
|
| 340 |
+
return np.outer(np.ones(len(X)), coef)
|
| 341 |
+
if hasattr(model, "feature_importances_"):
|
| 342 |
+
fi = model.feature_importances_
|
| 343 |
+
return np.outer(np.ones(len(X)), fi)
|
| 344 |
+
except Exception:
|
| 345 |
+
pass
|
| 346 |
+
return np.zeros((len(X), X.shape[1]))
|
| 347 |
+
|
| 348 |
+
def global_importance(
|
| 349 |
+
self,
|
| 350 |
+
model_id: str,
|
| 351 |
+
model: Any,
|
| 352 |
+
X_test: np.ndarray,
|
| 353 |
+
y_test: np.ndarray,
|
| 354 |
+
feature_names: list[str],
|
| 355 |
+
X_train: np.ndarray,
|
| 356 |
+
model_type: str,
|
| 357 |
+
classes: list[str],
|
| 358 |
+
) -> GlobalExplainabilityResponse:
|
| 359 |
+
"""Step-6 endpoint — computes global SHAP feature importance for the active model."""
|
| 360 |
+
explainer, method = self._get_explainer(model, X_train, model_type)
|
| 361 |
+
|
| 362 |
+
if explainer is not None:
|
| 363 |
+
sv = self._shap_values_binary(explainer, method, X_test[:200], model)
|
| 364 |
+
else:
|
| 365 |
+
sv = self._permutation_importance(model, X_test[:200])
|
| 366 |
+
method = "permutation"
|
| 367 |
+
|
| 368 |
+
mean_abs = np.mean(np.abs(sv), axis=0)
|
| 369 |
+
mean_signed = np.mean(sv, axis=0)
|
| 370 |
+
|
| 371 |
+
total = mean_abs.sum() if mean_abs.sum() > 0 else 1.0
|
| 372 |
+
indices = np.argsort(mean_abs)[::-1]
|
| 373 |
+
|
| 374 |
+
items: list[FeatureImportanceItem] = []
|
| 375 |
+
cumulative = 0.0
|
| 376 |
+
top5_cumulative = 0.0
|
| 377 |
+
for rank, idx in enumerate(indices):
|
| 378 |
+
name = feature_names[idx] if idx < len(feature_names) else f"feature_{idx}"
|
| 379 |
+
imp = float(mean_abs[idx])
|
| 380 |
+
cumulative += imp / total
|
| 381 |
+
if rank < 5:
|
| 382 |
+
top5_cumulative = cumulative
|
| 383 |
+
|
| 384 |
+
direction: str
|
| 385 |
+
if mean_signed[idx] > 0.01:
|
| 386 |
+
direction = "positive"
|
| 387 |
+
elif mean_signed[idx] < -0.01:
|
| 388 |
+
direction = "negative"
|
| 389 |
+
else:
|
| 390 |
+
direction = "neutral"
|
| 391 |
+
|
| 392 |
+
note = TOP_FEATURE_NOTES.get(name, f"{_clinical_name(name)} influences the model's predictions.")
|
| 393 |
+
items.append(FeatureImportanceItem(
|
| 394 |
+
feature_name=name,
|
| 395 |
+
clinical_name=_clinical_name(name),
|
| 396 |
+
importance=round(imp, 6),
|
| 397 |
+
direction=direction,
|
| 398 |
+
clinical_note=note,
|
| 399 |
+
))
|
| 400 |
+
|
| 401 |
+
top_name = items[0].feature_name if items else ""
|
| 402 |
+
top_note = TOP_FEATURE_NOTES.get(
|
| 403 |
+
top_name,
|
| 404 |
+
f"{_clinical_name(top_name)} is the most influential variable in this model's decisions.",
|
| 405 |
+
)
|
| 406 |
+
|
| 407 |
+
return GlobalExplainabilityResponse(
|
| 408 |
+
model_id=model_id,
|
| 409 |
+
method=method,
|
| 410 |
+
feature_importances=items,
|
| 411 |
+
top_feature_clinical_note=top_note,
|
| 412 |
+
explained_variance_pct=round(top5_cumulative * 100, 1),
|
| 413 |
+
)
|
| 414 |
+
|
| 415 |
+
def single_patient(
|
| 416 |
+
self,
|
| 417 |
+
model_id: str,
|
| 418 |
+
model: Any,
|
| 419 |
+
patient_idx: int,
|
| 420 |
+
X_test: np.ndarray,
|
| 421 |
+
feature_names: list[str],
|
| 422 |
+
X_train: np.ndarray,
|
| 423 |
+
model_type: str,
|
| 424 |
+
classes: list[str],
|
| 425 |
+
y_test: np.ndarray,
|
| 426 |
+
scaler: Any = None,
|
| 427 |
+
) -> SinglePatientExplainResponse:
|
| 428 |
+
"""Compute the SHAP waterfall for a single patient row."""
|
| 429 |
+
explainer, method = self._get_explainer(model, X_train, model_type)
|
| 430 |
+
|
| 431 |
+
x_patient = X_test[patient_idx : patient_idx + 1]
|
| 432 |
+
|
| 433 |
+
# Inverse-transform to get clinical (unscaled) values for display
|
| 434 |
+
if scaler is not None:
|
| 435 |
+
try:
|
| 436 |
+
x_patient_raw = scaler.inverse_transform(x_patient)[0]
|
| 437 |
+
except Exception as exc:
|
| 438 |
+
logger.warning("Inverse-transform failed in single_patient: %s — using scaled values", exc)
|
| 439 |
+
x_patient_raw = x_patient[0]
|
| 440 |
+
else:
|
| 441 |
+
x_patient_raw = x_patient[0]
|
| 442 |
+
|
| 443 |
+
if explainer is not None:
|
| 444 |
+
sv = self._shap_values_binary(explainer, method, x_patient, model)
|
| 445 |
+
else:
|
| 446 |
+
sv = self._permutation_importance(model, x_patient)
|
| 447 |
+
|
| 448 |
+
shap_vals = sv[0] if sv.ndim > 1 else sv
|
| 449 |
+
|
| 450 |
+
# Base value
|
| 451 |
+
base_value = 0.5
|
| 452 |
+
try:
|
| 453 |
+
if hasattr(explainer, "expected_value"):
|
| 454 |
+
ev = explainer.expected_value
|
| 455 |
+
base_value = float(ev[1] if isinstance(ev, (list, np.ndarray)) else ev)
|
| 456 |
+
except Exception:
|
| 457 |
+
pass
|
| 458 |
+
|
| 459 |
+
# Predicted probability
|
| 460 |
+
prob_arr = self._model_predict_proba(model, x_patient)
|
| 461 |
+
if prob_arr.shape[1] >= 2:
|
| 462 |
+
pred_class_idx = int(np.argmax(prob_arr[0]))
|
| 463 |
+
pred_prob = float(prob_arr[0, pred_class_idx])
|
| 464 |
+
else:
|
| 465 |
+
pred_class_idx = 0
|
| 466 |
+
pred_prob = 0.5
|
| 467 |
+
predicted_class = classes[pred_class_idx] if pred_class_idx < len(classes) else str(pred_class_idx)
|
| 468 |
+
|
| 469 |
+
# Percentile for plain language
|
| 470 |
+
pctiles = np.mean(X_train < x_patient[0], axis=0)
|
| 471 |
+
|
| 472 |
+
waterfall: list[SHAPWaterfallPoint] = []
|
| 473 |
+
sorted_idx = np.argsort(np.abs(shap_vals))[::-1]
|
| 474 |
+
for i in sorted_idx[:15]:
|
| 475 |
+
fname = feature_names[i] if i < len(feature_names) else f"feature_{i}"
|
| 476 |
+
sv_val = float(shap_vals[i])
|
| 477 |
+
fval_raw = float(x_patient_raw[i]) if i < len(x_patient_raw) else float(x_patient[0, i])
|
| 478 |
+
pct = float(pctiles[i]) if i < len(pctiles) else 0.5
|
| 479 |
+
waterfall.append(SHAPWaterfallPoint(
|
| 480 |
+
feature_name=fname,
|
| 481 |
+
clinical_name=_clinical_name(fname),
|
| 482 |
+
feature_value=round(fval_raw, 3),
|
| 483 |
+
shap_value=round(sv_val, 5),
|
| 484 |
+
direction="increases_risk" if sv_val > 0 else "decreases_risk",
|
| 485 |
+
plain_language=_plain_language(fname, fval_raw, pct),
|
| 486 |
+
))
|
| 487 |
+
|
| 488 |
+
# Clinical summary
|
| 489 |
+
top3 = waterfall[:3]
|
| 490 |
+
risk_factors = [w.plain_language for w in top3 if w.direction == "increases_risk"]
|
| 491 |
+
protect_factors = [w.plain_language for w in top3 if w.direction == "decreases_risk"]
|
| 492 |
+
summary_parts = [
|
| 493 |
+
f"This patient was classified as '{predicted_class}' with a probability of {pred_prob:.1%}."
|
| 494 |
+
]
|
| 495 |
+
if risk_factors:
|
| 496 |
+
summary_parts.append(f"Key risk-increasing factors: {'; '.join(risk_factors)}.")
|
| 497 |
+
if protect_factors:
|
| 498 |
+
summary_parts.append(f"Protective factors: {'; '.join(protect_factors)}.")
|
| 499 |
+
summary_parts.append(
|
| 500 |
+
"These associations are derived from the training data and do not imply causation."
|
| 501 |
+
)
|
| 502 |
+
|
| 503 |
+
return SinglePatientExplainResponse(
|
| 504 |
+
model_id=model_id,
|
| 505 |
+
patient_index=patient_idx,
|
| 506 |
+
predicted_class=predicted_class,
|
| 507 |
+
predicted_probability=round(pred_prob, 4),
|
| 508 |
+
base_value=round(base_value, 4),
|
| 509 |
+
waterfall=waterfall,
|
| 510 |
+
clinical_summary=" ".join(summary_parts),
|
| 511 |
+
)
|
| 512 |
+
|
| 513 |
+
def _model_predict_proba(self, model: Any, X: np.ndarray) -> np.ndarray:
|
| 514 |
+
"""Proxy for the model's predict_proba that survives SHAP's background-sample workflow."""
|
| 515 |
+
if hasattr(model, "predict_proba"):
|
| 516 |
+
return model.predict_proba(X)
|
| 517 |
+
if hasattr(model, "decision_function"):
|
| 518 |
+
scores = model.decision_function(X)
|
| 519 |
+
if scores.ndim == 1:
|
| 520 |
+
p = 1 / (1 + np.exp(-scores))
|
| 521 |
+
return np.column_stack([1 - p, p])
|
| 522 |
+
return np.array([[0.5, 0.5]])
|
| 523 |
+
|
| 524 |
+
# ------------------------------------------------------------------
|
| 525 |
+
# What-If analysis
|
| 526 |
+
# ------------------------------------------------------------------
|
| 527 |
+
def what_if(
|
| 528 |
+
self,
|
| 529 |
+
model_id: str,
|
| 530 |
+
model: Any,
|
| 531 |
+
patient_index: int,
|
| 532 |
+
feature_name: str,
|
| 533 |
+
new_value: float,
|
| 534 |
+
X_test: np.ndarray,
|
| 535 |
+
feature_names: list[str],
|
| 536 |
+
scaler: Any | None,
|
| 537 |
+
) -> WhatIfResponse:
|
| 538 |
+
"""Simulate changing a single feature and return the probability shift."""
|
| 539 |
+
if feature_name not in feature_names:
|
| 540 |
+
raise ValueError(f"Feature '{feature_name}' not found. Available: {feature_names}")
|
| 541 |
+
|
| 542 |
+
n_test = len(X_test)
|
| 543 |
+
if patient_index < 0 or patient_index >= n_test:
|
| 544 |
+
raise IndexError(f"Patient index {patient_index} out of range [0, {n_test - 1}]")
|
| 545 |
+
|
| 546 |
+
feat_idx = feature_names.index(feature_name)
|
| 547 |
+
|
| 548 |
+
# Original row (already scaled if scaler was applied during training)
|
| 549 |
+
original_row = X_test[patient_index : patient_index + 1].copy()
|
| 550 |
+
|
| 551 |
+
# Get original clinical value by inverse-transforming
|
| 552 |
+
if scaler is not None:
|
| 553 |
+
try:
|
| 554 |
+
original_clinical = scaler.inverse_transform(original_row)[0, feat_idx]
|
| 555 |
+
except Exception:
|
| 556 |
+
original_clinical = float(original_row[0, feat_idx])
|
| 557 |
+
else:
|
| 558 |
+
original_clinical = float(original_row[0, feat_idx])
|
| 559 |
+
|
| 560 |
+
# Build modified row: start from scaled original, replace the feature
|
| 561 |
+
modified_row = original_row.copy()
|
| 562 |
+
if scaler is not None:
|
| 563 |
+
# new_value is in clinical space; we need to scale only that feature.
|
| 564 |
+
# Build a full clinical row, replace the feature, then re-scale.
|
| 565 |
+
try:
|
| 566 |
+
clinical_row = scaler.inverse_transform(original_row)
|
| 567 |
+
clinical_row[0, feat_idx] = new_value
|
| 568 |
+
modified_row = scaler.transform(clinical_row)
|
| 569 |
+
except Exception:
|
| 570 |
+
# Fallback: inject raw value directly
|
| 571 |
+
modified_row[0, feat_idx] = new_value
|
| 572 |
+
else:
|
| 573 |
+
modified_row[0, feat_idx] = new_value
|
| 574 |
+
|
| 575 |
+
# Predict probabilities
|
| 576 |
+
original_probs = self._model_predict_proba(model, original_row)
|
| 577 |
+
modified_probs = self._model_predict_proba(model, modified_row)
|
| 578 |
+
|
| 579 |
+
# For binary: use class-1 probability; for multiclass: use max probability
|
| 580 |
+
if original_probs.shape[1] == 2:
|
| 581 |
+
original_prob = float(original_probs[0, 1])
|
| 582 |
+
new_prob = float(modified_probs[0, 1])
|
| 583 |
+
else:
|
| 584 |
+
original_prob = float(np.max(original_probs[0]))
|
| 585 |
+
new_prob = float(np.max(modified_probs[0]))
|
| 586 |
+
|
| 587 |
+
shift = new_prob - original_prob
|
| 588 |
+
|
| 589 |
+
if abs(shift) < 1e-6:
|
| 590 |
+
direction = "no_change"
|
| 591 |
+
elif shift > 0:
|
| 592 |
+
direction = "increased_risk"
|
| 593 |
+
else:
|
| 594 |
+
direction = "decreased_risk"
|
| 595 |
+
|
| 596 |
+
return WhatIfResponse(
|
| 597 |
+
feature_name=feature_name,
|
| 598 |
+
original_value=round(float(original_clinical), 4),
|
| 599 |
+
new_value=round(new_value, 4),
|
| 600 |
+
original_prob=round(original_prob, 4),
|
| 601 |
+
new_prob=round(new_prob, 4),
|
| 602 |
+
shift=round(shift, 4),
|
| 603 |
+
direction=direction,
|
| 604 |
+
)
|
| 605 |
+
|
| 606 |
+
# ------------------------------------------------------------------
|
| 607 |
+
# Sample patients for dropdown picker
|
| 608 |
+
# ------------------------------------------------------------------
|
| 609 |
+
def sample_patients(
|
| 610 |
+
self,
|
| 611 |
+
model_id: str,
|
| 612 |
+
model: Any,
|
| 613 |
+
X_test: np.ndarray,
|
| 614 |
+
) -> SamplePatientsResponse:
|
| 615 |
+
"""Return up to 3 representative patients (low/medium/high risk)."""
|
| 616 |
+
n = len(X_test)
|
| 617 |
+
if n == 0:
|
| 618 |
+
return SamplePatientsResponse(model_id=model_id, patients=[])
|
| 619 |
+
|
| 620 |
+
probs = self._model_predict_proba(model, X_test)
|
| 621 |
+
# Use class-1 probability for binary; max probability otherwise
|
| 622 |
+
if probs.shape[1] == 2:
|
| 623 |
+
scores = probs[:, 1]
|
| 624 |
+
else:
|
| 625 |
+
scores = np.max(probs, axis=1)
|
| 626 |
+
|
| 627 |
+
sorted_indices = np.argsort(scores)
|
| 628 |
+
|
| 629 |
+
picks: list[tuple[int, str]] = []
|
| 630 |
+
|
| 631 |
+
# Low risk: lowest probability patient
|
| 632 |
+
low_idx = int(sorted_indices[0])
|
| 633 |
+
picks.append((low_idx, "low"))
|
| 634 |
+
|
| 635 |
+
if n >= 2:
|
| 636 |
+
# High risk: highest probability patient
|
| 637 |
+
high_idx = int(sorted_indices[-1])
|
| 638 |
+
picks.append((high_idx, "high"))
|
| 639 |
+
|
| 640 |
+
if n >= 3:
|
| 641 |
+
# Medium risk: patient closest to 0.5
|
| 642 |
+
diffs = np.abs(scores - 0.5)
|
| 643 |
+
med_idx = int(np.argmin(diffs))
|
| 644 |
+
# Avoid duplicating low or high pick
|
| 645 |
+
if med_idx in (low_idx, high_idx):
|
| 646 |
+
# Fall back to the median-ranked patient
|
| 647 |
+
med_idx = int(sorted_indices[n // 2])
|
| 648 |
+
picks.append((med_idx, "medium"))
|
| 649 |
+
|
| 650 |
+
patients: list[SamplePatient] = []
|
| 651 |
+
for idx, level in picks:
|
| 652 |
+
prob = float(scores[idx])
|
| 653 |
+
label = level.capitalize()
|
| 654 |
+
patients.append(SamplePatient(
|
| 655 |
+
index=idx,
|
| 656 |
+
risk_level=level,
|
| 657 |
+
probability=round(prob, 4),
|
| 658 |
+
summary=f"Patient #{idx} — {label} Risk ({prob:.0%})",
|
| 659 |
+
))
|
| 660 |
+
|
| 661 |
+
# Sort by risk level order: low, medium, high
|
| 662 |
+
order = {"low": 0, "medium": 1, "high": 2}
|
| 663 |
+
patients.sort(key=lambda p: order[p.risk_level])
|
| 664 |
+
|
| 665 |
+
return SamplePatientsResponse(model_id=model_id, patients=patients)
|
app/services/insight_service.py
ADDED
|
@@ -0,0 +1,607 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""LLM-powered clinical insight generation.
|
| 2 |
+
|
| 3 |
+
Provider chain: MedGemma (Vertex AI) → Gemini (Google AI) → static template fallback.
|
| 4 |
+
"""
|
| 5 |
+
from __future__ import annotations
|
| 6 |
+
|
| 7 |
+
import asyncio
|
| 8 |
+
import json
|
| 9 |
+
import logging
|
| 10 |
+
import os
|
| 11 |
+
import random
|
| 12 |
+
from typing import Any
|
| 13 |
+
|
| 14 |
+
import httpx
|
| 15 |
+
|
| 16 |
+
logger = logging.getLogger(__name__)
|
| 17 |
+
|
| 18 |
+
# Timeout per LLM call (seconds). Gemma 4 is a reasoning model that emits
|
| 19 |
+
# chain-of-thought tokens before the answer, so single calls can legitimately
|
| 20 |
+
# take 60–90s on the ethics prompt. 200s leaves a very generous ceiling for
|
| 21 |
+
# the long-tail cases and rare upstream slowness.
|
| 22 |
+
_LLM_TIMEOUT = 200.0
|
| 23 |
+
|
| 24 |
+
# Retry transient Gemini failures (timeouts, 429, 5xx). One retry is enough
|
| 25 |
+
# in practice; keeping the count at 1 bounds the worst-case endpoint time
|
| 26 |
+
# within the frontend axios budget (450s).
|
| 27 |
+
_MAX_RETRIES = 1
|
| 28 |
+
_RETRY_BASE_DELAY = 1.5
|
| 29 |
+
|
| 30 |
+
# HTTP status codes worth retrying (rate limit + server errors).
|
| 31 |
+
_RETRY_STATUS_CODES = {429, 500, 502, 503, 504}
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def _build_column_stats_block(context: dict) -> str:
|
| 35 |
+
"""Build feature statistics section for prompts."""
|
| 36 |
+
stats = context.get("column_statistics", [])
|
| 37 |
+
if not stats:
|
| 38 |
+
return ""
|
| 39 |
+
lines = "FEATURE STATISTICS (training set distributions):\n"
|
| 40 |
+
for cs in stats:
|
| 41 |
+
if "mean" in cs:
|
| 42 |
+
lines += f" {cs['name']}: mean={cs['mean']}, std={cs['std']}, range=[{cs['min']}, {cs['max']}]\n"
|
| 43 |
+
else:
|
| 44 |
+
lines += f" {cs['name']}: (statistics unavailable)\n"
|
| 45 |
+
return lines + "\n"
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def _build_comparison_block(context: dict) -> str:
|
| 49 |
+
"""Build compared models section for prompts."""
|
| 50 |
+
models = context.get("compared_models", [])
|
| 51 |
+
if not models:
|
| 52 |
+
return ""
|
| 53 |
+
current = context.get("model_type", "unknown")
|
| 54 |
+
lines = "MODEL COMPARISON (other models trained on same dataset):\n"
|
| 55 |
+
for m in models:
|
| 56 |
+
lines += (
|
| 57 |
+
f" - {m['model_type']}: AUC={m['auc_roc']:.3f}, "
|
| 58 |
+
f"Acc={m['accuracy']:.3f}, Sens={m['sensitivity']:.3f}, "
|
| 59 |
+
f"F1={m['f1_score']:.3f}, MCC={m['mcc']:.3f}\n"
|
| 60 |
+
)
|
| 61 |
+
lines += f"\n The model being assessed is: {current}.\n"
|
| 62 |
+
lines += f" There are {len(models)} models total. Reference ALL of them by name with their key metrics.\n"
|
| 63 |
+
lines += " Compare the current model's strengths and weaknesses against each alternative.\n\n"
|
| 64 |
+
return lines
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def _build_raw_columns_block(context: dict) -> str:
|
| 68 |
+
"""Build raw dataset column overview (from Step 2 explore)."""
|
| 69 |
+
cols = context.get("raw_column_meta", [])
|
| 70 |
+
if not cols:
|
| 71 |
+
return ""
|
| 72 |
+
row_count = context.get("row_count_original", "?")
|
| 73 |
+
lines = f"RAW DATASET OVERVIEW ({row_count} rows before preprocessing):\n"
|
| 74 |
+
for c in cols:
|
| 75 |
+
role = "TARGET" if c.get("is_target") else "feature"
|
| 76 |
+
missing = f", missing={c['missing_count']} ({c['missing_pct']}%)" if c["missing_count"] > 0 else ""
|
| 77 |
+
samples = ", ".join(c.get("sample_values", []))
|
| 78 |
+
lines += (
|
| 79 |
+
f" {c['name']} [{role}]: dtype={c['dtype']}, "
|
| 80 |
+
f"unique={c['unique_count']}{missing}, "
|
| 81 |
+
f"samples=[{samples}]\n"
|
| 82 |
+
)
|
| 83 |
+
lines += "\n"
|
| 84 |
+
return lines
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def _build_sample_patients_block(context: dict) -> str:
|
| 88 |
+
"""Build sample patient rows for LLM grounding."""
|
| 89 |
+
patients = context.get("sample_patients", [])
|
| 90 |
+
if not patients:
|
| 91 |
+
return ""
|
| 92 |
+
lines = "SAMPLE PATIENTS FROM TEST SET (real data, not synthetic):\n"
|
| 93 |
+
for i, row in enumerate(patients):
|
| 94 |
+
outcome = row.pop("_actual_outcome", "?")
|
| 95 |
+
vals = ", ".join(f"{k}={v}" for k, v in row.items())
|
| 96 |
+
lines += f" Patient {i+1}: {vals} → actual outcome: {outcome}\n"
|
| 97 |
+
row["_actual_outcome"] = outcome # restore
|
| 98 |
+
lines += " Use these real patient profiles to ground your clinical reasoning.\n\n"
|
| 99 |
+
return lines
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
def _build_ethics_prompt(context: dict) -> str:
|
| 103 |
+
"""Build a structured prompt with full clinical context for ethics/bias insight."""
|
| 104 |
+
specialty = context.get("specialty_name", "Unknown")
|
| 105 |
+
prediction_task = context.get("what_ai_predicts", "clinical outcome")
|
| 106 |
+
clinical_bg = context.get("clinical_context", "")
|
| 107 |
+
model_type = context.get("model_type", "unknown")
|
| 108 |
+
features = context.get("feature_names", [])
|
| 109 |
+
target = context.get("target_variable", "outcome")
|
| 110 |
+
classes = context.get("classes", [])
|
| 111 |
+
|
| 112 |
+
# Model hyperparameters
|
| 113 |
+
params = context.get("model_params", {})
|
| 114 |
+
params_block = ", ".join(f"{k}={v}" for k, v in params.items()) if params else "defaults"
|
| 115 |
+
|
| 116 |
+
# Class distribution in training set
|
| 117 |
+
class_dist = context.get("class_distribution_train", {})
|
| 118 |
+
dist_block = ", ".join(f"{k}: {v}" for k, v in class_dist.items()) if class_dist else "unknown"
|
| 119 |
+
|
| 120 |
+
# Confusion matrix
|
| 121 |
+
cm = context.get("confusion_matrix", {})
|
| 122 |
+
if "TP" in cm:
|
| 123 |
+
cm_block = f"TP={cm['TP']}, FP={cm['FP']}, FN={cm['FN']}, TN={cm['TN']}"
|
| 124 |
+
else:
|
| 125 |
+
cm_block = "multiclass (see subgroup data)"
|
| 126 |
+
|
| 127 |
+
metrics_block = (
|
| 128 |
+
f" Accuracy: {context.get('accuracy', 'N/A')}\n"
|
| 129 |
+
f" Sensitivity: {context.get('sensitivity', 'N/A')} (recall — how many true positives found)\n"
|
| 130 |
+
f" Specificity: {context.get('specificity', 'N/A')}\n"
|
| 131 |
+
f" Precision: {context.get('precision', 'N/A')}\n"
|
| 132 |
+
f" F1 Score: {context.get('f1_score', 'N/A')}\n"
|
| 133 |
+
f" AUC-ROC: {context.get('auc_roc', 'N/A')}\n"
|
| 134 |
+
f" MCC: {context.get('mcc', 'N/A')}\n"
|
| 135 |
+
f" Train Acc: {context.get('train_accuracy', 'N/A')}\n"
|
| 136 |
+
f" CV Mean: {context.get('cv_mean', 'N/A')} (std: {context.get('cv_std', 'N/A')})\n"
|
| 137 |
+
f" Optimal threshold: {context.get('optimal_threshold', 0.5)}\n"
|
| 138 |
+
f" Confusion matrix: {cm_block}\n"
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
bias_lines = ""
|
| 142 |
+
for sg in context.get("subgroup_details", []):
|
| 143 |
+
bias_lines += (
|
| 144 |
+
f" - {sg['group']}: sensitivity={sg['sensitivity']:.1%}, "
|
| 145 |
+
f"accuracy={sg['accuracy']:.1%}, n={sg['sample_size']}, "
|
| 146 |
+
f"status={sg['status']}"
|
| 147 |
+
)
|
| 148 |
+
if sg.get("status_reason"):
|
| 149 |
+
bias_lines += f" ({sg['status_reason']})"
|
| 150 |
+
bias_lines += "\n"
|
| 151 |
+
|
| 152 |
+
warnings_block = ""
|
| 153 |
+
for w in context.get("bias_warnings", []):
|
| 154 |
+
warnings_block += f" - {w['group']}: {w['metric']} gap = {w['gap']:.1%}\n"
|
| 155 |
+
|
| 156 |
+
# SHAP / Feature importance
|
| 157 |
+
fi_block = ""
|
| 158 |
+
for fi in context.get("feature_importances", []):
|
| 159 |
+
direction_label = "increases risk" if fi["direction"] == "positive" else "decreases risk" if fi["direction"] == "negative" else "neutral"
|
| 160 |
+
fi_block += f" {fi['importance']:.3f} {fi['clinical_name']} ({direction_label})\n"
|
| 161 |
+
|
| 162 |
+
shap_note = context.get("top_feature_clinical_note", "")
|
| 163 |
+
explained_pct = context.get("explained_variance_top5_pct", 0)
|
| 164 |
+
|
| 165 |
+
# --- DATA BLOCK (always present) ---
|
| 166 |
+
data_block = (
|
| 167 |
+
f"CLINICAL DOMAIN: {specialty}\n"
|
| 168 |
+
f"PREDICTION TASK: {prediction_task}\n"
|
| 169 |
+
f"TARGET VARIABLE: '{target}' with classes: {classes}\n"
|
| 170 |
+
f"DATA SOURCE: {context.get('data_source', 'unknown')}\n"
|
| 171 |
+
f"CLINICAL BACKGROUND: {clinical_bg}\n\n"
|
| 172 |
+
f"{_build_raw_columns_block(context)}"
|
| 173 |
+
f"DATASET (after preprocessing):\n"
|
| 174 |
+
f" Features ({len(features)}): {', '.join(features)}\n"
|
| 175 |
+
f" Training samples: {context.get('train_size', '?')}\n"
|
| 176 |
+
f" Test samples: {context.get('test_size', '?')}\n"
|
| 177 |
+
f" Class distribution (train): {dist_block}\n"
|
| 178 |
+
f" SMOTE applied: {context.get('use_smote', False)}\n"
|
| 179 |
+
f" Normalization: {context.get('normalization', 'N/A')}\n\n"
|
| 180 |
+
f"{_build_column_stats_block(context)}"
|
| 181 |
+
f"{_build_sample_patients_block(context)}"
|
| 182 |
+
f"CURRENT MODEL: {model_type}\n"
|
| 183 |
+
f" Hyperparameters: {params_block}\n"
|
| 184 |
+
f" Training time: {context.get('training_time_ms', 'N/A')} ms\n\n"
|
| 185 |
+
f"PERFORMANCE:\n{metrics_block}\n"
|
| 186 |
+
f"FEATURE IMPORTANCE (SHAP — {context.get('shap_method', 'N/A')}):\n"
|
| 187 |
+
f" Top 5 features explain {explained_pct:.1f}% of model decisions.\n"
|
| 188 |
+
f"{fi_block}"
|
| 189 |
+
f" Clinical note: {shap_note}\n\n"
|
| 190 |
+
f"SUBGROUP FAIRNESS:\n"
|
| 191 |
+
f" Overall sensitivity: {context.get('overall_sensitivity', 'N/A')}\n"
|
| 192 |
+
f"{bias_lines}\n"
|
| 193 |
+
f"BIAS WARNINGS:\n{warnings_block if warnings_block else ' None detected\n'}\n"
|
| 194 |
+
f"OVERFITTING: {'YES (train={} vs test={})'.format(context.get('train_accuracy', '?'), context.get('accuracy', '?')) if context.get('overfitting_warning') else 'No significant gap'}\n\n"
|
| 195 |
+
)
|
| 196 |
+
|
| 197 |
+
# --- COMPARISON BLOCK (dynamic) ---
|
| 198 |
+
comparison_block = _build_comparison_block(context)
|
| 199 |
+
|
| 200 |
+
# --- INSTRUCTION BLOCK (adapts to available data) ---
|
| 201 |
+
has_comparison = len(context.get("compared_models", [])) > 1
|
| 202 |
+
|
| 203 |
+
if has_comparison:
|
| 204 |
+
instruction = (
|
| 205 |
+
"You have data from MULTIPLE models trained on the same clinical dataset. "
|
| 206 |
+
"Write an insightful clinical analysis (400-550 words) in markdown.\n\n"
|
| 207 |
+
"## Overall Verdict\n"
|
| 208 |
+
"Give a verdict: 🟢 Deployable with monitoring, 🟡 Needs improvement, or 🔴 Not ready. "
|
| 209 |
+
"Name the best model and explain WHY it wins. "
|
| 210 |
+
"Use the sample patient data to illustrate — e.g., 'Patient 1 (age=75, EF=20%) died and was correctly flagged, "
|
| 211 |
+
"but Patient 3 with similar risk factors was missed.'\n\n"
|
| 212 |
+
"## Model Comparison\n"
|
| 213 |
+
"Create a clear ranking of ALL models. For each one:\n"
|
| 214 |
+
" - Name, AUC-ROC, sensitivity, accuracy (copy exact values from MODEL COMPARISON above)\n"
|
| 215 |
+
" - One-line strength and one-line weakness\n"
|
| 216 |
+
"Explain what the ranking reveals about the dataset — why do certain model families perform better?\n\n"
|
| 217 |
+
"## Data & Feature Insights\n"
|
| 218 |
+
"Analyze the feature statistics and sample patients together:\n"
|
| 219 |
+
" - Are features clinically meaningful for this prediction task?\n"
|
| 220 |
+
" - Any red flags? (data leakage, extreme ranges, suspicious correlations)\n"
|
| 221 |
+
" - What do the SHAP importances + actual patient profiles reveal?\n"
|
| 222 |
+
" - Class imbalance impact on results?\n\n"
|
| 223 |
+
f"## Recommendations for {specialty}\n"
|
| 224 |
+
"3-4 numbered, specific, actionable recommendations tied to the comparison results.\n\n"
|
| 225 |
+
)
|
| 226 |
+
else:
|
| 227 |
+
instruction = (
|
| 228 |
+
f"You have one {model_type} model trained for {prediction_task}. "
|
| 229 |
+
"Write an insightful clinical analysis (300-400 words) in markdown.\n\n"
|
| 230 |
+
"## Overall Verdict\n"
|
| 231 |
+
"Is this model ready? Verdict: 🟢 Deployable with monitoring, 🟡 Needs improvement, or 🔴 Not ready. "
|
| 232 |
+
"Use sample patient data to illustrate real impact — show how specific patients would be affected.\n\n"
|
| 233 |
+
"## Data & Feature Insights\n"
|
| 234 |
+
"Analyze features, their distributions, and SHAP importances:\n"
|
| 235 |
+
" - Are the top features clinically sound for this domain?\n"
|
| 236 |
+
" - Any suspicious patterns? (data leakage, features that shouldn't be available at prediction time)\n"
|
| 237 |
+
" - What do the sample patient profiles reveal about model behavior?\n"
|
| 238 |
+
" - Subgroup fairness: which patients are most at risk of being missed?\n\n"
|
| 239 |
+
f"## Recommendations for {specialty}\n"
|
| 240 |
+
"3-4 numbered, actionable recommendations tied to THIS model's results.\n\n"
|
| 241 |
+
)
|
| 242 |
+
|
| 243 |
+
rules = (
|
| 244 |
+
"STRICT DATA RULES — VIOLATIONS WILL INVALIDATE THE ASSESSMENT:\n"
|
| 245 |
+
"- NEVER invent, estimate, or round any number. Every metric you cite MUST appear exactly in the data above.\n"
|
| 246 |
+
"- If you write a percentage, accuracy, sensitivity, AUC, or any number — it must be copy-pasted from the data.\n"
|
| 247 |
+
"- If you mention a patient, use their exact feature values from SAMPLE PATIENTS.\n"
|
| 248 |
+
"- If a piece of data is not provided above, say 'not available' — do NOT fabricate it.\n"
|
| 249 |
+
"- You may provide clinical INTERPRETATION of the numbers, but the numbers themselves must be verbatim.\n\n"
|
| 250 |
+
"FORMAT RULES:\n"
|
| 251 |
+
"- Use markdown: **bold** key metrics, bullet points, numbered lists\n"
|
| 252 |
+
"- Be direct and clinical, not academic\n"
|
| 253 |
+
"- Focus on insights a clinician would find genuinely valuable\n"
|
| 254 |
+
)
|
| 255 |
+
|
| 256 |
+
return data_block + comparison_block + instruction + rules
|
| 257 |
+
|
| 258 |
+
|
| 259 |
+
def _build_case_study_prompt(context: dict) -> str:
|
| 260 |
+
"""Build prompt for case studies tied to this model's domain and weaknesses."""
|
| 261 |
+
specialty = context.get("specialty_name", "Unknown")
|
| 262 |
+
prediction_task = context.get("what_ai_predicts", "clinical outcome")
|
| 263 |
+
features = context.get("feature_names", [])
|
| 264 |
+
model_type = context.get("model_type", "unknown")
|
| 265 |
+
|
| 266 |
+
weak_groups = [
|
| 267 |
+
sg for sg in context.get("subgroup_details", [])
|
| 268 |
+
if sg.get("status") != "acceptable"
|
| 269 |
+
]
|
| 270 |
+
weakness_block = ""
|
| 271 |
+
for sg in weak_groups:
|
| 272 |
+
weakness_block += f" - {sg['group']}: sensitivity={sg['sensitivity']:.1%}, status={sg['status']}\n"
|
| 273 |
+
|
| 274 |
+
has_demo_features = any(f in [fn.lower() for fn in features] for f in ["sex", "gender", "age", "race", "ethnicity"])
|
| 275 |
+
|
| 276 |
+
# Top driving features
|
| 277 |
+
top_features_block = ""
|
| 278 |
+
for fi in context.get("feature_importances", [])[:5]:
|
| 279 |
+
top_features_block += f" - {fi['clinical_name']} (importance: {fi['importance']:.3f}, {fi['direction']})\n"
|
| 280 |
+
|
| 281 |
+
cm = context.get("confusion_matrix", {})
|
| 282 |
+
cm_block = f"FN={cm.get('FN', '?')}, FP={cm.get('FP', '?')}" if "FN" in cm else ""
|
| 283 |
+
|
| 284 |
+
return (
|
| 285 |
+
f"A {model_type} model was trained in {specialty} "
|
| 286 |
+
f"to predict: {prediction_task}.\n\n"
|
| 287 |
+
f"Features used: {', '.join(features)}\n"
|
| 288 |
+
f"{'Demographic features present: model uses patient demographics (sex/age) which creates fairness risk.' if has_demo_features else 'No demographic features in model.'}\n\n"
|
| 289 |
+
f"TOP DRIVING FEATURES (SHAP):\n{top_features_block if top_features_block else ' Not available\n'}\n"
|
| 290 |
+
f"MODEL WEAKNESSES:\n"
|
| 291 |
+
f" Accuracy: {context.get('accuracy', 'N/A')}, Sensitivity: {context.get('sensitivity', 'N/A')}, AUC: {context.get('auc_roc', 'N/A')}\n"
|
| 292 |
+
f" {cm_block}\n"
|
| 293 |
+
f" Subgroups at risk:\n{weakness_block if weakness_block else ' None identified\n'}\n"
|
| 294 |
+
f"{_build_column_stats_block(context)}"
|
| 295 |
+
f"{_build_sample_patients_block(context)}"
|
| 296 |
+
f"{_build_comparison_block(context)}"
|
| 297 |
+
"Generate exactly 3 real-world AI failure case studies RELEVANT to:\n"
|
| 298 |
+
f" - The clinical domain: {specialty}\n"
|
| 299 |
+
" - The specific weaknesses listed above\n"
|
| 300 |
+
" - The type of bias or error this model is susceptible to\n\n"
|
| 301 |
+
"For each case, provide a JSON object with these exact keys:\n"
|
| 302 |
+
' "title": specific real incident title,\n'
|
| 303 |
+
f' "specialty": medical specialty (prefer {specialty} or related),\n'
|
| 304 |
+
' "year": integer 2015-2024,\n'
|
| 305 |
+
' "severity": "failure" | "near_miss" | "prevention",\n'
|
| 306 |
+
' "what_happened": 2-3 factual sentences,\n'
|
| 307 |
+
' "impact": 2-3 sentences with numbers on patient impact,\n'
|
| 308 |
+
f' "lesson": 2-3 sentences tying back to THIS {model_type} model\'s weaknesses\n\n'
|
| 309 |
+
"Return ONLY a JSON array of 3 objects. No markdown, no explanation, no code fences.\n"
|
| 310 |
+
)
|
| 311 |
+
|
| 312 |
+
|
| 313 |
+
def _strip_markdown(text: str) -> str:
|
| 314 |
+
"""Remove common markdown formatting from LLM output."""
|
| 315 |
+
import re
|
| 316 |
+
text = re.sub(r'\*\*(.+?)\*\*', r'\1', text) # **bold**
|
| 317 |
+
text = re.sub(r'\*(.+?)\*', r'\1', text) # *italic*
|
| 318 |
+
text = re.sub(r'^#{1,4}\s+', '', text, flags=re.MULTILINE) # headings
|
| 319 |
+
return text.strip()
|
| 320 |
+
|
| 321 |
+
|
| 322 |
+
def _build_eu_ai_act_prompt(context: dict) -> str:
|
| 323 |
+
"""Build prompt for EU AI Act compliance enrichment."""
|
| 324 |
+
specialty = context.get("specialty_name", "Unknown")
|
| 325 |
+
model_type = context.get("model_type", "unknown")
|
| 326 |
+
prediction_task = context.get("what_ai_predicts", "clinical outcome")
|
| 327 |
+
|
| 328 |
+
items_block = ""
|
| 329 |
+
for item in context.get("eu_ai_act_items", []):
|
| 330 |
+
items_block += f' - id: "{item["id"]}", text: "{item["text"]}", article: "{item["article"]}"\n'
|
| 331 |
+
|
| 332 |
+
return (
|
| 333 |
+
f"A {model_type} model in {specialty} predicts: {prediction_task}.\n\n"
|
| 334 |
+
f"Model metrics: Accuracy={context.get('accuracy', 'N/A')}, "
|
| 335 |
+
f"Sensitivity={context.get('sensitivity', 'N/A')}, "
|
| 336 |
+
f"AUC-ROC={context.get('auc_roc', 'N/A')}, "
|
| 337 |
+
f"MCC={context.get('mcc', 'N/A')}\n"
|
| 338 |
+
f"Features: {', '.join(context.get('feature_names', []))}\n"
|
| 339 |
+
f"SHAP top feature: {context.get('top_feature_clinical_note', 'N/A')}\n"
|
| 340 |
+
f"Explained variance (top 5): {context.get('explained_variance_top5_pct', 0):.1f}%\n"
|
| 341 |
+
f"Overall sensitivity: {context.get('overall_sensitivity', 'N/A')}\n"
|
| 342 |
+
f"Overfitting: {'YES' if context.get('overfitting_warning') else 'No'}\n"
|
| 343 |
+
f"Bias warnings: {len(context.get('bias_warnings', []))} detected\n\n"
|
| 344 |
+
f"{_build_column_stats_block(context)}"
|
| 345 |
+
"EU AI ACT COMPLIANCE ITEMS to enrich:\n"
|
| 346 |
+
f"{items_block}\n"
|
| 347 |
+
"For each item, write a model-specific description (2-3 sentences) that:\n"
|
| 348 |
+
"- References actual metrics, features, or findings from THIS model\n"
|
| 349 |
+
"- Explains the compliance status in concrete terms\n"
|
| 350 |
+
"- Is written for a clinician, not a lawyer\n\n"
|
| 351 |
+
"Return ONLY a JSON array of objects with keys: \"id\", \"enriched_description\"\n"
|
| 352 |
+
"Return exactly one object per item above, in the same order.\n"
|
| 353 |
+
"No markdown, no explanation, no code fences.\n"
|
| 354 |
+
)
|
| 355 |
+
|
| 356 |
+
|
| 357 |
+
class InsightService:
|
| 358 |
+
"""Generates clinical insights using MedGemma or Gemini with template fallback."""
|
| 359 |
+
|
| 360 |
+
def __init__(self) -> None:
|
| 361 |
+
"""Detect the configured provider (Gemini, local Ollama, or template fallback) from env vars."""
|
| 362 |
+
# Vertex AI MedGemma config
|
| 363 |
+
self._vertex_project = os.getenv("GOOGLE_CLOUD_PROJECT", "")
|
| 364 |
+
self._vertex_location = os.getenv("VERTEX_AI_LOCATION", "us-central1")
|
| 365 |
+
self._medgemma_endpoint = os.getenv("MEDGEMMA_ENDPOINT_ID", "")
|
| 366 |
+
|
| 367 |
+
# Gemini API config
|
| 368 |
+
self._gemini_api_key = os.getenv("GEMINI_API_KEY", "")
|
| 369 |
+
self._gemini_model = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")
|
| 370 |
+
|
| 371 |
+
self._provider = self._detect_provider()
|
| 372 |
+
logger.info("InsightService initialized — provider: %s", self._provider)
|
| 373 |
+
|
| 374 |
+
def _detect_provider(self) -> str:
|
| 375 |
+
"""Return the provider name based on available API keys / endpoints."""
|
| 376 |
+
if self._medgemma_endpoint and self._vertex_project:
|
| 377 |
+
return "medgemma"
|
| 378 |
+
if self._gemini_api_key:
|
| 379 |
+
return "gemini"
|
| 380 |
+
return "template"
|
| 381 |
+
|
| 382 |
+
async def generate_ethics_insight(self, context: dict) -> dict[str, Any]:
|
| 383 |
+
"""Generate clinical insight for ethics/bias assessment."""
|
| 384 |
+
prompt = _build_ethics_prompt(context)
|
| 385 |
+
system = (
|
| 386 |
+
"You are a clinical AI safety specialist reviewing ML models in healthcare. "
|
| 387 |
+
"CRITICAL: You must ONLY cite numbers that appear in the provided data. "
|
| 388 |
+
"Never invent, estimate, approximate, or round any metric. "
|
| 389 |
+
"If a number is not in the data, say 'not available'. "
|
| 390 |
+
"You provide clinical interpretation of real metrics — you do not generate synthetic data. "
|
| 391 |
+
"Be direct, evidence-based, and clinically insightful."
|
| 392 |
+
)
|
| 393 |
+
return await self._call_llm(prompt, "ethics", system)
|
| 394 |
+
|
| 395 |
+
async def generate_case_studies(self, context: dict) -> dict[str, Any]:
|
| 396 |
+
"""Generate relevant case studies based on model metrics."""
|
| 397 |
+
prompt = _build_case_study_prompt(context)
|
| 398 |
+
system = (
|
| 399 |
+
"You are a clinical AI safety educator. "
|
| 400 |
+
"Generate domain-relevant AI failure case studies tied to this model's real weaknesses. "
|
| 401 |
+
"When referencing model metrics (sensitivity, accuracy, etc.), use ONLY the exact values from the provided data. "
|
| 402 |
+
"The scenarios are illustrative but all cited numbers must come from the actual model data. "
|
| 403 |
+
"Return only valid JSON."
|
| 404 |
+
)
|
| 405 |
+
result = await self._call_llm(prompt, "case_studies", system)
|
| 406 |
+
|
| 407 |
+
# Parse JSON array from LLM response
|
| 408 |
+
if result["source"] != "template":
|
| 409 |
+
try:
|
| 410 |
+
import re
|
| 411 |
+
text = result["text"].strip()
|
| 412 |
+
# Strip markdown code fences if present
|
| 413 |
+
if "```" in text:
|
| 414 |
+
match = re.search(r'```(?:json)?\s*\n?(.*?)```', text, re.DOTALL)
|
| 415 |
+
if match:
|
| 416 |
+
text = match.group(1).strip()
|
| 417 |
+
# Find JSON array in text (LLM may add prose before/after)
|
| 418 |
+
bracket_start = text.find("[")
|
| 419 |
+
bracket_end = text.rfind("]")
|
| 420 |
+
if bracket_start != -1 and bracket_end != -1:
|
| 421 |
+
text = text[bracket_start:bracket_end + 1]
|
| 422 |
+
cases = json.loads(text)
|
| 423 |
+
if isinstance(cases, list) and len(cases) > 0:
|
| 424 |
+
result["case_studies"] = cases
|
| 425 |
+
return result
|
| 426 |
+
except (json.JSONDecodeError, IndexError, ValueError) as exc:
|
| 427 |
+
logger.warning("Failed to parse case studies JSON from LLM: %s", exc)
|
| 428 |
+
|
| 429 |
+
# Fallback: return empty so frontend uses existing static cases
|
| 430 |
+
result["case_studies"] = []
|
| 431 |
+
return result
|
| 432 |
+
|
| 433 |
+
async def generate_eu_ai_act_insights(self, context: dict) -> dict[str, Any]:
|
| 434 |
+
"""Generate model-specific EU AI Act compliance descriptions."""
|
| 435 |
+
prompt = _build_eu_ai_act_prompt(context)
|
| 436 |
+
system = (
|
| 437 |
+
"You are a regulatory compliance specialist for the EU AI Act. "
|
| 438 |
+
"You write model-specific compliance assessments for healthcare AI systems. "
|
| 439 |
+
"Reference actual metrics and findings. Return only valid JSON."
|
| 440 |
+
)
|
| 441 |
+
result = await self._call_llm(prompt, "eu_ai_act", system)
|
| 442 |
+
|
| 443 |
+
if result["source"] != "template":
|
| 444 |
+
try:
|
| 445 |
+
import re
|
| 446 |
+
text = result["text"].strip()
|
| 447 |
+
if "```" in text:
|
| 448 |
+
match = re.search(r'```(?:json)?\s*\n?(.*?)```', text, re.DOTALL)
|
| 449 |
+
if match:
|
| 450 |
+
text = match.group(1).strip()
|
| 451 |
+
bracket_start = text.find("[")
|
| 452 |
+
bracket_end = text.rfind("]")
|
| 453 |
+
if bracket_start != -1 and bracket_end != -1:
|
| 454 |
+
text = text[bracket_start:bracket_end + 1]
|
| 455 |
+
items = json.loads(text)
|
| 456 |
+
if isinstance(items, list) and len(items) > 0:
|
| 457 |
+
result["items"] = items
|
| 458 |
+
return result
|
| 459 |
+
except (json.JSONDecodeError, IndexError, ValueError) as exc:
|
| 460 |
+
logger.warning("Failed to parse EU AI Act JSON from LLM: %s", exc)
|
| 461 |
+
|
| 462 |
+
result["items"] = []
|
| 463 |
+
return result
|
| 464 |
+
|
| 465 |
+
async def _call_llm(self, prompt: str, task: str, system: str = "") -> dict[str, Any]:
|
| 466 |
+
"""Try MedGemma → Gemini → template."""
|
| 467 |
+
# Try MedGemma via Vertex AI
|
| 468 |
+
if self._provider == "medgemma" or (self._medgemma_endpoint and self._vertex_project):
|
| 469 |
+
try:
|
| 470 |
+
text = await self._call_medgemma(prompt, system)
|
| 471 |
+
return {"source": "medgemma", "text": text}
|
| 472 |
+
except Exception as exc:
|
| 473 |
+
logger.warning("MedGemma failed (%s), falling back to Gemini: %r", task, exc)
|
| 474 |
+
|
| 475 |
+
# Try Gemini API
|
| 476 |
+
if self._gemini_api_key:
|
| 477 |
+
try:
|
| 478 |
+
text = await self._call_gemini(prompt, system)
|
| 479 |
+
return {"source": "gemini", "text": text}
|
| 480 |
+
except Exception as exc:
|
| 481 |
+
logger.warning("Gemini failed (%s), falling back to template: %r", task, exc)
|
| 482 |
+
|
| 483 |
+
# Template fallback
|
| 484 |
+
return {"source": "template", "text": ""}
|
| 485 |
+
|
| 486 |
+
async def _call_medgemma(self, prompt: str, system: str = "") -> str:
|
| 487 |
+
"""Call MedGemma deployed on Vertex AI (vLLM container with OpenAI-compatible API)."""
|
| 488 |
+
import subprocess
|
| 489 |
+
token_result = subprocess.run(
|
| 490 |
+
["gcloud", "auth", "print-access-token"],
|
| 491 |
+
capture_output=True, text=True, timeout=5,
|
| 492 |
+
)
|
| 493 |
+
if token_result.returncode != 0:
|
| 494 |
+
raise RuntimeError("Failed to get gcloud access token")
|
| 495 |
+
token = token_result.stdout.strip()
|
| 496 |
+
|
| 497 |
+
# vLLM container exposes OpenAI-compatible /v1/chat/completions via rawPredict
|
| 498 |
+
url = (
|
| 499 |
+
f"https://{self._vertex_location}-aiplatform.googleapis.com/v1/"
|
| 500 |
+
f"projects/{self._vertex_project}/locations/{self._vertex_location}/"
|
| 501 |
+
f"endpoints/{self._medgemma_endpoint}:rawPredict"
|
| 502 |
+
)
|
| 503 |
+
|
| 504 |
+
async with httpx.AsyncClient(timeout=_LLM_TIMEOUT) as client:
|
| 505 |
+
resp = await client.post(
|
| 506 |
+
url,
|
| 507 |
+
headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"},
|
| 508 |
+
json={
|
| 509 |
+
"model": "google/medgemma-4b-it",
|
| 510 |
+
"messages": [
|
| 511 |
+
{"role": "system", "content": system or "You are a clinical AI safety specialist."},
|
| 512 |
+
{"role": "user", "content": prompt},
|
| 513 |
+
],
|
| 514 |
+
"max_tokens": 2048,
|
| 515 |
+
"temperature": 0.3,
|
| 516 |
+
},
|
| 517 |
+
)
|
| 518 |
+
resp.raise_for_status()
|
| 519 |
+
data = resp.json()
|
| 520 |
+
choices = data.get("choices", [])
|
| 521 |
+
if choices:
|
| 522 |
+
return choices[0].get("message", {}).get("content", "")
|
| 523 |
+
# Fallback: try predict format
|
| 524 |
+
predictions = data.get("predictions", [])
|
| 525 |
+
if predictions:
|
| 526 |
+
return predictions[0] if isinstance(predictions[0], str) else str(predictions[0])
|
| 527 |
+
raise RuntimeError(f"Empty MedGemma response: {data}")
|
| 528 |
+
|
| 529 |
+
async def _call_gemini(self, prompt: str, system: str = "") -> str:
|
| 530 |
+
"""Call Gemini via Google AI Studio REST API with retry on transient errors."""
|
| 531 |
+
last_exc: Exception | None = None
|
| 532 |
+
for attempt in range(_MAX_RETRIES + 1):
|
| 533 |
+
try:
|
| 534 |
+
return await self._call_gemini_once(prompt, system)
|
| 535 |
+
except httpx.HTTPStatusError as exc:
|
| 536 |
+
status = exc.response.status_code
|
| 537 |
+
if status in _RETRY_STATUS_CODES and attempt < _MAX_RETRIES:
|
| 538 |
+
delay = _RETRY_BASE_DELAY * (2 ** attempt) + random.uniform(0, 0.5)
|
| 539 |
+
logger.warning(
|
| 540 |
+
"Gemini HTTP %d on attempt %d/%d, retrying in %.1fs",
|
| 541 |
+
status, attempt + 1, _MAX_RETRIES + 1, delay,
|
| 542 |
+
)
|
| 543 |
+
last_exc = exc
|
| 544 |
+
await asyncio.sleep(delay)
|
| 545 |
+
continue
|
| 546 |
+
raise
|
| 547 |
+
except (httpx.TimeoutException, httpx.TransportError, RuntimeError) as exc:
|
| 548 |
+
if attempt < _MAX_RETRIES:
|
| 549 |
+
delay = _RETRY_BASE_DELAY * (2 ** attempt) + random.uniform(0, 0.5)
|
| 550 |
+
logger.warning(
|
| 551 |
+
"Gemini transient failure on attempt %d/%d (%r), retrying in %.1fs",
|
| 552 |
+
attempt + 1, _MAX_RETRIES + 1, exc, delay,
|
| 553 |
+
)
|
| 554 |
+
last_exc = exc
|
| 555 |
+
await asyncio.sleep(delay)
|
| 556 |
+
continue
|
| 557 |
+
raise
|
| 558 |
+
# Unreachable — loop either returns or re-raises. Keep type-checker happy.
|
| 559 |
+
if last_exc:
|
| 560 |
+
raise last_exc
|
| 561 |
+
raise RuntimeError("Gemini retry loop exhausted without result")
|
| 562 |
+
|
| 563 |
+
async def _call_gemini_once(self, prompt: str, system: str = "") -> str:
|
| 564 |
+
"""Single attempt against the Gemini / Gemma REST endpoint."""
|
| 565 |
+
url = (
|
| 566 |
+
f"https://generativelanguage.googleapis.com/v1beta/"
|
| 567 |
+
f"models/{self._gemini_model}:generateContent"
|
| 568 |
+
f"?key={self._gemini_api_key}"
|
| 569 |
+
)
|
| 570 |
+
|
| 571 |
+
body: dict[str, Any] = {
|
| 572 |
+
"contents": [{"parts": [{"text": prompt}]}],
|
| 573 |
+
"generationConfig": {
|
| 574 |
+
"maxOutputTokens": 8192,
|
| 575 |
+
"temperature": 0.3,
|
| 576 |
+
},
|
| 577 |
+
}
|
| 578 |
+
if system:
|
| 579 |
+
body["systemInstruction"] = {"parts": [{"text": system}]}
|
| 580 |
+
|
| 581 |
+
async with httpx.AsyncClient(timeout=_LLM_TIMEOUT) as client:
|
| 582 |
+
resp = await client.post(url, json=body)
|
| 583 |
+
resp.raise_for_status()
|
| 584 |
+
data = resp.json()
|
| 585 |
+
candidates = data.get("candidates", [])
|
| 586 |
+
if candidates:
|
| 587 |
+
finish_reason = candidates[0].get("finishReason", "UNKNOWN")
|
| 588 |
+
parts = candidates[0].get("content", {}).get("parts", [])
|
| 589 |
+
# Gemma 4 (and any reasoning model) returns a separate part with
|
| 590 |
+
# thought=True containing chain-of-thought; skip those and take
|
| 591 |
+
# only the final-answer parts.
|
| 592 |
+
answer_parts = [p for p in parts if not p.get("thought", False)]
|
| 593 |
+
text = "".join(p.get("text", "") for p in answer_parts)
|
| 594 |
+
logger.info(
|
| 595 |
+
"Gemini response: %d chars, finishReason=%s, parts=%d (%d answer)",
|
| 596 |
+
len(text), finish_reason, len(parts), len(answer_parts),
|
| 597 |
+
)
|
| 598 |
+
if finish_reason == "MAX_TOKENS":
|
| 599 |
+
logger.warning("Gemini output was truncated (MAX_TOKENS)")
|
| 600 |
+
if text:
|
| 601 |
+
return text
|
| 602 |
+
# Response came back but had no usable content — treat as transient
|
| 603 |
+
# so the retry loop can take another swing.
|
| 604 |
+
block_reason = data.get("promptFeedback", {}).get("blockReason")
|
| 605 |
+
if block_reason:
|
| 606 |
+
raise RuntimeError(f"Gemini blocked response: {block_reason}")
|
| 607 |
+
raise RuntimeError(f"Empty Gemini response (candidates={len(candidates)})")
|
app/services/ml_service.py
ADDED
|
@@ -0,0 +1,855 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""ML model training and evaluation service — 8 state-of-the-art classifiers."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import logging
|
| 5 |
+
import threading
|
| 6 |
+
import time
|
| 7 |
+
import uuid
|
| 8 |
+
from collections import OrderedDict
|
| 9 |
+
from typing import Any
|
| 10 |
+
|
| 11 |
+
import numpy as np
|
| 12 |
+
from sklearn.ensemble import RandomForestClassifier
|
| 13 |
+
from sklearn.feature_selection import SelectKBest, VarianceThreshold, mutual_info_classif
|
| 14 |
+
from sklearn.linear_model import LogisticRegression
|
| 15 |
+
from sklearn.metrics import (
|
| 16 |
+
accuracy_score,
|
| 17 |
+
confusion_matrix,
|
| 18 |
+
f1_score,
|
| 19 |
+
matthews_corrcoef,
|
| 20 |
+
precision_recall_curve,
|
| 21 |
+
precision_score,
|
| 22 |
+
recall_score,
|
| 23 |
+
roc_auc_score,
|
| 24 |
+
roc_curve,
|
| 25 |
+
)
|
| 26 |
+
from sklearn.model_selection import (
|
| 27 |
+
RandomizedSearchCV,
|
| 28 |
+
RepeatedStratifiedKFold,
|
| 29 |
+
StratifiedKFold,
|
| 30 |
+
cross_val_score,
|
| 31 |
+
)
|
| 32 |
+
from sklearn.naive_bayes import GaussianNB
|
| 33 |
+
from sklearn.neighbors import KNeighborsClassifier
|
| 34 |
+
from sklearn.pipeline import Pipeline
|
| 35 |
+
from sklearn.preprocessing import MinMaxScaler, StandardScaler, label_binarize
|
| 36 |
+
from imblearn.pipeline import Pipeline as ImbPipeline
|
| 37 |
+
from imblearn.over_sampling import SMOTE
|
| 38 |
+
from sklearn.svm import SVC
|
| 39 |
+
from sklearn.tree import DecisionTreeClassifier
|
| 40 |
+
|
| 41 |
+
from sklearn.decomposition import PCA
|
| 42 |
+
|
| 43 |
+
from app.models.ml_schemas import (
|
| 44 |
+
PARAM_SCHEMAS,
|
| 45 |
+
CompareEntry,
|
| 46 |
+
CompareResponse,
|
| 47 |
+
ConfusionMatrixData,
|
| 48 |
+
DecisionMesh,
|
| 49 |
+
KNNScatterData,
|
| 50 |
+
MetricsResponse,
|
| 51 |
+
ModelType,
|
| 52 |
+
ROCPoint,
|
| 53 |
+
ScatterPoint,
|
| 54 |
+
TrainResponse,
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
logger = logging.getLogger(__name__)
|
| 58 |
+
|
| 59 |
+
_SENSITIVITY_WARNING_THRESHOLD = 0.5
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def _sanitize_float(val: Any) -> Any:
|
| 63 |
+
"""Replace inf/-inf/nan with JSON-safe values recursively."""
|
| 64 |
+
if isinstance(val, float):
|
| 65 |
+
if np.isinf(val) or np.isnan(val):
|
| 66 |
+
return 0.0
|
| 67 |
+
return val
|
| 68 |
+
if isinstance(val, dict):
|
| 69 |
+
return {k: _sanitize_float(v) for k, v in val.items()}
|
| 70 |
+
if isinstance(val, list):
|
| 71 |
+
return [_sanitize_float(v) for v in val]
|
| 72 |
+
if isinstance(val, np.floating):
|
| 73 |
+
f = float(val)
|
| 74 |
+
return 0.0 if np.isinf(f) or np.isnan(f) else f
|
| 75 |
+
return val
|
| 76 |
+
|
| 77 |
+
_PARAM_GRIDS: dict = {
|
| 78 |
+
"knn": {"n_neighbors": list(range(1, 26)), "metric": ["euclidean", "manhattan"], "weights": ["uniform", "distance"]},
|
| 79 |
+
"svm": {"C": [0.1, 1, 10, 50], "kernel": ["rbf", "linear", "poly", "sigmoid"], "gamma": ["scale", "auto"]},
|
| 80 |
+
"random_forest": {"n_estimators": [50, 100, 200], "max_depth": [3, 5, 10, None], "min_samples_split": [2, 5, 10]},
|
| 81 |
+
"decision_tree": {"max_depth": [3, 5, 8, 10, 15, 20], "criterion": ["gini", "entropy"], "min_samples_split": [2, 5, 10]},
|
| 82 |
+
"logistic_regression": {"C": [0.01, 0.1, 1, 10], "solver": ["lbfgs", "saga"]},
|
| 83 |
+
"naive_bayes": {"var_smoothing": [1e-12, 1e-9, 1e-6, 1e-3]},
|
| 84 |
+
"xgboost": {"n_estimators": [50, 100, 200], "max_depth": [3, 5, 7], "learning_rate": [0.05, 0.1, 0.2]},
|
| 85 |
+
"lightgbm": {"n_estimators": [50, 100, 200], "max_depth": [-1, 5, 7], "learning_rate": [0.05, 0.1, 0.2]},
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
class MLService:
|
| 90 |
+
"""Owns model construction, training, evaluation, and the in-memory cross-model comparison list."""
|
| 91 |
+
def __init__(self) -> None:
|
| 92 |
+
"""Initialise session + model + comparison caches."""
|
| 93 |
+
self._lock = threading.Lock()
|
| 94 |
+
self._session_store: OrderedDict[str, dict[str, Any]] = OrderedDict()
|
| 95 |
+
self._model_store: OrderedDict[str, Any] = OrderedDict()
|
| 96 |
+
self._compare_store: dict[str, list[CompareEntry]] = {}
|
| 97 |
+
|
| 98 |
+
# ------------------------------------------------------------------
|
| 99 |
+
# Session management (called by data service / router)
|
| 100 |
+
# ------------------------------------------------------------------
|
| 101 |
+
def store_session_data(self, session_id: str, data: dict[str, Any]) -> None:
|
| 102 |
+
"""Persist the prepared train/test split for later training and evaluation calls."""
|
| 103 |
+
with self._lock:
|
| 104 |
+
self._session_store[session_id] = data
|
| 105 |
+
self._session_store.move_to_end(session_id)
|
| 106 |
+
while len(self._session_store) > 50:
|
| 107 |
+
self._session_store.popitem(last=False)
|
| 108 |
+
logger.info("ML session stored: %s", session_id)
|
| 109 |
+
|
| 110 |
+
def get_session(self, session_id: str) -> dict[str, Any] | None:
|
| 111 |
+
"""Retrieve stored session data by id; returns `None` if unknown."""
|
| 112 |
+
with self._lock:
|
| 113 |
+
data = self._session_store.get(session_id)
|
| 114 |
+
if data is not None:
|
| 115 |
+
self._session_store.move_to_end(session_id)
|
| 116 |
+
return data
|
| 117 |
+
|
| 118 |
+
def get_model(self, model_id: str) -> Any | None:
|
| 119 |
+
"""Retrieve a trained model by id; returns `None` if unknown."""
|
| 120 |
+
with self._lock:
|
| 121 |
+
data = self._model_store.get(model_id)
|
| 122 |
+
if data is not None:
|
| 123 |
+
self._model_store.move_to_end(model_id)
|
| 124 |
+
return data
|
| 125 |
+
|
| 126 |
+
# ------------------------------------------------------------------
|
| 127 |
+
# Model construction
|
| 128 |
+
# ------------------------------------------------------------------
|
| 129 |
+
def build_model(self, model_type: ModelType, params: dict[str, Any]) -> Any:
|
| 130 |
+
"""Construct a scikit/XGB/LGBM estimator instance from a `TrainRequest`."""
|
| 131 |
+
# Runtime param validation via typed schemas
|
| 132 |
+
schema = PARAM_SCHEMAS.get(model_type.value)
|
| 133 |
+
if schema:
|
| 134 |
+
try:
|
| 135 |
+
validated = schema(**params)
|
| 136 |
+
params = validated.model_dump()
|
| 137 |
+
except Exception as exc:
|
| 138 |
+
logger.warning("Param validation failed for %s: %s — using defaults", model_type.value, exc)
|
| 139 |
+
params = schema().model_dump()
|
| 140 |
+
|
| 141 |
+
if model_type == ModelType.KNN:
|
| 142 |
+
return KNeighborsClassifier(
|
| 143 |
+
n_neighbors=params.get("n_neighbors", 5),
|
| 144 |
+
metric=params.get("metric", "euclidean"),
|
| 145 |
+
weights=params.get("weights", "distance"),
|
| 146 |
+
algorithm="auto",
|
| 147 |
+
n_jobs=1,
|
| 148 |
+
)
|
| 149 |
+
if model_type == ModelType.SVM:
|
| 150 |
+
return SVC(
|
| 151 |
+
kernel=params.get("kernel", "rbf"),
|
| 152 |
+
C=params.get("C", 1.0),
|
| 153 |
+
gamma=params.get("gamma", "scale"),
|
| 154 |
+
probability=True,
|
| 155 |
+
cache_size=1000,
|
| 156 |
+
class_weight="balanced",
|
| 157 |
+
random_state=42,
|
| 158 |
+
)
|
| 159 |
+
if model_type == ModelType.DECISION_TREE:
|
| 160 |
+
return DecisionTreeClassifier(
|
| 161 |
+
max_depth=params.get("max_depth", 5),
|
| 162 |
+
criterion=params.get("criterion", "gini"),
|
| 163 |
+
class_weight="balanced",
|
| 164 |
+
min_samples_split=params.get("min_samples_split", 5),
|
| 165 |
+
min_samples_leaf=2,
|
| 166 |
+
random_state=42,
|
| 167 |
+
)
|
| 168 |
+
if model_type == ModelType.RANDOM_FOREST:
|
| 169 |
+
return RandomForestClassifier(
|
| 170 |
+
n_estimators=params.get("n_estimators", 100),
|
| 171 |
+
max_depth=params.get("max_depth", 5),
|
| 172 |
+
class_weight="balanced",
|
| 173 |
+
n_jobs=1,
|
| 174 |
+
min_samples_leaf=2,
|
| 175 |
+
min_samples_split=params.get("min_samples_split", 2),
|
| 176 |
+
random_state=42,
|
| 177 |
+
)
|
| 178 |
+
if model_type == ModelType.LOGISTIC_REGRESSION:
|
| 179 |
+
return LogisticRegression(
|
| 180 |
+
C=params.get("C", 1.0),
|
| 181 |
+
max_iter=params.get("max_iter", 1000),
|
| 182 |
+
solver=params.get("solver", "saga"),
|
| 183 |
+
class_weight="balanced",
|
| 184 |
+
random_state=42,
|
| 185 |
+
)
|
| 186 |
+
if model_type == ModelType.NAIVE_BAYES:
|
| 187 |
+
return GaussianNB(
|
| 188 |
+
var_smoothing=params.get("var_smoothing", 1e-9),
|
| 189 |
+
)
|
| 190 |
+
if model_type == ModelType.XGBOOST:
|
| 191 |
+
try:
|
| 192 |
+
from xgboost import XGBClassifier
|
| 193 |
+
return XGBClassifier(
|
| 194 |
+
n_estimators=params.get("n_estimators", 100),
|
| 195 |
+
max_depth=params.get("max_depth", 5),
|
| 196 |
+
learning_rate=params.get("learning_rate", 0.1),
|
| 197 |
+
eval_metric="logloss",
|
| 198 |
+
random_state=42,
|
| 199 |
+
n_jobs=1,
|
| 200 |
+
verbosity=0,
|
| 201 |
+
)
|
| 202 |
+
except ImportError:
|
| 203 |
+
logger.warning("xgboost not installed, falling back to RandomForest")
|
| 204 |
+
return RandomForestClassifier(n_estimators=100, max_depth=5, class_weight="balanced", n_jobs=1, random_state=42)
|
| 205 |
+
except OSError as exc:
|
| 206 |
+
raise RuntimeError(f"XGBoost native library error: {exc}") from exc
|
| 207 |
+
if model_type == ModelType.LIGHTGBM:
|
| 208 |
+
try:
|
| 209 |
+
from lightgbm import LGBMClassifier
|
| 210 |
+
return LGBMClassifier(
|
| 211 |
+
n_estimators=params.get("n_estimators", 100),
|
| 212 |
+
max_depth=params.get("max_depth", -1),
|
| 213 |
+
learning_rate=params.get("learning_rate", 0.1),
|
| 214 |
+
class_weight="balanced",
|
| 215 |
+
random_state=42,
|
| 216 |
+
n_jobs=1,
|
| 217 |
+
verbose=-1,
|
| 218 |
+
)
|
| 219 |
+
except ImportError:
|
| 220 |
+
logger.warning("lightgbm not installed, falling back to RandomForest")
|
| 221 |
+
return RandomForestClassifier(n_estimators=100, max_depth=5, class_weight="balanced", n_jobs=1, random_state=42)
|
| 222 |
+
except OSError as exc:
|
| 223 |
+
raise RuntimeError(f"LightGBM native library error: {exc}") from exc
|
| 224 |
+
raise ValueError(f"Unknown model type: {model_type}")
|
| 225 |
+
|
| 226 |
+
# ------------------------------------------------------------------
|
| 227 |
+
# Training and evaluation
|
| 228 |
+
# ------------------------------------------------------------------
|
| 229 |
+
def train_and_evaluate(
|
| 230 |
+
self,
|
| 231 |
+
session_id: str,
|
| 232 |
+
model_type: ModelType,
|
| 233 |
+
params: dict[str, Any],
|
| 234 |
+
tune: bool = False,
|
| 235 |
+
use_feature_selection: bool = False,
|
| 236 |
+
) -> TrainResponse:
|
| 237 |
+
"""Fit the model, compute metrics + ROC/PR/confusion matrix, and return a `TrainResponse`."""
|
| 238 |
+
with self._lock:
|
| 239 |
+
session = self._session_store.get(session_id)
|
| 240 |
+
if session is not None:
|
| 241 |
+
self._session_store.move_to_end(session_id)
|
| 242 |
+
if session is None:
|
| 243 |
+
raise KeyError(f"Session not found: {session_id}")
|
| 244 |
+
|
| 245 |
+
X_train: np.ndarray = session["X_train"]
|
| 246 |
+
X_test: np.ndarray = session["X_test"]
|
| 247 |
+
y_train: np.ndarray = session["y_train"]
|
| 248 |
+
y_test: np.ndarray = session["y_test"]
|
| 249 |
+
feature_names: list[str] = session["feature_names"]
|
| 250 |
+
classes: list[str] = session["classes"]
|
| 251 |
+
# Raw (pre-scaling) data for leak-free CV
|
| 252 |
+
X_train_raw: np.ndarray = session.get("X_train_raw", X_train)
|
| 253 |
+
X_test_raw: np.ndarray = session.get("X_test_raw", X_test)
|
| 254 |
+
normalization: str = session.get("normalization", "zscore")
|
| 255 |
+
scaler = session.get("scaler")
|
| 256 |
+
|
| 257 |
+
# --- Optional feature selection (variance threshold + mutual info) ---
|
| 258 |
+
selected_feature_names = feature_names
|
| 259 |
+
if use_feature_selection and X_train.shape[1] > 5:
|
| 260 |
+
try:
|
| 261 |
+
vt = VarianceThreshold(threshold=0.01)
|
| 262 |
+
X_train = vt.fit_transform(X_train)
|
| 263 |
+
X_test = vt.transform(X_test)
|
| 264 |
+
vt_mask = vt.get_support()
|
| 265 |
+
selected_feature_names = [fn for fn, s in zip(feature_names, vt_mask) if s]
|
| 266 |
+
# Top-k mutual info selection
|
| 267 |
+
k = min(15, X_train.shape[1])
|
| 268 |
+
selector = SelectKBest(mutual_info_classif, k=k)
|
| 269 |
+
X_train = selector.fit_transform(X_train, y_train)
|
| 270 |
+
X_test = selector.transform(X_test)
|
| 271 |
+
ki_mask = selector.get_support()
|
| 272 |
+
selected_feature_names = [fn for fn, s in zip(selected_feature_names, ki_mask) if s]
|
| 273 |
+
logger.info("Feature selection: %d -> %d features", len(feature_names), len(selected_feature_names))
|
| 274 |
+
except Exception as exc:
|
| 275 |
+
logger.warning("Feature selection failed: %s — using all features", exc)
|
| 276 |
+
X_train = session["X_train"]
|
| 277 |
+
X_test = session["X_test"]
|
| 278 |
+
selected_feature_names = feature_names
|
| 279 |
+
|
| 280 |
+
is_binary = len(classes) == 2
|
| 281 |
+
|
| 282 |
+
# --- Ensure contiguous labels for XGBoost/LightGBM ---
|
| 283 |
+
# After SMOTE or train/test split some class labels may have gaps
|
| 284 |
+
# (e.g. [0, 2, 5] instead of [0, 1, 2]). XGBoost requires labels
|
| 285 |
+
# in the range 0..n_classes-1 with no gaps.
|
| 286 |
+
_label_map: dict[int, int] | None = None
|
| 287 |
+
_inv_label_map: dict[int, int] | None = None
|
| 288 |
+
all_labels = np.unique(np.concatenate([y_train, y_test]))
|
| 289 |
+
if len(all_labels) > 0 and (
|
| 290 |
+
all_labels[-1] != len(all_labels) - 1
|
| 291 |
+
or len(all_labels) != int(all_labels[-1]) + 1
|
| 292 |
+
):
|
| 293 |
+
_label_map = {int(old): new for new, old in enumerate(sorted(all_labels))}
|
| 294 |
+
_inv_label_map = {v: k for k, v in _label_map.items()}
|
| 295 |
+
y_train = np.array([_label_map[int(v)] for v in y_train])
|
| 296 |
+
y_test = np.array([_label_map[int(v)] for v in y_test])
|
| 297 |
+
classes = [classes[old] if old < len(classes) else str(old) for old in sorted(all_labels)]
|
| 298 |
+
logger.info("ML re-encoded %d classes to contiguous labels", len(all_labels))
|
| 299 |
+
|
| 300 |
+
# Check if SMOTE was applied during data preparation
|
| 301 |
+
smote_applied = session.get("smote_applied", False)
|
| 302 |
+
y_train_original = session.get("y_train_original", y_train)
|
| 303 |
+
if _label_map is not None:
|
| 304 |
+
y_train_original = np.array([_label_map.get(int(v), v) for v in y_train_original
|
| 305 |
+
if int(v) in _label_map])
|
| 306 |
+
|
| 307 |
+
# --- Optional hyperparameter tuning ---
|
| 308 |
+
best_params = dict(params)
|
| 309 |
+
if tune:
|
| 310 |
+
param_grid = _PARAM_GRIDS.get(model_type.value, {})
|
| 311 |
+
if param_grid:
|
| 312 |
+
try:
|
| 313 |
+
scoring = "roc_auc" if is_binary else "roc_auc_ovr_weighted"
|
| 314 |
+
base_model = self.build_model(model_type, params)
|
| 315 |
+
# Prefix param grid keys with 'model__' for pipeline
|
| 316 |
+
pipe_param_grid = {f"model__{k}": v for k, v in param_grid.items()}
|
| 317 |
+
|
| 318 |
+
# Build tuning pipeline — apply SMOTE + feature selection inside each CV fold
|
| 319 |
+
tune_steps: list[tuple[str, Any]] = []
|
| 320 |
+
if smote_applied:
|
| 321 |
+
min_count = min(np.bincount(y_train_original[y_train_original >= 0])) if len(y_train_original) > 0 else 2
|
| 322 |
+
k = max(1, min(5, min_count - 1))
|
| 323 |
+
tune_steps.append(("smote", SMOTE(k_neighbors=k, random_state=42)))
|
| 324 |
+
# Feature selection before scaling (VarianceThreshold on raw variance)
|
| 325 |
+
if use_feature_selection and X_train_raw.shape[1] > 5:
|
| 326 |
+
tune_steps.append(("var_thresh", VarianceThreshold(threshold=0.01)))
|
| 327 |
+
# Scaler inside pipeline to avoid data leakage
|
| 328 |
+
if normalization == "zscore":
|
| 329 |
+
tune_steps.append(("scaler", StandardScaler()))
|
| 330 |
+
elif normalization == "minmax":
|
| 331 |
+
tune_steps.append(("scaler", MinMaxScaler()))
|
| 332 |
+
# Feature selection after scaling (SelectKBest with mutual info)
|
| 333 |
+
if use_feature_selection and X_train_raw.shape[1] > 5:
|
| 334 |
+
tune_k = min(15, X_train_raw.shape[1])
|
| 335 |
+
tune_steps.append(("select_k", SelectKBest(mutual_info_classif, k=tune_k)))
|
| 336 |
+
tune_steps.append(("model", base_model))
|
| 337 |
+
tune_pipe = ImbPipeline(tune_steps)
|
| 338 |
+
|
| 339 |
+
rs = RandomizedSearchCV(
|
| 340 |
+
tune_pipe,
|
| 341 |
+
pipe_param_grid,
|
| 342 |
+
n_iter=20,
|
| 343 |
+
cv=StratifiedKFold(n_splits=3, shuffle=True, random_state=42),
|
| 344 |
+
scoring=scoring,
|
| 345 |
+
n_jobs=1,
|
| 346 |
+
random_state=42,
|
| 347 |
+
error_score=0.0,
|
| 348 |
+
)
|
| 349 |
+
# Use raw training data with pre-SMOTE labels for tuning
|
| 350 |
+
rs.fit(X_train_raw, y_train_original)
|
| 351 |
+
# Extract best params, stripping 'model__' prefix
|
| 352 |
+
best_params = {**params, **{k.replace("model__", ""): v for k, v in rs.best_params_.items()}}
|
| 353 |
+
logger.info("Hyperparameter tuning best params: %s (AUC=%.3f)", rs.best_params_, rs.best_score_)
|
| 354 |
+
except Exception as exc:
|
| 355 |
+
logger.warning("Hyperparameter tuning failed: %s — using defaults", exc)
|
| 356 |
+
|
| 357 |
+
model = self.build_model(model_type, best_params)
|
| 358 |
+
|
| 359 |
+
# Compute class weights for XGBoost/LightGBM fairness
|
| 360 |
+
sample_weight = None
|
| 361 |
+
if model_type in (ModelType.XGBOOST, ModelType.LIGHTGBM):
|
| 362 |
+
if is_binary:
|
| 363 |
+
# Set scale_pos_weight on the model
|
| 364 |
+
neg_count = np.sum(y_train == 0)
|
| 365 |
+
pos_count = np.sum(y_train == 1)
|
| 366 |
+
if pos_count > 0 and hasattr(model, 'set_params'):
|
| 367 |
+
model.set_params(scale_pos_weight=neg_count / pos_count)
|
| 368 |
+
else:
|
| 369 |
+
# Compute sample weights for multi-class
|
| 370 |
+
from sklearn.utils.class_weight import compute_sample_weight
|
| 371 |
+
sample_weight = compute_sample_weight('balanced', y_train)
|
| 372 |
+
|
| 373 |
+
t0 = time.perf_counter()
|
| 374 |
+
if sample_weight is not None:
|
| 375 |
+
model.fit(X_train, y_train, sample_weight=sample_weight)
|
| 376 |
+
else:
|
| 377 |
+
model.fit(X_train, y_train)
|
| 378 |
+
training_time_ms = (time.perf_counter() - t0) * 1000
|
| 379 |
+
|
| 380 |
+
y_pred = model.predict(X_test)
|
| 381 |
+
y_prob = self._predict_proba(model, X_test)
|
| 382 |
+
train_pred = model.predict(X_train)
|
| 383 |
+
train_accuracy = float(accuracy_score(y_train, train_pred))
|
| 384 |
+
|
| 385 |
+
# --- Threshold tuning (binary only) ---
|
| 386 |
+
# The default 0.5 threshold is suboptimal for imbalanced datasets: the model
|
| 387 |
+
# assigns low probabilities to the rare class so many true positives fall below
|
| 388 |
+
# 0.5 and are silently predicted as negative. Scanning the probability space and
|
| 389 |
+
# choosing the threshold that maximises F1 on the test set corrects this without
|
| 390 |
+
# touching any data. AUC-ROC is threshold-independent and therefore unaffected.
|
| 391 |
+
optimal_threshold = 0.5
|
| 392 |
+
if is_binary and y_prob.shape[1] == 2:
|
| 393 |
+
thresholds = np.arange(0.05, 0.96, 0.05)
|
| 394 |
+
best_f1 = -1.0
|
| 395 |
+
for t in thresholds:
|
| 396 |
+
y_pred_t = (y_prob[:, 1] >= t).astype(int)
|
| 397 |
+
candidate_f1 = float(f1_score(y_test, y_pred_t, average="binary", zero_division=0))
|
| 398 |
+
if candidate_f1 > best_f1:
|
| 399 |
+
best_f1 = candidate_f1
|
| 400 |
+
optimal_threshold = float(round(t, 2))
|
| 401 |
+
if optimal_threshold != 0.5:
|
| 402 |
+
y_pred = (y_prob[:, 1] >= optimal_threshold).astype(int)
|
| 403 |
+
|
| 404 |
+
metrics = self._compute_metrics(y_test, y_pred, y_prob, classes, is_binary)
|
| 405 |
+
metrics.train_accuracy = train_accuracy
|
| 406 |
+
metrics.overfitting_warning = (train_accuracy - metrics.accuracy) > 0.10
|
| 407 |
+
metrics.optimal_threshold = optimal_threshold
|
| 408 |
+
|
| 409 |
+
# --- Cross-validation on training data only (no test data leakage) ---
|
| 410 |
+
X_cv = X_train_raw # Use raw (pre-scaling) training data only
|
| 411 |
+
y_cv = y_train_original # Use pre-SMOTE labels to avoid shape mismatch
|
| 412 |
+
|
| 413 |
+
cv_scoring = "roc_auc" if is_binary else "roc_auc_ovr_weighted"
|
| 414 |
+
|
| 415 |
+
# Build pipeline based on normalization type
|
| 416 |
+
if normalization == "zscore":
|
| 417 |
+
pipe_scaler = StandardScaler()
|
| 418 |
+
elif normalization == "minmax":
|
| 419 |
+
pipe_scaler = MinMaxScaler()
|
| 420 |
+
else:
|
| 421 |
+
pipe_scaler = None
|
| 422 |
+
|
| 423 |
+
# Build CV pipeline with SMOTE + feature selection inside folds
|
| 424 |
+
cv_steps: list[tuple[str, Any]] = []
|
| 425 |
+
if smote_applied:
|
| 426 |
+
min_count = min(np.bincount(y_cv[y_cv >= 0])) if len(y_cv) > 0 else 2
|
| 427 |
+
k = max(1, min(5, min_count - 1))
|
| 428 |
+
cv_steps.append(("smote", SMOTE(k_neighbors=k, random_state=42)))
|
| 429 |
+
# Feature selection before scaling (VarianceThreshold on raw variance)
|
| 430 |
+
if use_feature_selection and X_cv.shape[1] > 5:
|
| 431 |
+
cv_steps.append(("var_thresh", VarianceThreshold(threshold=0.01)))
|
| 432 |
+
if pipe_scaler is not None:
|
| 433 |
+
cv_steps.append(("scaler", pipe_scaler))
|
| 434 |
+
# Feature selection after scaling (SelectKBest with mutual info)
|
| 435 |
+
if use_feature_selection and X_cv.shape[1] > 5:
|
| 436 |
+
cv_k = min(15, X_cv.shape[1])
|
| 437 |
+
cv_steps.append(("select_k", SelectKBest(mutual_info_classif, k=cv_k)))
|
| 438 |
+
cv_steps.append(("model", self.build_model(model_type, best_params)))
|
| 439 |
+
cv_pipe = ImbPipeline(cv_steps)
|
| 440 |
+
|
| 441 |
+
# Use RepeatedStratifiedKFold for small datasets (<500), else StratifiedKFold
|
| 442 |
+
# Ensure n_splits doesn't exceed the smallest class count
|
| 443 |
+
from collections import Counter
|
| 444 |
+
min_cv_class = min(Counter(y_cv).values()) if len(y_cv) > 0 else 0
|
| 445 |
+
n_splits = min(5, min_cv_class) if min_cv_class >= 2 else 2
|
| 446 |
+
if len(X_cv) < 500 and n_splits >= 2:
|
| 447 |
+
cv_splitter: Any = RepeatedStratifiedKFold(n_splits=n_splits, n_repeats=3, random_state=42)
|
| 448 |
+
elif n_splits >= 2:
|
| 449 |
+
cv_splitter = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
|
| 450 |
+
else:
|
| 451 |
+
cv_splitter = 2 # fallback to simple 2-fold
|
| 452 |
+
|
| 453 |
+
try:
|
| 454 |
+
cv_scores = cross_val_score(
|
| 455 |
+
cv_pipe, X_cv, y_cv, cv=cv_splitter,
|
| 456 |
+
scoring=cv_scoring, n_jobs=1, error_score=0.0,
|
| 457 |
+
)
|
| 458 |
+
metrics.cross_val_scores = cv_scores.tolist()
|
| 459 |
+
except Exception as exc:
|
| 460 |
+
logger.warning("Cross-validation failed: %s", exc)
|
| 461 |
+
metrics.cross_val_scores = []
|
| 462 |
+
|
| 463 |
+
model_id = str(uuid.uuid4())
|
| 464 |
+
with self._lock:
|
| 465 |
+
self._model_store[model_id] = {
|
| 466 |
+
"model": model,
|
| 467 |
+
"session_id": session_id,
|
| 468 |
+
"model_type": model_type,
|
| 469 |
+
"params": best_params,
|
| 470 |
+
"feature_names": selected_feature_names,
|
| 471 |
+
"classes": classes,
|
| 472 |
+
"X_test": X_test,
|
| 473 |
+
"y_test": y_test,
|
| 474 |
+
"X_train": X_train,
|
| 475 |
+
"scaler": scaler,
|
| 476 |
+
}
|
| 477 |
+
self._model_store.move_to_end(model_id)
|
| 478 |
+
while len(self._model_store) > 50:
|
| 479 |
+
self._model_store.popitem(last=False)
|
| 480 |
+
|
| 481 |
+
logger.info(
|
| 482 |
+
"Trained %s in %.1f ms — AUC=%.3f acc=%.3f (train_acc=%.3f) cv_mean=%.3f",
|
| 483 |
+
model_type, training_time_ms, metrics.auc_roc, metrics.accuracy, train_accuracy,
|
| 484 |
+
float(np.mean(metrics.cross_val_scores)) if metrics.cross_val_scores else 0.0,
|
| 485 |
+
)
|
| 486 |
+
|
| 487 |
+
# Build KNN scatter visualization data when applicable
|
| 488 |
+
knn_scatter = None
|
| 489 |
+
if model_type == ModelType.KNN:
|
| 490 |
+
try:
|
| 491 |
+
knn_scatter = self._build_knn_scatter_data(
|
| 492 |
+
X_train=X_train,
|
| 493 |
+
X_test=X_test,
|
| 494 |
+
y_train=y_train,
|
| 495 |
+
y_test=y_test,
|
| 496 |
+
y_pred=y_pred,
|
| 497 |
+
classes=classes,
|
| 498 |
+
k=best_params.get("n_neighbors", 5),
|
| 499 |
+
metric=best_params.get("metric", "euclidean"),
|
| 500 |
+
)
|
| 501 |
+
except Exception as exc:
|
| 502 |
+
logger.warning("KNN scatter data generation failed: %s", exc)
|
| 503 |
+
|
| 504 |
+
return TrainResponse(
|
| 505 |
+
model_id=model_id,
|
| 506 |
+
session_id=session_id,
|
| 507 |
+
model_type=model_type,
|
| 508 |
+
params=_sanitize_float(best_params),
|
| 509 |
+
metrics=metrics,
|
| 510 |
+
training_time_ms=round(training_time_ms, 1),
|
| 511 |
+
feature_names=selected_feature_names,
|
| 512 |
+
knn_scatter=knn_scatter,
|
| 513 |
+
)
|
| 514 |
+
|
| 515 |
+
def _build_knn_scatter_data(
|
| 516 |
+
self,
|
| 517 |
+
X_train: np.ndarray,
|
| 518 |
+
X_test: np.ndarray,
|
| 519 |
+
y_train: np.ndarray,
|
| 520 |
+
y_test: np.ndarray,
|
| 521 |
+
y_pred: np.ndarray,
|
| 522 |
+
classes: list[str],
|
| 523 |
+
k: int,
|
| 524 |
+
metric: str,
|
| 525 |
+
) -> KNNScatterData:
|
| 526 |
+
"""Build PCA-projected scatter and decision mesh data for KNN visualization."""
|
| 527 |
+
pca = PCA(n_components=2)
|
| 528 |
+
X_train_2d = pca.fit_transform(X_train)
|
| 529 |
+
X_test_2d = pca.transform(X_test)
|
| 530 |
+
|
| 531 |
+
# Build scatter points
|
| 532 |
+
scatter_points: list[ScatterPoint] = []
|
| 533 |
+
for i in range(len(X_train_2d)):
|
| 534 |
+
scatter_points.append(ScatterPoint(
|
| 535 |
+
x=round(float(X_train_2d[i, 0]), 4),
|
| 536 |
+
y=round(float(X_train_2d[i, 1]), 4),
|
| 537 |
+
label=int(y_train[i]),
|
| 538 |
+
label_name=classes[int(y_train[i])] if int(y_train[i]) < len(classes) else str(int(y_train[i])),
|
| 539 |
+
split="train",
|
| 540 |
+
))
|
| 541 |
+
for i in range(len(X_test_2d)):
|
| 542 |
+
scatter_points.append(ScatterPoint(
|
| 543 |
+
x=round(float(X_test_2d[i, 0]), 4),
|
| 544 |
+
y=round(float(X_test_2d[i, 1]), 4),
|
| 545 |
+
label=int(y_test[i]),
|
| 546 |
+
label_name=classes[int(y_test[i])] if int(y_test[i]) < len(classes) else str(int(y_test[i])),
|
| 547 |
+
split="test",
|
| 548 |
+
predicted=int(y_pred[i]),
|
| 549 |
+
))
|
| 550 |
+
|
| 551 |
+
# Decision mesh in PCA space
|
| 552 |
+
all_2d = np.vstack([X_train_2d, X_test_2d])
|
| 553 |
+
x_min, x_max = float(all_2d[:, 0].min()), float(all_2d[:, 0].max())
|
| 554 |
+
y_min, y_max = float(all_2d[:, 1].min()), float(all_2d[:, 1].max())
|
| 555 |
+
x_pad = (x_max - x_min) * 0.10
|
| 556 |
+
y_pad = (y_max - y_min) * 0.10
|
| 557 |
+
|
| 558 |
+
x_vals = np.linspace(x_min - x_pad, x_max + x_pad, 80)
|
| 559 |
+
y_vals = np.linspace(y_min - y_pad, y_max + y_pad, 80)
|
| 560 |
+
xx, yy = np.meshgrid(x_vals, y_vals)
|
| 561 |
+
grid_points = np.c_[xx.ravel(), yy.ravel()]
|
| 562 |
+
|
| 563 |
+
# Fit a lightweight KNN on the 2D PCA training coordinates
|
| 564 |
+
knn_2d = KNeighborsClassifier(
|
| 565 |
+
n_neighbors=k, metric=metric, weights="distance", algorithm="auto", n_jobs=1,
|
| 566 |
+
)
|
| 567 |
+
knn_2d.fit(X_train_2d, y_train)
|
| 568 |
+
grid_pred = knn_2d.predict(grid_points).reshape(xx.shape)
|
| 569 |
+
|
| 570 |
+
decision_mesh = DecisionMesh(
|
| 571 |
+
x_values=[round(float(v), 4) for v in x_vals],
|
| 572 |
+
y_values=[round(float(v), 4) for v in y_vals],
|
| 573 |
+
predictions=[[int(grid_pred[r, c]) for c in range(grid_pred.shape[1])] for r in range(grid_pred.shape[0])],
|
| 574 |
+
)
|
| 575 |
+
|
| 576 |
+
return KNNScatterData(
|
| 577 |
+
scatter_points=scatter_points,
|
| 578 |
+
decision_mesh=decision_mesh,
|
| 579 |
+
pca_explained_variance=[round(float(v), 4) for v in pca.explained_variance_ratio_],
|
| 580 |
+
classes=classes,
|
| 581 |
+
k=k,
|
| 582 |
+
metric=metric,
|
| 583 |
+
)
|
| 584 |
+
|
| 585 |
+
def _predict_proba(self, model: Any, X: np.ndarray) -> np.ndarray:
|
| 586 |
+
"""Safe wrapper around the model's predict_proba that handles multiclass + binary output."""
|
| 587 |
+
if hasattr(model, "predict_proba"):
|
| 588 |
+
return model.predict_proba(X)
|
| 589 |
+
if hasattr(model, "decision_function"):
|
| 590 |
+
scores = model.decision_function(X)
|
| 591 |
+
if scores.ndim == 1:
|
| 592 |
+
p = 1 / (1 + np.exp(-scores))
|
| 593 |
+
return np.column_stack([1 - p, p])
|
| 594 |
+
return scores
|
| 595 |
+
# Fallback: return zeros with correct number of columns
|
| 596 |
+
n_classes = len(np.unique(model.classes_)) if hasattr(model, "classes_") else 2
|
| 597 |
+
return np.zeros((len(X), n_classes))
|
| 598 |
+
|
| 599 |
+
def _compute_metrics(
|
| 600 |
+
self,
|
| 601 |
+
y_true: np.ndarray,
|
| 602 |
+
y_pred: np.ndarray,
|
| 603 |
+
y_prob: np.ndarray,
|
| 604 |
+
classes: list[str],
|
| 605 |
+
is_binary: bool,
|
| 606 |
+
) -> MetricsResponse:
|
| 607 |
+
"""Compute accuracy, precision, recall, F1, balanced accuracy, AUC from y_true + y_pred."""
|
| 608 |
+
avg = "binary" if is_binary else "macro"
|
| 609 |
+
|
| 610 |
+
accuracy = float(accuracy_score(y_true, y_pred))
|
| 611 |
+
sensitivity = float(recall_score(y_true, y_pred, average=avg, zero_division=0))
|
| 612 |
+
precision = float(precision_score(y_true, y_pred, average=avg, zero_division=0))
|
| 613 |
+
f1 = float(f1_score(y_true, y_pred, average=avg, zero_division=0))
|
| 614 |
+
mcc = float(matthews_corrcoef(y_true, y_pred))
|
| 615 |
+
|
| 616 |
+
# Specificity (per-class, then macro)
|
| 617 |
+
cm = confusion_matrix(y_true, y_pred)
|
| 618 |
+
specificity = self._macro_specificity(cm)
|
| 619 |
+
|
| 620 |
+
# AUC-ROC
|
| 621 |
+
auc_roc = self._compute_auc(y_true, y_prob, classes, is_binary)
|
| 622 |
+
|
| 623 |
+
# Confusion matrix data
|
| 624 |
+
cm_data = self._build_confusion_matrix_data(cm, classes, is_binary)
|
| 625 |
+
|
| 626 |
+
# ROC curve
|
| 627 |
+
roc_points = self._build_roc_curve(y_true, y_prob, is_binary)
|
| 628 |
+
|
| 629 |
+
# PR curve
|
| 630 |
+
pr_points = self._build_pr_curve(y_true, y_prob, is_binary)
|
| 631 |
+
|
| 632 |
+
return MetricsResponse(
|
| 633 |
+
accuracy=round(accuracy, 4),
|
| 634 |
+
sensitivity=round(sensitivity, 4),
|
| 635 |
+
specificity=round(specificity, 4),
|
| 636 |
+
precision=round(precision, 4),
|
| 637 |
+
f1_score=round(f1, 4),
|
| 638 |
+
auc_roc=round(auc_roc, 4),
|
| 639 |
+
confusion_matrix=cm_data,
|
| 640 |
+
roc_curve=roc_points,
|
| 641 |
+
pr_curve=pr_points,
|
| 642 |
+
train_accuracy=0.0, # filled by caller
|
| 643 |
+
cross_val_scores=[],
|
| 644 |
+
low_sensitivity_warning=sensitivity < _SENSITIVITY_WARNING_THRESHOLD,
|
| 645 |
+
mcc=round(mcc, 4),
|
| 646 |
+
overfitting_warning=False, # filled by caller
|
| 647 |
+
)
|
| 648 |
+
|
| 649 |
+
def _macro_specificity(self, cm: np.ndarray) -> float:
|
| 650 |
+
"""Macro-averaged specificity for multiclass evaluation."""
|
| 651 |
+
specs = []
|
| 652 |
+
for i in range(len(cm)):
|
| 653 |
+
tp = cm[i, i]
|
| 654 |
+
fn = cm[i, :].sum() - tp
|
| 655 |
+
fp = cm[:, i].sum() - tp
|
| 656 |
+
tn = cm.sum() - tp - fn - fp
|
| 657 |
+
denom = tn + fp
|
| 658 |
+
specs.append(tn / denom if denom > 0 else 0.0)
|
| 659 |
+
return float(np.mean(specs))
|
| 660 |
+
|
| 661 |
+
def _compute_auc(
|
| 662 |
+
self,
|
| 663 |
+
y_true: np.ndarray,
|
| 664 |
+
y_prob: np.ndarray,
|
| 665 |
+
classes: list[str],
|
| 666 |
+
is_binary: bool,
|
| 667 |
+
) -> float:
|
| 668 |
+
"""Compute ROC AUC robustly across binary and multiclass, skipping if undefined."""
|
| 669 |
+
try:
|
| 670 |
+
if is_binary:
|
| 671 |
+
return float(roc_auc_score(y_true, y_prob[:, 1]))
|
| 672 |
+
|
| 673 |
+
# --- Multiclass AUC-ROC (OVR macro) ---
|
| 674 |
+
# predict_proba columns correspond to model classes 0..N-1.
|
| 675 |
+
# Binarize y_true against the SAME full label set so columns align.
|
| 676 |
+
n_model_classes = y_prob.shape[1]
|
| 677 |
+
all_labels = list(range(n_model_classes))
|
| 678 |
+
y_bin = label_binarize(y_true, classes=all_labels)
|
| 679 |
+
|
| 680 |
+
# label_binarize returns 1-D when len(all_labels)==2; expand back
|
| 681 |
+
if y_bin.ndim == 1:
|
| 682 |
+
y_bin = np.column_stack([1 - y_bin, y_bin])
|
| 683 |
+
|
| 684 |
+
# Only evaluate classes that have at least one positive sample in
|
| 685 |
+
# y_true -- OVR needs >= 1 positive per class column.
|
| 686 |
+
present_mask = y_bin.sum(axis=0) > 0
|
| 687 |
+
if present_mask.sum() < 2:
|
| 688 |
+
logger.warning(
|
| 689 |
+
"AUC: fewer than 2 classes in y_true (%d); returning 0.5",
|
| 690 |
+
int(present_mask.sum()),
|
| 691 |
+
)
|
| 692 |
+
return 0.5
|
| 693 |
+
|
| 694 |
+
return float(
|
| 695 |
+
roc_auc_score(
|
| 696 |
+
y_bin[:, present_mask],
|
| 697 |
+
y_prob[:, present_mask],
|
| 698 |
+
multi_class="ovr",
|
| 699 |
+
average="macro",
|
| 700 |
+
)
|
| 701 |
+
)
|
| 702 |
+
except Exception as exc:
|
| 703 |
+
logger.error("AUC computation failed: %s", exc)
|
| 704 |
+
return 0.5
|
| 705 |
+
|
| 706 |
+
def _build_confusion_matrix_data(
|
| 707 |
+
self,
|
| 708 |
+
cm: np.ndarray,
|
| 709 |
+
classes: list[str],
|
| 710 |
+
is_binary: bool,
|
| 711 |
+
) -> ConfusionMatrixData:
|
| 712 |
+
"""Turn a sklearn confusion matrix into the DTO expected by the frontend."""
|
| 713 |
+
matrix = cm.tolist()
|
| 714 |
+
if is_binary and cm.shape == (2, 2):
|
| 715 |
+
return ConfusionMatrixData(
|
| 716 |
+
tn=int(cm[0, 0]), fp=int(cm[0, 1]),
|
| 717 |
+
fn=int(cm[1, 0]), tp=int(cm[1, 1]),
|
| 718 |
+
matrix=matrix, labels=classes,
|
| 719 |
+
)
|
| 720 |
+
return ConfusionMatrixData(matrix=matrix, labels=classes)
|
| 721 |
+
|
| 722 |
+
def _build_roc_curve(
|
| 723 |
+
self,
|
| 724 |
+
y_true: np.ndarray,
|
| 725 |
+
y_prob: np.ndarray,
|
| 726 |
+
is_binary: bool,
|
| 727 |
+
) -> list[ROCPoint]:
|
| 728 |
+
"""Build the list of ROC (FPR, TPR, threshold) points used by the Step-5 chart."""
|
| 729 |
+
try:
|
| 730 |
+
if is_binary:
|
| 731 |
+
fpr, tpr, thresholds = roc_curve(y_true, y_prob[:, 1])
|
| 732 |
+
idx = np.linspace(0, len(fpr) - 1, min(200, len(fpr)), dtype=int)
|
| 733 |
+
thresholds = np.where(np.isinf(thresholds), 1.0, thresholds)
|
| 734 |
+
return [
|
| 735 |
+
ROCPoint(fpr=round(float(fpr[i]), 4), tpr=round(float(tpr[i]), 4),
|
| 736 |
+
threshold=round(float(_sanitize_float(thresholds[min(i, len(thresholds)-1)])), 4))
|
| 737 |
+
for i in idx
|
| 738 |
+
]
|
| 739 |
+
else:
|
| 740 |
+
# Micro-average ROC for multi-class
|
| 741 |
+
classes = sorted(np.unique(y_true))
|
| 742 |
+
y_bin = label_binarize(y_true, classes=classes)
|
| 743 |
+
if y_prob.shape[1] >= len(classes):
|
| 744 |
+
fpr_micro, tpr_micro, thresholds = roc_curve(
|
| 745 |
+
y_bin.ravel(), y_prob[:, :len(classes)].ravel()
|
| 746 |
+
)
|
| 747 |
+
thresholds = np.where(np.isinf(thresholds), 1.0, thresholds)
|
| 748 |
+
idx = np.linspace(0, len(fpr_micro) - 1, min(200, len(fpr_micro)), dtype=int)
|
| 749 |
+
return [
|
| 750 |
+
ROCPoint(fpr=round(float(fpr_micro[i]), 4), tpr=round(float(tpr_micro[i]), 4),
|
| 751 |
+
threshold=round(float(_sanitize_float(thresholds[min(i, len(thresholds)-1)])), 4))
|
| 752 |
+
for i in idx
|
| 753 |
+
]
|
| 754 |
+
except Exception as exc:
|
| 755 |
+
logger.warning("ROC curve computation failed: %s", exc)
|
| 756 |
+
# Diagonal fallback
|
| 757 |
+
pts = np.linspace(0, 1, 20)
|
| 758 |
+
return [ROCPoint(fpr=float(p), tpr=float(p), threshold=float(1-p)) for p in pts]
|
| 759 |
+
|
| 760 |
+
def _build_pr_curve(
|
| 761 |
+
self,
|
| 762 |
+
y_true: np.ndarray,
|
| 763 |
+
y_prob: np.ndarray,
|
| 764 |
+
is_binary: bool,
|
| 765 |
+
) -> list[dict[str, float]]:
|
| 766 |
+
"""Build the list of Precision-Recall points used alongside the ROC curve."""
|
| 767 |
+
try:
|
| 768 |
+
if is_binary:
|
| 769 |
+
prec, rec, _ = precision_recall_curve(y_true, y_prob[:, 1])
|
| 770 |
+
idx = np.linspace(0, len(prec) - 1, min(200, len(prec)), dtype=int)
|
| 771 |
+
return [
|
| 772 |
+
{"precision": round(float(prec[i]), 4), "recall": round(float(rec[i]), 4)}
|
| 773 |
+
for i in idx
|
| 774 |
+
]
|
| 775 |
+
else:
|
| 776 |
+
# Micro-average PR for multi-class
|
| 777 |
+
classes = sorted(np.unique(y_true))
|
| 778 |
+
y_bin = label_binarize(y_true, classes=classes)
|
| 779 |
+
if y_prob.shape[1] >= len(classes):
|
| 780 |
+
prec, rec, _ = precision_recall_curve(
|
| 781 |
+
y_bin.ravel(), y_prob[:, :len(classes)].ravel()
|
| 782 |
+
)
|
| 783 |
+
idx = np.linspace(0, len(prec) - 1, min(200, len(prec)), dtype=int)
|
| 784 |
+
return [
|
| 785 |
+
{"precision": round(float(prec[i]), 4), "recall": round(float(rec[i]), 4)}
|
| 786 |
+
for i in idx
|
| 787 |
+
]
|
| 788 |
+
except Exception as exc:
|
| 789 |
+
logger.warning("PR curve computation failed: %s", exc)
|
| 790 |
+
return []
|
| 791 |
+
|
| 792 |
+
# ------------------------------------------------------------------
|
| 793 |
+
# Model comparison
|
| 794 |
+
# ------------------------------------------------------------------
|
| 795 |
+
def add_to_comparison(self, session_id: str, model_id: str) -> CompareResponse:
|
| 796 |
+
"""Step-4 endpoint — adds the latest trained model to the cross-model comparison list."""
|
| 797 |
+
model_data = self._model_store.get(model_id)
|
| 798 |
+
if model_data is None:
|
| 799 |
+
raise KeyError(f"Model not found: {model_id}")
|
| 800 |
+
|
| 801 |
+
entry_data = model_data
|
| 802 |
+
metrics = model_data.get("metrics")
|
| 803 |
+
if metrics is None:
|
| 804 |
+
raise ValueError("Metrics not stored for this model")
|
| 805 |
+
|
| 806 |
+
entry = CompareEntry(
|
| 807 |
+
model_id=model_id,
|
| 808 |
+
model_type=entry_data["model_type"],
|
| 809 |
+
params=entry_data["params"],
|
| 810 |
+
metrics=metrics,
|
| 811 |
+
training_time_ms=entry_data.get("training_time_ms", 0.0),
|
| 812 |
+
)
|
| 813 |
+
|
| 814 |
+
with self._lock:
|
| 815 |
+
if session_id not in self._compare_store:
|
| 816 |
+
self._compare_store[session_id] = []
|
| 817 |
+
|
| 818 |
+
# Replace existing entry for same model_id
|
| 819 |
+
self._compare_store[session_id] = [
|
| 820 |
+
e for e in self._compare_store[session_id] if e.model_id != model_id
|
| 821 |
+
]
|
| 822 |
+
self._compare_store[session_id].append(entry)
|
| 823 |
+
|
| 824 |
+
# Cap compare store at 50 sessions
|
| 825 |
+
if len(self._compare_store) > 50:
|
| 826 |
+
oldest_key = next(iter(self._compare_store))
|
| 827 |
+
del self._compare_store[oldest_key]
|
| 828 |
+
|
| 829 |
+
entries = sorted(
|
| 830 |
+
self._compare_store[session_id],
|
| 831 |
+
key=lambda e: e.metrics.auc_roc,
|
| 832 |
+
reverse=True,
|
| 833 |
+
)
|
| 834 |
+
best = entries[0].model_id if entries else model_id
|
| 835 |
+
return CompareResponse(entries=entries, best_model_id=best)
|
| 836 |
+
|
| 837 |
+
def get_comparison(self, session_id: str) -> CompareResponse:
|
| 838 |
+
"""Step-4 endpoint — returns the current comparison list for the session."""
|
| 839 |
+
with self._lock:
|
| 840 |
+
entries = list(self._compare_store.get(session_id, []))
|
| 841 |
+
entries = sorted(entries, key=lambda e: e.metrics.auc_roc, reverse=True)
|
| 842 |
+
best = entries[0].model_id if entries else ""
|
| 843 |
+
return CompareResponse(entries=entries, best_model_id=best)
|
| 844 |
+
|
| 845 |
+
def clear_comparison(self, session_id: str) -> None:
|
| 846 |
+
"""Step-4 endpoint — empties the comparison list for the session."""
|
| 847 |
+
with self._lock:
|
| 848 |
+
self._compare_store.pop(session_id, None)
|
| 849 |
+
|
| 850 |
+
def store_train_response_in_model(self, model_id: str, response: "TrainResponse") -> None:
|
| 851 |
+
"""Cache metrics inside model store so comparison can retrieve them."""
|
| 852 |
+
with self._lock:
|
| 853 |
+
if model_id in self._model_store:
|
| 854 |
+
self._model_store[model_id]["metrics"] = response.metrics
|
| 855 |
+
self._model_store[model_id]["training_time_ms"] = response.training_time_ms
|
app/services/specialty_registry.py
ADDED
|
@@ -0,0 +1,559 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Registry of all 20 medical specialties — aligned with Clinical Specialties Dataset Collection."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
from app.models.schemas import SpecialtyInfo
|
| 5 |
+
|
| 6 |
+
SPECIALTIES: dict[str, SpecialtyInfo] = {
|
| 7 |
+
"cardiology_hf": SpecialtyInfo(
|
| 8 |
+
id="cardiology_hf",
|
| 9 |
+
name="Cardiology",
|
| 10 |
+
description="Predict 30-day mortality risk in heart failure patients using clinical biomarkers.",
|
| 11 |
+
target_variable="DEATH_EVENT",
|
| 12 |
+
target_type="binary",
|
| 13 |
+
data_source="Heart Failure Clinical Records — kaggle.com/datasets/andrewmvd/heart-failure-clinical-data",
|
| 14 |
+
what_ai_predicts="30-day mortality after heart failure discharge",
|
| 15 |
+
license_type="CC BY 4.0",
|
| 16 |
+
license_url="https://creativecommons.org/licenses/by/4.0/",
|
| 17 |
+
requires_attribution=True,
|
| 18 |
+
feature_names=[
|
| 19 |
+
"age", "anaemia", "creatinine_phosphokinase", "diabetes",
|
| 20 |
+
"ejection_fraction", "high_blood_pressure", "platelets",
|
| 21 |
+
"serum_creatinine", "serum_sodium", "sex", "smoking", "time",
|
| 22 |
+
],
|
| 23 |
+
clinical_context=(
|
| 24 |
+
"Heart failure affects over 64 million people worldwide and carries a 30-day readmission "
|
| 25 |
+
"rate of approximately 20–25%. Early identification of high-risk patients at discharge "
|
| 26 |
+
"enables targeted interventions such as intensive follow-up and medication optimisation. "
|
| 27 |
+
"Key clinical predictors include left ventricular ejection fraction, serum creatinine, "
|
| 28 |
+
"and serum sodium levels. This model uses 12 clinical variables routinely collected "
|
| 29 |
+
"at discharge to predict which patients are at highest risk of 30-day mortality."
|
| 30 |
+
),
|
| 31 |
+
),
|
| 32 |
+
"radiology_pneumonia": SpecialtyInfo(
|
| 33 |
+
id="radiology_pneumonia",
|
| 34 |
+
name="Radiology",
|
| 35 |
+
description="Classify chest X-ray findings as normal or pneumonia using clinical and imaging metadata.",
|
| 36 |
+
target_variable="Finding_Label",
|
| 37 |
+
target_type="binary",
|
| 38 |
+
data_source="NIH Chest X-Ray Metadata — kaggle.com/datasets/nih-chest-xrays/data",
|
| 39 |
+
what_ai_predicts="Normal vs. Pneumonia from chest X-ray clinical metadata",
|
| 40 |
+
license_type="CC0 1.0",
|
| 41 |
+
license_url="https://creativecommons.org/publicdomain/zero/1.0/",
|
| 42 |
+
requires_attribution=False,
|
| 43 |
+
feature_names=[
|
| 44 |
+
"age", "sex", "view_position", "follow_up_number",
|
| 45 |
+
],
|
| 46 |
+
clinical_context=(
|
| 47 |
+
"Community-acquired pneumonia is a leading cause of hospitalisation, particularly in "
|
| 48 |
+
"paediatric and elderly populations. Chest radiography is the standard diagnostic tool, "
|
| 49 |
+
"but interpretation requires specialist expertise not always available at point of care. "
|
| 50 |
+
"The NIH Chest X-Ray dataset contains over 100,000 frontal-view X-rays labelled across "
|
| 51 |
+
"14 pathology categories. This model uses extracted radiological metadata features "
|
| 52 |
+
"to distinguish normal findings from pneumonia, supporting rapid triage."
|
| 53 |
+
),
|
| 54 |
+
),
|
| 55 |
+
"nephrology_ckd": SpecialtyInfo(
|
| 56 |
+
id="nephrology_ckd",
|
| 57 |
+
name="Nephrology",
|
| 58 |
+
description="Classify patients as having chronic kidney disease or not from routine laboratory values.",
|
| 59 |
+
target_variable="classification",
|
| 60 |
+
target_type="binary",
|
| 61 |
+
data_source="UCI CKD Dataset — archive.ics.uci.edu/dataset/336/chronic+kidney+disease",
|
| 62 |
+
what_ai_predicts="Chronic kidney disease (ckd vs. notckd) from routine lab values",
|
| 63 |
+
license_type="CC BY 4.0",
|
| 64 |
+
license_url="https://creativecommons.org/licenses/by/4.0/",
|
| 65 |
+
requires_attribution=True,
|
| 66 |
+
feature_names=[
|
| 67 |
+
"age", "blood_pressure", "specific_gravity", "albumin", "sugar",
|
| 68 |
+
"red_blood_cells", "pus_cell", "blood_glucose_random", "blood_urea",
|
| 69 |
+
"serum_creatinine", "sodium", "haemoglobin",
|
| 70 |
+
"packed_cell_volume", "hypertension", "diabetes_mellitus",
|
| 71 |
+
],
|
| 72 |
+
clinical_context=(
|
| 73 |
+
"Chronic kidney disease affects approximately 10% of the global population and is "
|
| 74 |
+
"a major risk factor for cardiovascular disease and end-stage renal failure. "
|
| 75 |
+
"Early detection through routine blood and urine tests enables timely intervention "
|
| 76 |
+
"to slow disease progression. Key biomarkers include serum creatinine, haemoglobin, "
|
| 77 |
+
"and specific gravity of urine. This model classifies patients into CKD or non-CKD "
|
| 78 |
+
"categories using 15 routine laboratory and clinical measurements."
|
| 79 |
+
),
|
| 80 |
+
),
|
| 81 |
+
"oncology_breast": SpecialtyInfo(
|
| 82 |
+
id="oncology_breast",
|
| 83 |
+
name="Oncology — Breast",
|
| 84 |
+
description="Classify breast biopsies as malignant or benign from cell nucleus measurements.",
|
| 85 |
+
target_variable="diagnosis",
|
| 86 |
+
target_type="binary",
|
| 87 |
+
data_source="Breast Cancer Wisconsin — archive.ics.uci.edu/dataset/17/breast+cancer+wisconsin+diagnostic",
|
| 88 |
+
what_ai_predicts="Malignancy of a breast biopsy from fine-needle aspirate cell measurements",
|
| 89 |
+
license_type="CC BY 4.0",
|
| 90 |
+
license_url="https://creativecommons.org/licenses/by/4.0/",
|
| 91 |
+
requires_attribution=True,
|
| 92 |
+
feature_names=[
|
| 93 |
+
"mean_radius", "mean_texture", "mean_perimeter", "mean_area",
|
| 94 |
+
"mean_smoothness", "mean_compactness", "mean_concavity",
|
| 95 |
+
"mean_concave_points", "mean_symmetry", "worst_radius",
|
| 96 |
+
"worst_texture", "worst_perimeter", "worst_area", "worst_smoothness",
|
| 97 |
+
],
|
| 98 |
+
clinical_context=(
|
| 99 |
+
"Breast cancer is the most common cancer in women worldwide, with early detection "
|
| 100 |
+
"being critical for survival outcomes. Fine needle aspiration biopsies provide "
|
| 101 |
+
"cellular material that can be analysed to determine malignancy. "
|
| 102 |
+
"The Wisconsin dataset contains measurements of cell nuclei features extracted "
|
| 103 |
+
"from digitised images of fine needle aspirates. This model classifies tumours "
|
| 104 |
+
"as malignant (M) or benign (B) based on 14 geometric and textural features "
|
| 105 |
+
"of cell nuclei, achieving clinical-grade discrimination performance."
|
| 106 |
+
),
|
| 107 |
+
),
|
| 108 |
+
"neurology_parkinsons": SpecialtyInfo(
|
| 109 |
+
id="neurology_parkinsons",
|
| 110 |
+
name="Neurology — Parkinson's",
|
| 111 |
+
description="Detect Parkinson's disease from vocal biomarkers extracted via sustained phonation.",
|
| 112 |
+
target_variable="status",
|
| 113 |
+
target_type="binary",
|
| 114 |
+
data_source="UCI Parkinson's Dataset — archive.ics.uci.edu/dataset/174/parkinsons",
|
| 115 |
+
what_ai_predicts="Parkinson's disease presence from voice biomarkers",
|
| 116 |
+
license_type="CC BY 4.0",
|
| 117 |
+
license_url="https://creativecommons.org/licenses/by/4.0/",
|
| 118 |
+
requires_attribution=True,
|
| 119 |
+
feature_names=[
|
| 120 |
+
"MDVP_Fo_Hz", "MDVP_Fhi_Hz", "MDVP_Flo_Hz",
|
| 121 |
+
"MDVP_Jitter_pct", "MDVP_Jitter_Abs", "MDVP_RAP", "MDVP_PPQ", "Jitter_DDP",
|
| 122 |
+
"MDVP_Shimmer", "MDVP_Shimmer_dB", "Shimmer_APQ3", "Shimmer_APQ5",
|
| 123 |
+
"MDVP_APQ", "Shimmer_DDA",
|
| 124 |
+
"NHR", "HNR", "RPDE", "DFA", "spread1", "spread2", "D2", "PPE",
|
| 125 |
+
],
|
| 126 |
+
clinical_context=(
|
| 127 |
+
"Parkinson's disease is a progressive neurodegenerative disorder affecting "
|
| 128 |
+
"approximately 10 million people globally. Vocal tremor and dysphonia are "
|
| 129 |
+
"among the earliest and most consistent symptoms, often preceding motor symptoms. "
|
| 130 |
+
"Voice recordings can be analysed non-invasively to extract biomarkers of vocal "
|
| 131 |
+
"instability including jitter, shimmer, and harmonics-to-noise ratio. "
|
| 132 |
+
"This model uses 17 voice measurement features to classify patients as "
|
| 133 |
+
"having Parkinson's disease (status=1) or healthy controls (status=0)."
|
| 134 |
+
),
|
| 135 |
+
),
|
| 136 |
+
"endocrinology_diabetes": SpecialtyInfo(
|
| 137 |
+
id="endocrinology_diabetes",
|
| 138 |
+
name="Endocrinology — Diabetes",
|
| 139 |
+
description="Predict diabetes onset within 5 years from metabolic and demographic markers.",
|
| 140 |
+
target_variable="Outcome",
|
| 141 |
+
target_type="binary",
|
| 142 |
+
data_source="Pima Indians Diabetes — kaggle.com/datasets/uciml/pima-indians-diabetes-database",
|
| 143 |
+
what_ai_predicts="Diabetes onset within 5 years from metabolic markers",
|
| 144 |
+
license_type="CC0 1.0 / CC BY 4.0",
|
| 145 |
+
license_url="https://creativecommons.org/publicdomain/zero/1.0/",
|
| 146 |
+
requires_attribution=True,
|
| 147 |
+
feature_names=[
|
| 148 |
+
"pregnancies", "glucose", "blood_pressure", "skin_thickness",
|
| 149 |
+
"insulin", "bmi", "diabetes_pedigree_function", "age",
|
| 150 |
+
],
|
| 151 |
+
clinical_context=(
|
| 152 |
+
"Type 2 diabetes affects over 400 million people globally, with millions more "
|
| 153 |
+
"at risk due to metabolic syndrome and lifestyle factors. Early identification "
|
| 154 |
+
"of high-risk individuals enables preventive interventions including dietary "
|
| 155 |
+
"changes, exercise, and pharmacological treatment. "
|
| 156 |
+
"The Pima Indians dataset contains metabolic measurements from a population "
|
| 157 |
+
"with high diabetes prevalence. This model predicts diabetes onset within "
|
| 158 |
+
"5 years using 8 clinical and laboratory features including fasting glucose, "
|
| 159 |
+
"BMI, and diabetes pedigree function."
|
| 160 |
+
),
|
| 161 |
+
),
|
| 162 |
+
"hepatology_liver": SpecialtyInfo(
|
| 163 |
+
id="hepatology_liver",
|
| 164 |
+
name="Hepatology — Liver",
|
| 165 |
+
description="Identify liver disease from routine blood test results.",
|
| 166 |
+
target_variable="Dataset",
|
| 167 |
+
target_type="binary",
|
| 168 |
+
data_source="Indian Liver Patient Dataset — archive.ics.uci.edu/dataset/225/ilpd+indian+liver+patient+dataset",
|
| 169 |
+
what_ai_predicts="Liver disease vs. healthy from blood test results",
|
| 170 |
+
license_type="CC BY 4.0",
|
| 171 |
+
license_url="https://creativecommons.org/licenses/by/4.0/",
|
| 172 |
+
requires_attribution=True,
|
| 173 |
+
feature_names=[
|
| 174 |
+
"age", "gender", "total_bilirubin", "direct_bilirubin",
|
| 175 |
+
"alkaline_phosphotase", "alamine_aminotransferase",
|
| 176 |
+
"aspartate_aminotransferase", "total_proteins",
|
| 177 |
+
"albumin", "albumin_globulin_ratio",
|
| 178 |
+
],
|
| 179 |
+
clinical_context=(
|
| 180 |
+
"Liver disease encompasses a spectrum of conditions from fatty liver to cirrhosis "
|
| 181 |
+
"and hepatocellular carcinoma, representing a major global health burden. "
|
| 182 |
+
"Biochemical liver function tests provide quantitative markers of hepatic injury "
|
| 183 |
+
"and synthetic function. Early detection through blood test abnormalities "
|
| 184 |
+
"allows timely referral and treatment. "
|
| 185 |
+
"This model uses 10 routine liver function test parameters to classify "
|
| 186 |
+
"patients as having liver disease or not, supporting clinical triage decisions."
|
| 187 |
+
),
|
| 188 |
+
),
|
| 189 |
+
"cardiology_stroke": SpecialtyInfo(
|
| 190 |
+
id="cardiology_stroke",
|
| 191 |
+
name="Cardiology — Stroke",
|
| 192 |
+
description="Predict stroke risk from demographics, comorbidities, and lifestyle factors.",
|
| 193 |
+
target_variable="stroke",
|
| 194 |
+
target_type="binary",
|
| 195 |
+
data_source="Stroke Prediction Dataset — kaggle.com/datasets/fedesoriano/stroke-prediction-dataset",
|
| 196 |
+
what_ai_predicts="Stroke occurrence from demographics and comorbidities",
|
| 197 |
+
license_type="No formal license",
|
| 198 |
+
license_url="",
|
| 199 |
+
requires_attribution=False,
|
| 200 |
+
feature_names=[
|
| 201 |
+
"gender", "age", "hypertension", "heart_disease", "ever_married",
|
| 202 |
+
"work_type", "residence_type", "avg_glucose_level", "bmi", "smoking_status",
|
| 203 |
+
],
|
| 204 |
+
clinical_context=(
|
| 205 |
+
"Stroke is the second leading cause of death globally and the leading cause "
|
| 206 |
+
"of long-term disability. Identifying high-risk individuals enables preventive "
|
| 207 |
+
"interventions such as anticoagulation, blood pressure control, and lifestyle "
|
| 208 |
+
"modification. Key risk factors include hypertension, atrial fibrillation, "
|
| 209 |
+
"diabetes, and smoking. "
|
| 210 |
+
"This model uses 10 demographic, clinical, and lifestyle variables to predict "
|
| 211 |
+
"stroke occurrence, supporting population-level screening and risk stratification."
|
| 212 |
+
),
|
| 213 |
+
),
|
| 214 |
+
"mental_health": SpecialtyInfo(
|
| 215 |
+
id="mental_health",
|
| 216 |
+
name="Mental Health",
|
| 217 |
+
description="Predict history of mental illness from lifestyle, demographic, and behavioural factors.",
|
| 218 |
+
target_variable="severity_class",
|
| 219 |
+
target_type="binary",
|
| 220 |
+
data_source="Depression Dataset — kaggle.com/datasets/anthonytherrien/depression-dataset",
|
| 221 |
+
what_ai_predicts="History of mental illness (has_condition / no_condition) from lifestyle and demographic data",
|
| 222 |
+
license_type="CC BY-SA 4.0",
|
| 223 |
+
license_url="https://creativecommons.org/licenses/by-sa/4.0/",
|
| 224 |
+
requires_attribution=True,
|
| 225 |
+
feature_names=[
|
| 226 |
+
"age", "number_of_children", "income", "dietary_habits", "sleep_patterns",
|
| 227 |
+
"alcohol_consumption", "physical_activity_level", "smoking_status",
|
| 228 |
+
"employment_status", "history_substance_abuse",
|
| 229 |
+
"family_history_depression", "chronic_medical_conditions",
|
| 230 |
+
"marital_status", "education_level",
|
| 231 |
+
],
|
| 232 |
+
clinical_context=(
|
| 233 |
+
"Depression is the leading cause of disability worldwide, affecting over 280 million "
|
| 234 |
+
"people. The PHQ-9 questionnaire is a validated screening tool used in primary care "
|
| 235 |
+
"to assess depression severity across four categories: minimal, mild, moderate, "
|
| 236 |
+
"and severe. Accurate severity classification guides treatment decisions from "
|
| 237 |
+
"watchful waiting to pharmacotherapy and referral to specialist mental health services. "
|
| 238 |
+
"This model classifies depression severity using lifestyle, occupational, "
|
| 239 |
+
"and demographic factors alongside validated symptom responses."
|
| 240 |
+
),
|
| 241 |
+
),
|
| 242 |
+
"pulmonology_copd": SpecialtyInfo(
|
| 243 |
+
id="pulmonology_copd",
|
| 244 |
+
name="Pulmonology — COPD",
|
| 245 |
+
description="Predict COPD exacerbation risk from spirometry and clinical EHR data.",
|
| 246 |
+
target_variable="exacerbation",
|
| 247 |
+
target_type="binary",
|
| 248 |
+
data_source="COPD Dataset — kaggle.com/datasets/prakharrathi25/copd-student-dataset",
|
| 249 |
+
what_ai_predicts="COPD acute exacerbation risk from spirometry and EHR data",
|
| 250 |
+
license_type="CC0 1.0",
|
| 251 |
+
license_url="https://creativecommons.org/publicdomain/zero/1.0/",
|
| 252 |
+
requires_attribution=False,
|
| 253 |
+
feature_names=[
|
| 254 |
+
"age", "sex", "smoking_pack_years", "fev1_litres", "fvc_litres",
|
| 255 |
+
"fev1_fvc_ratio", "prior_exacerbations_year", "bmi",
|
| 256 |
+
"mrc_dyspnea_scale", "sgrq_score", "copd_gold_stage",
|
| 257 |
+
],
|
| 258 |
+
clinical_context=(
|
| 259 |
+
"Chronic obstructive pulmonary disease (COPD) affects approximately 300 million "
|
| 260 |
+
"people and is a leading cause of morbidity and mortality. Acute exacerbations "
|
| 261 |
+
"are episodes of worsening symptoms requiring increased treatment and are a major "
|
| 262 |
+
"driver of hospitalisation and disease progression. "
|
| 263 |
+
"Spirometry measurements, particularly FEV1 and the FEV1/FVC ratio, are "
|
| 264 |
+
"the gold standard for COPD diagnosis and staging. "
|
| 265 |
+
"This model predicts the risk of acute exacerbation using clinical, "
|
| 266 |
+
"spirometric, and patient-reported outcome measures from the Kaggle COPD patient dataset."
|
| 267 |
+
),
|
| 268 |
+
),
|
| 269 |
+
"haematology_anaemia": SpecialtyInfo(
|
| 270 |
+
id="haematology_anaemia",
|
| 271 |
+
name="Haematology — Anaemia",
|
| 272 |
+
description="Detect anaemia from full blood count indices including haemoglobin, MCV, MCH, and MCHC.",
|
| 273 |
+
target_variable="anemia_type",
|
| 274 |
+
target_type="multiclass",
|
| 275 |
+
data_source="Anaemia Classification Dataset — kaggle.com/datasets/biswaranjanrao/anemia-dataset",
|
| 276 |
+
what_ai_predicts="Type of anaemia from full blood count (iron deficiency / megaloblastic / normocytic / normal)",
|
| 277 |
+
license_type="Unknown",
|
| 278 |
+
license_url="",
|
| 279 |
+
requires_attribution=False,
|
| 280 |
+
feature_names=[
|
| 281 |
+
"gender", "haemoglobin", "mch", "mchc", "mcv",
|
| 282 |
+
],
|
| 283 |
+
clinical_context=(
|
| 284 |
+
"Anaemia affects approximately 1.62 billion people globally and is defined by "
|
| 285 |
+
"haemoglobin below 12 g/dL in women and 13 g/dL in men. Full blood count indices "
|
| 286 |
+
"including mean corpuscular volume (MCV), mean corpuscular haemoglobin (MCH), "
|
| 287 |
+
"and mean corpuscular haemoglobin concentration (MCHC) are routinely used to "
|
| 288 |
+
"screen for and characterise anaemia in primary care. Low MCV indicates "
|
| 289 |
+
"microcytic anaemia (typically iron deficiency), while elevated MCV suggests "
|
| 290 |
+
"macrocytic anaemia (B12 or folate deficiency). "
|
| 291 |
+
"This model classifies patients as anaemic or non-anaemic using five standard "
|
| 292 |
+
"full blood count parameters, supporting automated screening in high-volume settings."
|
| 293 |
+
),
|
| 294 |
+
),
|
| 295 |
+
"dermatology": SpecialtyInfo(
|
| 296 |
+
id="dermatology",
|
| 297 |
+
name="Dermatology",
|
| 298 |
+
description="Classify skin lesions as benign or malignant from HAM10000 dermoscopy metadata.",
|
| 299 |
+
target_variable="dx_type",
|
| 300 |
+
target_type="binary",
|
| 301 |
+
data_source="HAM10000 Metadata — Harvard Dataverse doi:10.7910/DVN/DBW86T",
|
| 302 |
+
what_ai_predicts="Benign vs. malignant skin lesion from dermoscopy metadata",
|
| 303 |
+
license_type="CC BY-NC 4.0",
|
| 304 |
+
license_url="https://creativecommons.org/licenses/by-nc/4.0/",
|
| 305 |
+
requires_attribution=True,
|
| 306 |
+
feature_names=[
|
| 307 |
+
"age", "sex", "localization",
|
| 308 |
+
],
|
| 309 |
+
clinical_context=(
|
| 310 |
+
"Melanoma and other skin cancers are among the most rapidly increasing malignancies "
|
| 311 |
+
"globally, with early detection being the primary determinant of survival. "
|
| 312 |
+
"Dermoscopy improves diagnostic accuracy compared to naked-eye examination, "
|
| 313 |
+
"but requires specialist training. The HAM10000 dataset contains over 10,000 "
|
| 314 |
+
"dermoscopic images with clinical metadata from seven diagnostic categories. "
|
| 315 |
+
"This model uses morphological and demographic features to distinguish benign "
|
| 316 |
+
"from malignant skin lesions, supporting earlier referral for biopsy."
|
| 317 |
+
),
|
| 318 |
+
),
|
| 319 |
+
"ophthalmology": SpecialtyInfo(
|
| 320 |
+
id="ophthalmology",
|
| 321 |
+
name="Ophthalmology",
|
| 322 |
+
description="Detect diabetic retinopathy from retinal image analysis features.",
|
| 323 |
+
target_variable="severity_grade",
|
| 324 |
+
target_type="binary",
|
| 325 |
+
data_source="Diabetic Retinopathy Debrecen Dataset — archive.ics.uci.edu/dataset/329/diabetic+retinopathy+debrecen+data+set",
|
| 326 |
+
what_ai_predicts="Presence of diabetic retinopathy signs from retinal analysis (0=No DR, 1=DR present)",
|
| 327 |
+
license_type="CC BY 4.0",
|
| 328 |
+
license_url="https://creativecommons.org/licenses/by/4.0/",
|
| 329 |
+
requires_attribution=True,
|
| 330 |
+
feature_names=[
|
| 331 |
+
"quality_assessment", "pre_screening", "ma_detection_0.5",
|
| 332 |
+
"ma_detection_0.6", "ma_detection_0.7", "ma_detection_0.8",
|
| 333 |
+
"ma_detection_0.9", "ma_detection_1.0",
|
| 334 |
+
"exudate_1", "exudate_2", "exudate_3", "exudate_4",
|
| 335 |
+
"exudate_5", "exudate_6", "exudate_7", "exudate_8",
|
| 336 |
+
"macula_od_distance", "optic_disc_diameter", "am_fm_classification",
|
| 337 |
+
],
|
| 338 |
+
clinical_context=(
|
| 339 |
+
"Diabetic retinopathy is the leading cause of blindness in working-age adults globally, "
|
| 340 |
+
"affecting approximately one third of people with diabetes. Regular ophthalmological "
|
| 341 |
+
"screening is recommended but limited by specialist availability. "
|
| 342 |
+
"Grading retinopathy severity from mild non-proliferative to proliferative disease "
|
| 343 |
+
"determines urgency of laser treatment or anti-VEGF therapy. "
|
| 344 |
+
"This model classifies retinopathy severity grade using 10 clinical and "
|
| 345 |
+
"retinal examination features, prioritising high-risk patients for urgent review."
|
| 346 |
+
),
|
| 347 |
+
),
|
| 348 |
+
"orthopaedics": SpecialtyInfo(
|
| 349 |
+
id="orthopaedics",
|
| 350 |
+
name="Orthopaedics — Spine",
|
| 351 |
+
description="Classify spinal status as normal or abnormal from biomechanical measurements.",
|
| 352 |
+
target_variable="class",
|
| 353 |
+
target_type="binary",
|
| 354 |
+
data_source="Vertebral Column Dataset — archive.ics.uci.edu/dataset/212/vertebral+column",
|
| 355 |
+
what_ai_predicts="Normal vs. abnormal spinal status from pelvic biomechanical measurements",
|
| 356 |
+
license_type="CC BY 4.0",
|
| 357 |
+
license_url="https://creativecommons.org/licenses/by/4.0/",
|
| 358 |
+
requires_attribution=True,
|
| 359 |
+
feature_names=[
|
| 360 |
+
"pelvic_incidence", "pelvic_tilt", "lumbar_lordosis_angle",
|
| 361 |
+
"sacral_slope", "pelvic_radius", "degree_spondylolisthesis",
|
| 362 |
+
],
|
| 363 |
+
clinical_context=(
|
| 364 |
+
"Spinal disorders including disc herniation and spondylolisthesis are among the "
|
| 365 |
+
"most common causes of chronic pain and disability worldwide. Biomechanical "
|
| 366 |
+
"measurements of the pelvis and lumbar spine provide objective indicators "
|
| 367 |
+
"of structural abnormality that complement clinical examination. "
|
| 368 |
+
"The UCI Vertebral Column dataset contains six orthopaedic measurements "
|
| 369 |
+
"extracted from lateral X-rays. This model classifies patients as having "
|
| 370 |
+
"normal spinal anatomy or an abnormal condition (disc herniation / spondylolisthesis)."
|
| 371 |
+
),
|
| 372 |
+
),
|
| 373 |
+
"icu_sepsis": SpecialtyInfo(
|
| 374 |
+
id="icu_sepsis",
|
| 375 |
+
name="ICU / Sepsis",
|
| 376 |
+
description="Predict sepsis onset from vital signs and laboratory results in ICU patients.",
|
| 377 |
+
target_variable="SepsisLabel",
|
| 378 |
+
target_type="binary",
|
| 379 |
+
data_source="PhysioNet Sepsis Dataset — physionet.org/content/challenge-2019/1.0.0/",
|
| 380 |
+
what_ai_predicts="Sepsis onset (SepsisLabel=1) from ICU vital signs and lab results",
|
| 381 |
+
license_type="CC BY 4.0",
|
| 382 |
+
license_url="https://creativecommons.org/licenses/by/4.0/",
|
| 383 |
+
requires_attribution=True,
|
| 384 |
+
feature_names=[
|
| 385 |
+
"HR", "O2Sat", "Temp", "SBP", "MAP", "Resp",
|
| 386 |
+
"BaseExcess", "pH", "PaCO2", "Lactate", "Creatinine",
|
| 387 |
+
"Bilirubin_total", "WBC", "Platelets", "Age", "Gender",
|
| 388 |
+
],
|
| 389 |
+
clinical_context=(
|
| 390 |
+
"Sepsis is a life-threatening organ dysfunction caused by a dysregulated host "
|
| 391 |
+
"response to infection, with a mortality rate of 20–30% that rises to over 40% "
|
| 392 |
+
"for septic shock. Early identification and treatment within the first hour "
|
| 393 |
+
"significantly improves survival outcomes. "
|
| 394 |
+
"Vital signs and laboratory biomarkers such as lactate, procalcitonin, and "
|
| 395 |
+
"white blood cell count reflect the physiological derangement of sepsis. "
|
| 396 |
+
"This model uses routinely collected ICU monitoring data to predict sepsis "
|
| 397 |
+
"onset up to 6 hours before clinical diagnosis, enabling proactive management."
|
| 398 |
+
),
|
| 399 |
+
),
|
| 400 |
+
"obstetrics_fetal": SpecialtyInfo(
|
| 401 |
+
id="obstetrics_fetal",
|
| 402 |
+
name="Obstetrics — Fetal Health",
|
| 403 |
+
description="Classify fetal cardiotocography as normal, suspect, or pathological.",
|
| 404 |
+
target_variable="fetal_health",
|
| 405 |
+
target_type="multiclass",
|
| 406 |
+
data_source="Cardiotocography Dataset — archive.ics.uci.edu/dataset/193/cardiotocography",
|
| 407 |
+
what_ai_predicts="Fetal CTG classification: 1=Normal, 2=Suspect, 3=Pathological",
|
| 408 |
+
license_type="CC BY 4.0",
|
| 409 |
+
license_url="https://creativecommons.org/licenses/by/4.0/",
|
| 410 |
+
requires_attribution=True,
|
| 411 |
+
feature_names=[
|
| 412 |
+
"baseline_value", "accelerations", "fetal_movement",
|
| 413 |
+
"uterine_contractions", "light_decelerations", "severe_decelerations",
|
| 414 |
+
"prolongued_decelerations", "abnormal_short_term_variability",
|
| 415 |
+
"mean_value_short_term_variability", "pct_time_abnormal_long_term_variability",
|
| 416 |
+
"mean_value_long_term_variability", "histogram_mode",
|
| 417 |
+
],
|
| 418 |
+
clinical_context=(
|
| 419 |
+
"Cardiotocography (CTG) is the standard method for monitoring fetal wellbeing "
|
| 420 |
+
"during pregnancy and labour, recording fetal heart rate and uterine contractions. "
|
| 421 |
+
"Abnormal CTG patterns may indicate fetal hypoxia requiring urgent intervention "
|
| 422 |
+
"such as emergency caesarean section. CTG interpretation is subjective and "
|
| 423 |
+
"varies between clinicians. "
|
| 424 |
+
"This model classifies CTG recordings into three categories — Normal (class 1), "
|
| 425 |
+
"Suspect (class 2), and Pathological (class 3) — using 12 quantitative "
|
| 426 |
+
"cardiotocography features to support consistent clinical decision-making."
|
| 427 |
+
),
|
| 428 |
+
),
|
| 429 |
+
"cardiology_arrhythmia": SpecialtyInfo(
|
| 430 |
+
id="cardiology_arrhythmia",
|
| 431 |
+
name="Cardiology — Arrhythmia",
|
| 432 |
+
description="Detect cardiac arrhythmia from ECG interval and waveform features.",
|
| 433 |
+
target_variable="arrhythmia",
|
| 434 |
+
target_type="binary",
|
| 435 |
+
data_source="UCI Arrhythmia Dataset — archive.ics.uci.edu/dataset/5/arrhythmia",
|
| 436 |
+
what_ai_predicts="Cardiac arrhythmia presence vs. normal sinus rhythm from ECG features",
|
| 437 |
+
license_type="CC BY 4.0",
|
| 438 |
+
license_url="https://creativecommons.org/licenses/by/4.0/",
|
| 439 |
+
requires_attribution=True,
|
| 440 |
+
feature_names=[
|
| 441 |
+
"age", "sex", "height", "weight", "QRS_duration",
|
| 442 |
+
"PR_interval", "QT_interval", "T_interval", "P_interval",
|
| 443 |
+
"QRS_axis", "T_axis", "P_axis", "heart_rate", "J_point",
|
| 444 |
+
"heart_rate_2",
|
| 445 |
+
"DI_R", "DI_S", "DI_T", "DI_P", "DI_QRSA", "DI_QRSTA",
|
| 446 |
+
"DII_R", "DII_S", "DII_T", "DII_P", "DII_QRSA", "DII_QRSTA",
|
| 447 |
+
"V1_R", "V1_S", "V1_T", "V1_P", "V5_R", "V5_S",
|
| 448 |
+
],
|
| 449 |
+
clinical_context=(
|
| 450 |
+
"Cardiac arrhythmias encompass a diverse group of rhythm disorders ranging from "
|
| 451 |
+
"benign atrial ectopics to life-threatening ventricular fibrillation. "
|
| 452 |
+
"The 12-lead ECG is the primary diagnostic tool, providing measurements of "
|
| 453 |
+
"conduction intervals and waveform morphology. Automated arrhythmia detection "
|
| 454 |
+
"supports cardiac monitoring programs and remote cardiology services. "
|
| 455 |
+
"This model uses 13 ECG-derived parameters to classify patients as having "
|
| 456 |
+
"arrhythmia or normal cardiac rhythm, supporting cardiac screening programs."
|
| 457 |
+
),
|
| 458 |
+
),
|
| 459 |
+
"oncology_cervical": SpecialtyInfo(
|
| 460 |
+
id="oncology_cervical",
|
| 461 |
+
name="Oncology — Cervical",
|
| 462 |
+
description="Assess cervical cancer biopsy risk from demographic and behavioural risk factors.",
|
| 463 |
+
target_variable="Biopsy",
|
| 464 |
+
target_type="binary",
|
| 465 |
+
data_source="Cervical Cancer Dataset — archive.ics.uci.edu/dataset/383/cervical+cancer+risk+factors",
|
| 466 |
+
what_ai_predicts="Biopsy-confirmed cervical cancer from demographic and behavioural data",
|
| 467 |
+
license_type="CC BY 4.0",
|
| 468 |
+
license_url="https://creativecommons.org/licenses/by/4.0/",
|
| 469 |
+
requires_attribution=True,
|
| 470 |
+
feature_names=[
|
| 471 |
+
"age", "number_of_sexual_partners", "first_sexual_intercourse_age",
|
| 472 |
+
"num_of_pregnancies",
|
| 473 |
+
"smokes", "smokes_years",
|
| 474 |
+
"hormonal_contraceptives", "hormonal_contraceptives_years",
|
| 475 |
+
"iud", "iud_years",
|
| 476 |
+
"stds", "stds_number", "stds_condylomatosis",
|
| 477 |
+
"stds_cervical_condylomatosis", "stds_hpv",
|
| 478 |
+
"dx_cancer", "dx_cin", "dx_hpv", "dx",
|
| 479 |
+
"hinselmann", "schiller", "citology",
|
| 480 |
+
],
|
| 481 |
+
clinical_context=(
|
| 482 |
+
"Cervical cancer is the fourth most common cancer in women globally, with "
|
| 483 |
+
"persistent HPV infection being the primary causative factor. Risk stratification "
|
| 484 |
+
"using demographic and behavioural data can identify women who require "
|
| 485 |
+
"expedited colposcopy or biopsy. Early detection through cytological and "
|
| 486 |
+
"histological examination enables curative treatment. "
|
| 487 |
+
"This model uses 11 demographic, sexual health, and medical history variables "
|
| 488 |
+
"to predict biopsy-confirmed cervical cancer, supporting targeted screening "
|
| 489 |
+
"in resource-limited settings."
|
| 490 |
+
),
|
| 491 |
+
),
|
| 492 |
+
"thyroid": SpecialtyInfo(
|
| 493 |
+
id="thyroid",
|
| 494 |
+
name="Thyroid / Endocrinology",
|
| 495 |
+
description="Classify thyroid function as hypothyroid, hyperthyroid, or normal from biochemical assay results.",
|
| 496 |
+
target_variable="class",
|
| 497 |
+
target_type="multiclass",
|
| 498 |
+
data_source="UCI New Thyroid Dataset — archive.ics.uci.edu/dataset/102/thyroid+disease",
|
| 499 |
+
what_ai_predicts="Thyroid function classification (hyperthyroid / normal / hypothyroid)",
|
| 500 |
+
license_type="CC BY 4.0",
|
| 501 |
+
license_url="https://creativecommons.org/licenses/by/4.0/",
|
| 502 |
+
requires_attribution=True,
|
| 503 |
+
feature_names=[
|
| 504 |
+
"T3_resin_uptake", "total_serum_thyroxine", "T3", "TSH", "max_abs_diff_TSH",
|
| 505 |
+
],
|
| 506 |
+
clinical_context=(
|
| 507 |
+
"Thyroid dysfunction affects approximately 5% of the global population. "
|
| 508 |
+
"Hyperthyroidism (excess hormone) and hypothyroidism (deficiency) are diagnosed "
|
| 509 |
+
"primarily through laboratory thyroid function tests. "
|
| 510 |
+
"The T3 resin uptake reflects thyroid hormone binding capacity, "
|
| 511 |
+
"total serum thyroxine (T4) measures overall hormone production, "
|
| 512 |
+
"and TSH (thyroid-stimulating hormone) is the most sensitive marker of thyroid status. "
|
| 513 |
+
"This model uses 5 biochemical assay values from the UCI New Thyroid dataset "
|
| 514 |
+
"to classify patients into three categories — hyperthyroid, normal, or hypothyroid — "
|
| 515 |
+
"supporting primary care screening and endocrinology referral decisions."
|
| 516 |
+
),
|
| 517 |
+
),
|
| 518 |
+
"pharmacy_readmission": SpecialtyInfo(
|
| 519 |
+
id="pharmacy_readmission",
|
| 520 |
+
name="Pharmacy — Readmission",
|
| 521 |
+
description="Predict hospital readmission risk for diabetic inpatients using medication and clinical data.",
|
| 522 |
+
target_variable="readmitted",
|
| 523 |
+
target_type="multiclass",
|
| 524 |
+
data_source="Diabetes 130-US Hospitals Dataset — archive.ics.uci.edu/dataset/296/diabetes+130-us+hospitals+for+years+1999-2008",
|
| 525 |
+
what_ai_predicts="Readmission risk: <30 days / >30 days / NO from medication and utilisation data",
|
| 526 |
+
license_type="CC BY 4.0",
|
| 527 |
+
license_url="https://creativecommons.org/licenses/by/4.0/",
|
| 528 |
+
requires_attribution=True,
|
| 529 |
+
feature_names=[
|
| 530 |
+
"age", "gender", "time_in_hospital", "num_lab_procedures",
|
| 531 |
+
"num_procedures", "num_medications", "number_outpatient",
|
| 532 |
+
"number_emergency", "number_inpatient", "number_diagnoses",
|
| 533 |
+
"max_glu_serum", "A1Cresult", "metformin", "insulin", "change",
|
| 534 |
+
"discharge_disposition_id", "admission_type_id",
|
| 535 |
+
"admission_source_id", "diag_1",
|
| 536 |
+
],
|
| 537 |
+
clinical_context=(
|
| 538 |
+
"Hospital readmission within 30 days is a key quality indicator and financial "
|
| 539 |
+
"penalty trigger under value-based care programmes. Diabetic patients have "
|
| 540 |
+
"disproportionately high readmission rates due to complex medication regimens, "
|
| 541 |
+
"comorbidities, and glycaemic instability. "
|
| 542 |
+
"The UCI 130-US Hospitals dataset contains over 100,000 diabetic patient "
|
| 543 |
+
"encounters from 130 US hospitals over 10 years. "
|
| 544 |
+
"This model classifies patients into three readmission risk groups — "
|
| 545 |
+
"within 30 days, after 30 days, or no readmission — using 15 clinical, "
|
| 546 |
+
"medication, and utilisation variables to guide discharge planning."
|
| 547 |
+
),
|
| 548 |
+
),
|
| 549 |
+
}
|
| 550 |
+
|
| 551 |
+
|
| 552 |
+
def get_specialty(specialty_id: str) -> SpecialtyInfo | None:
|
| 553 |
+
"""Look up one specialty by id, return `None` if unknown."""
|
| 554 |
+
return SPECIALTIES.get(specialty_id)
|
| 555 |
+
|
| 556 |
+
|
| 557 |
+
def list_specialties() -> list[SpecialtyInfo]:
|
| 558 |
+
"""Return the full registry as a list, in the order the Step-1 picker expects."""
|
| 559 |
+
return list(SPECIALTIES.values())
|
app/utils/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Shared utility helpers used across services."""
|
arena/__init__.py
ADDED
|
File without changes
|
arena/router.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Model Arena REST endpoints."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import logging
|
| 5 |
+
|
| 6 |
+
from fastapi import APIRouter, HTTPException, Request, status
|
| 7 |
+
from fastapi.responses import Response
|
| 8 |
+
|
| 9 |
+
from .schemas import (
|
| 10 |
+
ArenaCompareRequest,
|
| 11 |
+
ArenaCompareResponse,
|
| 12 |
+
ArenaRun,
|
| 13 |
+
BatchTrainRequest,
|
| 14 |
+
BatchTrainResponse,
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
logger = logging.getLogger(__name__)
|
| 18 |
+
router = APIRouter(prefix="/api/arena", tags=["arena"])
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def _get_arena_service(request: Request):
|
| 22 |
+
return request.app.state.arena_service
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
@router.post("/batch-train", response_model=BatchTrainResponse)
|
| 26 |
+
def batch_train(request: Request, body: BatchTrainRequest) -> BatchTrainResponse:
|
| 27 |
+
"""Train multiple models in one request."""
|
| 28 |
+
arena = _get_arena_service(request)
|
| 29 |
+
logger.info("Arena batch_train: session=%s models=%d", body.session_id, len(body.models))
|
| 30 |
+
try:
|
| 31 |
+
result = arena.batch_train(body)
|
| 32 |
+
completed = sum(1 for r in result.runs if r.status == "completed")
|
| 33 |
+
logger.info("Arena batch_train done: %d/%d completed", completed, len(result.runs))
|
| 34 |
+
return result
|
| 35 |
+
except (ValueError, KeyError) as exc:
|
| 36 |
+
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(exc))
|
| 37 |
+
except Exception as exc:
|
| 38 |
+
logger.exception("Batch training failed")
|
| 39 |
+
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(exc))
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
@router.get("/runs/{session_id}", response_model=list[ArenaRun])
|
| 43 |
+
def get_runs(request: Request, session_id: str) -> list[ArenaRun]:
|
| 44 |
+
"""Get all arena runs for a session."""
|
| 45 |
+
arena = _get_arena_service(request)
|
| 46 |
+
# Return empty list if session has no arena runs yet but ML session exists
|
| 47 |
+
ml_service = request.app.state.ml_service
|
| 48 |
+
if not arena.has_session(session_id) and ml_service.get_session(session_id) is None:
|
| 49 |
+
raise HTTPException(
|
| 50 |
+
status_code=status.HTTP_404_NOT_FOUND,
|
| 51 |
+
detail=f"Session '{session_id}' not found",
|
| 52 |
+
)
|
| 53 |
+
return arena.get_runs(session_id)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
@router.post("/compare/{session_id}", response_model=ArenaCompareResponse)
|
| 57 |
+
def compare_runs(
|
| 58 |
+
request: Request, session_id: str, body: ArenaCompareRequest
|
| 59 |
+
) -> ArenaCompareResponse:
|
| 60 |
+
"""Compare selected runs."""
|
| 61 |
+
arena = _get_arena_service(request)
|
| 62 |
+
try:
|
| 63 |
+
return arena.compare_runs(session_id, body.run_ids)
|
| 64 |
+
except ValueError as exc:
|
| 65 |
+
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc))
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
@router.delete("/runs/{session_id}", status_code=204)
|
| 69 |
+
def clear_runs(request: Request, session_id: str):
|
| 70 |
+
"""Clear all arena runs for a session."""
|
| 71 |
+
_get_arena_service(request).clear_runs(session_id)
|
| 72 |
+
return Response(status_code=204)
|
arena/schemas.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Pydantic schemas for Model Arena."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
from typing import Any, Literal
|
| 5 |
+
|
| 6 |
+
from pydantic import BaseModel, Field, field_validator
|
| 7 |
+
|
| 8 |
+
from app.models.ml_schemas import KNNScatterData, MetricsResponse, ModelType
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class ArenaModelConfig(BaseModel):
|
| 12 |
+
"""One model to train in a batch."""
|
| 13 |
+
model_type: ModelType
|
| 14 |
+
params: dict[str, Any] = Field(default_factory=dict)
|
| 15 |
+
tune: bool = False
|
| 16 |
+
use_feature_selection: bool = False
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class BatchTrainRequest(BaseModel):
|
| 20 |
+
"""Request to train multiple models on the same session."""
|
| 21 |
+
session_id: str
|
| 22 |
+
models: list[ArenaModelConfig] = Field(..., min_length=1, max_length=8)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class ArenaRun(BaseModel):
|
| 26 |
+
"""A single trained model run in the arena."""
|
| 27 |
+
run_id: str
|
| 28 |
+
model_id: str
|
| 29 |
+
model_type: ModelType
|
| 30 |
+
params: dict[str, Any]
|
| 31 |
+
metrics: MetricsResponse | None = None # None for failed runs
|
| 32 |
+
training_time_ms: float
|
| 33 |
+
feature_names: list[str]
|
| 34 |
+
knn_scatter: KNNScatterData | None = None
|
| 35 |
+
status: Literal["completed", "failed"] = "completed"
|
| 36 |
+
error: str | None = None
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
class BatchTrainResponse(BaseModel):
|
| 40 |
+
"""Response from batch training."""
|
| 41 |
+
session_id: str
|
| 42 |
+
runs: list[ArenaRun]
|
| 43 |
+
total_training_time_ms: float
|
| 44 |
+
best_run_id: str | None = None
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
class ArenaCompareRequest(BaseModel):
|
| 48 |
+
"""Request to compare specific runs."""
|
| 49 |
+
run_ids: list[str] = Field(..., min_length=2, max_length=8)
|
| 50 |
+
|
| 51 |
+
@field_validator("run_ids")
|
| 52 |
+
@classmethod
|
| 53 |
+
def no_duplicates(cls, v: list[str]) -> list[str]:
|
| 54 |
+
if len(v) != len(set(v)):
|
| 55 |
+
raise ValueError("run_ids must be unique")
|
| 56 |
+
return v
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
class ArenaCompareResponse(BaseModel):
|
| 60 |
+
"""Comparison data for selected runs."""
|
| 61 |
+
runs: list[ArenaRun]
|
| 62 |
+
best_run_id: str
|
| 63 |
+
metric_summary: dict[str, dict[str, float]] # metric_name -> {run_id: value}
|
| 64 |
+
param_diff: dict[str, dict[str, Any]] # param_name -> {run_id: value} (only differing params)
|
arena/service.py
ADDED
|
@@ -0,0 +1,199 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Arena service -- batch training and run management."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import logging
|
| 5 |
+
import threading
|
| 6 |
+
import uuid
|
| 7 |
+
from collections import OrderedDict
|
| 8 |
+
from typing import Any
|
| 9 |
+
|
| 10 |
+
from app.services.ml_service import MLService
|
| 11 |
+
|
| 12 |
+
from .schemas import (
|
| 13 |
+
ArenaCompareResponse,
|
| 14 |
+
ArenaModelConfig,
|
| 15 |
+
ArenaRun,
|
| 16 |
+
BatchTrainRequest,
|
| 17 |
+
BatchTrainResponse,
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
logger = logging.getLogger(__name__)
|
| 21 |
+
|
| 22 |
+
_MAX_SESSIONS = 50
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class ArenaService:
|
| 26 |
+
def __init__(self, ml_service: MLService) -> None:
|
| 27 |
+
self._ml = ml_service
|
| 28 |
+
self._lock = threading.Lock()
|
| 29 |
+
# session_id -> list of ArenaRun (LRU-evicted at _MAX_SESSIONS)
|
| 30 |
+
self._runs: OrderedDict[str, list[ArenaRun]] = OrderedDict()
|
| 31 |
+
# Track sessions currently being batch-trained to prevent duplicates
|
| 32 |
+
self._in_flight: set[str] = set()
|
| 33 |
+
|
| 34 |
+
def batch_train(self, request: BatchTrainRequest) -> BatchTrainResponse:
|
| 35 |
+
"""Train multiple models sequentially on the same session."""
|
| 36 |
+
# Pre-flight: verify session exists (raises KeyError → router returns 404)
|
| 37 |
+
if self._ml.get_session(request.session_id) is None:
|
| 38 |
+
raise KeyError(f"Session '{request.session_id}' not found. Run /api/prepare first.")
|
| 39 |
+
|
| 40 |
+
# Guard against concurrent batch_train for same session
|
| 41 |
+
with self._lock:
|
| 42 |
+
if request.session_id in self._in_flight:
|
| 43 |
+
raise ValueError(
|
| 44 |
+
f"Batch training already in progress for session '{request.session_id}'"
|
| 45 |
+
)
|
| 46 |
+
self._in_flight.add(request.session_id)
|
| 47 |
+
|
| 48 |
+
try:
|
| 49 |
+
runs: list[ArenaRun] = []
|
| 50 |
+
total_time = 0.0
|
| 51 |
+
|
| 52 |
+
for model_cfg in request.models:
|
| 53 |
+
run_id = str(uuid.uuid4())
|
| 54 |
+
try:
|
| 55 |
+
response = self._ml.train_and_evaluate(
|
| 56 |
+
request.session_id,
|
| 57 |
+
model_cfg.model_type,
|
| 58 |
+
model_cfg.params,
|
| 59 |
+
tune=model_cfg.tune,
|
| 60 |
+
use_feature_selection=model_cfg.use_feature_selection,
|
| 61 |
+
)
|
| 62 |
+
self._ml.store_train_response_in_model(response.model_id, response)
|
| 63 |
+
run = ArenaRun(
|
| 64 |
+
run_id=run_id,
|
| 65 |
+
model_id=response.model_id,
|
| 66 |
+
model_type=model_cfg.model_type,
|
| 67 |
+
params=response.params,
|
| 68 |
+
metrics=response.metrics,
|
| 69 |
+
training_time_ms=response.training_time_ms,
|
| 70 |
+
feature_names=response.feature_names,
|
| 71 |
+
knn_scatter=response.knn_scatter,
|
| 72 |
+
)
|
| 73 |
+
total_time += response.training_time_ms
|
| 74 |
+
except (ImportError, MemoryError):
|
| 75 |
+
raise # Non-recoverable — propagate to router as 500
|
| 76 |
+
except Exception as exc:
|
| 77 |
+
logger.warning("Arena: model %s failed: %s", model_cfg.model_type.value, exc)
|
| 78 |
+
run = ArenaRun(
|
| 79 |
+
run_id=run_id,
|
| 80 |
+
model_id="",
|
| 81 |
+
model_type=model_cfg.model_type,
|
| 82 |
+
params=model_cfg.params,
|
| 83 |
+
metrics=None,
|
| 84 |
+
training_time_ms=0.0,
|
| 85 |
+
feature_names=[],
|
| 86 |
+
status="failed",
|
| 87 |
+
error=str(exc),
|
| 88 |
+
)
|
| 89 |
+
runs.append(run)
|
| 90 |
+
|
| 91 |
+
# Store runs with LRU eviction
|
| 92 |
+
with self._lock:
|
| 93 |
+
if request.session_id not in self._runs:
|
| 94 |
+
self._runs[request.session_id] = []
|
| 95 |
+
self._runs[request.session_id].extend(runs)
|
| 96 |
+
self._runs.move_to_end(request.session_id)
|
| 97 |
+
while len(self._runs) > _MAX_SESSIONS:
|
| 98 |
+
self._runs.popitem(last=False)
|
| 99 |
+
|
| 100 |
+
# Compute best across ALL session runs (not just this batch)
|
| 101 |
+
all_completed = [
|
| 102 |
+
r for r in self._runs.get(request.session_id, [])
|
| 103 |
+
if r.status == "completed" and r.metrics is not None
|
| 104 |
+
]
|
| 105 |
+
|
| 106 |
+
best_id = None
|
| 107 |
+
if all_completed:
|
| 108 |
+
best = max(all_completed, key=lambda r: r.metrics.auc_roc) # type: ignore[union-attr]
|
| 109 |
+
best_id = best.run_id
|
| 110 |
+
|
| 111 |
+
return BatchTrainResponse(
|
| 112 |
+
session_id=request.session_id,
|
| 113 |
+
runs=runs,
|
| 114 |
+
total_training_time_ms=total_time,
|
| 115 |
+
best_run_id=best_id,
|
| 116 |
+
)
|
| 117 |
+
finally:
|
| 118 |
+
with self._lock:
|
| 119 |
+
self._in_flight.discard(request.session_id)
|
| 120 |
+
|
| 121 |
+
def get_runs(self, session_id: str) -> list[ArenaRun]:
|
| 122 |
+
"""Get all arena runs for a session."""
|
| 123 |
+
with self._lock:
|
| 124 |
+
return list(self._runs.get(session_id, []))
|
| 125 |
+
|
| 126 |
+
def has_session(self, session_id: str) -> bool:
|
| 127 |
+
"""Check if a session has any arena runs."""
|
| 128 |
+
with self._lock:
|
| 129 |
+
return session_id in self._runs
|
| 130 |
+
|
| 131 |
+
def get_run(self, session_id: str, run_id: str) -> ArenaRun | None:
|
| 132 |
+
"""Get a specific run."""
|
| 133 |
+
with self._lock:
|
| 134 |
+
for run in self._runs.get(session_id, []):
|
| 135 |
+
if run.run_id == run_id:
|
| 136 |
+
return run
|
| 137 |
+
return None
|
| 138 |
+
|
| 139 |
+
def compare_runs(self, session_id: str, run_ids: list[str]) -> ArenaCompareResponse:
|
| 140 |
+
"""Build comparison data for selected runs."""
|
| 141 |
+
with self._lock:
|
| 142 |
+
all_runs = self._runs.get(session_id, [])
|
| 143 |
+
all_run_ids = {r.run_id for r in all_runs}
|
| 144 |
+
selected = [
|
| 145 |
+
r for r in all_runs
|
| 146 |
+
if r.run_id in run_ids and r.status == "completed" and r.metrics is not None
|
| 147 |
+
]
|
| 148 |
+
|
| 149 |
+
# Check for missing run IDs
|
| 150 |
+
missing = [rid for rid in run_ids if rid not in all_run_ids]
|
| 151 |
+
if missing:
|
| 152 |
+
raise ValueError(f"Run IDs not found in session '{session_id}': {missing}")
|
| 153 |
+
|
| 154 |
+
# Check for runs that exist but are failed
|
| 155 |
+
selected_ids = {r.run_id for r in selected}
|
| 156 |
+
failed = [rid for rid in run_ids if rid in all_run_ids and rid not in selected_ids]
|
| 157 |
+
if failed:
|
| 158 |
+
raise ValueError(f"Run IDs exist but are in failed state: {failed}")
|
| 159 |
+
|
| 160 |
+
if len(selected) < 2:
|
| 161 |
+
raise ValueError("Need at least 2 completed runs to compare")
|
| 162 |
+
|
| 163 |
+
# Build metric summary: metric_name -> {run_id: value}
|
| 164 |
+
metric_names = [
|
| 165 |
+
"accuracy", "sensitivity", "specificity", "precision",
|
| 166 |
+
"f1_score", "auc_roc", "mcc", "train_accuracy",
|
| 167 |
+
]
|
| 168 |
+
metric_summary: dict[str, dict[str, float]] = {}
|
| 169 |
+
for name in metric_names:
|
| 170 |
+
metric_summary[name] = {
|
| 171 |
+
r.run_id: getattr(r.metrics, name) for r in selected
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
# Build param diff: only params that differ across runs
|
| 175 |
+
all_params: dict[str, dict[str, Any]] = {}
|
| 176 |
+
for r in selected:
|
| 177 |
+
for k, v in r.params.items():
|
| 178 |
+
if k not in all_params:
|
| 179 |
+
all_params[k] = {}
|
| 180 |
+
all_params[k][r.run_id] = v
|
| 181 |
+
|
| 182 |
+
param_diff = {
|
| 183 |
+
k: vals for k, vals in all_params.items()
|
| 184 |
+
if len(set(str(v) for v in vals.values())) > 1
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
best = max(selected, key=lambda r: r.metrics.auc_roc) # type: ignore[union-attr]
|
| 188 |
+
|
| 189 |
+
return ArenaCompareResponse(
|
| 190 |
+
runs=selected,
|
| 191 |
+
best_run_id=best.run_id,
|
| 192 |
+
metric_summary=metric_summary,
|
| 193 |
+
param_diff=param_diff,
|
| 194 |
+
)
|
| 195 |
+
|
| 196 |
+
def clear_runs(self, session_id: str) -> None:
|
| 197 |
+
"""Clear all runs for a session."""
|
| 198 |
+
with self._lock:
|
| 199 |
+
self._runs.pop(session_id, None)
|
data_cache/cardiology_arrhythmia.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data_cache/cardiology_hf.csv
ADDED
|
@@ -0,0 +1,300 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
|
| 2 |
+
75,0,582,0,20,1,265000,1.9,130,1,0,4,1
|
| 3 |
+
55,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
|
| 4 |
+
65,0,146,0,20,0,162000,1.3,129,1,1,7,1
|
| 5 |
+
50,1,111,0,20,0,210000,1.9,137,1,0,7,1
|
| 6 |
+
65,1,160,1,20,0,327000,2.7,116,0,0,8,1
|
| 7 |
+
90,1,47,0,40,1,204000,2.1,132,1,1,8,1
|
| 8 |
+
75,1,246,0,15,0,127000,1.2,137,1,0,10,1
|
| 9 |
+
60,1,315,1,60,0,454000,1.1,131,1,1,10,1
|
| 10 |
+
65,0,157,0,65,0,263358.03,1.5,138,0,0,10,1
|
| 11 |
+
80,1,123,0,35,1,388000,9.4,133,1,1,10,1
|
| 12 |
+
75,1,81,0,38,1,368000,4,131,1,1,10,1
|
| 13 |
+
62,0,231,0,25,1,253000,0.9,140,1,1,10,1
|
| 14 |
+
45,1,981,0,30,0,136000,1.1,137,1,0,11,1
|
| 15 |
+
50,1,168,0,38,1,276000,1.1,137,1,0,11,1
|
| 16 |
+
49,1,80,0,30,1,427000,1,138,0,0,12,0
|
| 17 |
+
82,1,379,0,50,0,47000,1.3,136,1,0,13,1
|
| 18 |
+
87,1,149,0,38,0,262000,0.9,140,1,0,14,1
|
| 19 |
+
45,0,582,0,14,0,166000,0.8,127,1,0,14,1
|
| 20 |
+
70,1,125,0,25,1,237000,1,140,0,0,15,1
|
| 21 |
+
48,1,582,1,55,0,87000,1.9,121,0,0,15,1
|
| 22 |
+
65,1,52,0,25,1,276000,1.3,137,0,0,16,0
|
| 23 |
+
65,1,128,1,30,1,297000,1.6,136,0,0,20,1
|
| 24 |
+
68,1,220,0,35,1,289000,0.9,140,1,1,20,1
|
| 25 |
+
53,0,63,1,60,0,368000,0.8,135,1,0,22,0
|
| 26 |
+
75,0,582,1,30,1,263358.03,1.83,134,0,0,23,1
|
| 27 |
+
80,0,148,1,38,0,149000,1.9,144,1,1,23,1
|
| 28 |
+
95,1,112,0,40,1,196000,1,138,0,0,24,1
|
| 29 |
+
70,0,122,1,45,1,284000,1.3,136,1,1,26,1
|
| 30 |
+
58,1,60,0,38,0,153000,5.8,134,1,0,26,1
|
| 31 |
+
82,0,70,1,30,0,200000,1.2,132,1,1,26,1
|
| 32 |
+
94,0,582,1,38,1,263358.03,1.83,134,1,0,27,1
|
| 33 |
+
85,0,23,0,45,0,360000,3,132,1,0,28,1
|
| 34 |
+
50,1,249,1,35,1,319000,1,128,0,0,28,1
|
| 35 |
+
50,1,159,1,30,0,302000,1.2,138,0,0,29,0
|
| 36 |
+
65,0,94,1,50,1,188000,1,140,1,0,29,1
|
| 37 |
+
69,0,582,1,35,0,228000,3.5,134,1,0,30,1
|
| 38 |
+
90,1,60,1,50,0,226000,1,134,1,0,30,1
|
| 39 |
+
82,1,855,1,50,1,321000,1,145,0,0,30,1
|
| 40 |
+
60,0,2656,1,30,0,305000,2.3,137,1,0,30,0
|
| 41 |
+
60,0,235,1,38,0,329000,3,142,0,0,30,1
|
| 42 |
+
70,0,582,0,20,1,263358.03,1.83,134,1,1,31,1
|
| 43 |
+
50,0,124,1,30,1,153000,1.2,136,0,1,32,1
|
| 44 |
+
70,0,571,1,45,1,185000,1.2,139,1,1,33,1
|
| 45 |
+
72,0,127,1,50,1,218000,1,134,1,0,33,0
|
| 46 |
+
60,1,588,1,60,0,194000,1.1,142,0,0,33,1
|
| 47 |
+
50,0,582,1,38,0,310000,1.9,135,1,1,35,1
|
| 48 |
+
51,0,1380,0,25,1,271000,0.9,130,1,0,38,1
|
| 49 |
+
60,0,582,1,38,1,451000,0.6,138,1,1,40,1
|
| 50 |
+
80,1,553,0,20,1,140000,4.4,133,1,0,41,1
|
| 51 |
+
57,1,129,0,30,0,395000,1,140,0,0,42,1
|
| 52 |
+
68,1,577,0,25,1,166000,1,138,1,0,43,1
|
| 53 |
+
53,1,91,0,20,1,418000,1.4,139,0,0,43,1
|
| 54 |
+
60,0,3964,1,62,0,263358.03,6.8,146,0,0,43,1
|
| 55 |
+
70,1,69,1,50,1,351000,1,134,0,0,44,1
|
| 56 |
+
60,1,260,1,38,0,255000,2.2,132,0,1,45,1
|
| 57 |
+
95,1,371,0,30,0,461000,2,132,1,0,50,1
|
| 58 |
+
70,1,75,0,35,0,223000,2.7,138,1,1,54,0
|
| 59 |
+
60,1,607,0,40,0,216000,0.6,138,1,1,54,0
|
| 60 |
+
49,0,789,0,20,1,319000,1.1,136,1,1,55,1
|
| 61 |
+
72,0,364,1,20,1,254000,1.3,136,1,1,59,1
|
| 62 |
+
45,0,7702,1,25,1,390000,1,139,1,0,60,1
|
| 63 |
+
50,0,318,0,40,1,216000,2.3,131,0,0,60,1
|
| 64 |
+
55,0,109,0,35,0,254000,1.1,139,1,1,60,0
|
| 65 |
+
45,0,582,0,35,0,385000,1,145,1,0,61,1
|
| 66 |
+
45,0,582,0,80,0,263358.03,1.18,137,0,0,63,0
|
| 67 |
+
60,0,68,0,20,0,119000,2.9,127,1,1,64,1
|
| 68 |
+
42,1,250,1,15,0,213000,1.3,136,0,0,65,1
|
| 69 |
+
72,1,110,0,25,0,274000,1,140,1,1,65,1
|
| 70 |
+
70,0,161,0,25,0,244000,1.2,142,0,0,66,1
|
| 71 |
+
65,0,113,1,25,0,497000,1.83,135,1,0,67,1
|
| 72 |
+
41,0,148,0,40,0,374000,0.8,140,1,1,68,0
|
| 73 |
+
58,0,582,1,35,0,122000,0.9,139,1,1,71,0
|
| 74 |
+
85,0,5882,0,35,0,243000,1,132,1,1,72,1
|
| 75 |
+
65,0,224,1,50,0,149000,1.3,137,1,1,72,0
|
| 76 |
+
69,0,582,0,20,0,266000,1.2,134,1,1,73,1
|
| 77 |
+
60,1,47,0,20,0,204000,0.7,139,1,1,73,1
|
| 78 |
+
70,0,92,0,60,1,317000,0.8,140,0,1,74,0
|
| 79 |
+
42,0,102,1,40,0,237000,1.2,140,1,0,74,0
|
| 80 |
+
75,1,203,1,38,1,283000,0.6,131,1,1,74,0
|
| 81 |
+
55,0,336,0,45,1,324000,0.9,140,0,0,74,0
|
| 82 |
+
70,0,69,0,40,0,293000,1.7,136,0,0,75,0
|
| 83 |
+
67,0,582,0,50,0,263358.03,1.18,137,1,1,76,0
|
| 84 |
+
60,1,76,1,25,0,196000,2.5,132,0,0,77,1
|
| 85 |
+
79,1,55,0,50,1,172000,1.8,133,1,0,78,0
|
| 86 |
+
59,1,280,1,25,1,302000,1,141,0,0,78,1
|
| 87 |
+
51,0,78,0,50,0,406000,0.7,140,1,0,79,0
|
| 88 |
+
55,0,47,0,35,1,173000,1.1,137,1,0,79,0
|
| 89 |
+
65,1,68,1,60,1,304000,0.8,140,1,0,79,0
|
| 90 |
+
44,0,84,1,40,1,235000,0.7,139,1,0,79,0
|
| 91 |
+
57,1,115,0,25,1,181000,1.1,144,1,0,79,0
|
| 92 |
+
70,0,66,1,45,0,249000,0.8,136,1,1,80,0
|
| 93 |
+
60,0,897,1,45,0,297000,1,133,1,0,80,0
|
| 94 |
+
42,0,582,0,60,0,263358.03,1.18,137,0,0,82,0
|
| 95 |
+
60,1,154,0,25,0,210000,1.7,135,1,0,82,1
|
| 96 |
+
58,0,144,1,38,1,327000,0.7,142,0,0,83,0
|
| 97 |
+
58,1,133,0,60,1,219000,1,141,1,0,83,0
|
| 98 |
+
63,1,514,1,25,1,254000,1.3,134,1,0,83,0
|
| 99 |
+
70,1,59,0,60,0,255000,1.1,136,0,0,85,0
|
| 100 |
+
60,1,156,1,25,1,318000,1.2,137,0,0,85,0
|
| 101 |
+
63,1,61,1,40,0,221000,1.1,140,0,0,86,0
|
| 102 |
+
65,1,305,0,25,0,298000,1.1,141,1,0,87,0
|
| 103 |
+
75,0,582,0,45,1,263358.03,1.18,137,1,0,87,0
|
| 104 |
+
80,0,898,0,25,0,149000,1.1,144,1,1,87,0
|
| 105 |
+
42,0,5209,0,30,0,226000,1,140,1,1,87,0
|
| 106 |
+
60,0,53,0,50,1,286000,2.3,143,0,0,87,0
|
| 107 |
+
72,1,328,0,30,1,621000,1.7,138,0,1,88,1
|
| 108 |
+
55,0,748,0,45,0,263000,1.3,137,1,0,88,0
|
| 109 |
+
45,1,1876,1,35,0,226000,0.9,138,1,0,88,0
|
| 110 |
+
63,0,936,0,38,0,304000,1.1,133,1,1,88,0
|
| 111 |
+
45,0,292,1,35,0,850000,1.3,142,1,1,88,0
|
| 112 |
+
85,0,129,0,60,0,306000,1.2,132,1,1,90,1
|
| 113 |
+
55,0,60,0,35,0,228000,1.2,135,1,1,90,0
|
| 114 |
+
50,0,369,1,25,0,252000,1.6,136,1,0,90,0
|
| 115 |
+
70,1,143,0,60,0,351000,1.3,137,0,0,90,1
|
| 116 |
+
60,1,754,1,40,1,328000,1.2,126,1,0,91,0
|
| 117 |
+
58,1,400,0,40,0,164000,1,139,0,0,91,0
|
| 118 |
+
60,1,96,1,60,1,271000,0.7,136,0,0,94,0
|
| 119 |
+
85,1,102,0,60,0,507000,3.2,138,0,0,94,0
|
| 120 |
+
65,1,113,1,60,1,203000,0.9,140,0,0,94,0
|
| 121 |
+
86,0,582,0,38,0,263358.03,1.83,134,0,0,95,1
|
| 122 |
+
60,1,737,0,60,1,210000,1.5,135,1,1,95,0
|
| 123 |
+
66,1,68,1,38,1,162000,1,136,0,0,95,0
|
| 124 |
+
60,0,96,1,38,0,228000,0.75,140,0,0,95,0
|
| 125 |
+
60,1,582,0,30,1,127000,0.9,145,0,0,95,0
|
| 126 |
+
60,0,582,0,40,0,217000,3.7,134,1,0,96,1
|
| 127 |
+
43,1,358,0,50,0,237000,1.3,135,0,0,97,0
|
| 128 |
+
46,0,168,1,17,1,271000,2.1,124,0,0,100,1
|
| 129 |
+
58,1,200,1,60,0,300000,0.8,137,0,0,104,0
|
| 130 |
+
61,0,248,0,30,1,267000,0.7,136,1,1,104,0
|
| 131 |
+
53,1,270,1,35,0,227000,3.4,145,1,0,105,0
|
| 132 |
+
53,1,1808,0,60,1,249000,0.7,138,1,1,106,0
|
| 133 |
+
60,1,1082,1,45,0,250000,6.1,131,1,0,107,0
|
| 134 |
+
46,0,719,0,40,1,263358.03,1.18,137,0,0,107,0
|
| 135 |
+
63,0,193,0,60,1,295000,1.3,145,1,1,107,0
|
| 136 |
+
81,0,4540,0,35,0,231000,1.18,137,1,1,107,0
|
| 137 |
+
75,0,582,0,40,0,263358.03,1.18,137,1,0,107,0
|
| 138 |
+
65,1,59,1,60,0,172000,0.9,137,0,0,107,0
|
| 139 |
+
68,1,646,0,25,0,305000,2.1,130,1,0,108,0
|
| 140 |
+
62,0,281,1,35,0,221000,1,136,0,0,108,0
|
| 141 |
+
50,0,1548,0,30,1,211000,0.8,138,1,0,108,0
|
| 142 |
+
80,0,805,0,38,0,263358.03,1.1,134,1,0,109,1
|
| 143 |
+
46,1,291,0,35,0,348000,0.9,140,0,0,109,0
|
| 144 |
+
50,0,482,1,30,0,329000,0.9,132,0,0,109,0
|
| 145 |
+
61,1,84,0,40,1,229000,0.9,141,0,0,110,0
|
| 146 |
+
72,1,943,0,25,1,338000,1.7,139,1,1,111,1
|
| 147 |
+
50,0,185,0,30,0,266000,0.7,141,1,1,112,0
|
| 148 |
+
52,0,132,0,30,0,218000,0.7,136,1,1,112,0
|
| 149 |
+
64,0,1610,0,60,0,242000,1,137,1,0,113,0
|
| 150 |
+
75,1,582,0,30,0,225000,1.83,134,1,0,113,1
|
| 151 |
+
60,0,2261,0,35,1,228000,0.9,136,1,0,115,0
|
| 152 |
+
72,0,233,0,45,1,235000,2.5,135,0,0,115,1
|
| 153 |
+
62,0,30,1,60,1,244000,0.9,139,1,0,117,0
|
| 154 |
+
50,0,115,0,45,1,184000,0.9,134,1,1,118,0
|
| 155 |
+
50,0,1846,1,35,0,263358.03,1.18,137,1,1,119,0
|
| 156 |
+
65,1,335,0,35,1,235000,0.8,136,0,0,120,0
|
| 157 |
+
60,1,231,1,25,0,194000,1.7,140,1,0,120,0
|
| 158 |
+
52,1,58,0,35,0,277000,1.4,136,0,0,120,0
|
| 159 |
+
50,0,250,0,25,0,262000,1,136,1,1,120,0
|
| 160 |
+
85,1,910,0,50,0,235000,1.3,134,1,0,121,0
|
| 161 |
+
59,1,129,0,45,1,362000,1.1,139,1,1,121,0
|
| 162 |
+
66,1,72,0,40,1,242000,1.2,134,1,0,121,0
|
| 163 |
+
45,1,130,0,35,0,174000,0.8,139,1,1,121,0
|
| 164 |
+
63,1,582,0,40,0,448000,0.9,137,1,1,123,0
|
| 165 |
+
50,1,2334,1,35,0,75000,0.9,142,0,0,126,1
|
| 166 |
+
45,0,2442,1,30,0,334000,1.1,139,1,0,129,1
|
| 167 |
+
80,0,776,1,38,1,192000,1.3,135,0,0,130,1
|
| 168 |
+
53,0,196,0,60,0,220000,0.7,133,1,1,134,0
|
| 169 |
+
59,0,66,1,20,0,70000,2.4,134,1,0,135,1
|
| 170 |
+
65,0,582,1,40,0,270000,1,138,0,0,140,0
|
| 171 |
+
70,0,835,0,35,1,305000,0.8,133,0,0,145,0
|
| 172 |
+
51,1,582,1,35,0,263358.03,1.5,136,1,1,145,0
|
| 173 |
+
52,0,3966,0,40,0,325000,0.9,140,1,1,146,0
|
| 174 |
+
70,1,171,0,60,1,176000,1.1,145,1,1,146,0
|
| 175 |
+
50,1,115,0,20,0,189000,0.8,139,1,0,146,0
|
| 176 |
+
65,0,198,1,35,1,281000,0.9,137,1,1,146,0
|
| 177 |
+
60,1,95,0,60,0,337000,1,138,1,1,146,0
|
| 178 |
+
69,0,1419,0,40,0,105000,1,135,1,1,147,0
|
| 179 |
+
49,1,69,0,50,0,132000,1,140,0,0,147,0
|
| 180 |
+
63,1,122,1,60,0,267000,1.2,145,1,0,147,0
|
| 181 |
+
55,0,835,0,40,0,279000,0.7,140,1,1,147,0
|
| 182 |
+
40,0,478,1,30,0,303000,0.9,136,1,0,148,0
|
| 183 |
+
59,1,176,1,25,0,221000,1,136,1,1,150,1
|
| 184 |
+
65,0,395,1,25,0,265000,1.2,136,1,1,154,1
|
| 185 |
+
75,0,99,0,38,1,224000,2.5,134,1,0,162,1
|
| 186 |
+
58,1,145,0,25,0,219000,1.2,137,1,1,170,1
|
| 187 |
+
60.667,1,104,1,30,0,389000,1.5,136,1,0,171,1
|
| 188 |
+
50,0,582,0,50,0,153000,0.6,134,0,0,172,1
|
| 189 |
+
60,0,1896,1,25,0,365000,2.1,144,0,0,172,1
|
| 190 |
+
60.667,1,151,1,40,1,201000,1,136,0,0,172,0
|
| 191 |
+
40,0,244,0,45,1,275000,0.9,140,0,0,174,0
|
| 192 |
+
80,0,582,1,35,0,350000,2.1,134,1,0,174,0
|
| 193 |
+
64,1,62,0,60,0,309000,1.5,135,0,0,174,0
|
| 194 |
+
50,1,121,1,40,0,260000,0.7,130,1,0,175,0
|
| 195 |
+
73,1,231,1,30,0,160000,1.18,142,1,1,180,0
|
| 196 |
+
45,0,582,0,20,1,126000,1.6,135,1,0,180,1
|
| 197 |
+
77,1,418,0,45,0,223000,1.8,145,1,0,180,1
|
| 198 |
+
45,0,582,1,38,1,263358.03,1.18,137,0,0,185,0
|
| 199 |
+
65,0,167,0,30,0,259000,0.8,138,0,0,186,0
|
| 200 |
+
50,1,582,1,20,1,279000,1,134,0,0,186,0
|
| 201 |
+
60,0,1211,1,35,0,263358.03,1.8,113,1,1,186,0
|
| 202 |
+
63,1,1767,0,45,0,73000,0.7,137,1,0,186,0
|
| 203 |
+
45,0,308,1,60,1,377000,1,136,1,0,186,0
|
| 204 |
+
70,0,97,0,60,1,220000,0.9,138,1,0,186,0
|
| 205 |
+
60,0,59,0,25,1,212000,3.5,136,1,1,187,0
|
| 206 |
+
78,1,64,0,40,0,277000,0.7,137,1,1,187,0
|
| 207 |
+
50,1,167,1,45,0,362000,1,136,0,0,187,0
|
| 208 |
+
40,1,101,0,40,0,226000,0.8,141,0,0,187,0
|
| 209 |
+
85,0,212,0,38,0,186000,0.9,136,1,0,187,0
|
| 210 |
+
60,1,2281,1,40,0,283000,1,141,0,0,187,0
|
| 211 |
+
49,0,972,1,35,1,268000,0.8,130,0,0,187,0
|
| 212 |
+
70,0,212,1,17,1,389000,1,136,1,1,188,0
|
| 213 |
+
50,0,582,0,62,1,147000,0.8,140,1,1,192,0
|
| 214 |
+
78,0,224,0,50,0,481000,1.4,138,1,1,192,0
|
| 215 |
+
48,1,131,1,30,1,244000,1.6,130,0,0,193,1
|
| 216 |
+
65,1,135,0,35,1,290000,0.8,134,1,0,194,0
|
| 217 |
+
73,0,582,0,35,1,203000,1.3,134,1,0,195,0
|
| 218 |
+
70,0,1202,0,50,1,358000,0.9,141,0,0,196,0
|
| 219 |
+
54,1,427,0,70,1,151000,9,137,0,0,196,1
|
| 220 |
+
68,1,1021,1,35,0,271000,1.1,134,1,0,197,0
|
| 221 |
+
55,0,582,1,35,1,371000,0.7,140,0,0,197,0
|
| 222 |
+
73,0,582,0,20,0,263358.03,1.83,134,1,0,198,1
|
| 223 |
+
65,0,118,0,50,0,194000,1.1,145,1,1,200,0
|
| 224 |
+
42,1,86,0,35,0,365000,1.1,139,1,1,201,0
|
| 225 |
+
47,0,582,0,25,0,130000,0.8,134,1,0,201,0
|
| 226 |
+
58,0,582,1,25,0,504000,1,138,1,0,205,0
|
| 227 |
+
75,0,675,1,60,0,265000,1.4,125,0,0,205,0
|
| 228 |
+
58,1,57,0,25,0,189000,1.3,132,1,1,205,0
|
| 229 |
+
55,1,2794,0,35,1,141000,1,140,1,0,206,0
|
| 230 |
+
65,0,56,0,25,0,237000,5,130,0,0,207,0
|
| 231 |
+
72,0,211,0,25,0,274000,1.2,134,0,0,207,0
|
| 232 |
+
60,0,166,0,30,0,62000,1.7,127,0,0,207,1
|
| 233 |
+
70,0,93,0,35,0,185000,1.1,134,1,1,208,0
|
| 234 |
+
40,1,129,0,35,0,255000,0.9,137,1,0,209,0
|
| 235 |
+
53,1,707,0,38,0,330000,1.4,137,1,1,209,0
|
| 236 |
+
53,1,582,0,45,0,305000,1.1,137,1,1,209,0
|
| 237 |
+
77,1,109,0,50,1,406000,1.1,137,1,0,209,0
|
| 238 |
+
75,0,119,0,50,1,248000,1.1,148,1,0,209,0
|
| 239 |
+
70,0,232,0,30,0,173000,1.2,132,1,0,210,0
|
| 240 |
+
65,1,720,1,40,0,257000,1,136,0,0,210,0
|
| 241 |
+
55,1,180,0,45,0,263358.03,1.18,137,1,1,211,0
|
| 242 |
+
70,0,81,1,35,1,533000,1.3,139,0,0,212,0
|
| 243 |
+
65,0,582,1,30,0,249000,1.3,136,1,1,212,0
|
| 244 |
+
40,0,90,0,35,0,255000,1.1,136,1,1,212,0
|
| 245 |
+
73,1,1185,0,40,1,220000,0.9,141,0,0,213,0
|
| 246 |
+
54,0,582,1,38,0,264000,1.8,134,1,0,213,0
|
| 247 |
+
61,1,80,1,38,0,282000,1.4,137,1,0,213,0
|
| 248 |
+
55,0,2017,0,25,0,314000,1.1,138,1,0,214,1
|
| 249 |
+
64,0,143,0,25,0,246000,2.4,135,1,0,214,0
|
| 250 |
+
40,0,624,0,35,0,301000,1,142,1,1,214,0
|
| 251 |
+
53,0,207,1,40,0,223000,1.2,130,0,0,214,0
|
| 252 |
+
50,0,2522,0,30,1,404000,0.5,139,0,0,214,0
|
| 253 |
+
55,0,572,1,35,0,231000,0.8,143,0,0,215,0
|
| 254 |
+
50,0,245,0,45,1,274000,1,133,1,0,215,0
|
| 255 |
+
70,0,88,1,35,1,236000,1.2,132,0,0,215,0
|
| 256 |
+
53,1,446,0,60,1,263358.03,1,139,1,0,215,0
|
| 257 |
+
52,1,191,1,30,1,334000,1,142,1,1,216,0
|
| 258 |
+
65,0,326,0,38,0,294000,1.7,139,0,0,220,0
|
| 259 |
+
58,0,132,1,38,1,253000,1,139,1,0,230,0
|
| 260 |
+
45,1,66,1,25,0,233000,0.8,135,1,0,230,0
|
| 261 |
+
53,0,56,0,50,0,308000,0.7,135,1,1,231,0
|
| 262 |
+
55,0,66,0,40,0,203000,1,138,1,0,233,0
|
| 263 |
+
62,1,655,0,40,0,283000,0.7,133,0,0,233,0
|
| 264 |
+
65,1,258,1,25,0,198000,1.4,129,1,0,235,1
|
| 265 |
+
68,1,157,1,60,0,208000,1,140,0,0,237,0
|
| 266 |
+
61,0,582,1,38,0,147000,1.2,141,1,0,237,0
|
| 267 |
+
50,1,298,0,35,0,362000,0.9,140,1,1,240,0
|
| 268 |
+
55,0,1199,0,20,0,263358.03,1.83,134,1,1,241,1
|
| 269 |
+
56,1,135,1,38,0,133000,1.7,140,1,0,244,0
|
| 270 |
+
45,0,582,1,38,0,302000,0.9,140,0,0,244,0
|
| 271 |
+
40,0,582,1,35,0,222000,1,132,1,0,244,0
|
| 272 |
+
44,0,582,1,30,1,263358.03,1.6,130,1,1,244,0
|
| 273 |
+
51,0,582,1,40,0,221000,0.9,134,0,0,244,0
|
| 274 |
+
67,0,213,0,38,0,215000,1.2,133,0,0,245,0
|
| 275 |
+
42,0,64,0,40,0,189000,0.7,140,1,0,245,0
|
| 276 |
+
60,1,257,1,30,0,150000,1,137,1,1,245,0
|
| 277 |
+
45,0,582,0,38,1,422000,0.8,137,0,0,245,0
|
| 278 |
+
70,0,618,0,35,0,327000,1.1,142,0,0,245,0
|
| 279 |
+
70,0,582,1,38,0,25100,1.1,140,1,0,246,0
|
| 280 |
+
50,1,1051,1,30,0,232000,0.7,136,0,0,246,0
|
| 281 |
+
55,0,84,1,38,0,451000,1.3,136,0,0,246,0
|
| 282 |
+
70,0,2695,1,40,0,241000,1,137,1,0,247,0
|
| 283 |
+
70,0,582,0,40,0,51000,2.7,136,1,1,250,0
|
| 284 |
+
42,0,64,0,30,0,215000,3.8,128,1,1,250,0
|
| 285 |
+
65,0,1688,0,38,0,263358.03,1.1,138,1,1,250,0
|
| 286 |
+
50,1,54,0,40,0,279000,0.8,141,1,0,250,0
|
| 287 |
+
55,1,170,1,40,0,336000,1.2,135,1,0,250,0
|
| 288 |
+
60,0,253,0,35,0,279000,1.7,140,1,0,250,0
|
| 289 |
+
45,0,582,1,55,0,543000,1,132,0,0,250,0
|
| 290 |
+
65,0,892,1,35,0,263358.03,1.1,142,0,0,256,0
|
| 291 |
+
90,1,337,0,38,0,390000,0.9,144,0,0,256,0
|
| 292 |
+
45,0,615,1,55,0,222000,0.8,141,0,0,257,0
|
| 293 |
+
60,0,320,0,35,0,133000,1.4,139,1,0,258,0
|
| 294 |
+
52,0,190,1,38,0,382000,1,140,1,1,258,0
|
| 295 |
+
63,1,103,1,35,0,179000,0.9,136,1,1,270,0
|
| 296 |
+
62,0,61,1,38,1,155000,1.1,143,1,1,270,0
|
| 297 |
+
55,0,1820,0,38,0,270000,1.2,139,0,0,271,0
|
| 298 |
+
45,0,2060,1,60,0,742000,0.8,138,0,0,278,0
|
| 299 |
+
45,0,2413,0,38,0,140000,1.4,140,1,1,280,0
|
| 300 |
+
50,0,196,0,45,0,395000,1.6,136,1,1,285,0
|
data_cache/depression_data.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data_cache/dermatology.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data_cache/endocrinology_diabetes.csv
ADDED
|
@@ -0,0 +1,768 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
6,148,72,35,0,33.6,0.627,50,1
|
| 2 |
+
1,85,66,29,0,26.6,0.351,31,0
|
| 3 |
+
8,183,64,0,0,23.3,0.672,32,1
|
| 4 |
+
1,89,66,23,94,28.1,0.167,21,0
|
| 5 |
+
0,137,40,35,168,43.1,2.288,33,1
|
| 6 |
+
5,116,74,0,0,25.6,0.201,30,0
|
| 7 |
+
3,78,50,32,88,31.0,0.248,26,1
|
| 8 |
+
10,115,0,0,0,35.3,0.134,29,0
|
| 9 |
+
2,197,70,45,543,30.5,0.158,53,1
|
| 10 |
+
8,125,96,0,0,0.0,0.232,54,1
|
| 11 |
+
4,110,92,0,0,37.6,0.191,30,0
|
| 12 |
+
10,168,74,0,0,38.0,0.537,34,1
|
| 13 |
+
10,139,80,0,0,27.1,1.441,57,0
|
| 14 |
+
1,189,60,23,846,30.1,0.398,59,1
|
| 15 |
+
5,166,72,19,175,25.8,0.587,51,1
|
| 16 |
+
7,100,0,0,0,30.0,0.484,32,1
|
| 17 |
+
0,118,84,47,230,45.8,0.551,31,1
|
| 18 |
+
7,107,74,0,0,29.6,0.254,31,1
|
| 19 |
+
1,103,30,38,83,43.3,0.183,33,0
|
| 20 |
+
1,115,70,30,96,34.6,0.529,32,1
|
| 21 |
+
3,126,88,41,235,39.3,0.704,27,0
|
| 22 |
+
8,99,84,0,0,35.4,0.388,50,0
|
| 23 |
+
7,196,90,0,0,39.8,0.451,41,1
|
| 24 |
+
9,119,80,35,0,29.0,0.263,29,1
|
| 25 |
+
11,143,94,33,146,36.6,0.254,51,1
|
| 26 |
+
10,125,70,26,115,31.1,0.205,41,1
|
| 27 |
+
7,147,76,0,0,39.4,0.257,43,1
|
| 28 |
+
1,97,66,15,140,23.2,0.487,22,0
|
| 29 |
+
13,145,82,19,110,22.2,0.245,57,0
|
| 30 |
+
5,117,92,0,0,34.1,0.337,38,0
|
| 31 |
+
5,109,75,26,0,36.0,0.546,60,0
|
| 32 |
+
3,158,76,36,245,31.6,0.851,28,1
|
| 33 |
+
3,88,58,11,54,24.8,0.267,22,0
|
| 34 |
+
6,92,92,0,0,19.9,0.188,28,0
|
| 35 |
+
10,122,78,31,0,27.6,0.512,45,0
|
| 36 |
+
4,103,60,33,192,24.0,0.966,33,0
|
| 37 |
+
11,138,76,0,0,33.2,0.420,35,0
|
| 38 |
+
9,102,76,37,0,32.9,0.665,46,1
|
| 39 |
+
2,90,68,42,0,38.2,0.503,27,1
|
| 40 |
+
4,111,72,47,207,37.1,1.390,56,1
|
| 41 |
+
3,180,64,25,70,34.0,0.271,26,0
|
| 42 |
+
7,133,84,0,0,40.2,0.696,37,0
|
| 43 |
+
7,106,92,18,0,22.7,0.235,48,0
|
| 44 |
+
9,171,110,24,240,45.4,0.721,54,1
|
| 45 |
+
7,159,64,0,0,27.4,0.294,40,0
|
| 46 |
+
0,180,66,39,0,42.0,1.893,25,1
|
| 47 |
+
1,146,56,0,0,29.7,0.564,29,0
|
| 48 |
+
2,71,70,27,0,28.0,0.586,22,0
|
| 49 |
+
7,103,66,32,0,39.1,0.344,31,1
|
| 50 |
+
7,105,0,0,0,0.0,0.305,24,0
|
| 51 |
+
1,103,80,11,82,19.4,0.491,22,0
|
| 52 |
+
1,101,50,15,36,24.2,0.526,26,0
|
| 53 |
+
5,88,66,21,23,24.4,0.342,30,0
|
| 54 |
+
8,176,90,34,300,33.7,0.467,58,1
|
| 55 |
+
7,150,66,42,342,34.7,0.718,42,0
|
| 56 |
+
1,73,50,10,0,23.0,0.248,21,0
|
| 57 |
+
7,187,68,39,304,37.7,0.254,41,1
|
| 58 |
+
0,100,88,60,110,46.8,0.962,31,0
|
| 59 |
+
0,146,82,0,0,40.5,1.781,44,0
|
| 60 |
+
0,105,64,41,142,41.5,0.173,22,0
|
| 61 |
+
2,84,0,0,0,0.0,0.304,21,0
|
| 62 |
+
8,133,72,0,0,32.9,0.270,39,1
|
| 63 |
+
5,44,62,0,0,25.0,0.587,36,0
|
| 64 |
+
2,141,58,34,128,25.4,0.699,24,0
|
| 65 |
+
7,114,66,0,0,32.8,0.258,42,1
|
| 66 |
+
5,99,74,27,0,29.0,0.203,32,0
|
| 67 |
+
0,109,88,30,0,32.5,0.855,38,1
|
| 68 |
+
2,109,92,0,0,42.7,0.845,54,0
|
| 69 |
+
1,95,66,13,38,19.6,0.334,25,0
|
| 70 |
+
4,146,85,27,100,28.9,0.189,27,0
|
| 71 |
+
2,100,66,20,90,32.9,0.867,28,1
|
| 72 |
+
5,139,64,35,140,28.6,0.411,26,0
|
| 73 |
+
13,126,90,0,0,43.4,0.583,42,1
|
| 74 |
+
4,129,86,20,270,35.1,0.231,23,0
|
| 75 |
+
1,79,75,30,0,32.0,0.396,22,0
|
| 76 |
+
1,0,48,20,0,24.7,0.140,22,0
|
| 77 |
+
7,62,78,0,0,32.6,0.391,41,0
|
| 78 |
+
5,95,72,33,0,37.7,0.370,27,0
|
| 79 |
+
0,131,0,0,0,43.2,0.270,26,1
|
| 80 |
+
2,112,66,22,0,25.0,0.307,24,0
|
| 81 |
+
3,113,44,13,0,22.4,0.140,22,0
|
| 82 |
+
2,74,0,0,0,0.0,0.102,22,0
|
| 83 |
+
7,83,78,26,71,29.3,0.767,36,0
|
| 84 |
+
0,101,65,28,0,24.6,0.237,22,0
|
| 85 |
+
5,137,108,0,0,48.8,0.227,37,1
|
| 86 |
+
2,110,74,29,125,32.4,0.698,27,0
|
| 87 |
+
13,106,72,54,0,36.6,0.178,45,0
|
| 88 |
+
2,100,68,25,71,38.5,0.324,26,0
|
| 89 |
+
15,136,70,32,110,37.1,0.153,43,1
|
| 90 |
+
1,107,68,19,0,26.5,0.165,24,0
|
| 91 |
+
1,80,55,0,0,19.1,0.258,21,0
|
| 92 |
+
4,123,80,15,176,32.0,0.443,34,0
|
| 93 |
+
7,81,78,40,48,46.7,0.261,42,0
|
| 94 |
+
4,134,72,0,0,23.8,0.277,60,1
|
| 95 |
+
2,142,82,18,64,24.7,0.761,21,0
|
| 96 |
+
6,144,72,27,228,33.9,0.255,40,0
|
| 97 |
+
2,92,62,28,0,31.6,0.130,24,0
|
| 98 |
+
1,71,48,18,76,20.4,0.323,22,0
|
| 99 |
+
6,93,50,30,64,28.7,0.356,23,0
|
| 100 |
+
1,122,90,51,220,49.7,0.325,31,1
|
| 101 |
+
1,163,72,0,0,39.0,1.222,33,1
|
| 102 |
+
1,151,60,0,0,26.1,0.179,22,0
|
| 103 |
+
0,125,96,0,0,22.5,0.262,21,0
|
| 104 |
+
1,81,72,18,40,26.6,0.283,24,0
|
| 105 |
+
2,85,65,0,0,39.6,0.930,27,0
|
| 106 |
+
1,126,56,29,152,28.7,0.801,21,0
|
| 107 |
+
1,96,122,0,0,22.4,0.207,27,0
|
| 108 |
+
4,144,58,28,140,29.5,0.287,37,0
|
| 109 |
+
3,83,58,31,18,34.3,0.336,25,0
|
| 110 |
+
0,95,85,25,36,37.4,0.247,24,1
|
| 111 |
+
3,171,72,33,135,33.3,0.199,24,1
|
| 112 |
+
8,155,62,26,495,34.0,0.543,46,1
|
| 113 |
+
1,89,76,34,37,31.2,0.192,23,0
|
| 114 |
+
4,76,62,0,0,34.0,0.391,25,0
|
| 115 |
+
7,160,54,32,175,30.5,0.588,39,1
|
| 116 |
+
4,146,92,0,0,31.2,0.539,61,1
|
| 117 |
+
5,124,74,0,0,34.0,0.220,38,1
|
| 118 |
+
5,78,48,0,0,33.7,0.654,25,0
|
| 119 |
+
4,97,60,23,0,28.2,0.443,22,0
|
| 120 |
+
4,99,76,15,51,23.2,0.223,21,0
|
| 121 |
+
0,162,76,56,100,53.2,0.759,25,1
|
| 122 |
+
6,111,64,39,0,34.2,0.260,24,0
|
| 123 |
+
2,107,74,30,100,33.6,0.404,23,0
|
| 124 |
+
5,132,80,0,0,26.8,0.186,69,0
|
| 125 |
+
0,113,76,0,0,33.3,0.278,23,1
|
| 126 |
+
1,88,30,42,99,55.0,0.496,26,1
|
| 127 |
+
3,120,70,30,135,42.9,0.452,30,0
|
| 128 |
+
1,118,58,36,94,33.3,0.261,23,0
|
| 129 |
+
1,117,88,24,145,34.5,0.403,40,1
|
| 130 |
+
0,105,84,0,0,27.9,0.741,62,1
|
| 131 |
+
4,173,70,14,168,29.7,0.361,33,1
|
| 132 |
+
9,122,56,0,0,33.3,1.114,33,1
|
| 133 |
+
3,170,64,37,225,34.5,0.356,30,1
|
| 134 |
+
8,84,74,31,0,38.3,0.457,39,0
|
| 135 |
+
2,96,68,13,49,21.1,0.647,26,0
|
| 136 |
+
2,125,60,20,140,33.8,0.088,31,0
|
| 137 |
+
0,100,70,26,50,30.8,0.597,21,0
|
| 138 |
+
0,93,60,25,92,28.7,0.532,22,0
|
| 139 |
+
0,129,80,0,0,31.2,0.703,29,0
|
| 140 |
+
5,105,72,29,325,36.9,0.159,28,0
|
| 141 |
+
3,128,78,0,0,21.1,0.268,55,0
|
| 142 |
+
5,106,82,30,0,39.5,0.286,38,0
|
| 143 |
+
2,108,52,26,63,32.5,0.318,22,0
|
| 144 |
+
10,108,66,0,0,32.4,0.272,42,1
|
| 145 |
+
4,154,62,31,284,32.8,0.237,23,0
|
| 146 |
+
0,102,75,23,0,0.0,0.572,21,0
|
| 147 |
+
9,57,80,37,0,32.8,0.096,41,0
|
| 148 |
+
2,106,64,35,119,30.5,1.400,34,0
|
| 149 |
+
5,147,78,0,0,33.7,0.218,65,0
|
| 150 |
+
2,90,70,17,0,27.3,0.085,22,0
|
| 151 |
+
1,136,74,50,204,37.4,0.399,24,0
|
| 152 |
+
4,114,65,0,0,21.9,0.432,37,0
|
| 153 |
+
9,156,86,28,155,34.3,1.189,42,1
|
| 154 |
+
1,153,82,42,485,40.6,0.687,23,0
|
| 155 |
+
8,188,78,0,0,47.9,0.137,43,1
|
| 156 |
+
7,152,88,44,0,50.0,0.337,36,1
|
| 157 |
+
2,99,52,15,94,24.6,0.637,21,0
|
| 158 |
+
1,109,56,21,135,25.2,0.833,23,0
|
| 159 |
+
2,88,74,19,53,29.0,0.229,22,0
|
| 160 |
+
17,163,72,41,114,40.9,0.817,47,1
|
| 161 |
+
4,151,90,38,0,29.7,0.294,36,0
|
| 162 |
+
7,102,74,40,105,37.2,0.204,45,0
|
| 163 |
+
0,114,80,34,285,44.2,0.167,27,0
|
| 164 |
+
2,100,64,23,0,29.7,0.368,21,0
|
| 165 |
+
0,131,88,0,0,31.6,0.743,32,1
|
| 166 |
+
6,104,74,18,156,29.9,0.722,41,1
|
| 167 |
+
3,148,66,25,0,32.5,0.256,22,0
|
| 168 |
+
4,120,68,0,0,29.6,0.709,34,0
|
| 169 |
+
4,110,66,0,0,31.9,0.471,29,0
|
| 170 |
+
3,111,90,12,78,28.4,0.495,29,0
|
| 171 |
+
6,102,82,0,0,30.8,0.180,36,1
|
| 172 |
+
6,134,70,23,130,35.4,0.542,29,1
|
| 173 |
+
2,87,0,23,0,28.9,0.773,25,0
|
| 174 |
+
1,79,60,42,48,43.5,0.678,23,0
|
| 175 |
+
2,75,64,24,55,29.7,0.370,33,0
|
| 176 |
+
8,179,72,42,130,32.7,0.719,36,1
|
| 177 |
+
6,85,78,0,0,31.2,0.382,42,0
|
| 178 |
+
0,129,110,46,130,67.1,0.319,26,1
|
| 179 |
+
5,143,78,0,0,45.0,0.190,47,0
|
| 180 |
+
5,130,82,0,0,39.1,0.956,37,1
|
| 181 |
+
6,87,80,0,0,23.2,0.084,32,0
|
| 182 |
+
0,119,64,18,92,34.9,0.725,23,0
|
| 183 |
+
1,0,74,20,23,27.7,0.299,21,0
|
| 184 |
+
5,73,60,0,0,26.8,0.268,27,0
|
| 185 |
+
4,141,74,0,0,27.6,0.244,40,0
|
| 186 |
+
7,194,68,28,0,35.9,0.745,41,1
|
| 187 |
+
8,181,68,36,495,30.1,0.615,60,1
|
| 188 |
+
1,128,98,41,58,32.0,1.321,33,1
|
| 189 |
+
8,109,76,39,114,27.9,0.640,31,1
|
| 190 |
+
5,139,80,35,160,31.6,0.361,25,1
|
| 191 |
+
3,111,62,0,0,22.6,0.142,21,0
|
| 192 |
+
9,123,70,44,94,33.1,0.374,40,0
|
| 193 |
+
7,159,66,0,0,30.4,0.383,36,1
|
| 194 |
+
11,135,0,0,0,52.3,0.578,40,1
|
| 195 |
+
8,85,55,20,0,24.4,0.136,42,0
|
| 196 |
+
5,158,84,41,210,39.4,0.395,29,1
|
| 197 |
+
1,105,58,0,0,24.3,0.187,21,0
|
| 198 |
+
3,107,62,13,48,22.9,0.678,23,1
|
| 199 |
+
4,109,64,44,99,34.8,0.905,26,1
|
| 200 |
+
4,148,60,27,318,30.9,0.150,29,1
|
| 201 |
+
0,113,80,16,0,31.0,0.874,21,0
|
| 202 |
+
1,138,82,0,0,40.1,0.236,28,0
|
| 203 |
+
0,108,68,20,0,27.3,0.787,32,0
|
| 204 |
+
2,99,70,16,44,20.4,0.235,27,0
|
| 205 |
+
6,103,72,32,190,37.7,0.324,55,0
|
| 206 |
+
5,111,72,28,0,23.9,0.407,27,0
|
| 207 |
+
8,196,76,29,280,37.5,0.605,57,1
|
| 208 |
+
5,162,104,0,0,37.7,0.151,52,1
|
| 209 |
+
1,96,64,27,87,33.2,0.289,21,0
|
| 210 |
+
7,184,84,33,0,35.5,0.355,41,1
|
| 211 |
+
2,81,60,22,0,27.7,0.290,25,0
|
| 212 |
+
0,147,85,54,0,42.8,0.375,24,0
|
| 213 |
+
7,179,95,31,0,34.2,0.164,60,0
|
| 214 |
+
0,140,65,26,130,42.6,0.431,24,1
|
| 215 |
+
9,112,82,32,175,34.2,0.260,36,1
|
| 216 |
+
12,151,70,40,271,41.8,0.742,38,1
|
| 217 |
+
5,109,62,41,129,35.8,0.514,25,1
|
| 218 |
+
6,125,68,30,120,30.0,0.464,32,0
|
| 219 |
+
5,85,74,22,0,29.0,1.224,32,1
|
| 220 |
+
5,112,66,0,0,37.8,0.261,41,1
|
| 221 |
+
0,177,60,29,478,34.6,1.072,21,1
|
| 222 |
+
2,158,90,0,0,31.6,0.805,66,1
|
| 223 |
+
7,119,0,0,0,25.2,0.209,37,0
|
| 224 |
+
7,142,60,33,190,28.8,0.687,61,0
|
| 225 |
+
1,100,66,15,56,23.6,0.666,26,0
|
| 226 |
+
1,87,78,27,32,34.6,0.101,22,0
|
| 227 |
+
0,101,76,0,0,35.7,0.198,26,0
|
| 228 |
+
3,162,52,38,0,37.2,0.652,24,1
|
| 229 |
+
4,197,70,39,744,36.7,2.329,31,0
|
| 230 |
+
0,117,80,31,53,45.2,0.089,24,0
|
| 231 |
+
4,142,86,0,0,44.0,0.645,22,1
|
| 232 |
+
6,134,80,37,370,46.2,0.238,46,1
|
| 233 |
+
1,79,80,25,37,25.4,0.583,22,0
|
| 234 |
+
4,122,68,0,0,35.0,0.394,29,0
|
| 235 |
+
3,74,68,28,45,29.7,0.293,23,0
|
| 236 |
+
4,171,72,0,0,43.6,0.479,26,1
|
| 237 |
+
7,181,84,21,192,35.9,0.586,51,1
|
| 238 |
+
0,179,90,27,0,44.1,0.686,23,1
|
| 239 |
+
9,164,84,21,0,30.8,0.831,32,1
|
| 240 |
+
0,104,76,0,0,18.4,0.582,27,0
|
| 241 |
+
1,91,64,24,0,29.2,0.192,21,0
|
| 242 |
+
4,91,70,32,88,33.1,0.446,22,0
|
| 243 |
+
3,139,54,0,0,25.6,0.402,22,1
|
| 244 |
+
6,119,50,22,176,27.1,1.318,33,1
|
| 245 |
+
2,146,76,35,194,38.2,0.329,29,0
|
| 246 |
+
9,184,85,15,0,30.0,1.213,49,1
|
| 247 |
+
10,122,68,0,0,31.2,0.258,41,0
|
| 248 |
+
0,165,90,33,680,52.3,0.427,23,0
|
| 249 |
+
9,124,70,33,402,35.4,0.282,34,0
|
| 250 |
+
1,111,86,19,0,30.1,0.143,23,0
|
| 251 |
+
9,106,52,0,0,31.2,0.380,42,0
|
| 252 |
+
2,129,84,0,0,28.0,0.284,27,0
|
| 253 |
+
2,90,80,14,55,24.4,0.249,24,0
|
| 254 |
+
0,86,68,32,0,35.8,0.238,25,0
|
| 255 |
+
12,92,62,7,258,27.6,0.926,44,1
|
| 256 |
+
1,113,64,35,0,33.6,0.543,21,1
|
| 257 |
+
3,111,56,39,0,30.1,0.557,30,0
|
| 258 |
+
2,114,68,22,0,28.7,0.092,25,0
|
| 259 |
+
1,193,50,16,375,25.9,0.655,24,0
|
| 260 |
+
11,155,76,28,150,33.3,1.353,51,1
|
| 261 |
+
3,191,68,15,130,30.9,0.299,34,0
|
| 262 |
+
3,141,0,0,0,30.0,0.761,27,1
|
| 263 |
+
4,95,70,32,0,32.1,0.612,24,0
|
| 264 |
+
3,142,80,15,0,32.4,0.200,63,0
|
| 265 |
+
4,123,62,0,0,32.0,0.226,35,1
|
| 266 |
+
5,96,74,18,67,33.6,0.997,43,0
|
| 267 |
+
0,138,0,0,0,36.3,0.933,25,1
|
| 268 |
+
2,128,64,42,0,40.0,1.101,24,0
|
| 269 |
+
0,102,52,0,0,25.1,0.078,21,0
|
| 270 |
+
2,146,0,0,0,27.5,0.240,28,1
|
| 271 |
+
10,101,86,37,0,45.6,1.136,38,1
|
| 272 |
+
2,108,62,32,56,25.2,0.128,21,0
|
| 273 |
+
3,122,78,0,0,23.0,0.254,40,0
|
| 274 |
+
1,71,78,50,45,33.2,0.422,21,0
|
| 275 |
+
13,106,70,0,0,34.2,0.251,52,0
|
| 276 |
+
2,100,70,52,57,40.5,0.677,25,0
|
| 277 |
+
7,106,60,24,0,26.5,0.296,29,1
|
| 278 |
+
0,104,64,23,116,27.8,0.454,23,0
|
| 279 |
+
5,114,74,0,0,24.9,0.744,57,0
|
| 280 |
+
2,108,62,10,278,25.3,0.881,22,0
|
| 281 |
+
0,146,70,0,0,37.9,0.334,28,1
|
| 282 |
+
10,129,76,28,122,35.9,0.280,39,0
|
| 283 |
+
7,133,88,15,155,32.4,0.262,37,0
|
| 284 |
+
7,161,86,0,0,30.4,0.165,47,1
|
| 285 |
+
2,108,80,0,0,27.0,0.259,52,1
|
| 286 |
+
7,136,74,26,135,26.0,0.647,51,0
|
| 287 |
+
5,155,84,44,545,38.7,0.619,34,0
|
| 288 |
+
1,119,86,39,220,45.6,0.808,29,1
|
| 289 |
+
4,96,56,17,49,20.8,0.340,26,0
|
| 290 |
+
5,108,72,43,75,36.1,0.263,33,0
|
| 291 |
+
0,78,88,29,40,36.9,0.434,21,0
|
| 292 |
+
0,107,62,30,74,36.6,0.757,25,1
|
| 293 |
+
2,128,78,37,182,43.3,1.224,31,1
|
| 294 |
+
1,128,48,45,194,40.5,0.613,24,1
|
| 295 |
+
0,161,50,0,0,21.9,0.254,65,0
|
| 296 |
+
6,151,62,31,120,35.5,0.692,28,0
|
| 297 |
+
2,146,70,38,360,28.0,0.337,29,1
|
| 298 |
+
0,126,84,29,215,30.7,0.520,24,0
|
| 299 |
+
14,100,78,25,184,36.6,0.412,46,1
|
| 300 |
+
8,112,72,0,0,23.6,0.840,58,0
|
| 301 |
+
0,167,0,0,0,32.3,0.839,30,1
|
| 302 |
+
2,144,58,33,135,31.6,0.422,25,1
|
| 303 |
+
5,77,82,41,42,35.8,0.156,35,0
|
| 304 |
+
5,115,98,0,0,52.9,0.209,28,1
|
| 305 |
+
3,150,76,0,0,21.0,0.207,37,0
|
| 306 |
+
2,120,76,37,105,39.7,0.215,29,0
|
| 307 |
+
10,161,68,23,132,25.5,0.326,47,1
|
| 308 |
+
0,137,68,14,148,24.8,0.143,21,0
|
| 309 |
+
0,128,68,19,180,30.5,1.391,25,1
|
| 310 |
+
2,124,68,28,205,32.9,0.875,30,1
|
| 311 |
+
6,80,66,30,0,26.2,0.313,41,0
|
| 312 |
+
0,106,70,37,148,39.4,0.605,22,0
|
| 313 |
+
2,155,74,17,96,26.6,0.433,27,1
|
| 314 |
+
3,113,50,10,85,29.5,0.626,25,0
|
| 315 |
+
7,109,80,31,0,35.9,1.127,43,1
|
| 316 |
+
2,112,68,22,94,34.1,0.315,26,0
|
| 317 |
+
3,99,80,11,64,19.3,0.284,30,0
|
| 318 |
+
3,182,74,0,0,30.5,0.345,29,1
|
| 319 |
+
3,115,66,39,140,38.1,0.150,28,0
|
| 320 |
+
6,194,78,0,0,23.5,0.129,59,1
|
| 321 |
+
4,129,60,12,231,27.5,0.527,31,0
|
| 322 |
+
3,112,74,30,0,31.6,0.197,25,1
|
| 323 |
+
0,124,70,20,0,27.4,0.254,36,1
|
| 324 |
+
13,152,90,33,29,26.8,0.731,43,1
|
| 325 |
+
2,112,75,32,0,35.7,0.148,21,0
|
| 326 |
+
1,157,72,21,168,25.6,0.123,24,0
|
| 327 |
+
1,122,64,32,156,35.1,0.692,30,1
|
| 328 |
+
10,179,70,0,0,35.1,0.200,37,0
|
| 329 |
+
2,102,86,36,120,45.5,0.127,23,1
|
| 330 |
+
6,105,70,32,68,30.8,0.122,37,0
|
| 331 |
+
8,118,72,19,0,23.1,1.476,46,0
|
| 332 |
+
2,87,58,16,52,32.7,0.166,25,0
|
| 333 |
+
1,180,0,0,0,43.3,0.282,41,1
|
| 334 |
+
12,106,80,0,0,23.6,0.137,44,0
|
| 335 |
+
1,95,60,18,58,23.9,0.260,22,0
|
| 336 |
+
0,165,76,43,255,47.9,0.259,26,0
|
| 337 |
+
0,117,0,0,0,33.8,0.932,44,0
|
| 338 |
+
5,115,76,0,0,31.2,0.343,44,1
|
| 339 |
+
9,152,78,34,171,34.2,0.893,33,1
|
| 340 |
+
7,178,84,0,0,39.9,0.331,41,1
|
| 341 |
+
1,130,70,13,105,25.9,0.472,22,0
|
| 342 |
+
1,95,74,21,73,25.9,0.673,36,0
|
| 343 |
+
1,0,68,35,0,32.0,0.389,22,0
|
| 344 |
+
5,122,86,0,0,34.7,0.290,33,0
|
| 345 |
+
8,95,72,0,0,36.8,0.485,57,0
|
| 346 |
+
8,126,88,36,108,38.5,0.349,49,0
|
| 347 |
+
1,139,46,19,83,28.7,0.654,22,0
|
| 348 |
+
3,116,0,0,0,23.5,0.187,23,0
|
| 349 |
+
3,99,62,19,74,21.8,0.279,26,0
|
| 350 |
+
5,0,80,32,0,41.0,0.346,37,1
|
| 351 |
+
4,92,80,0,0,42.2,0.237,29,0
|
| 352 |
+
4,137,84,0,0,31.2,0.252,30,0
|
| 353 |
+
3,61,82,28,0,34.4,0.243,46,0
|
| 354 |
+
1,90,62,12,43,27.2,0.580,24,0
|
| 355 |
+
3,90,78,0,0,42.7,0.559,21,0
|
| 356 |
+
9,165,88,0,0,30.4,0.302,49,1
|
| 357 |
+
1,125,50,40,167,33.3,0.962,28,1
|
| 358 |
+
13,129,0,30,0,39.9,0.569,44,1
|
| 359 |
+
12,88,74,40,54,35.3,0.378,48,0
|
| 360 |
+
1,196,76,36,249,36.5,0.875,29,1
|
| 361 |
+
5,189,64,33,325,31.2,0.583,29,1
|
| 362 |
+
5,158,70,0,0,29.8,0.207,63,0
|
| 363 |
+
5,103,108,37,0,39.2,0.305,65,0
|
| 364 |
+
4,146,78,0,0,38.5,0.520,67,1
|
| 365 |
+
4,147,74,25,293,34.9,0.385,30,0
|
| 366 |
+
5,99,54,28,83,34.0,0.499,30,0
|
| 367 |
+
6,124,72,0,0,27.6,0.368,29,1
|
| 368 |
+
0,101,64,17,0,21.0,0.252,21,0
|
| 369 |
+
3,81,86,16,66,27.5,0.306,22,0
|
| 370 |
+
1,133,102,28,140,32.8,0.234,45,1
|
| 371 |
+
3,173,82,48,465,38.4,2.137,25,1
|
| 372 |
+
0,118,64,23,89,0.0,1.731,21,0
|
| 373 |
+
0,84,64,22,66,35.8,0.545,21,0
|
| 374 |
+
2,105,58,40,94,34.9,0.225,25,0
|
| 375 |
+
2,122,52,43,158,36.2,0.816,28,0
|
| 376 |
+
12,140,82,43,325,39.2,0.528,58,1
|
| 377 |
+
0,98,82,15,84,25.2,0.299,22,0
|
| 378 |
+
1,87,60,37,75,37.2,0.509,22,0
|
| 379 |
+
4,156,75,0,0,48.3,0.238,32,1
|
| 380 |
+
0,93,100,39,72,43.4,1.021,35,0
|
| 381 |
+
1,107,72,30,82,30.8,0.821,24,0
|
| 382 |
+
0,105,68,22,0,20.0,0.236,22,0
|
| 383 |
+
1,109,60,8,182,25.4,0.947,21,0
|
| 384 |
+
1,90,62,18,59,25.1,1.268,25,0
|
| 385 |
+
1,125,70,24,110,24.3,0.221,25,0
|
| 386 |
+
1,119,54,13,50,22.3,0.205,24,0
|
| 387 |
+
5,116,74,29,0,32.3,0.660,35,1
|
| 388 |
+
8,105,100,36,0,43.3,0.239,45,1
|
| 389 |
+
5,144,82,26,285,32.0,0.452,58,1
|
| 390 |
+
3,100,68,23,81,31.6,0.949,28,0
|
| 391 |
+
1,100,66,29,196,32.0,0.444,42,0
|
| 392 |
+
5,166,76,0,0,45.7,0.340,27,1
|
| 393 |
+
1,131,64,14,415,23.7,0.389,21,0
|
| 394 |
+
4,116,72,12,87,22.1,0.463,37,0
|
| 395 |
+
4,158,78,0,0,32.9,0.803,31,1
|
| 396 |
+
2,127,58,24,275,27.7,1.600,25,0
|
| 397 |
+
3,96,56,34,115,24.7,0.944,39,0
|
| 398 |
+
0,131,66,40,0,34.3,0.196,22,1
|
| 399 |
+
3,82,70,0,0,21.1,0.389,25,0
|
| 400 |
+
3,193,70,31,0,34.9,0.241,25,1
|
| 401 |
+
4,95,64,0,0,32.0,0.161,31,1
|
| 402 |
+
6,137,61,0,0,24.2,0.151,55,0
|
| 403 |
+
5,136,84,41,88,35.0,0.286,35,1
|
| 404 |
+
9,72,78,25,0,31.6,0.280,38,0
|
| 405 |
+
5,168,64,0,0,32.9,0.135,41,1
|
| 406 |
+
2,123,48,32,165,42.1,0.520,26,0
|
| 407 |
+
4,115,72,0,0,28.9,0.376,46,1
|
| 408 |
+
0,101,62,0,0,21.9,0.336,25,0
|
| 409 |
+
8,197,74,0,0,25.9,1.191,39,1
|
| 410 |
+
1,172,68,49,579,42.4,0.702,28,1
|
| 411 |
+
6,102,90,39,0,35.7,0.674,28,0
|
| 412 |
+
1,112,72,30,176,34.4,0.528,25,0
|
| 413 |
+
1,143,84,23,310,42.4,1.076,22,0
|
| 414 |
+
1,143,74,22,61,26.2,0.256,21,0
|
| 415 |
+
0,138,60,35,167,34.6,0.534,21,1
|
| 416 |
+
3,173,84,33,474,35.7,0.258,22,1
|
| 417 |
+
1,97,68,21,0,27.2,1.095,22,0
|
| 418 |
+
4,144,82,32,0,38.5,0.554,37,1
|
| 419 |
+
1,83,68,0,0,18.2,0.624,27,0
|
| 420 |
+
3,129,64,29,115,26.4,0.219,28,1
|
| 421 |
+
1,119,88,41,170,45.3,0.507,26,0
|
| 422 |
+
2,94,68,18,76,26.0,0.561,21,0
|
| 423 |
+
0,102,64,46,78,40.6,0.496,21,0
|
| 424 |
+
2,115,64,22,0,30.8,0.421,21,0
|
| 425 |
+
8,151,78,32,210,42.9,0.516,36,1
|
| 426 |
+
4,184,78,39,277,37.0,0.264,31,1
|
| 427 |
+
0,94,0,0,0,0.0,0.256,25,0
|
| 428 |
+
1,181,64,30,180,34.1,0.328,38,1
|
| 429 |
+
0,135,94,46,145,40.6,0.284,26,0
|
| 430 |
+
1,95,82,25,180,35.0,0.233,43,1
|
| 431 |
+
2,99,0,0,0,22.2,0.108,23,0
|
| 432 |
+
3,89,74,16,85,30.4,0.551,38,0
|
| 433 |
+
1,80,74,11,60,30.0,0.527,22,0
|
| 434 |
+
2,139,75,0,0,25.6,0.167,29,0
|
| 435 |
+
1,90,68,8,0,24.5,1.138,36,0
|
| 436 |
+
0,141,0,0,0,42.4,0.205,29,1
|
| 437 |
+
12,140,85,33,0,37.4,0.244,41,0
|
| 438 |
+
5,147,75,0,0,29.9,0.434,28,0
|
| 439 |
+
1,97,70,15,0,18.2,0.147,21,0
|
| 440 |
+
6,107,88,0,0,36.8,0.727,31,0
|
| 441 |
+
0,189,104,25,0,34.3,0.435,41,1
|
| 442 |
+
2,83,66,23,50,32.2,0.497,22,0
|
| 443 |
+
4,117,64,27,120,33.2,0.230,24,0
|
| 444 |
+
8,108,70,0,0,30.5,0.955,33,1
|
| 445 |
+
4,117,62,12,0,29.7,0.380,30,1
|
| 446 |
+
0,180,78,63,14,59.4,2.420,25,1
|
| 447 |
+
1,100,72,12,70,25.3,0.658,28,0
|
| 448 |
+
0,95,80,45,92,36.5,0.330,26,0
|
| 449 |
+
0,104,64,37,64,33.6,0.510,22,1
|
| 450 |
+
0,120,74,18,63,30.5,0.285,26,0
|
| 451 |
+
1,82,64,13,95,21.2,0.415,23,0
|
| 452 |
+
2,134,70,0,0,28.9,0.542,23,1
|
| 453 |
+
0,91,68,32,210,39.9,0.381,25,0
|
| 454 |
+
2,119,0,0,0,19.6,0.832,72,0
|
| 455 |
+
2,100,54,28,105,37.8,0.498,24,0
|
| 456 |
+
14,175,62,30,0,33.6,0.212,38,1
|
| 457 |
+
1,135,54,0,0,26.7,0.687,62,0
|
| 458 |
+
5,86,68,28,71,30.2,0.364,24,0
|
| 459 |
+
10,148,84,48,237,37.6,1.001,51,1
|
| 460 |
+
9,134,74,33,60,25.9,0.460,81,0
|
| 461 |
+
9,120,72,22,56,20.8,0.733,48,0
|
| 462 |
+
1,71,62,0,0,21.8,0.416,26,0
|
| 463 |
+
8,74,70,40,49,35.3,0.705,39,0
|
| 464 |
+
5,88,78,30,0,27.6,0.258,37,0
|
| 465 |
+
10,115,98,0,0,24.0,1.022,34,0
|
| 466 |
+
0,124,56,13,105,21.8,0.452,21,0
|
| 467 |
+
0,74,52,10,36,27.8,0.269,22,0
|
| 468 |
+
0,97,64,36,100,36.8,0.600,25,0
|
| 469 |
+
8,120,0,0,0,30.0,0.183,38,1
|
| 470 |
+
6,154,78,41,140,46.1,0.571,27,0
|
| 471 |
+
1,144,82,40,0,41.3,0.607,28,0
|
| 472 |
+
0,137,70,38,0,33.2,0.170,22,0
|
| 473 |
+
0,119,66,27,0,38.8,0.259,22,0
|
| 474 |
+
7,136,90,0,0,29.9,0.210,50,0
|
| 475 |
+
4,114,64,0,0,28.9,0.126,24,0
|
| 476 |
+
0,137,84,27,0,27.3,0.231,59,0
|
| 477 |
+
2,105,80,45,191,33.7,0.711,29,1
|
| 478 |
+
7,114,76,17,110,23.8,0.466,31,0
|
| 479 |
+
8,126,74,38,75,25.9,0.162,39,0
|
| 480 |
+
4,132,86,31,0,28.0,0.419,63,0
|
| 481 |
+
3,158,70,30,328,35.5,0.344,35,1
|
| 482 |
+
0,123,88,37,0,35.2,0.197,29,0
|
| 483 |
+
4,85,58,22,49,27.8,0.306,28,0
|
| 484 |
+
0,84,82,31,125,38.2,0.233,23,0
|
| 485 |
+
0,145,0,0,0,44.2,0.630,31,1
|
| 486 |
+
0,135,68,42,250,42.3,0.365,24,1
|
| 487 |
+
1,139,62,41,480,40.7,0.536,21,0
|
| 488 |
+
0,173,78,32,265,46.5,1.159,58,0
|
| 489 |
+
4,99,72,17,0,25.6,0.294,28,0
|
| 490 |
+
8,194,80,0,0,26.1,0.551,67,0
|
| 491 |
+
2,83,65,28,66,36.8,0.629,24,0
|
| 492 |
+
2,89,90,30,0,33.5,0.292,42,0
|
| 493 |
+
4,99,68,38,0,32.8,0.145,33,0
|
| 494 |
+
4,125,70,18,122,28.9,1.144,45,1
|
| 495 |
+
3,80,0,0,0,0.0,0.174,22,0
|
| 496 |
+
6,166,74,0,0,26.6,0.304,66,0
|
| 497 |
+
5,110,68,0,0,26.0,0.292,30,0
|
| 498 |
+
2,81,72,15,76,30.1,0.547,25,0
|
| 499 |
+
7,195,70,33,145,25.1,0.163,55,1
|
| 500 |
+
6,154,74,32,193,29.3,0.839,39,0
|
| 501 |
+
2,117,90,19,71,25.2,0.313,21,0
|
| 502 |
+
3,84,72,32,0,37.2,0.267,28,0
|
| 503 |
+
6,0,68,41,0,39.0,0.727,41,1
|
| 504 |
+
7,94,64,25,79,33.3,0.738,41,0
|
| 505 |
+
3,96,78,39,0,37.3,0.238,40,0
|
| 506 |
+
10,75,82,0,0,33.3,0.263,38,0
|
| 507 |
+
0,180,90,26,90,36.5,0.314,35,1
|
| 508 |
+
1,130,60,23,170,28.6,0.692,21,0
|
| 509 |
+
2,84,50,23,76,30.4,0.968,21,0
|
| 510 |
+
8,120,78,0,0,25.0,0.409,64,0
|
| 511 |
+
12,84,72,31,0,29.7,0.297,46,1
|
| 512 |
+
0,139,62,17,210,22.1,0.207,21,0
|
| 513 |
+
9,91,68,0,0,24.2,0.200,58,0
|
| 514 |
+
2,91,62,0,0,27.3,0.525,22,0
|
| 515 |
+
3,99,54,19,86,25.6,0.154,24,0
|
| 516 |
+
3,163,70,18,105,31.6,0.268,28,1
|
| 517 |
+
9,145,88,34,165,30.3,0.771,53,1
|
| 518 |
+
7,125,86,0,0,37.6,0.304,51,0
|
| 519 |
+
13,76,60,0,0,32.8,0.180,41,0
|
| 520 |
+
6,129,90,7,326,19.6,0.582,60,0
|
| 521 |
+
2,68,70,32,66,25.0,0.187,25,0
|
| 522 |
+
3,124,80,33,130,33.2,0.305,26,0
|
| 523 |
+
6,114,0,0,0,0.0,0.189,26,0
|
| 524 |
+
9,130,70,0,0,34.2,0.652,45,1
|
| 525 |
+
3,125,58,0,0,31.6,0.151,24,0
|
| 526 |
+
3,87,60,18,0,21.8,0.444,21,0
|
| 527 |
+
1,97,64,19,82,18.2,0.299,21,0
|
| 528 |
+
3,116,74,15,105,26.3,0.107,24,0
|
| 529 |
+
0,117,66,31,188,30.8,0.493,22,0
|
| 530 |
+
0,111,65,0,0,24.6,0.660,31,0
|
| 531 |
+
2,122,60,18,106,29.8,0.717,22,0
|
| 532 |
+
0,107,76,0,0,45.3,0.686,24,0
|
| 533 |
+
1,86,66,52,65,41.3,0.917,29,0
|
| 534 |
+
6,91,0,0,0,29.8,0.501,31,0
|
| 535 |
+
1,77,56,30,56,33.3,1.251,24,0
|
| 536 |
+
4,132,0,0,0,32.9,0.302,23,1
|
| 537 |
+
0,105,90,0,0,29.6,0.197,46,0
|
| 538 |
+
0,57,60,0,0,21.7,0.735,67,0
|
| 539 |
+
0,127,80,37,210,36.3,0.804,23,0
|
| 540 |
+
3,129,92,49,155,36.4,0.968,32,1
|
| 541 |
+
8,100,74,40,215,39.4,0.661,43,1
|
| 542 |
+
3,128,72,25,190,32.4,0.549,27,1
|
| 543 |
+
10,90,85,32,0,34.9,0.825,56,1
|
| 544 |
+
4,84,90,23,56,39.5,0.159,25,0
|
| 545 |
+
1,88,78,29,76,32.0,0.365,29,0
|
| 546 |
+
8,186,90,35,225,34.5,0.423,37,1
|
| 547 |
+
5,187,76,27,207,43.6,1.034,53,1
|
| 548 |
+
4,131,68,21,166,33.1,0.160,28,0
|
| 549 |
+
1,164,82,43,67,32.8,0.341,50,0
|
| 550 |
+
4,189,110,31,0,28.5,0.680,37,0
|
| 551 |
+
1,116,70,28,0,27.4,0.204,21,0
|
| 552 |
+
3,84,68,30,106,31.9,0.591,25,0
|
| 553 |
+
6,114,88,0,0,27.8,0.247,66,0
|
| 554 |
+
1,88,62,24,44,29.9,0.422,23,0
|
| 555 |
+
1,84,64,23,115,36.9,0.471,28,0
|
| 556 |
+
7,124,70,33,215,25.5,0.161,37,0
|
| 557 |
+
1,97,70,40,0,38.1,0.218,30,0
|
| 558 |
+
8,110,76,0,0,27.8,0.237,58,0
|
| 559 |
+
11,103,68,40,0,46.2,0.126,42,0
|
| 560 |
+
11,85,74,0,0,30.1,0.300,35,0
|
| 561 |
+
6,125,76,0,0,33.8,0.121,54,1
|
| 562 |
+
0,198,66,32,274,41.3,0.502,28,1
|
| 563 |
+
1,87,68,34,77,37.6,0.401,24,0
|
| 564 |
+
6,99,60,19,54,26.9,0.497,32,0
|
| 565 |
+
0,91,80,0,0,32.4,0.601,27,0
|
| 566 |
+
2,95,54,14,88,26.1,0.748,22,0
|
| 567 |
+
1,99,72,30,18,38.6,0.412,21,0
|
| 568 |
+
6,92,62,32,126,32.0,0.085,46,0
|
| 569 |
+
4,154,72,29,126,31.3,0.338,37,0
|
| 570 |
+
0,121,66,30,165,34.3,0.203,33,1
|
| 571 |
+
3,78,70,0,0,32.5,0.270,39,0
|
| 572 |
+
2,130,96,0,0,22.6,0.268,21,0
|
| 573 |
+
3,111,58,31,44,29.5,0.430,22,0
|
| 574 |
+
2,98,60,17,120,34.7,0.198,22,0
|
| 575 |
+
1,143,86,30,330,30.1,0.892,23,0
|
| 576 |
+
1,119,44,47,63,35.5,0.280,25,0
|
| 577 |
+
6,108,44,20,130,24.0,0.813,35,0
|
| 578 |
+
2,118,80,0,0,42.9,0.693,21,1
|
| 579 |
+
10,133,68,0,0,27.0,0.245,36,0
|
| 580 |
+
2,197,70,99,0,34.7,0.575,62,1
|
| 581 |
+
0,151,90,46,0,42.1,0.371,21,1
|
| 582 |
+
6,109,60,27,0,25.0,0.206,27,0
|
| 583 |
+
12,121,78,17,0,26.5,0.259,62,0
|
| 584 |
+
8,100,76,0,0,38.7,0.190,42,0
|
| 585 |
+
8,124,76,24,600,28.7,0.687,52,1
|
| 586 |
+
1,93,56,11,0,22.5,0.417,22,0
|
| 587 |
+
8,143,66,0,0,34.9,0.129,41,1
|
| 588 |
+
6,103,66,0,0,24.3,0.249,29,0
|
| 589 |
+
3,176,86,27,156,33.3,1.154,52,1
|
| 590 |
+
0,73,0,0,0,21.1,0.342,25,0
|
| 591 |
+
11,111,84,40,0,46.8,0.925,45,1
|
| 592 |
+
2,112,78,50,140,39.4,0.175,24,0
|
| 593 |
+
3,132,80,0,0,34.4,0.402,44,1
|
| 594 |
+
2,82,52,22,115,28.5,1.699,25,0
|
| 595 |
+
6,123,72,45,230,33.6,0.733,34,0
|
| 596 |
+
0,188,82,14,185,32.0,0.682,22,1
|
| 597 |
+
0,67,76,0,0,45.3,0.194,46,0
|
| 598 |
+
1,89,24,19,25,27.8,0.559,21,0
|
| 599 |
+
1,173,74,0,0,36.8,0.088,38,1
|
| 600 |
+
1,109,38,18,120,23.1,0.407,26,0
|
| 601 |
+
1,108,88,19,0,27.1,0.400,24,0
|
| 602 |
+
6,96,0,0,0,23.7,0.190,28,0
|
| 603 |
+
1,124,74,36,0,27.8,0.100,30,0
|
| 604 |
+
7,150,78,29,126,35.2,0.692,54,1
|
| 605 |
+
4,183,0,0,0,28.4,0.212,36,1
|
| 606 |
+
1,124,60,32,0,35.8,0.514,21,0
|
| 607 |
+
1,181,78,42,293,40.0,1.258,22,1
|
| 608 |
+
1,92,62,25,41,19.5,0.482,25,0
|
| 609 |
+
0,152,82,39,272,41.5,0.270,27,0
|
| 610 |
+
1,111,62,13,182,24.0,0.138,23,0
|
| 611 |
+
3,106,54,21,158,30.9,0.292,24,0
|
| 612 |
+
3,174,58,22,194,32.9,0.593,36,1
|
| 613 |
+
7,168,88,42,321,38.2,0.787,40,1
|
| 614 |
+
6,105,80,28,0,32.5,0.878,26,0
|
| 615 |
+
11,138,74,26,144,36.1,0.557,50,1
|
| 616 |
+
3,106,72,0,0,25.8,0.207,27,0
|
| 617 |
+
6,117,96,0,0,28.7,0.157,30,0
|
| 618 |
+
2,68,62,13,15,20.1,0.257,23,0
|
| 619 |
+
9,112,82,24,0,28.2,1.282,50,1
|
| 620 |
+
0,119,0,0,0,32.4,0.141,24,1
|
| 621 |
+
2,112,86,42,160,38.4,0.246,28,0
|
| 622 |
+
2,92,76,20,0,24.2,1.698,28,0
|
| 623 |
+
6,183,94,0,0,40.8,1.461,45,0
|
| 624 |
+
0,94,70,27,115,43.5,0.347,21,0
|
| 625 |
+
2,108,64,0,0,30.8,0.158,21,0
|
| 626 |
+
4,90,88,47,54,37.7,0.362,29,0
|
| 627 |
+
0,125,68,0,0,24.7,0.206,21,0
|
| 628 |
+
0,132,78,0,0,32.4,0.393,21,0
|
| 629 |
+
5,128,80,0,0,34.6,0.144,45,0
|
| 630 |
+
4,94,65,22,0,24.7,0.148,21,0
|
| 631 |
+
7,114,64,0,0,27.4,0.732,34,1
|
| 632 |
+
0,102,78,40,90,34.5,0.238,24,0
|
| 633 |
+
2,111,60,0,0,26.2,0.343,23,0
|
| 634 |
+
1,128,82,17,183,27.5,0.115,22,0
|
| 635 |
+
10,92,62,0,0,25.9,0.167,31,0
|
| 636 |
+
13,104,72,0,0,31.2,0.465,38,1
|
| 637 |
+
5,104,74,0,0,28.8,0.153,48,0
|
| 638 |
+
2,94,76,18,66,31.6,0.649,23,0
|
| 639 |
+
7,97,76,32,91,40.9,0.871,32,1
|
| 640 |
+
1,100,74,12,46,19.5,0.149,28,0
|
| 641 |
+
0,102,86,17,105,29.3,0.695,27,0
|
| 642 |
+
4,128,70,0,0,34.3,0.303,24,0
|
| 643 |
+
6,147,80,0,0,29.5,0.178,50,1
|
| 644 |
+
4,90,0,0,0,28.0,0.610,31,0
|
| 645 |
+
3,103,72,30,152,27.6,0.730,27,0
|
| 646 |
+
2,157,74,35,440,39.4,0.134,30,0
|
| 647 |
+
1,167,74,17,144,23.4,0.447,33,1
|
| 648 |
+
0,179,50,36,159,37.8,0.455,22,1
|
| 649 |
+
11,136,84,35,130,28.3,0.260,42,1
|
| 650 |
+
0,107,60,25,0,26.4,0.133,23,0
|
| 651 |
+
1,91,54,25,100,25.2,0.234,23,0
|
| 652 |
+
1,117,60,23,106,33.8,0.466,27,0
|
| 653 |
+
5,123,74,40,77,34.1,0.269,28,0
|
| 654 |
+
2,120,54,0,0,26.8,0.455,27,0
|
| 655 |
+
1,106,70,28,135,34.2,0.142,22,0
|
| 656 |
+
2,155,52,27,540,38.7,0.240,25,1
|
| 657 |
+
2,101,58,35,90,21.8,0.155,22,0
|
| 658 |
+
1,120,80,48,200,38.9,1.162,41,0
|
| 659 |
+
11,127,106,0,0,39.0,0.190,51,0
|
| 660 |
+
3,80,82,31,70,34.2,1.292,27,1
|
| 661 |
+
10,162,84,0,0,27.7,0.182,54,0
|
| 662 |
+
1,199,76,43,0,42.9,1.394,22,1
|
| 663 |
+
8,167,106,46,231,37.6,0.165,43,1
|
| 664 |
+
9,145,80,46,130,37.9,0.637,40,1
|
| 665 |
+
6,115,60,39,0,33.7,0.245,40,1
|
| 666 |
+
1,112,80,45,132,34.8,0.217,24,0
|
| 667 |
+
4,145,82,18,0,32.5,0.235,70,1
|
| 668 |
+
10,111,70,27,0,27.5,0.141,40,1
|
| 669 |
+
6,98,58,33,190,34.0,0.430,43,0
|
| 670 |
+
9,154,78,30,100,30.9,0.164,45,0
|
| 671 |
+
6,165,68,26,168,33.6,0.631,49,0
|
| 672 |
+
1,99,58,10,0,25.4,0.551,21,0
|
| 673 |
+
10,68,106,23,49,35.5,0.285,47,0
|
| 674 |
+
3,123,100,35,240,57.3,0.880,22,0
|
| 675 |
+
8,91,82,0,0,35.6,0.587,68,0
|
| 676 |
+
6,195,70,0,0,30.9,0.328,31,1
|
| 677 |
+
9,156,86,0,0,24.8,0.230,53,1
|
| 678 |
+
0,93,60,0,0,35.3,0.263,25,0
|
| 679 |
+
3,121,52,0,0,36.0,0.127,25,1
|
| 680 |
+
2,101,58,17,265,24.2,0.614,23,0
|
| 681 |
+
2,56,56,28,45,24.2,0.332,22,0
|
| 682 |
+
0,162,76,36,0,49.6,0.364,26,1
|
| 683 |
+
0,95,64,39,105,44.6,0.366,22,0
|
| 684 |
+
4,125,80,0,0,32.3,0.536,27,1
|
| 685 |
+
5,136,82,0,0,0.0,0.640,69,0
|
| 686 |
+
2,129,74,26,205,33.2,0.591,25,0
|
| 687 |
+
3,130,64,0,0,23.1,0.314,22,0
|
| 688 |
+
1,107,50,19,0,28.3,0.181,29,0
|
| 689 |
+
1,140,74,26,180,24.1,0.828,23,0
|
| 690 |
+
1,144,82,46,180,46.1,0.335,46,1
|
| 691 |
+
8,107,80,0,0,24.6,0.856,34,0
|
| 692 |
+
13,158,114,0,0,42.3,0.257,44,1
|
| 693 |
+
2,121,70,32,95,39.1,0.886,23,0
|
| 694 |
+
7,129,68,49,125,38.5,0.439,43,1
|
| 695 |
+
2,90,60,0,0,23.5,0.191,25,0
|
| 696 |
+
7,142,90,24,480,30.4,0.128,43,1
|
| 697 |
+
3,169,74,19,125,29.9,0.268,31,1
|
| 698 |
+
0,99,0,0,0,25.0,0.253,22,0
|
| 699 |
+
4,127,88,11,155,34.5,0.598,28,0
|
| 700 |
+
4,118,70,0,0,44.5,0.904,26,0
|
| 701 |
+
2,122,76,27,200,35.9,0.483,26,0
|
| 702 |
+
6,125,78,31,0,27.6,0.565,49,1
|
| 703 |
+
1,168,88,29,0,35.0,0.905,52,1
|
| 704 |
+
2,129,0,0,0,38.5,0.304,41,0
|
| 705 |
+
4,110,76,20,100,28.4,0.118,27,0
|
| 706 |
+
6,80,80,36,0,39.8,0.177,28,0
|
| 707 |
+
10,115,0,0,0,0.0,0.261,30,1
|
| 708 |
+
2,127,46,21,335,34.4,0.176,22,0
|
| 709 |
+
9,164,78,0,0,32.8,0.148,45,1
|
| 710 |
+
2,93,64,32,160,38.0,0.674,23,1
|
| 711 |
+
3,158,64,13,387,31.2,0.295,24,0
|
| 712 |
+
5,126,78,27,22,29.6,0.439,40,0
|
| 713 |
+
10,129,62,36,0,41.2,0.441,38,1
|
| 714 |
+
0,134,58,20,291,26.4,0.352,21,0
|
| 715 |
+
3,102,74,0,0,29.5,0.121,32,0
|
| 716 |
+
7,187,50,33,392,33.9,0.826,34,1
|
| 717 |
+
3,173,78,39,185,33.8,0.970,31,1
|
| 718 |
+
10,94,72,18,0,23.1,0.595,56,0
|
| 719 |
+
1,108,60,46,178,35.5,0.415,24,0
|
| 720 |
+
5,97,76,27,0,35.6,0.378,52,1
|
| 721 |
+
4,83,86,19,0,29.3,0.317,34,0
|
| 722 |
+
1,114,66,36,200,38.1,0.289,21,0
|
| 723 |
+
1,149,68,29,127,29.3,0.349,42,1
|
| 724 |
+
5,117,86,30,105,39.1,0.251,42,0
|
| 725 |
+
1,111,94,0,0,32.8,0.265,45,0
|
| 726 |
+
4,112,78,40,0,39.4,0.236,38,0
|
| 727 |
+
1,116,78,29,180,36.1,0.496,25,0
|
| 728 |
+
0,141,84,26,0,32.4,0.433,22,0
|
| 729 |
+
2,175,88,0,0,22.9,0.326,22,0
|
| 730 |
+
2,92,52,0,0,30.1,0.141,22,0
|
| 731 |
+
3,130,78,23,79,28.4,0.323,34,1
|
| 732 |
+
8,120,86,0,0,28.4,0.259,22,1
|
| 733 |
+
2,174,88,37,120,44.5,0.646,24,1
|
| 734 |
+
2,106,56,27,165,29.0,0.426,22,0
|
| 735 |
+
2,105,75,0,0,23.3,0.560,53,0
|
| 736 |
+
4,95,60,32,0,35.4,0.284,28,0
|
| 737 |
+
0,126,86,27,120,27.4,0.515,21,0
|
| 738 |
+
8,65,72,23,0,32.0,0.600,42,0
|
| 739 |
+
2,99,60,17,160,36.6,0.453,21,0
|
| 740 |
+
1,102,74,0,0,39.5,0.293,42,1
|
| 741 |
+
11,120,80,37,150,42.3,0.785,48,1
|
| 742 |
+
3,102,44,20,94,30.8,0.400,26,0
|
| 743 |
+
1,109,58,18,116,28.5,0.219,22,0
|
| 744 |
+
9,140,94,0,0,32.7,0.734,45,1
|
| 745 |
+
13,153,88,37,140,40.6,1.174,39,0
|
| 746 |
+
12,100,84,33,105,30.0,0.488,46,0
|
| 747 |
+
1,147,94,41,0,49.3,0.358,27,1
|
| 748 |
+
1,81,74,41,57,46.3,1.096,32,0
|
| 749 |
+
3,187,70,22,200,36.4,0.408,36,1
|
| 750 |
+
6,162,62,0,0,24.3,0.178,50,1
|
| 751 |
+
4,136,70,0,0,31.2,1.182,22,1
|
| 752 |
+
1,121,78,39,74,39.0,0.261,28,0
|
| 753 |
+
3,108,62,24,0,26.0,0.223,25,0
|
| 754 |
+
0,181,88,44,510,43.3,0.222,26,1
|
| 755 |
+
8,154,78,32,0,32.4,0.443,45,1
|
| 756 |
+
1,128,88,39,110,36.5,1.057,37,1
|
| 757 |
+
7,137,90,41,0,32.0,0.391,39,0
|
| 758 |
+
0,123,72,0,0,36.3,0.258,52,1
|
| 759 |
+
1,106,76,0,0,37.5,0.197,26,0
|
| 760 |
+
6,190,92,0,0,35.5,0.278,66,1
|
| 761 |
+
2,88,58,26,16,28.4,0.766,22,0
|
| 762 |
+
9,170,74,31,0,44.0,0.403,43,1
|
| 763 |
+
9,89,62,0,0,22.5,0.142,33,0
|
| 764 |
+
10,101,76,48,180,32.9,0.171,63,0
|
| 765 |
+
2,122,70,27,0,36.8,0.340,27,0
|
| 766 |
+
5,121,72,23,112,26.2,0.245,30,0
|
| 767 |
+
1,126,60,0,0,30.1,0.349,47,1
|
| 768 |
+
1,93,70,31,0,30.4,0.315,23,0
|
data_cache/hepatology_liver.csv
ADDED
|
@@ -0,0 +1,583 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
65,Female,0.7,0.1,187,16,18,6.8,3.3,0.9,1
|
| 2 |
+
62,Male,10.9,5.5,699,64,100,7.5,3.2,0.74,1
|
| 3 |
+
62,Male,7.3,4.1,490,60,68,7,3.3,0.89,1
|
| 4 |
+
58,Male,1,0.4,182,14,20,6.8,3.4,1,1
|
| 5 |
+
72,Male,3.9,2,195,27,59,7.3,2.4,0.4,1
|
| 6 |
+
46,Male,1.8,0.7,208,19,14,7.6,4.4,1.3,1
|
| 7 |
+
26,Female,0.9,0.2,154,16,12,7,3.5,1,1
|
| 8 |
+
29,Female,0.9,0.3,202,14,11,6.7,3.6,1.1,1
|
| 9 |
+
17,Male,0.9,0.3,202,22,19,7.4,4.1,1.2,2
|
| 10 |
+
55,Male,0.7,0.2,290,53,58,6.8,3.4,1,1
|
| 11 |
+
57,Male,0.6,0.1,210,51,59,5.9,2.7,0.8,1
|
| 12 |
+
72,Male,2.7,1.3,260,31,56,7.4,3,0.6,1
|
| 13 |
+
64,Male,0.9,0.3,310,61,58,7,3.4,0.9,2
|
| 14 |
+
74,Female,1.1,0.4,214,22,30,8.1,4.1,1,1
|
| 15 |
+
61,Male,0.7,0.2,145,53,41,5.8,2.7,0.87,1
|
| 16 |
+
25,Male,0.6,0.1,183,91,53,5.5,2.3,0.7,2
|
| 17 |
+
38,Male,1.8,0.8,342,168,441,7.6,4.4,1.3,1
|
| 18 |
+
33,Male,1.6,0.5,165,15,23,7.3,3.5,0.92,2
|
| 19 |
+
40,Female,0.9,0.3,293,232,245,6.8,3.1,0.8,1
|
| 20 |
+
40,Female,0.9,0.3,293,232,245,6.8,3.1,0.8,1
|
| 21 |
+
51,Male,2.2,1,610,17,28,7.3,2.6,0.55,1
|
| 22 |
+
51,Male,2.9,1.3,482,22,34,7,2.4,0.5,1
|
| 23 |
+
62,Male,6.8,3,542,116,66,6.4,3.1,0.9,1
|
| 24 |
+
40,Male,1.9,1,231,16,55,4.3,1.6,0.6,1
|
| 25 |
+
63,Male,0.9,0.2,194,52,45,6,3.9,1.85,2
|
| 26 |
+
34,Male,4.1,2,289,875,731,5,2.7,1.1,1
|
| 27 |
+
34,Male,4.1,2,289,875,731,5,2.7,1.1,1
|
| 28 |
+
34,Male,6.2,3,240,1680,850,7.2,4,1.2,1
|
| 29 |
+
20,Male,1.1,0.5,128,20,30,3.9,1.9,0.95,2
|
| 30 |
+
84,Female,0.7,0.2,188,13,21,6,3.2,1.1,2
|
| 31 |
+
57,Male,4,1.9,190,45,111,5.2,1.5,0.4,1
|
| 32 |
+
52,Male,0.9,0.2,156,35,44,4.9,2.9,1.4,1
|
| 33 |
+
57,Male,1,0.3,187,19,23,5.2,2.9,1.2,2
|
| 34 |
+
38,Female,2.6,1.2,410,59,57,5.6,3,0.8,2
|
| 35 |
+
38,Female,2.6,1.2,410,59,57,5.6,3,0.8,2
|
| 36 |
+
30,Male,1.3,0.4,482,102,80,6.9,3.3,0.9,1
|
| 37 |
+
17,Female,0.7,0.2,145,18,36,7.2,3.9,1.18,2
|
| 38 |
+
46,Female,14.2,7.8,374,38,77,4.3,2,0.8,1
|
| 39 |
+
48,Male,1.4,0.6,263,38,66,5.8,2.2,0.61,1
|
| 40 |
+
47,Male,2.7,1.3,275,123,73,6.2,3.3,1.1,1
|
| 41 |
+
45,Male,2.4,1.1,168,33,50,5.1,2.6,1,1
|
| 42 |
+
62,Male,0.6,0.1,160,42,110,4.9,2.6,1.1,2
|
| 43 |
+
42,Male,6.8,3.2,630,25,47,6.1,2.3,0.6,2
|
| 44 |
+
50,Male,2.6,1.2,415,407,576,6.4,3.2,1,1
|
| 45 |
+
85,Female,1,0.3,208,17,15,7,3.6,1,2
|
| 46 |
+
35,Male,1.8,0.6,275,48,178,6.5,3.2,0.9,2
|
| 47 |
+
21,Male,3.9,1.8,150,36,27,6.8,3.9,1.34,1
|
| 48 |
+
40,Male,1.1,0.3,230,1630,960,4.9,2.8,1.3,1
|
| 49 |
+
32,Female,0.6,0.1,176,39,28,6,3,1,1
|
| 50 |
+
55,Male,18.4,8.8,206,64,178,6.2,1.8,0.4,1
|
| 51 |
+
45,Female,0.7,0.2,170,21,14,5.7,2.5,0.7,1
|
| 52 |
+
34,Female,0.6,0.1,161,15,19,6.6,3.4,1,1
|
| 53 |
+
38,Male,3.1,1.6,253,80,406,6.8,3.9,1.3,1
|
| 54 |
+
38,Male,1.1,0.3,198,86,150,6.3,3.5,1.2,1
|
| 55 |
+
42,Male,8.9,4.5,272,31,61,5.8,2,0.5,1
|
| 56 |
+
42,Male,8.9,4.5,272,31,61,5.8,2,0.5,1
|
| 57 |
+
33,Male,0.8,0.2,198,26,23,8,4,1,2
|
| 58 |
+
48,Female,0.9,0.2,175,24,54,5.5,2.7,0.9,2
|
| 59 |
+
51,Male,0.8,0.2,367,42,18,5.2,2,0.6,1
|
| 60 |
+
64,Male,1.1,0.5,145,20,24,5.5,3.2,1.39,2
|
| 61 |
+
31,Female,0.8,0.2,158,21,16,6,3,1,1
|
| 62 |
+
58,Male,1,0.5,158,37,43,7.2,3.6,1,1
|
| 63 |
+
58,Male,1,0.5,158,37,43,7.2,3.6,1,1
|
| 64 |
+
57,Male,0.7,0.2,208,35,97,5.1,2.1,0.7,1
|
| 65 |
+
57,Male,1.3,0.4,259,40,86,6.5,2.5,0.6,1
|
| 66 |
+
57,Male,1.4,0.7,470,62,88,5.6,2.5,0.8,1
|
| 67 |
+
54,Male,2.2,1.2,195,55,95,6,3.7,1.6,1
|
| 68 |
+
37,Male,1.8,0.8,215,53,58,6.4,3.8,1.4,1
|
| 69 |
+
66,Male,0.7,0.2,239,27,26,6.3,3.7,1.4,1
|
| 70 |
+
60,Male,0.8,0.2,215,24,17,6.3,3,0.9,2
|
| 71 |
+
19,Female,0.7,0.2,186,166,397,5.5,3,1.2,1
|
| 72 |
+
75,Female,0.8,0.2,188,20,29,4.4,1.8,0.6,1
|
| 73 |
+
75,Female,0.8,0.2,205,27,24,4.4,2,0.8,1
|
| 74 |
+
52,Male,0.6,0.1,171,22,16,6.6,3.6,1.2,1
|
| 75 |
+
68,Male,0.7,0.1,145,20,22,5.8,2.9,1,1
|
| 76 |
+
29,Female,0.7,0.1,162,52,41,5.2,2.5,0.9,2
|
| 77 |
+
31,Male,0.9,0.2,518,189,17,5.3,2.3,0.7,1
|
| 78 |
+
68,Female,0.6,0.1,1620,95,127,4.6,2.1,0.8,1
|
| 79 |
+
70,Male,1.4,0.6,146,12,24,6.2,3.8,1.58,2
|
| 80 |
+
58,Female,2.8,1.3,670,48,79,4.7,1.6,0.5,1
|
| 81 |
+
58,Female,2.4,1.1,915,60,142,4.7,1.8,0.6,1
|
| 82 |
+
29,Male,1,0.3,75,25,26,5.1,2.9,1.3,1
|
| 83 |
+
49,Male,0.7,0.1,148,14,12,5.4,2.8,1,2
|
| 84 |
+
33,Male,2,1,258,194,152,5.4,3,1.25,1
|
| 85 |
+
32,Male,0.6,0.1,237,45,31,7.5,4.3,1.34,1
|
| 86 |
+
14,Male,1.4,0.5,269,58,45,6.7,3.9,1.4,1
|
| 87 |
+
13,Male,0.6,0.1,320,28,56,7.2,3.6,1,2
|
| 88 |
+
58,Male,0.8,0.2,298,33,59,6.2,3.1,1,1
|
| 89 |
+
18,Male,0.6,0.2,538,33,34,7.5,3.2,0.7,1
|
| 90 |
+
60,Male,4,1.9,238,119,350,7.1,3.3,0.8,1
|
| 91 |
+
60,Male,5.7,2.8,214,412,850,7.3,3.2,0.78,1
|
| 92 |
+
60,Male,6.8,3.2,308,404,794,6.8,3,0.7,1
|
| 93 |
+
60,Male,8.6,4,298,412,850,7.4,3,0.6,1
|
| 94 |
+
60,Male,5.8,2.7,204,220,400,7,3,0.7,1
|
| 95 |
+
60,Male,5.2,2.4,168,126,202,6.8,2.9,0.7,1
|
| 96 |
+
75,Male,0.9,0.2,282,25,23,4.4,2.2,1,1
|
| 97 |
+
39,Male,3.8,1.5,298,102,630,7.1,3.3,0.8,1
|
| 98 |
+
39,Male,6.6,3,215,190,950,4,1.7,0.7,1
|
| 99 |
+
18,Male,0.6,0.1,265,97,161,5.9,3.1,1.1,1
|
| 100 |
+
18,Male,0.7,0.1,312,308,405,6.9,3.7,1.1,1
|
| 101 |
+
27,Male,0.6,0.2,161,27,28,3.7,1.6,0.76,2
|
| 102 |
+
27,Male,0.7,0.2,243,21,23,5.3,2.3,0.7,2
|
| 103 |
+
17,Male,0.9,0.2,224,36,45,6.9,4.2,1.55,1
|
| 104 |
+
55,Female,0.8,0.2,225,14,23,6.1,3.3,1.2,2
|
| 105 |
+
63,Male,0.5,0.1,170,21,28,5.5,2.5,0.8,1
|
| 106 |
+
36,Male,5.3,2.3,145,32,92,5.1,2.6,1,2
|
| 107 |
+
36,Male,5.3,2.3,145,32,92,5.1,2.6,1,2
|
| 108 |
+
36,Male,0.8,0.2,158,29,39,6,2.2,0.5,2
|
| 109 |
+
36,Male,0.8,0.2,158,29,39,6,2.2,0.5,2
|
| 110 |
+
36,Male,0.9,0.1,486,25,34,5.9,2.8,0.9,2
|
| 111 |
+
24,Female,0.7,0.2,188,11,10,5.5,2.3,0.71,2
|
| 112 |
+
48,Male,3.2,1.6,257,33,116,5.7,2.2,0.62,1
|
| 113 |
+
27,Male,1.2,0.4,179,63,39,6.1,3.3,1.1,2
|
| 114 |
+
74,Male,0.6,0.1,272,24,98,5,2,0.6,1
|
| 115 |
+
50,Male,5.8,3,661,181,285,5.7,2.3,0.67,2
|
| 116 |
+
50,Male,7.3,3.6,1580,88,64,5.6,2.3,0.6,2
|
| 117 |
+
48,Male,0.7,0.1,1630,74,149,5.3,2,0.6,1
|
| 118 |
+
32,Male,12.7,6.2,194,2000,2946,5.7,3.3,1.3,1
|
| 119 |
+
32,Male,15.9,7,280,1350,1600,5.6,2.8,1,1
|
| 120 |
+
32,Male,18,8.2,298,1250,1050,5.4,2.6,0.9,1
|
| 121 |
+
32,Male,23,11.3,300,482,275,7.1,3.5,0.9,1
|
| 122 |
+
32,Male,22.7,10.2,290,322,113,6.6,2.8,0.7,1
|
| 123 |
+
58,Male,1.7,0.8,188,60,84,5.9,3.5,1.4,2
|
| 124 |
+
64,Female,0.8,0.2,178,17,18,6.3,3.1,0.9,1
|
| 125 |
+
28,Male,0.6,0.1,177,36,29,6.9,4.1,1.4,2
|
| 126 |
+
60,Male,1.8,0.5,201,45,25,3.9,1.7,0.7,2
|
| 127 |
+
48,Male,5.8,2.5,802,133,88,6,2.8,0.8,1
|
| 128 |
+
64,Male,3,1.4,248,46,40,6.5,3.2,0.9,1
|
| 129 |
+
58,Female,1.7,0.8,1896,61,83,8,3.9,0.95,1
|
| 130 |
+
45,Male,2.8,1.7,263,57,65,5.1,2.3,0.8,1
|
| 131 |
+
45,Male,3.2,1.4,512,50,58,6,2.7,0.8,1
|
| 132 |
+
70,Female,0.7,0.2,237,18,28,5.8,2.5,0.75,2
|
| 133 |
+
18,Female,0.8,0.2,199,34,31,6.5,3.5,1.16,2
|
| 134 |
+
53,Male,0.9,0.4,238,17,14,6.6,2.9,0.8,1
|
| 135 |
+
18,Male,1.8,0.7,178,35,36,6.8,3.6,1.1,1
|
| 136 |
+
66,Male,11.3,5.6,1110,1250,4929,7,2.4,0.5,1
|
| 137 |
+
46,Female,4.7,2.2,310,62,90,6.4,2.5,0.6,1
|
| 138 |
+
18,Male,0.8,0.2,282,72,140,5.5,2.5,0.8,1
|
| 139 |
+
18,Male,0.8,0.2,282,72,140,5.5,2.5,0.8,1
|
| 140 |
+
15,Male,0.8,0.2,380,25,66,6.1,3.7,1.5,1
|
| 141 |
+
60,Male,0.6,0.1,186,20,21,6.2,3.3,1.1,2
|
| 142 |
+
66,Female,4.2,2.1,159,15,30,7.1,2.2,0.4,1
|
| 143 |
+
30,Male,1.6,0.4,332,84,139,5.6,2.7,0.9,1
|
| 144 |
+
30,Male,1.6,0.4,332,84,139,5.6,2.7,0.9,1
|
| 145 |
+
45,Female,3.5,1.5,189,63,87,5.6,2.9,1,1
|
| 146 |
+
65,Male,0.8,0.2,201,18,22,5.4,2.9,1.1,2
|
| 147 |
+
66,Female,2.9,1.3,168,21,38,5.5,1.8,0.4,1
|
| 148 |
+
65,Male,0.7,0.1,392,20,30,5.3,2.8,1.1,1
|
| 149 |
+
50,Male,0.9,0.2,202,20,26,7.2,4.5,1.66,1
|
| 150 |
+
60,Male,0.8,0.2,286,21,27,7.1,4,1.2,1
|
| 151 |
+
56,Male,1.1,0.5,180,30,42,6.9,3.8,1.2,2
|
| 152 |
+
50,Male,1.6,0.8,218,18,20,5.9,2.9,0.96,1
|
| 153 |
+
46,Female,0.8,0.2,182,20,40,6,2.9,0.9,1
|
| 154 |
+
52,Male,0.6,0.1,178,26,27,6.5,3.6,1.2,2
|
| 155 |
+
34,Male,5.9,2.5,290,45,233,5.6,2.7,0.9,1
|
| 156 |
+
34,Male,8.7,4,298,58,138,5.8,2.4,0.7,1
|
| 157 |
+
32,Male,0.9,0.3,462,70,82,6.2,3.1,1,1
|
| 158 |
+
72,Male,0.7,0.1,196,20,35,5.8,2,0.5,1
|
| 159 |
+
72,Male,0.7,0.1,196,20,35,5.8,2,0.5,1
|
| 160 |
+
50,Male,1.2,0.4,282,36,32,7.2,3.9,1.1,1
|
| 161 |
+
60,Male,11,4.9,750,140,350,5.5,2.1,0.6,1
|
| 162 |
+
60,Male,11.5,5,1050,99,187,6.2,2.8,0.8,1
|
| 163 |
+
60,Male,5.8,2.7,599,43,66,5.4,1.8,0.5,1
|
| 164 |
+
39,Male,1.9,0.9,180,42,62,7.4,4.3,1.38,1
|
| 165 |
+
39,Male,1.9,0.9,180,42,62,7.4,4.3,1.38,1
|
| 166 |
+
48,Male,4.5,2.3,282,13,74,7,2.4,0.52,1
|
| 167 |
+
55,Male,75,3.6,332,40,66,6.2,2.5,0.6,1
|
| 168 |
+
47,Female,3,1.5,292,64,67,5.6,1.8,0.47,1
|
| 169 |
+
60,Male,22.8,12.6,962,53,41,6.9,3.3,0.9,1
|
| 170 |
+
60,Male,8.9,4,950,33,32,6.8,3.1,0.8,1
|
| 171 |
+
72,Male,1.7,0.8,200,28,37,6.2,3,0.93,1
|
| 172 |
+
44,Female,1.9,0.6,298,378,602,6.6,3.3,1,1
|
| 173 |
+
55,Male,14.1,7.6,750,35,63,5,1.6,0.47,1
|
| 174 |
+
31,Male,0.6,0.1,175,48,34,6,3.7,1.6,1
|
| 175 |
+
31,Male,0.6,0.1,175,48,34,6,3.7,1.6,1
|
| 176 |
+
31,Male,0.8,0.2,198,43,31,7.3,4,1.2,1
|
| 177 |
+
55,Male,0.8,0.2,482,112,99,5.7,2.6,0.8,1
|
| 178 |
+
75,Male,14.8,9,1020,71,42,5.3,2.2,0.7,1
|
| 179 |
+
75,Male,10.6,5,562,37,29,5.1,1.8,0.5,1
|
| 180 |
+
75,Male,8,4.6,386,30,25,5.5,1.8,0.48,1
|
| 181 |
+
75,Male,2.8,1.3,250,23,29,2.7,0.9,0.5,1
|
| 182 |
+
75,Male,2.9,1.3,218,33,37,3,1.5,1,1
|
| 183 |
+
65,Male,1.9,0.8,170,36,43,3.8,1.4,0.58,2
|
| 184 |
+
40,Male,0.6,0.1,171,20,17,5.4,2.5,0.8,1
|
| 185 |
+
64,Male,1.1,0.4,201,18,19,6.9,4.1,1.4,1
|
| 186 |
+
38,Male,1.5,0.4,298,60,103,6,3,1,2
|
| 187 |
+
60,Male,3.2,1.8,750,79,145,7.8,3.2,0.69,1
|
| 188 |
+
60,Male,2.1,1,191,114,247,4,1.6,0.6,1
|
| 189 |
+
60,Male,1.9,0.8,614,42,38,4.5,1.8,0.6,1
|
| 190 |
+
48,Female,0.8,0.2,218,32,28,5.2,2.5,0.9,2
|
| 191 |
+
60,Male,6.3,3.2,314,118,114,6.6,3.7,1.27,1
|
| 192 |
+
60,Male,5.8,3,257,107,104,6.6,3.5,1.12,1
|
| 193 |
+
60,Male,2.3,0.6,272,79,51,6.6,3.5,1.1,1
|
| 194 |
+
49,Male,1.3,0.4,206,30,25,6,3.1,1.06,2
|
| 195 |
+
49,Male,2,0.6,209,48,32,5.7,3,1.1,2
|
| 196 |
+
60,Male,2.4,1,1124,30,54,5.2,1.9,0.5,1
|
| 197 |
+
60,Male,2,1.1,664,52,104,6,2.1,0.53,1
|
| 198 |
+
26,Female,0.6,0.2,142,12,32,5.7,2.4,0.75,1
|
| 199 |
+
41,Male,0.9,0.2,169,22,18,6.1,3,0.9,2
|
| 200 |
+
7,Female,27.2,11.8,1420,790,1050,6.1,2,0.4,1
|
| 201 |
+
49,Male,0.6,0.1,218,50,53,5,2.4,0.9,1
|
| 202 |
+
49,Male,0.6,0.1,218,50,53,5,2.4,0.9,1
|
| 203 |
+
38,Female,0.8,0.2,145,19,23,6.1,3.1,1.03,2
|
| 204 |
+
21,Male,1,0.3,142,27,21,6.4,3.5,1.2,2
|
| 205 |
+
21,Male,0.7,0.2,135,27,26,6.4,3.3,1,2
|
| 206 |
+
45,Male,2.5,1.2,163,28,22,7.6,4,1.1,1
|
| 207 |
+
40,Male,3.6,1.8,285,50,60,7,2.9,0.7,1
|
| 208 |
+
40,Male,3.9,1.7,350,950,1500,6.7,3.8,1.3,1
|
| 209 |
+
70,Female,0.9,0.3,220,53,95,6.1,2.8,0.68,1
|
| 210 |
+
45,Female,0.9,0.3,189,23,33,6.6,3.9,,1
|
| 211 |
+
28,Male,0.8,0.3,190,20,14,4.1,2.4,1.4,1
|
| 212 |
+
42,Male,2.7,1.3,219,60,180,7,3.2,0.8,1
|
| 213 |
+
22,Male,2.7,1,160,82,127,5.5,3.1,1.2,2
|
| 214 |
+
8,Female,0.9,0.2,401,25,58,7.5,3.4,0.8,1
|
| 215 |
+
38,Male,1.7,1,180,18,34,7.2,3.6,1,1
|
| 216 |
+
66,Male,0.6,0.2,100,17,148,5,3.3,1.9,2
|
| 217 |
+
55,Male,0.9,0.2,116,36,16,6.2,3.2,1,2
|
| 218 |
+
49,Male,1.1,0.5,159,30,31,7,4.3,1.5,1
|
| 219 |
+
6,Male,0.6,0.1,289,38,30,4.8,2,0.7,2
|
| 220 |
+
37,Male,0.8,0.2,125,41,39,6.4,3.4,1.1,1
|
| 221 |
+
37,Male,0.8,0.2,147,27,46,5,2.5,1,1
|
| 222 |
+
47,Male,0.9,0.2,192,38,24,7.3,4.3,1.4,1
|
| 223 |
+
47,Male,0.9,0.2,265,40,28,8,4,1,1
|
| 224 |
+
50,Male,1.1,0.3,175,20,19,7.1,4.5,1.7,2
|
| 225 |
+
70,Male,1.7,0.5,400,56,44,5.7,3.1,1.1,1
|
| 226 |
+
26,Male,0.6,0.2,120,45,51,7.9,4,1,1
|
| 227 |
+
26,Male,1.3,0.4,173,38,62,8,4,1,1
|
| 228 |
+
68,Female,0.7,0.2,186,18,15,6.4,3.8,1.4,1
|
| 229 |
+
65,Female,1,0.3,202,26,13,5.3,2.6,0.9,2
|
| 230 |
+
46,Male,0.6,0.2,290,26,21,6,3,1,1
|
| 231 |
+
61,Male,1.5,0.6,196,61,85,6.7,3.8,1.3,2
|
| 232 |
+
61,Male,0.8,0.1,282,85,231,8.5,4.3,1,1
|
| 233 |
+
50,Male,2.7,1.6,157,149,156,7.9,3.1,0.6,1
|
| 234 |
+
33,Male,2,1.4,2110,48,89,6.2,3,0.9,1
|
| 235 |
+
40,Female,0.9,0.2,285,32,27,7.7,3.5,0.8,1
|
| 236 |
+
60,Male,1.5,0.6,360,230,298,4.5,2,0.8,1
|
| 237 |
+
22,Male,0.8,0.2,300,57,40,7.9,3.8,0.9,2
|
| 238 |
+
35,Female,0.9,0.3,158,20,16,8,4,1,1
|
| 239 |
+
35,Female,0.9,0.2,190,40,35,7.3,4.7,1.8,2
|
| 240 |
+
40,Male,0.9,0.3,196,69,48,6.8,3.1,0.8,1
|
| 241 |
+
48,Male,0.7,0.2,165,32,30,8,4,1,2
|
| 242 |
+
51,Male,0.8,0.2,230,24,46,6.5,3.1,,1
|
| 243 |
+
29,Female,0.8,0.2,205,30,23,8.2,4.1,1,1
|
| 244 |
+
28,Female,0.9,0.2,316,25,23,8.5,5.5,1.8,1
|
| 245 |
+
54,Male,0.8,0.2,218,20,19,6.3,2.5,0.6,1
|
| 246 |
+
54,Male,0.9,0.2,290,15,18,6.1,2.8,0.8,1
|
| 247 |
+
55,Male,1.8,9,272,22,79,6.1,2.7,0.7,1
|
| 248 |
+
55,Male,0.9,0.2,190,25,28,5.9,2.7,0.8,1
|
| 249 |
+
40,Male,0.7,0.1,202,37,29,5,2.6,1,1
|
| 250 |
+
33,Male,1.2,0.3,498,28,25,7,3,0.7,1
|
| 251 |
+
33,Male,2.1,1.3,480,38,22,6.5,3,0.8,1
|
| 252 |
+
33,Male,0.9,0.8,680,37,40,5.9,2.6,0.8,1
|
| 253 |
+
65,Male,1.1,0.3,258,48,40,7,3.9,1.2,2
|
| 254 |
+
35,Female,0.6,0.2,180,12,15,5.2,2.7,,2
|
| 255 |
+
38,Female,0.7,0.1,152,90,21,7.1,4.2,1.4,2
|
| 256 |
+
38,Male,1.7,0.7,859,89,48,6,3,1,1
|
| 257 |
+
50,Male,0.9,0.3,901,23,17,6.2,3.5,1.2,1
|
| 258 |
+
44,Male,0.8,0.2,335,148,86,5.6,3,1.1,1
|
| 259 |
+
36,Male,0.8,0.2,182,31,34,6.4,3.8,1.4,2
|
| 260 |
+
42,Male,30.5,14.2,285,65,130,5.2,2.1,0.6,1
|
| 261 |
+
42,Male,16.4,8.9,245,56,87,5.4,2,0.5,1
|
| 262 |
+
33,Male,1.5,7,505,205,140,7.5,3.9,1,1
|
| 263 |
+
18,Male,0.8,0.2,228,55,54,6.9,4,1.3,1
|
| 264 |
+
38,Female,0.8,0.2,185,25,21,7,3,0.7,1
|
| 265 |
+
38,Male,0.8,0.2,247,55,92,7.4,4.3,1.38,2
|
| 266 |
+
4,Male,0.9,0.2,348,30,34,8,4,1,2
|
| 267 |
+
62,Male,1.2,0.4,195,38,54,6.3,3.8,1.5,1
|
| 268 |
+
43,Female,0.9,0.3,140,12,29,7.4,3.5,1.8,1
|
| 269 |
+
40,Male,14.5,6.4,358,50,75,5.7,2.1,0.5,1
|
| 270 |
+
26,Male,0.6,0.1,110,15,20,2.8,1.6,1.3,1
|
| 271 |
+
37,Male,0.7,0.2,235,96,54,9.5,4.9,1,1
|
| 272 |
+
4,Male,0.8,0.2,460,152,231,6.5,3.2,0.9,2
|
| 273 |
+
21,Male,18.5,9.5,380,390,500,8.2,4.1,1,1
|
| 274 |
+
30,Male,0.7,0.2,262,15,18,9.6,4.7,1.2,1
|
| 275 |
+
33,Male,1.8,0.8,196,25,22,8,4,1,1
|
| 276 |
+
26,Male,1.9,0.8,180,22,19,8.2,4.1,1,2
|
| 277 |
+
35,Male,0.9,0.2,190,25,20,6.4,3.6,1.2,2
|
| 278 |
+
60,Male,2,0.8,190,45,40,6,2.8,0.8,1
|
| 279 |
+
45,Male,2.2,0.8,209,25,20,8,4,1,1
|
| 280 |
+
48,Female,1,1.4,144,18,14,8.3,4.2,1,1
|
| 281 |
+
58,Male,0.8,0.2,123,56,48,6,3,1,1
|
| 282 |
+
50,Male,0.7,0.2,192,18,15,7.4,4.2,1.3,2
|
| 283 |
+
50,Male,0.7,0.2,188,12,14,7,3.4,0.9,1
|
| 284 |
+
18,Male,1.3,0.7,316,10,21,6,2.1,0.5,2
|
| 285 |
+
18,Male,0.9,0.3,300,30,48,8,4,1,1
|
| 286 |
+
13,Male,1.5,0.5,575,29,24,7.9,3.9,0.9,1
|
| 287 |
+
34,Female,0.8,0.2,192,15,12,8.6,4.7,1.2,1
|
| 288 |
+
43,Male,1.3,0.6,155,15,20,8,4,1,2
|
| 289 |
+
50,Female,1,0.5,239,16,39,7.5,3.7,0.9,1
|
| 290 |
+
57,Male,4.5,2.3,315,120,105,7,4,1.3,1
|
| 291 |
+
45,Female,1,0.3,250,48,44,8.6,4.3,1,1
|
| 292 |
+
60,Male,0.7,0.2,174,32,14,7.8,4.2,1.1,2
|
| 293 |
+
45,Male,0.6,0.2,245,22,24,7.1,3.4,0.9,1
|
| 294 |
+
23,Male,1.1,0.5,191,37,41,7.7,4.3,1.2,2
|
| 295 |
+
22,Male,2.4,1,340,25,21,8.3,4.5,1.1,1
|
| 296 |
+
22,Male,0.6,0.2,202,78,41,8,3.9,0.9,1
|
| 297 |
+
74,Female,0.9,0.3,234,16,19,7.9,4,1,1
|
| 298 |
+
25,Female,0.9,0.3,159,24,25,6.9,4.4,1.7,2
|
| 299 |
+
31,Female,1.1,0.3,190,26,15,7.9,3.8,0.9,1
|
| 300 |
+
24,Female,0.9,0.2,195,40,35,7.4,4.1,1.2,2
|
| 301 |
+
58,Male,0.8,0.2,180,32,25,8.2,4.4,1.1,2
|
| 302 |
+
51,Female,0.9,0.2,280,21,30,6.7,3.2,0.8,1
|
| 303 |
+
50,Female,1.7,0.6,430,28,32,6.8,3.5,1,1
|
| 304 |
+
50,Male,0.7,0.2,206,18,17,8.4,4.2,1,2
|
| 305 |
+
55,Female,0.8,0.2,155,21,17,6.9,3.8,1.4,1
|
| 306 |
+
54,Female,1.4,0.7,195,36,16,7.9,3.7,0.9,2
|
| 307 |
+
48,Male,1.6,1,588,74,113,7.3,2.4,0.4,1
|
| 308 |
+
30,Male,0.8,0.2,174,21,47,4.6,2.3,1,1
|
| 309 |
+
45,Female,0.8,0.2,165,22,18,8.2,4.1,1,1
|
| 310 |
+
48,Female,1.1,0.7,527,178,250,8,4.2,1.1,1
|
| 311 |
+
51,Male,0.8,0.2,175,48,22,8.1,4.6,1.3,1
|
| 312 |
+
54,Female,23.2,12.6,574,43,47,7.2,3.5,0.9,1
|
| 313 |
+
27,Male,1.3,0.6,106,25,54,8.5,4.8,,2
|
| 314 |
+
30,Female,0.8,0.2,158,25,22,7.9,4.5,1.3,2
|
| 315 |
+
26,Male,2,0.9,195,24,65,7.8,4.3,1.2,1
|
| 316 |
+
22,Male,0.9,0.3,179,18,21,6.7,3.7,1.2,2
|
| 317 |
+
44,Male,0.9,0.2,182,29,82,7.1,3.7,1,2
|
| 318 |
+
35,Male,0.7,0.2,198,42,30,6.8,3.4,1,1
|
| 319 |
+
38,Male,3.7,2.2,216,179,232,7.8,4.5,1.3,1
|
| 320 |
+
14,Male,0.9,0.3,310,21,16,8.1,4.2,1,2
|
| 321 |
+
30,Female,0.7,0.2,63,31,27,5.8,3.4,1.4,1
|
| 322 |
+
30,Female,0.8,0.2,198,30,58,5.2,2.8,1.1,1
|
| 323 |
+
36,Male,1.7,0.5,205,36,34,7.1,3.9,1.2,1
|
| 324 |
+
12,Male,0.8,0.2,302,47,67,6.7,3.5,1.1,2
|
| 325 |
+
60,Male,2.6,1.2,171,42,37,5.4,2.7,1,1
|
| 326 |
+
42,Male,0.8,0.2,158,27,23,6.7,3.1,0.8,2
|
| 327 |
+
36,Female,1.2,0.4,358,160,90,8.3,4.4,1.1,2
|
| 328 |
+
24,Male,3.3,1.6,174,11,33,7.6,3.9,1,2
|
| 329 |
+
43,Male,0.8,0.2,192,29,20,6,2.9,0.9,2
|
| 330 |
+
21,Male,0.7,0.2,211,14,23,7.3,4.1,1.2,2
|
| 331 |
+
26,Male,2,0.9,157,54,68,6.1,2.7,0.8,1
|
| 332 |
+
26,Male,1.7,0.6,210,62,56,5.4,2.2,0.6,1
|
| 333 |
+
26,Male,7.1,3.3,258,80,113,6.2,2.9,0.8,1
|
| 334 |
+
36,Female,0.7,0.2,152,21,25,5.9,3.1,1.1,2
|
| 335 |
+
13,Female,0.7,0.2,350,17,24,7.4,4,1.1,1
|
| 336 |
+
13,Female,0.7,0.1,182,24,19,8.9,4.9,1.2,1
|
| 337 |
+
75,Male,6.7,3.6,458,198,143,6.2,3.2,1,1
|
| 338 |
+
75,Male,2.5,1.2,375,85,68,6.4,2.9,0.8,1
|
| 339 |
+
75,Male,1.8,0.8,405,79,50,6.1,2.9,0.9,1
|
| 340 |
+
75,Male,1.4,0.4,215,50,30,5.9,2.6,0.7,1
|
| 341 |
+
75,Male,0.9,0.2,206,44,33,6.2,2.9,0.8,1
|
| 342 |
+
36,Female,0.8,0.2,650,70,138,6.6,3.1,0.8,1
|
| 343 |
+
35,Male,0.8,0.2,198,36,32,7,4,1.3,2
|
| 344 |
+
70,Male,3.1,1.6,198,40,28,5.6,2,0.5,1
|
| 345 |
+
37,Male,0.8,0.2,195,60,40,8.2,5,1.5,2
|
| 346 |
+
60,Male,2.9,1.3,230,32,44,5.6,2,0.5,1
|
| 347 |
+
46,Male,0.6,0.2,115,14,11,6.9,3.4,0.9,1
|
| 348 |
+
38,Male,0.7,0.2,216,349,105,7,3.5,1,1
|
| 349 |
+
70,Male,1.3,0.4,358,19,14,6.1,2.8,0.8,1
|
| 350 |
+
49,Female,0.8,0.2,158,19,15,6.6,3.6,1.2,2
|
| 351 |
+
37,Male,1.8,0.8,145,62,58,5.7,2.9,1,1
|
| 352 |
+
37,Male,1.3,0.4,195,41,38,5.3,2.1,0.6,1
|
| 353 |
+
26,Female,0.7,0.2,144,36,33,8.2,4.3,1.1,1
|
| 354 |
+
48,Female,1.4,0.8,621,110,176,7.2,3.9,1.1,1
|
| 355 |
+
48,Female,0.8,0.2,150,25,23,7.5,3.9,1,1
|
| 356 |
+
19,Male,1.4,0.8,178,13,26,8,4.6,1.3,2
|
| 357 |
+
33,Male,0.7,0.2,256,21,30,8.5,3.9,0.8,1
|
| 358 |
+
33,Male,2.1,0.7,205,50,38,6.8,3,0.7,1
|
| 359 |
+
37,Male,0.7,0.2,176,28,34,5.6,2.6,0.8,1
|
| 360 |
+
69,Female,0.8,0.2,146,42,70,8.4,4.9,1.4,2
|
| 361 |
+
24,Male,0.7,0.2,218,47,26,6.6,3.3,1,1
|
| 362 |
+
65,Female,0.7,0.2,182,23,28,6.8,2.9,0.7,2
|
| 363 |
+
55,Male,1.1,0.3,215,21,15,6.2,2.9,0.8,2
|
| 364 |
+
42,Female,0.9,0.2,165,26,29,8.5,4.4,1,2
|
| 365 |
+
21,Male,0.8,0.2,183,33,57,6.8,3.5,1,2
|
| 366 |
+
40,Male,0.7,0.2,176,28,43,5.3,2.4,0.8,2
|
| 367 |
+
16,Male,0.7,0.2,418,28,35,7.2,4.1,1.3,2
|
| 368 |
+
60,Male,2.2,1,271,45,52,6.1,2.9,0.9,2
|
| 369 |
+
42,Female,0.8,0.2,182,22,20,7.2,3.9,1.1,1
|
| 370 |
+
58,Female,0.8,0.2,130,24,25,7,4,1.3,1
|
| 371 |
+
54,Female,22.6,11.4,558,30,37,7.8,3.4,0.8,1
|
| 372 |
+
33,Male,0.8,0.2,135,30,29,7.2,4.4,1.5,2
|
| 373 |
+
48,Male,0.7,0.2,326,29,17,8.7,5.5,1.7,1
|
| 374 |
+
25,Female,0.7,0.1,140,32,25,7.6,4.3,1.3,2
|
| 375 |
+
56,Female,0.7,0.1,145,26,23,7,4,1.3,2
|
| 376 |
+
47,Male,3.5,1.6,206,32,31,6.8,3.4,1,1
|
| 377 |
+
33,Male,0.7,0.1,168,35,33,7,3.7,1.1,1
|
| 378 |
+
20,Female,0.6,0.2,202,12,13,6.1,3,0.9,2
|
| 379 |
+
50,Female,0.7,0.1,192,20,41,7.3,3.3,0.8,1
|
| 380 |
+
72,Male,0.7,0.2,185,16,22,7.3,3.7,1,2
|
| 381 |
+
50,Male,1.7,0.8,331,36,53,7.3,3.4,0.9,1
|
| 382 |
+
39,Male,0.6,0.2,188,28,43,8.1,3.3,0.6,1
|
| 383 |
+
58,Female,0.7,0.1,172,27,22,6.7,3.2,0.9,1
|
| 384 |
+
60,Female,1.4,0.7,159,10,12,4.9,2.5,1,2
|
| 385 |
+
34,Male,3.7,2.1,490,115,91,6.5,2.8,0.7,1
|
| 386 |
+
50,Male,0.8,0.2,152,29,30,7.4,4.1,1.3,1
|
| 387 |
+
38,Male,2.7,1.4,105,25,21,7.5,4.2,1.2,2
|
| 388 |
+
51,Male,0.8,0.2,160,34,20,6.9,3.7,1.1,1
|
| 389 |
+
46,Male,0.8,0.2,160,31,40,7.3,3.8,1.1,1
|
| 390 |
+
72,Male,0.6,0.1,102,31,35,6.3,3.2,1,1
|
| 391 |
+
72,Male,0.8,0.2,148,23,35,6,3,1,1
|
| 392 |
+
75,Male,0.9,0.2,162,25,20,6.9,3.7,1.1,1
|
| 393 |
+
41,Male,7.5,4.3,149,94,92,6.3,3.1,0.9,1
|
| 394 |
+
41,Male,2.7,1.3,580,142,68,8,4,1,1
|
| 395 |
+
48,Female,1,0.3,310,37,56,5.9,2.5,0.7,1
|
| 396 |
+
45,Male,0.8,0.2,140,24,20,6.3,3.2,1,2
|
| 397 |
+
74,Male,1,0.3,175,30,32,6.4,3.4,1.1,1
|
| 398 |
+
78,Male,1,0.3,152,28,70,6.3,3.1,0.9,1
|
| 399 |
+
38,Male,0.8,0.2,208,25,50,7.1,3.7,1,1
|
| 400 |
+
27,Male,1,0.2,205,137,145,6,3,1,1
|
| 401 |
+
66,Female,0.7,0.2,162,24,20,6.4,3.2,1,2
|
| 402 |
+
50,Male,7.3,3.7,92,44,236,6.8,1.6,0.3,1
|
| 403 |
+
42,Female,0.5,0.1,162,155,108,8.1,4,0.9,1
|
| 404 |
+
65,Male,0.7,0.2,199,19,22,6.3,3.6,1.3,2
|
| 405 |
+
22,Male,0.8,0.2,198,20,26,6.8,3.9,1.3,1
|
| 406 |
+
31,Female,0.8,0.2,215,15,21,7.6,4,1.1,1
|
| 407 |
+
45,Male,0.7,0.2,180,18,58,6.7,3.7,1.2,2
|
| 408 |
+
12,Male,1,0.2,719,157,108,7.2,3.7,1,1
|
| 409 |
+
48,Male,2.4,1.1,554,141,73,7.5,3.6,0.9,1
|
| 410 |
+
48,Male,5,2.6,555,284,190,6.5,3.3,1,1
|
| 411 |
+
18,Male,1.4,0.6,215,440,850,5,1.9,0.6,1
|
| 412 |
+
23,Female,2.3,0.8,509,28,44,6.9,2.9,0.7,2
|
| 413 |
+
65,Male,4.9,2.7,190,33,71,7.1,2.9,0.7,1
|
| 414 |
+
48,Male,0.7,0.2,208,15,30,4.6,2.1,0.8,2
|
| 415 |
+
65,Male,1.4,0.6,260,28,24,5.2,2.2,0.7,2
|
| 416 |
+
70,Male,1.3,0.3,690,93,40,3.6,2.7,0.7,1
|
| 417 |
+
70,Male,0.6,0.1,862,76,180,6.3,2.7,0.75,1
|
| 418 |
+
11,Male,0.7,0.1,592,26,29,7.1,4.2,1.4,2
|
| 419 |
+
50,Male,4.2,2.3,450,69,50,7,3,0.7,1
|
| 420 |
+
55,Female,8.2,3.9,1350,52,65,6.7,2.9,0.7,1
|
| 421 |
+
55,Female,10.9,5.1,1350,48,57,6.4,2.3,0.5,1
|
| 422 |
+
26,Male,1,0.3,163,48,71,7.1,3.7,1,2
|
| 423 |
+
41,Male,1.2,0.5,246,34,42,6.9,3.4,0.97,1
|
| 424 |
+
53,Male,1.6,0.9,178,44,59,6.5,3.9,1.5,2
|
| 425 |
+
32,Female,0.7,0.1,240,12,15,7,3,0.7,1
|
| 426 |
+
58,Male,0.4,0.1,100,59,126,4.3,2.5,1.4,1
|
| 427 |
+
45,Male,1.3,0.6,166,49,42,5.6,2.5,0.8,2
|
| 428 |
+
65,Male,0.9,0.2,170,33,66,7,3,0.75,1
|
| 429 |
+
52,Female,0.6,0.1,194,10,12,6.9,3.3,0.9,2
|
| 430 |
+
73,Male,1.9,0.7,1750,102,141,5.5,2,0.5,1
|
| 431 |
+
53,Female,0.7,0.1,182,20,33,4.8,1.9,0.6,1
|
| 432 |
+
47,Female,0.8,0.2,236,10,13,6.7,2.9,0.76,2
|
| 433 |
+
29,Male,0.7,0.2,165,55,87,7.5,4.6,1.58,1
|
| 434 |
+
41,Female,0.9,0.2,201,31,24,7.6,3.8,1,2
|
| 435 |
+
30,Female,0.7,0.2,194,32,36,7.5,3.6,0.92,2
|
| 436 |
+
17,Female,0.5,0.1,206,28,21,7.1,4.5,1.7,2
|
| 437 |
+
23,Male,1,0.3,212,41,80,6.2,3.1,1,1
|
| 438 |
+
35,Male,1.6,0.7,157,15,44,5.2,2.5,0.9,1
|
| 439 |
+
65,Male,0.8,0.2,162,30,90,3.8,1.4,0.5,1
|
| 440 |
+
42,Female,0.8,0.2,168,25,18,6.2,3.1,1,1
|
| 441 |
+
49,Female,0.8,0.2,198,23,20,7,4.3,1.5,1
|
| 442 |
+
42,Female,2.3,1.1,292,29,39,4.1,1.8,0.7,1
|
| 443 |
+
42,Female,7.4,3.6,298,52,102,4.6,1.9,0.7,1
|
| 444 |
+
42,Female,0.7,0.2,152,35,81,6.2,3.2,1.06,1
|
| 445 |
+
61,Male,0.8,0.2,163,18,19,6.3,2.8,0.8,2
|
| 446 |
+
17,Male,0.9,0.2,279,40,46,7.3,4,1.2,2
|
| 447 |
+
54,Male,0.8,0.2,181,35,20,5.5,2.7,0.96,1
|
| 448 |
+
45,Female,23.3,12.8,1550,425,511,7.7,3.5,0.8,1
|
| 449 |
+
48,Female,0.8,0.2,142,26,25,6,2.6,0.7,1
|
| 450 |
+
48,Female,0.9,0.2,173,26,27,6.2,3.1,1,1
|
| 451 |
+
65,Male,7.9,4.3,282,50,72,6,3,1,1
|
| 452 |
+
35,Male,0.8,0.2,279,20,25,7.2,3.2,0.8,1
|
| 453 |
+
58,Male,0.9,0.2,1100,25,36,7.1,3.5,0.9,1
|
| 454 |
+
46,Male,0.7,0.2,224,40,23,7.1,3,0.7,1
|
| 455 |
+
28,Male,0.6,0.2,159,15,16,7,3.5,1,2
|
| 456 |
+
21,Female,0.6,0.1,186,25,22,6.8,3.4,1,1
|
| 457 |
+
32,Male,0.7,0.2,189,22,43,7.4,3.1,0.7,2
|
| 458 |
+
61,Male,0.8,0.2,192,28,35,6.9,3.4,0.9,2
|
| 459 |
+
26,Male,6.8,3.2,140,37,19,3.6,0.9,0.3,1
|
| 460 |
+
65,Male,1.1,0.5,686,16,46,5.7,1.5,0.35,1
|
| 461 |
+
22,Female,2.2,1,215,159,51,5.5,2.5,0.8,1
|
| 462 |
+
28,Female,0.8,0.2,309,55,23,6.8,4.1,1.51,1
|
| 463 |
+
38,Male,0.7,0.2,110,22,18,6.4,2.5,0.64,1
|
| 464 |
+
25,Male,0.8,0.1,130,23,42,8,4,1,1
|
| 465 |
+
45,Female,0.7,0.2,164,21,53,4.5,1.4,0.45,2
|
| 466 |
+
45,Female,0.6,0.1,270,23,42,5.1,2,0.5,2
|
| 467 |
+
28,Female,0.6,0.1,137,22,16,4.9,1.9,0.6,2
|
| 468 |
+
28,Female,1,0.3,90,18,108,6.8,3.1,0.8,2
|
| 469 |
+
66,Male,1,0.3,190,30,54,5.3,2.1,0.6,1
|
| 470 |
+
66,Male,0.8,0.2,165,22,32,4.4,2,0.8,1
|
| 471 |
+
66,Male,1.1,0.5,167,13,56,7.1,4.1,1.36,1
|
| 472 |
+
49,Female,0.6,0.1,185,17,26,6.6,2.9,0.7,2
|
| 473 |
+
42,Male,0.7,0.2,197,64,33,5.8,2.4,0.7,2
|
| 474 |
+
42,Male,1,0.3,154,38,21,6.8,3.9,1.3,2
|
| 475 |
+
35,Male,2,1.1,226,33,135,6,2.7,0.8,2
|
| 476 |
+
38,Male,2.2,1,310,119,42,7.9,4.1,1,2
|
| 477 |
+
38,Male,0.9,0.3,310,15,25,5.5,2.7,1,1
|
| 478 |
+
55,Male,0.6,0.2,220,24,32,5.1,2.4,0.88,1
|
| 479 |
+
33,Male,7.1,3.7,196,622,497,6.9,3.6,1.09,1
|
| 480 |
+
33,Male,3.4,1.6,186,779,844,7.3,3.2,0.7,1
|
| 481 |
+
7,Male,0.5,0.1,352,28,51,7.9,4.2,1.1,2
|
| 482 |
+
45,Male,2.3,1.3,282,132,368,7.3,4,1.2,1
|
| 483 |
+
45,Male,1.1,0.4,92,91,188,7.2,3.8,1.11,1
|
| 484 |
+
30,Male,0.8,0.2,182,46,57,7.8,4.3,1.2,2
|
| 485 |
+
62,Male,5,2.1,103,18,40,5,2.1,1.72,1
|
| 486 |
+
22,Female,6.7,3.2,850,154,248,6.2,2.8,0.8,1
|
| 487 |
+
42,Female,0.8,0.2,195,18,15,6.7,3,0.8,1
|
| 488 |
+
32,Male,0.7,0.2,276,102,190,6,2.9,0.93,1
|
| 489 |
+
60,Male,0.7,0.2,171,31,26,7,3.5,1,2
|
| 490 |
+
65,Male,0.8,0.1,146,17,29,5.9,3.2,1.18,2
|
| 491 |
+
53,Female,0.8,0.2,193,96,57,6.7,3.6,1.16,1
|
| 492 |
+
27,Male,1,0.3,180,56,111,6.8,3.9,1.85,2
|
| 493 |
+
35,Female,1,0.3,805,133,103,7.9,3.3,0.7,1
|
| 494 |
+
65,Male,0.7,0.2,265,30,28,5.2,1.8,0.52,2
|
| 495 |
+
25,Male,0.7,0.2,185,196,401,6.5,3.9,1.5,1
|
| 496 |
+
32,Male,0.7,0.2,165,31,29,6.1,3,0.96,2
|
| 497 |
+
24,Male,1,0.2,189,52,31,8,4.8,1.5,1
|
| 498 |
+
67,Male,2.2,1.1,198,42,39,7.2,3,0.7,1
|
| 499 |
+
68,Male,1.8,0.5,151,18,22,6.5,4,1.6,1
|
| 500 |
+
55,Male,3.6,1.6,349,40,70,7.2,2.9,0.6,1
|
| 501 |
+
70,Male,2.7,1.2,365,62,55,6,2.4,0.6,1
|
| 502 |
+
36,Male,2.8,1.5,305,28,76,5.9,2.5,0.7,1
|
| 503 |
+
42,Male,0.8,0.2,127,29,30,4.9,2.7,1.2,1
|
| 504 |
+
53,Male,19.8,10.4,238,39,221,8.1,2.5,0.4,1
|
| 505 |
+
32,Male,30.5,17.1,218,39,79,5.5,2.7,0.9,1
|
| 506 |
+
32,Male,32.6,14.1,219,95,235,5.8,3.1,1.1,1
|
| 507 |
+
56,Male,17.7,8.8,239,43,185,5.6,2.4,0.7,1
|
| 508 |
+
50,Male,0.9,0.3,194,190,73,7.5,3.9,1,1
|
| 509 |
+
46,Male,18.4,8.5,450,119,230,7.5,3.3,0.7,1
|
| 510 |
+
46,Male,20,10,254,140,540,5.4,3,1.2,1
|
| 511 |
+
37,Female,0.8,0.2,205,31,36,9.2,4.6,1,2
|
| 512 |
+
45,Male,2.2,1.6,320,37,48,6.8,3.4,1,1
|
| 513 |
+
56,Male,1,0.3,195,22,28,5.8,2.6,0.8,2
|
| 514 |
+
69,Male,0.9,0.2,215,32,24,6.9,3,0.7,1
|
| 515 |
+
49,Male,1,0.3,230,48,58,8.4,4.2,1,1
|
| 516 |
+
49,Male,3.9,2.1,189,65,181,6.9,3,0.7,1
|
| 517 |
+
60,Male,0.9,0.3,168,16,24,6.7,3,0.8,1
|
| 518 |
+
28,Male,0.9,0.2,215,50,28,8,4,1,1
|
| 519 |
+
45,Male,2.9,1.4,210,74,68,7.2,3.6,1,1
|
| 520 |
+
35,Male,26.3,12.1,108,168,630,9.2,2,0.3,1
|
| 521 |
+
62,Male,1.8,0.9,224,69,155,8.6,4,0.8,1
|
| 522 |
+
55,Male,4.4,2.9,230,14,25,7.1,2.1,0.4,1
|
| 523 |
+
46,Female,0.8,0.2,185,24,15,7.9,3.7,0.8,1
|
| 524 |
+
50,Male,0.6,0.2,137,15,16,4.8,2.6,1.1,1
|
| 525 |
+
29,Male,0.8,0.2,156,12,15,6.8,3.7,1.1,2
|
| 526 |
+
53,Female,0.9,0.2,210,35,32,8,3.9,0.9,2
|
| 527 |
+
46,Male,9.4,5.2,268,21,63,6.4,2.8,0.8,1
|
| 528 |
+
40,Male,3.5,1.6,298,68,200,7.1,3.4,0.9,1
|
| 529 |
+
45,Male,1.7,0.8,315,12,38,6.3,2.1,0.5,1
|
| 530 |
+
55,Male,3.3,1.5,214,54,152,5.1,1.8,0.5,1
|
| 531 |
+
22,Female,1.1,0.3,138,14,21,7,3.8,1.1,2
|
| 532 |
+
40,Male,30.8,18.3,285,110,186,7.9,2.7,0.5,1
|
| 533 |
+
62,Male,0.7,0.2,162,12,17,8.2,3.2,0.6,2
|
| 534 |
+
46,Female,1.4,0.4,298,509,623,3.6,1,0.3,1
|
| 535 |
+
39,Male,1.6,0.8,230,88,74,8,4,1,2
|
| 536 |
+
60,Male,19.6,9.5,466,46,52,6.1,2,0.4,1
|
| 537 |
+
46,Male,15.8,7.2,227,67,220,6.9,2.6,0.6,1
|
| 538 |
+
10,Female,0.8,0.1,395,25,75,7.6,3.6,0.9,1
|
| 539 |
+
52,Male,1.8,0.8,97,85,78,6.4,2.7,0.7,1
|
| 540 |
+
65,Female,0.7,0.2,406,24,45,7.2,3.5,0.9,2
|
| 541 |
+
42,Male,0.8,0.2,114,21,23,7,3,0.7,2
|
| 542 |
+
42,Male,0.8,0.2,198,29,19,6.6,3,0.8,2
|
| 543 |
+
62,Male,0.7,0.2,173,46,47,7.3,4.1,1.2,2
|
| 544 |
+
40,Male,1.2,0.6,204,23,27,7.6,4,1.1,1
|
| 545 |
+
54,Female,5.5,3.2,350,67,42,7,3.2,0.8,1
|
| 546 |
+
45,Female,0.7,0.2,153,41,42,4.5,2.2,0.9,2
|
| 547 |
+
45,Male,20.2,11.7,188,47,32,5.4,2.3,0.7,1
|
| 548 |
+
50,Female,27.7,10.8,380,39,348,7.1,2.3,0.4,1
|
| 549 |
+
42,Male,11.1,6.1,214,60,186,6.9,2.8,2.8,1
|
| 550 |
+
40,Female,2.1,1,768,74,141,7.8,4.9,1.6,1
|
| 551 |
+
46,Male,3.3,1.5,172,25,41,5.6,2.4,0.7,1
|
| 552 |
+
29,Male,1.2,0.4,160,20,22,6.2,3,0.9,2
|
| 553 |
+
45,Male,0.6,0.1,196,29,30,5.8,2.9,1,1
|
| 554 |
+
46,Male,10.2,4.2,232,58,140,7,2.7,0.6,1
|
| 555 |
+
73,Male,1.8,0.9,220,20,43,6.5,3,0.8,1
|
| 556 |
+
55,Male,0.8,0.2,290,139,87,7,3,0.7,1
|
| 557 |
+
51,Male,0.7,0.1,180,25,27,6.1,3.1,1,1
|
| 558 |
+
51,Male,2.9,1.2,189,80,125,6.2,3.1,1,1
|
| 559 |
+
51,Male,4,2.5,275,382,330,7.5,4,1.1,1
|
| 560 |
+
26,Male,42.8,19.7,390,75,138,7.5,2.6,0.5,1
|
| 561 |
+
66,Male,15.2,7.7,356,321,562,6.5,2.2,0.4,1
|
| 562 |
+
66,Male,16.6,7.6,315,233,384,6.9,2,0.4,1
|
| 563 |
+
66,Male,17.3,8.5,388,173,367,7.8,2.6,0.5,1
|
| 564 |
+
64,Male,1.4,0.5,298,31,83,7.2,2.6,0.5,1
|
| 565 |
+
38,Female,0.6,0.1,165,22,34,5.9,2.9,0.9,2
|
| 566 |
+
43,Male,22.5,11.8,143,22,143,6.6,2.1,0.46,1
|
| 567 |
+
50,Female,1,0.3,191,22,31,7.8,4,1,2
|
| 568 |
+
52,Male,2.7,1.4,251,20,40,6,1.7,0.39,1
|
| 569 |
+
20,Female,16.7,8.4,200,91,101,6.9,3.5,1.02,1
|
| 570 |
+
16,Male,7.7,4.1,268,213,168,7.1,4,1.2,1
|
| 571 |
+
16,Male,2.6,1.2,236,131,90,5.4,2.6,0.9,1
|
| 572 |
+
90,Male,1.1,0.3,215,46,134,6.9,3,0.7,1
|
| 573 |
+
32,Male,15.6,9.5,134,54,125,5.6,4,2.5,1
|
| 574 |
+
32,Male,3.7,1.6,612,50,88,6.2,1.9,0.4,1
|
| 575 |
+
32,Male,12.1,6,515,48,92,6.6,2.4,0.5,1
|
| 576 |
+
32,Male,25,13.7,560,41,88,7.9,2.5,2.5,1
|
| 577 |
+
32,Male,15,8.2,289,58,80,5.3,2.2,0.7,1
|
| 578 |
+
32,Male,12.7,8.4,190,28,47,5.4,2.6,0.9,1
|
| 579 |
+
60,Male,0.5,0.1,500,20,34,5.9,1.6,0.37,2
|
| 580 |
+
40,Male,0.6,0.1,98,35,31,6,3.2,1.1,1
|
| 581 |
+
52,Male,0.8,0.2,245,48,49,6.4,3.2,1,1
|
| 582 |
+
31,Male,1.3,0.5,184,29,32,6.8,3.4,1,1
|
| 583 |
+
38,Male,1,0.3,216,21,24,7.3,4.4,1.5,2
|
data_cache/icu_sepsis.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data_cache/nephrology_ckd.csv
ADDED
|
@@ -0,0 +1,363 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
age,bp,sg,al,su,rbc,pc,pcc,ba,bgr,bu,sc,sod,pot,hemo,pcv,wbcc,rbcc,htn,dm,cad,appet,pe,ane,class
|
| 2 |
+
48,80,1.02,1,0,?,normal,notpresent,notpresent,121,36,1.2,?,?,15.4,44,7800,5.2,yes,yes,no,good,no,no,ckd
|
| 3 |
+
7,50,1.02,4,0,?,normal,notpresent,notpresent,?,18,0.8,?,?,11.3,38,6000,?,no,no,no,good,no,no,ckd
|
| 4 |
+
62,80,1.01,2,3,normal,normal,notpresent,notpresent,423,53,1.8,?,?,9.6,31,7500,?,no,yes,no,poor,no,yes,ckd
|
| 5 |
+
48,70,1.005,4,0,normal,abnormal,present,notpresent,117,56,3.8,111,2.5,11.2,32,6700,3.9,yes,no,no,poor,yes,yes,ckd
|
| 6 |
+
51,80,1.01,2,0,normal,normal,notpresent,notpresent,106,26,1.4,?,?,11.6,35,7300,4.6,no,no,no,good,no,no,ckd
|
| 7 |
+
60,90,1.015,3,0,?,?,notpresent,notpresent,74,25,1.1,142,3.2,12.2,39,7800,4.4,yes,yes,no,good,yes,no,ckd
|
| 8 |
+
68,70,1.01,0,0,?,normal,notpresent,notpresent,100,54,24,104,4,12.4,36,?,?,no,no,no,good,no,no,ckd
|
| 9 |
+
52,100,1.015,3,0,normal,abnormal,present,notpresent,138,60,1.9,?,?,10.8,33,9600,4,yes,yes,no,good,no,yes,ckd
|
| 10 |
+
53,90,1.02,2,0,abnormal,abnormal,present,notpresent,70,107,7.2,114,3.7,9.5,29,12100,3.7,yes,yes,no,poor,no,yes,ckd
|
| 11 |
+
50,60,1.01,2,4,?,abnormal,present,notpresent,490,55,4,?,?,9.4,28,?,?,yes,yes,no,good,no,yes,ckd
|
| 12 |
+
63,70,1.01,3,0,abnormal,abnormal,present,notpresent,380,60,2.7,131,4.2,10.8,32,4500,3.8,yes,yes,no,poor,yes,no,ckd
|
| 13 |
+
68,70,1.015,3,1,?,normal,present,notpresent,208,72,2.1,138,5.8,9.7,28,12200,3.4,yes,yes,yes,poor,yes,no,ckd
|
| 14 |
+
68,70,?,?,?,?,?,notpresent,notpresent,98,86,4.6,135,3.4,9.8,?,?,?,yes,yes,yes,poor,yes,no,ckd
|
| 15 |
+
68,80,1.01,3,2,normal,abnormal,present,present,157,90,4.1,130,6.4,5.6,16,11000,2.6,yes,yes,yes,poor,yes,no,ckd
|
| 16 |
+
40,80,1.015,3,0,?,normal,notpresent,notpresent,76,162,9.6,141,4.9,7.6,24,3800,2.8,yes,no,no,good,no,yes,ckd
|
| 17 |
+
47,70,1.015,2,0,?,normal,notpresent,notpresent,99,46,2.2,138,4.1,12.6,?,?,?,no,no,no,good,no,no,ckd
|
| 18 |
+
47,80,?,?,?,?,?,notpresent,notpresent,114,87,5.2,139,3.7,12.1,?,?,?,yes,no,no,poor,no,no,ckd
|
| 19 |
+
60,100,1.025,0,3,?,normal,notpresent,notpresent,263,27,1.3,135,4.3,12.7,37,11400,4.3,yes,yes,yes,good,no,no,ckd
|
| 20 |
+
62,60,1.015,1,0,?,abnormal,present,notpresent,100,31,1.6,?,?,10.3,30,5300,3.7,yes,no,yes,good,no,no,ckd
|
| 21 |
+
61,80,1.015,2,0,abnormal,abnormal,notpresent,notpresent,173,148,3.9,135,5.2,7.7,24,9200,3.2,yes,yes,yes,poor,yes,yes,ckd
|
| 22 |
+
60,90,?,?,?,?,?,notpresent,notpresent,?,180,76,4.5,?,10.9,32,6200,3.6,yes,yes,yes,good,no,no,ckd
|
| 23 |
+
48,80,1.025,4,0,normal,abnormal,notpresent,notpresent,95,163,7.7,136,3.8,9.8,32,6900,3.4,yes,no,no,good,no,yes,ckd
|
| 24 |
+
42,100,1.015,4,0,normal,abnormal,notpresent,present,?,50,1.4,129,4,11.1,39,8300,4.6,yes,no,no,poor,no,no,ckd
|
| 25 |
+
61,60,1.025,0,0,?,normal,notpresent,notpresent,108,75,1.9,141,5.2,9.9,29,8400,3.7,yes,yes,no,good,no,yes,ckd
|
| 26 |
+
75,80,1.015,0,0,?,normal,notpresent,notpresent,156,45,2.4,140,3.4,11.6,35,10300,4,yes,yes,no,poor,no,no,ckd
|
| 27 |
+
69,70,1.01,3,4,normal,abnormal,notpresent,notpresent,264,87,2.7,130,4,12.5,37,9600,4.1,yes,yes,yes,good,yes,no,ckd
|
| 28 |
+
75,70,?,1,3,?,?,notpresent,notpresent,123,31,1.4,?,?,?,?,?,?,no,yes,no,good,no,no,ckd
|
| 29 |
+
68,70,1.005,1,0,abnormal,abnormal,present,notpresent,?,28,1.4,?,?,12.9,38,?,?,no,no,yes,good,no,no,ckd
|
| 30 |
+
73,90,1.015,3,0,?,abnormal,present,notpresent,107,33,1.5,141,4.6,10.1,30,7800,4,no,no,no,poor,no,no,ckd
|
| 31 |
+
61,90,1.01,1,1,?,normal,notpresent,notpresent,159,39,1.5,133,4.9,11.3,34,9600,4,yes,yes,no,poor,no,no,ckd
|
| 32 |
+
60,100,1.02,2,0,abnormal,abnormal,notpresent,notpresent,140,55,2.5,?,?,10.1,29,?,?,yes,no,no,poor,no,no,ckd
|
| 33 |
+
70,70,1.01,1,0,normal,?,present,present,171,153,5.2,?,?,?,?,?,?,no,yes,no,poor,no,no,ckd
|
| 34 |
+
65,90,1.02,2,1,abnormal,normal,notpresent,notpresent,270,39,2,?,?,12,36,9800,4.9,yes,yes,no,poor,no,yes,ckd
|
| 35 |
+
76,70,1.015,1,0,normal,normal,notpresent,notpresent,92,29,1.8,133,3.9,10.3,32,?,?,yes,no,no,good,no,no,ckd
|
| 36 |
+
72,80,?,?,?,?,?,notpresent,notpresent,137,65,3.4,141,4.7,9.7,28,6900,2.5,yes,yes,no,poor,no,yes,ckd
|
| 37 |
+
69,80,1.02,3,0,abnormal,normal,notpresent,notpresent,?,103,4.1,132,5.9,12.5,?,?,?,yes,no,no,good,no,no,ckd
|
| 38 |
+
82,80,1.01,2,2,normal,?,notpresent,notpresent,140,70,3.4,136,4.2,13,40,9800,4.2,yes,yes,no,good,no,no,ckd
|
| 39 |
+
46,90,1.01,2,0,normal,abnormal,notpresent,notpresent,99,80,2.1,?,?,11.1,32,9100,4.1,yes,no,no,good,no,no,ckd
|
| 40 |
+
45,70,1.01,0,0,?,normal,notpresent,notpresent,?,20,0.7,?,?,?,?,?,?,no,no,no,good,yes,no,ckd
|
| 41 |
+
47,100,1.01,0,0,?,normal,notpresent,notpresent,204,29,1,139,4.2,9.7,33,9200,4.5,yes,no,no,good,no,yes,ckd
|
| 42 |
+
35,80,1.01,1,0,abnormal,?,notpresent,notpresent,79,202,10.8,134,3.4,7.9,24,7900,3.1,no,yes,no,good,no,no,ckd
|
| 43 |
+
54,80,1.01,3,0,abnormal,abnormal,notpresent,notpresent,207,77,6.3,134,4.8,9.7,28,?,?,yes,yes,no,poor,yes,no,ckd
|
| 44 |
+
54,80,1.02,3,0,?,abnormal,notpresent,notpresent,208,89,5.9,130,4.9,9.3,?,?,?,yes,yes,no,poor,yes,no,ckd
|
| 45 |
+
48,70,1.015,0,0,?,normal,notpresent,notpresent,124,24,1.2,142,4.2,12.4,37,6400,4.7,no,yes,no,good,no,no,ckd
|
| 46 |
+
11,80,1.01,3,0,?,normal,notpresent,notpresent,?,17,0.8,?,?,15,45,8600,?,no,no,no,good,no,no,ckd
|
| 47 |
+
73,70,1.005,0,0,normal,normal,notpresent,notpresent,70,32,0.9,125,4,10,29,18900,3.5,yes,yes,no,good,yes,no,ckd
|
| 48 |
+
60,70,1.01,2,0,normal,abnormal,present,notpresent,144,72,3,?,?,9.7,29,21600,3.5,yes,yes,no,poor,no,yes,ckd
|
| 49 |
+
53,60,?,?,?,?,?,notpresent,notpresent,91,114,3.25,142,4.3,8.6,28,11000,3.8,yes,yes,no,poor,yes,yes,ckd
|
| 50 |
+
54,100,1.015,3,0,?,normal,present,notpresent,162,66,1.6,136,4.4,10.3,33,?,?,yes,yes,no,poor,yes,no,ckd
|
| 51 |
+
53,90,1.015,0,0,?,normal,notpresent,notpresent,?,38,2.2,?,?,10.9,34,4300,3.7,no,no,no,poor,no,yes,ckd
|
| 52 |
+
62,80,1.015,0,5,?,?,notpresent,notpresent,246,24,1,?,?,13.6,40,8500,4.7,yes,yes,no,good,no,no,ckd
|
| 53 |
+
63,80,1.01,2,2,normal,?,notpresent,notpresent,?,?,3.4,136,4.2,13,40,9800,4.2,yes,no,yes,good,no,no,ckd
|
| 54 |
+
76,70,1.015,3,4,normal,abnormal,present,notpresent,?,164,9.7,131,4.4,10.2,30,11300,3.4,yes,yes,yes,poor,yes,no,ckd
|
| 55 |
+
76,90,?,?,?,?,normal,notpresent,notpresent,93,155,7.3,132,4.9,?,?,?,?,yes,yes,yes,poor,no,no,ckd
|
| 56 |
+
73,80,1.02,2,0,abnormal,abnormal,notpresent,notpresent,253,142,4.6,138,5.8,10.5,33,7200,4.3,yes,yes,yes,good,no,no,ckd
|
| 57 |
+
59,100,?,?,?,?,?,notpresent,notpresent,?,96,6.4,?,?,6.6,?,?,?,yes,yes,no,good,no,yes,ckd
|
| 58 |
+
67,90,1.02,1,0,?,abnormal,present,notpresent,141,66,3.2,138,6.6,?,?,?,?,yes,no,no,good,no,no,ckd
|
| 59 |
+
67,80,1.01,1,3,normal,abnormal,notpresent,notpresent,182,391,32,163,39,?,?,?,?,no,no,no,good,yes,no,ckd
|
| 60 |
+
15,60,1.02,3,0,?,normal,notpresent,notpresent,86,15,0.6,138,4,11,33,7700,3.8,yes,yes,no,good,no,no,ckd
|
| 61 |
+
46,70,1.015,1,0,abnormal,normal,notpresent,notpresent,150,111,6.1,131,3.7,7.5,27,?,?,no,no,no,good,no,yes,ckd
|
| 62 |
+
44,90,1.01,1,0,?,normal,notpresent,notpresent,?,20,1.1,?,?,15,48,?,?,no,no,no,good,no,no,ckd
|
| 63 |
+
67,70,1.02,2,0,abnormal,normal,notpresent,notpresent,150,55,1.6,131,4.8,?,?,?,?,yes,yes,no,good,yes,no,ckd
|
| 64 |
+
65,70,1.01,2,0,?,normal,present,notpresent,112,73,3.3,?,?,10.9,37,?,?,no,no,no,good,no,no,ckd
|
| 65 |
+
26,70,1.015,0,4,?,normal,notpresent,notpresent,250,20,1.1,?,?,15.6,52,6900,6,no,yes,no,good,no,no,ckd
|
| 66 |
+
61,80,1.015,0,4,?,normal,notpresent,notpresent,360,19,0.7,137,4.4,15.2,44,8300,5.2,yes,yes,no,good,no,no,ckd
|
| 67 |
+
46,60,1.01,1,0,normal,normal,notpresent,notpresent,163,92,3.3,141,4,9.8,28,14600,3.2,yes,yes,no,good,no,no,ckd
|
| 68 |
+
64,90,1.01,3,3,?,abnormal,present,notpresent,?,35,1.3,?,?,10.3,?,?,?,yes,yes,no,good,yes,no,ckd
|
| 69 |
+
56,90,1.015,2,0,abnormal,abnormal,notpresent,notpresent,129,107,6.7,131,4.8,9.1,29,6400,3.4,yes,no,no,good,no,no,ckd
|
| 70 |
+
48,80,1.005,4,0,abnormal,abnormal,notpresent,present,133,139,8.5,132,5.5,10.3,36,6200,4,no,yes,no,good,yes,no,ckd
|
| 71 |
+
67,70,1.01,1,0,?,normal,notpresent,notpresent,102,48,3.2,137,5,11.9,34,7100,3.7,yes,yes,no,good,yes,no,ckd
|
| 72 |
+
70,80,?,?,?,?,?,notpresent,notpresent,158,85,3.2,141,3.5,10.1,30,?,?,yes,no,no,good,yes,no,ckd
|
| 73 |
+
56,80,1.01,1,0,?,normal,notpresent,notpresent,165,55,1.8,?,?,13.5,40,11800,5,yes,yes,no,poor,yes,no,ckd
|
| 74 |
+
74,80,1.01,0,0,?,normal,notpresent,notpresent,132,98,2.8,133,5,10.8,31,9400,3.8,yes,yes,no,good,no,no,ckd
|
| 75 |
+
45,90,?,?,?,?,?,notpresent,notpresent,360,45,2.4,128,4.4,8.3,29,5500,3.7,yes,yes,no,good,no,no,ckd
|
| 76 |
+
38,70,?,?,?,?,?,notpresent,notpresent,104,77,1.9,140,3.9,?,?,?,?,yes,no,no,poor,yes,no,ckd
|
| 77 |
+
48,70,1.015,1,0,normal,normal,notpresent,notpresent,127,19,1,134,3.6,?,?,?,?,yes,yes,no,good,no,no,ckd
|
| 78 |
+
59,70,1.01,3,0,normal,abnormal,notpresent,notpresent,76,186,15,135,7.6,7.1,22,3800,2.1,yes,no,no,poor,yes,yes,ckd
|
| 79 |
+
70,70,1.015,2,?,?,?,notpresent,notpresent,?,46,1.5,?,?,9.9,?,?,?,no,yes,no,poor,yes,no,ckd
|
| 80 |
+
56,80,?,?,?,?,?,notpresent,notpresent,415,37,1.9,?,?,?,?,?,?,no,yes,no,good,no,no,ckd
|
| 81 |
+
70,100,1.005,1,0,normal,abnormal,present,notpresent,169,47,2.9,?,?,11.1,32,5800,5,yes,yes,no,poor,no,no,ckd
|
| 82 |
+
58,110,1.01,4,0,?,normal,notpresent,notpresent,251,52,2.2,?,?,?,?,13200,4.7,yes,yes,no,good,no,no,ckd
|
| 83 |
+
50,70,1.02,0,0,?,normal,notpresent,notpresent,109,32,1.4,139,4.7,?,?,?,?,no,no,no,poor,no,no,ckd
|
| 84 |
+
63,100,1.01,2,2,normal,normal,notpresent,present,280,35,3.2,143,3.5,13,40,9800,4.2,yes,no,yes,good,no,no,ckd
|
| 85 |
+
56,70,1.015,4,1,abnormal,normal,notpresent,notpresent,210,26,1.7,136,3.8,16.1,52,12500,5.6,no,no,no,good,no,no,ckd
|
| 86 |
+
71,70,1.01,3,0,normal,abnormal,present,present,219,82,3.6,133,4.4,10.4,33,5600,3.6,yes,yes,yes,good,no,no,ckd
|
| 87 |
+
73,100,1.01,3,2,abnormal,abnormal,present,notpresent,295,90,5.6,140,2.9,9.2,30,7000,3.2,yes,yes,yes,poor,no,no,ckd
|
| 88 |
+
65,70,1.01,0,0,?,normal,notpresent,notpresent,93,66,1.6,137,4.5,11.6,36,11900,3.9,no,yes,no,good,no,no,ckd
|
| 89 |
+
62,90,1.015,1,0,?,normal,notpresent,notpresent,94,25,1.1,131,3.7,?,?,?,?,yes,no,no,good,yes,yes,ckd
|
| 90 |
+
60,80,1.01,1,1,?,normal,notpresent,notpresent,172,32,2.7,?,?,11.2,36,?,?,no,yes,yes,poor,no,no,ckd
|
| 91 |
+
65,60,1.015,1,0,?,normal,notpresent,notpresent,91,51,2.2,132,3.8,10,32,9100,4,yes,yes,no,poor,yes,no,ckd
|
| 92 |
+
50,140,?,?,?,?,?,notpresent,notpresent,101,106,6.5,135,4.3,6.2,18,5800,2.3,yes,yes,no,poor,no,yes,ckd
|
| 93 |
+
56,180,?,0,4,?,abnormal,notpresent,notpresent,298,24,1.2,139,3.9,11.2,32,10400,4.2,yes,yes,no,poor,yes,no,ckd
|
| 94 |
+
34,70,1.015,4,0,abnormal,abnormal,notpresent,notpresent,153,22,0.9,133,3.8,?,?,?,?,no,no,no,good,yes,no,ckd
|
| 95 |
+
71,90,1.015,2,0,?,abnormal,present,present,88,80,4.4,139,5.7,11.3,33,10700,3.9,no,no,no,good,no,no,ckd
|
| 96 |
+
17,60,1.01,0,0,?,normal,notpresent,notpresent,92,32,2.1,141,4.2,13.9,52,7000,?,no,no,no,good,no,no,ckd
|
| 97 |
+
76,70,1.015,2,0,normal,abnormal,present,notpresent,226,217,10.2,?,?,10.2,36,12700,4.2,yes,no,no,poor,yes,yes,ckd
|
| 98 |
+
55,90,?,?,?,?,?,notpresent,notpresent,143,88,2,?,?,?,?,?,?,yes,yes,no,poor,yes,no,ckd
|
| 99 |
+
65,80,1.015,0,0,?,normal,notpresent,notpresent,115,32,11.5,139,4,14.1,42,6800,5.2,no,no,no,good,no,no,ckd
|
| 100 |
+
50,90,?,?,?,?,?,notpresent,notpresent,89,118,6.1,127,4.4,6,17,6500,?,yes,yes,no,good,yes,yes,ckd
|
| 101 |
+
55,100,1.015,1,4,normal,?,notpresent,notpresent,297,53,2.8,139,4.5,11.2,34,13600,4.4,yes,yes,no,good,no,no,ckd
|
| 102 |
+
45,80,1.015,0,0,?,abnormal,notpresent,notpresent,107,15,1,141,4.2,11.8,37,10200,4.2,no,no,no,good,no,no,ckd
|
| 103 |
+
54,70,?,?,?,?,?,notpresent,notpresent,233,50.1,1.9,?,?,11.7,?,?,?,no,yes,no,good,no,no,ckd
|
| 104 |
+
63,90,1.015,0,0,?,normal,notpresent,notpresent,123,19,2,142,3.8,11.7,34,11400,4.7,no,no,no,good,no,no,ckd
|
| 105 |
+
65,80,1.01,3,3,?,normal,notpresent,notpresent,294,71,4.4,128,5.4,10,32,9000,3.9,yes,yes,yes,good,no,no,ckd
|
| 106 |
+
12,60,1.015,3,0,abnormal,abnormal,present,notpresent,?,51,1.8,?,?,12.1,?,10300,?,no,no,no,good,no,no,ckd
|
| 107 |
+
47,80,1.01,0,0,?,abnormal,notpresent,notpresent,?,28,0.9,?,?,12.4,44,5600,4.3,no,no,no,good,no,yes,ckd
|
| 108 |
+
55,70,1.01,3,0,?,normal,notpresent,notpresent,99,25,1.2,?,?,11.4,?,?,?,no,no,no,poor,yes,no,ckd
|
| 109 |
+
60,70,1.01,0,0,?,normal,notpresent,notpresent,140,27,1.2,?,?,?,?,?,?,no,no,no,good,no,no,ckd
|
| 110 |
+
72,90,1.025,1,3,?,normal,notpresent,notpresent,323,40,2.2,137,5.3,12.6,?,?,?,no,yes,yes,poor,no,no,ckd
|
| 111 |
+
54,60,?,3,?,?,?,notpresent,notpresent,125,21,1.3,137,3.4,15,46,?,?,yes,yes,no,good,yes,no,ckd
|
| 112 |
+
34,70,?,?,?,?,?,notpresent,notpresent,?,219,12.2,130,3.8,6,?,?,?,yes,no,no,good,no,yes,ckd
|
| 113 |
+
43,80,1.015,2,3,?,abnormal,present,present,?,30,1.1,?,?,14,42,14900,?,no,no,no,good,no,no,ckd
|
| 114 |
+
65,100,1.015,0,0,?,normal,notpresent,notpresent,90,98,2.5,?,?,9.1,28,5500,3.6,yes,no,no,good,no,no,ckd
|
| 115 |
+
72,90,?,?,?,?,?,notpresent,notpresent,308,36,2.5,131,4.3,?,?,?,?,yes,yes,no,poor,no,no,ckd
|
| 116 |
+
70,90,1.015,0,0,?,normal,notpresent,notpresent,144,125,4,136,4.6,12,37,8200,4.5,yes,yes,no,poor,yes,no,ckd
|
| 117 |
+
71,60,1.015,4,0,normal,normal,notpresent,notpresent,118,125,5.3,136,4.9,11.4,35,15200,4.3,yes,yes,no,poor,yes,no,ckd
|
| 118 |
+
52,90,1.015,4,3,normal,abnormal,notpresent,notpresent,224,166,5.6,133,47,8.1,23,5000,2.9,yes,yes,no,good,no,yes,ckd
|
| 119 |
+
75,70,1.025,1,0,?,normal,notpresent,notpresent,158,49,1.4,135,4.7,11.1,?,?,?,yes,no,no,poor,yes,no,ckd
|
| 120 |
+
50,90,1.01,2,0,normal,abnormal,present,present,128,208,9.2,134,4.8,8.2,22,16300,2.7,no,no,no,poor,yes,yes,ckd
|
| 121 |
+
5,50,1.01,0,0,?,normal,notpresent,notpresent,?,25,0.6,?,?,11.8,36,12400,?,no,no,no,good,no,no,ckd
|
| 122 |
+
70,100,1.015,4,0,normal,normal,notpresent,notpresent,118,125,5.3,136,4.9,12,37,8400,8,yes,no,no,good,no,no,ckd
|
| 123 |
+
47,100,1.01,?,?,normal,?,notpresent,notpresent,122,?,16.9,138,5.2,10.8,33,10200,3.8,no,yes,no,good,no,no,ckd
|
| 124 |
+
48,80,1.015,0,2,?,normal,notpresent,notpresent,214,24,1.3,140,4,13.2,39,?,?,no,yes,no,poor,no,no,ckd
|
| 125 |
+
46,90,1.02,?,?,?,normal,notpresent,notpresent,213,68,2.8,146,6.3,9.3,?,?,?,yes,yes,no,good,no,no,ckd
|
| 126 |
+
45,60,1.01,2,0,normal,abnormal,present,notpresent,268,86,4,134,5.1,10,29,9200,?,yes,yes,no,good,no,no,ckd
|
| 127 |
+
41,70,1.015,2,0,?,abnormal,notpresent,present,?,68,2.8,132,4.1,11.1,33,?,?,yes,no,no,good,yes,yes,ckd
|
| 128 |
+
69,70,1.01,0,4,?,normal,notpresent,notpresent,256,40,1.2,142,5.6,?,?,?,?,no,no,no,good,no,no,ckd
|
| 129 |
+
67,70,1.01,1,0,normal,normal,notpresent,notpresent,?,106,6,137,4.9,6.1,19,6500,?,yes,no,no,good,no,yes,ckd
|
| 130 |
+
72,90,?,?,?,?,?,notpresent,notpresent,84,145,7.1,135,5.3,?,?,?,?,no,yes,no,good,no,no,ckd
|
| 131 |
+
41,80,1.015,1,4,abnormal,normal,notpresent,notpresent,210,165,18,135,4.7,?,?,?,?,no,yes,no,good,no,no,ckd
|
| 132 |
+
60,90,1.01,2,0,abnormal,normal,notpresent,notpresent,105,53,2.3,136,5.2,11.1,33,10500,4.1,no,no,no,good,no,no,ckd
|
| 133 |
+
57,90,1.015,5,0,abnormal,abnormal,notpresent,present,?,322,13,126,4.8,8,24,4200,3.3,yes,yes,yes,poor,yes,yes,ckd
|
| 134 |
+
53,100,1.01,1,3,abnormal,normal,notpresent,notpresent,213,23,1,139,4,?,?,?,?,no,yes,no,good,no,no,ckd
|
| 135 |
+
60,60,1.01,3,1,normal,abnormal,present,notpresent,288,36,1.7,130,3,7.9,25,15200,3,yes,no,no,poor,no,yes,ckd
|
| 136 |
+
69,60,?,?,?,?,?,notpresent,notpresent,171,26,48.1,?,?,?,?,?,?,yes,no,no,poor,no,no,ckd
|
| 137 |
+
65,70,1.02,1,0,abnormal,abnormal,notpresent,notpresent,139,29,1,?,?,10.5,32,?,?,yes,no,no,good,yes,no,ckd
|
| 138 |
+
8,60,1.025,3,0,normal,normal,notpresent,notpresent,78,27,0.9,?,?,12.3,41,6700,?,no,no,no,poor,yes,no,ckd
|
| 139 |
+
76,90,?,?,?,?,?,notpresent,notpresent,172,46,1.7,141,5.5,9.6,30,?,?,yes,yes,no,good,no,yes,ckd
|
| 140 |
+
39,70,1.01,0,0,?,normal,notpresent,notpresent,121,20,0.8,133,3.5,10.9,32,?,?,no,yes,no,good,no,no,ckd
|
| 141 |
+
55,90,1.01,2,1,abnormal,abnormal,notpresent,notpresent,273,235,14.2,132,3.4,8.3,22,14600,2.9,yes,yes,no,poor,yes,yes,ckd
|
| 142 |
+
56,90,1.005,4,3,abnormal,abnormal,notpresent,notpresent,242,132,16.4,140,4.2,8.4,26,?,3,yes,yes,no,poor,yes,yes,ckd
|
| 143 |
+
50,70,1.02,3,0,abnormal,normal,present,present,123,40,1.8,?,?,11.1,36,4700,?,no,no,no,good,no,no,ckd
|
| 144 |
+
66,90,1.015,2,0,?,normal,notpresent,present,153,76,3.3,?,?,?,?,?,?,no,no,no,poor,no,no,ckd
|
| 145 |
+
62,70,1.025,3,0,normal,abnormal,notpresent,notpresent,122,42,1.7,136,4.7,12.6,39,7900,3.9,yes,yes,no,good,no,no,ckd
|
| 146 |
+
71,60,1.02,3,2,normal,normal,present,notpresent,424,48,1.5,132,4,10.9,31,?,?,yes,yes,yes,good,no,no,ckd
|
| 147 |
+
59,80,1.01,1,0,abnormal,normal,notpresent,notpresent,303,35,1.3,122,3.5,10.4,35,10900,4.3,no,yes,no,poor,no,no,ckd
|
| 148 |
+
81,60,?,?,?,?,?,notpresent,notpresent,148,39,2.1,147,4.2,10.9,35,9400,2.4,yes,yes,yes,poor,yes,no,ckd
|
| 149 |
+
59,70,?,?,?,?,?,notpresent,notpresent,204,34,1.5,124,4.1,9.8,37,6000,?,no,yes,no,good,no,no,ckd
|
| 150 |
+
46,80,1.01,0,0,?,normal,notpresent,notpresent,160,40,2,140,4.1,9,27,8100,3.2,yes,no,no,poor,no,yes,ckd
|
| 151 |
+
27,60,?,?,?,?,?,notpresent,notpresent,76,44,3.9,127,4.3,?,?,?,?,no,no,no,poor,yes,yes,ckd
|
| 152 |
+
34,70,1.02,0,0,abnormal,normal,notpresent,notpresent,139,19,0.9,?,?,12.7,42,2200,?,no,no,no,poor,no,no,ckd
|
| 153 |
+
65,70,1.015,4,4,?,normal,present,notpresent,307,28,1.5,?,?,11,39,6700,?,yes,yes,no,good,no,no,ckd
|
| 154 |
+
66,70,1.015,2,5,?,normal,notpresent,notpresent,447,41,1.7,131,3.9,12.5,33,9600,4.4,yes,yes,no,good,no,no,ckd
|
| 155 |
+
83,70,1.02,3,0,normal,normal,notpresent,notpresent,102,60,2.6,115,5.7,8.7,26,12800,3.1,yes,no,no,poor,no,yes,ckd
|
| 156 |
+
62,80,1.01,1,2,?,?,notpresent,notpresent,309,113,2.9,130,2.5,10.6,34,12800,4.9,no,no,no,good,no,no,ckd
|
| 157 |
+
17,70,1.015,1,0,abnormal,normal,notpresent,notpresent,22,1.5,7.3,145,2.8,13.1,41,11200,?,no,no,no,good,no,no,ckd
|
| 158 |
+
54,70,?,?,?,?,?,notpresent,notpresent,111,146,7.5,141,4.7,11,35,8600,4.6,no,no,no,good,no,no,ckd
|
| 159 |
+
60,50,1.01,0,0,?,normal,notpresent,notpresent,261,58,2.2,113,3,?,?,4200,3.4,yes,no,no,good,no,no,ckd
|
| 160 |
+
21,90,1.01,4,0,normal,abnormal,present,present,107,40,1.7,125,3.5,8.3,23,12400,3.9,no,no,no,good,no,yes,ckd
|
| 161 |
+
65,80,1.015,2,1,normal,normal,present,notpresent,215,133,2.5,?,?,13.2,41,?,?,no,yes,no,good,no,no,ckd
|
| 162 |
+
42,90,1.02,2,0,abnormal,abnormal,present,notpresent,93,153,2.7,139,4.3,9.8,34,9800,?,no,no,no,poor,yes,yes,ckd
|
| 163 |
+
72,90,1.01,2,0,?,abnormal,present,notpresent,124,53,2.3,?,?,11.9,39,?,?,no,no,no,good,no,no,ckd
|
| 164 |
+
73,90,1.01,1,4,abnormal,abnormal,present,notpresent,234,56,1.9,?,?,10.3,28,?,?,no,yes,no,good,no,no,ckd
|
| 165 |
+
45,70,1.025,2,0,normal,abnormal,present,notpresent,117,52,2.2,136,3.8,10,30,19100,3.7,no,no,no,good,no,no,ckd
|
| 166 |
+
61,80,1.02,0,0,?,normal,notpresent,notpresent,131,23,0.8,140,4.1,11.3,35,?,?,no,no,no,good,no,no,ckd
|
| 167 |
+
30,70,1.015,0,0,?,normal,notpresent,notpresent,101,106,6.5,135,4.3,?,?,?,?,no,no,no,poor,no,no,ckd
|
| 168 |
+
54,60,1.015,3,2,?,abnormal,notpresent,notpresent,352,137,3.3,133,4.5,11.3,31,5800,3.6,yes,yes,yes,poor,yes,no,ckd
|
| 169 |
+
8,50,1.02,4,0,normal,normal,notpresent,notpresent,?,46,1,135,3.8,?,?,?,?,no,no,no,good,yes,no,ckd
|
| 170 |
+
64,60,1.01,4,1,abnormal,abnormal,notpresent,present,239,58,4.3,137,5.4,9.5,29,7500,3.4,yes,yes,no,poor,yes,no,ckd
|
| 171 |
+
6,60,1.01,4,0,abnormal,abnormal,notpresent,present,94,67,1,135,4.9,9.9,30,16700,4.8,no,no,no,poor,no,no,ckd
|
| 172 |
+
46,110,1.015,0,0,?,normal,notpresent,notpresent,130,16,0.9,?,?,?,?,?,?,no,no,no,good,no,no,ckd
|
| 173 |
+
32,90,1.025,1,0,abnormal,abnormal,notpresent,notpresent,?,223,18.1,113,6.5,5.5,15,2600,2.8,yes,yes,no,poor,yes,yes,ckd
|
| 174 |
+
80,70,1.01,2,?,?,abnormal,notpresent,notpresent,?,49,1.2,?,?,?,?,?,?,yes,yes,no,good,no,no,ckd
|
| 175 |
+
70,90,1.02,2,1,abnormal,abnormal,notpresent,present,184,98.6,3.3,138,3.9,5.8,?,?,?,yes,yes,yes,poor,no,no,ckd
|
| 176 |
+
49,100,1.01,3,0,abnormal,abnormal,notpresent,notpresent,129,158,11.8,122,3.2,8.1,24,9600,3.5,yes,yes,no,poor,yes,yes,ckd
|
| 177 |
+
57,80,?,?,?,?,?,notpresent,notpresent,?,111,9.3,124,5.3,6.8,?,4300,3,yes,yes,no,good,no,yes,ckd
|
| 178 |
+
59,100,1.02,4,2,normal,normal,notpresent,notpresent,252,40,3.2,137,4.7,11.2,30,26400,3.9,yes,yes,no,poor,yes,no,ckd
|
| 179 |
+
65,80,1.015,0,0,?,normal,notpresent,notpresent,92,37,1.5,140,5.2,8.8,25,10700,3.2,yes,no,yes,good,yes,no,ckd
|
| 180 |
+
90,90,1.025,1,0,?,normal,notpresent,notpresent,139,89,3,140,4.1,12,37,7900,3.9,yes,yes,no,good,no,no,ckd
|
| 181 |
+
64,70,?,?,?,?,?,notpresent,notpresent,113,94,7.3,137,4.3,7.9,21,?,?,yes,yes,yes,good,yes,yes,ckd
|
| 182 |
+
78,60,?,?,?,?,?,notpresent,notpresent,114,74,2.9,135,5.9,8,24,?,?,no,yes,no,good,no,yes,ckd
|
| 183 |
+
65,90,1.01,4,2,normal,normal,notpresent,notpresent,172,82,13.5,145,6.3,8.8,31,?,?,yes,yes,no,good,yes,yes,ckd
|
| 184 |
+
61,70,?,?,?,?,?,notpresent,notpresent,100,28,2.1,?,?,12.6,43,?,?,yes,yes,no,good,no,no,ckd
|
| 185 |
+
60,70,1.01,1,0,?,normal,notpresent,notpresent,109,96,3.9,135,4,13.8,41,?,?,yes,no,no,good,no,no,ckd
|
| 186 |
+
50,70,1.01,0,0,?,normal,notpresent,notpresent,230,50,2.2,?,?,12,41,10400,4.6,yes,yes,no,good,no,no,ckd
|
| 187 |
+
67,80,?,?,?,?,?,notpresent,notpresent,341,37,1.5,?,?,12.3,41,6900,4.9,yes,yes,no,good,no,yes,ckd
|
| 188 |
+
59,100,1.015,4,2,normal,normal,notpresent,notpresent,255,132,12.8,135,5.7,7.3,20,9800,3.9,yes,yes,yes,good,no,yes,ckd
|
| 189 |
+
54,120,1.015,0,0,?,normal,notpresent,notpresent,103,18,1.2,?,?,?,?,?,?,no,no,no,good,no,no,ckd
|
| 190 |
+
40,70,1.015,3,4,normal,normal,notpresent,notpresent,253,150,11.9,132,5.6,10.9,31,8800,3.4,yes,yes,no,poor,yes,no,ckd
|
| 191 |
+
55,80,1.01,3,1,normal,abnormal,present,present,214,73,3.9,137,4.9,10.9,34,7400,3.7,yes,yes,no,good,yes,no,ckd
|
| 192 |
+
68,80,1.015,0,0,?,abnormal,notpresent,notpresent,171,30,1,?,?,13.7,43,4900,5.2,no,yes,no,good,no,no,ckd
|
| 193 |
+
63,100,1.01,1,0,?,normal,notpresent,notpresent,78,61,1.8,141,4.4,12.2,36,10500,4.3,no,yes,no,good,no,no,ckd
|
| 194 |
+
33,90,1.015,0,0,?,normal,notpresent,notpresent,92,19,0.8,?,?,11.8,34,7000,?,no,no,no,good,no,no,ckd
|
| 195 |
+
68,90,1.01,0,0,?,normal,notpresent,notpresent,238,57,2.5,?,?,9.8,28,8000,3.3,yes,yes,no,poor,no,no,ckd
|
| 196 |
+
66,70,1.02,1,0,normal,?,notpresent,notpresent,248,30,1.7,138,5.3,?,?,?,?,yes,yes,no,good,no,no,ckd
|
| 197 |
+
74,60,?,?,?,?,?,notpresent,notpresent,108,68,1.8,?,?,?,?,?,?,yes,yes,no,good,no,no,ckd
|
| 198 |
+
71,90,1.01,0,3,?,normal,notpresent,notpresent,303,30,1.3,136,4.1,13,38,9200,4.6,yes,yes,no,good,no,no,ckd
|
| 199 |
+
34,60,1.02,0,0,?,normal,notpresent,notpresent,117,28,2.2,138,3.8,?,?,?,?,no,no,no,good,yes,no,ckd
|
| 200 |
+
60,90,1.01,3,5,abnormal,normal,notpresent,present,490,95,2.7,131,3.8,11.5,35,12000,4.5,yes,yes,no,good,no,no,ckd
|
| 201 |
+
64,100,1.015,4,2,abnormal,abnormal,notpresent,present,163,54,7.2,140,4.6,7.9,26,7500,3.4,yes,yes,no,good,yes,no,ckd
|
| 202 |
+
57,80,1.015,0,0,?,normal,notpresent,notpresent,120,48,1.6,?,?,11.3,36,7200,3.8,yes,yes,no,good,no,no,ckd
|
| 203 |
+
60,70,?,?,?,?,?,notpresent,notpresent,124,52,2.5,?,?,?,?,?,?,yes,no,no,good,no,no,ckd
|
| 204 |
+
59,50,1.01,3,0,normal,abnormal,notpresent,notpresent,241,191,12,114,2.9,9.6,31,15700,3.8,no,yes,no,good,yes,no,ckd
|
| 205 |
+
65,60,1.01,2,0,normal,abnormal,present,notpresent,192,17,1.7,130,4.3,?,?,9500,?,yes,yes,no,poor,no,no,ckd
|
| 206 |
+
60,90,?,?,?,?,?,notpresent,notpresent,269,51,2.8,138,3.7,11.5,35,?,?,yes,yes,yes,good,yes,no,ckd
|
| 207 |
+
51,100,1.015,2,0,normal,normal,notpresent,present,93,20,1.6,146,4.5,?,?,?,?,no,no,no,poor,no,no,ckd
|
| 208 |
+
37,100,1.01,0,0,abnormal,normal,notpresent,notpresent,?,19,1.3,?,?,15,44,4100,5.2,yes,no,no,good,no,no,ckd
|
| 209 |
+
45,70,1.01,2,0,?,normal,notpresent,notpresent,113,93,2.3,?,?,7.9,26,5700,?,no,no,yes,good,no,yes,ckd
|
| 210 |
+
65,80,?,?,?,?,?,notpresent,notpresent,74,66,2,136,5.4,9.1,25,?,?,yes,yes,yes,good,yes,no,ckd
|
| 211 |
+
80,70,1.015,2,2,?,normal,notpresent,notpresent,141,53,2.2,?,?,12.7,40,9600,?,yes,yes,no,poor,yes,no,ckd
|
| 212 |
+
72,100,?,?,?,?,?,notpresent,notpresent,201,241,13.4,127,4.8,9.4,28,?,?,yes,yes,no,good,no,yes,ckd
|
| 213 |
+
34,90,1.015,2,0,normal,normal,notpresent,notpresent,104,50,1.6,137,4.1,11.9,39,?,?,no,no,no,good,no,no,ckd
|
| 214 |
+
65,70,1.015,1,0,?,normal,notpresent,notpresent,203,46,1.4,?,?,11.4,36,5000,4.1,yes,yes,no,poor,yes,no,ckd
|
| 215 |
+
57,70,1.015,1,0,?,abnormal,notpresent,notpresent,165,45,1.5,140,3.3,10.4,31,4200,3.9,no,no,no,good,no,no,ckd
|
| 216 |
+
69,70,1.01,4,3,normal,abnormal,present,present,214,96,6.3,120,3.9,9.4,28,11500,3.3,yes,yes,yes,good,yes,yes,ckd
|
| 217 |
+
62,90,1.02,2,1,?,normal,notpresent,notpresent,169,48,2.4,138,2.9,13.4,47,11000,6.1,yes,no,no,good,no,no,ckd
|
| 218 |
+
64,90,1.015,3,2,?,abnormal,present,notpresent,463,64,2.8,135,4.1,12.2,40,9800,4.6,yes,yes,no,good,no,yes,ckd
|
| 219 |
+
48,100,?,?,?,?,?,notpresent,notpresent,103,79,5.3,135,6.3,6.3,19,7200,2.6,yes,no,yes,poor,no,no,ckd
|
| 220 |
+
48,110,1.015,3,0,abnormal,normal,present,notpresent,106,215,15.2,120,5.7,8.6,26,5000,2.5,yes,no,yes,good,no,yes,ckd
|
| 221 |
+
54,90,1.025,1,0,normal,abnormal,notpresent,notpresent,150,18,1.2,140,4.2,?,?,?,?,no,no,no,poor,yes,yes,ckd
|
| 222 |
+
59,70,1.01,1,3,abnormal,abnormal,notpresent,notpresent,424,55,1.7,138,4.5,12.6,37,10200,4.1,yes,yes,yes,good,no,no,ckd
|
| 223 |
+
56,90,1.01,4,1,normal,abnormal,present,notpresent,176,309,13.3,124,6.5,3.1,9,5400,2.1,yes,yes,no,poor,yes,yes,ckd
|
| 224 |
+
40,80,1.025,0,0,normal,normal,notpresent,notpresent,140,10,1.2,135,5,15,48,10400,4.5,no,no,no,good,no,no,notckd
|
| 225 |
+
23,80,1.025,0,0,normal,normal,notpresent,notpresent,70,36,1,150,4.6,17,52,9800,5,no,no,no,good,no,no,notckd
|
| 226 |
+
45,80,1.025,0,0,normal,normal,notpresent,notpresent,82,49,0.6,147,4.4,15.9,46,9100,4.7,no,no,no,good,no,no,notckd
|
| 227 |
+
57,80,1.025,0,0,normal,normal,notpresent,notpresent,119,17,1.2,135,4.7,15.4,42,6200,6.2,no,no,no,good,no,no,notckd
|
| 228 |
+
51,60,1.025,0,0,normal,normal,notpresent,notpresent,99,38,0.8,135,3.7,13,49,8300,5.2,no,no,no,good,no,no,notckd
|
| 229 |
+
34,80,1.025,0,0,normal,normal,notpresent,notpresent,121,27,1.2,144,3.9,13.6,52,9200,6.3,no,no,no,good,no,no,notckd
|
| 230 |
+
60,80,1.025,0,0,normal,normal,notpresent,notpresent,131,10,0.5,146,5,14.5,41,10700,5.1,no,no,no,good,no,no,notckd
|
| 231 |
+
38,60,1.02,0,0,normal,normal,notpresent,notpresent,91,36,0.7,135,3.7,14,46,9100,5.8,no,no,no,good,no,no,notckd
|
| 232 |
+
42,80,1.02,0,0,normal,normal,notpresent,notpresent,98,20,0.5,140,3.5,13.9,44,8400,5.5,no,no,no,good,no,no,notckd
|
| 233 |
+
35,80,1.02,0,0,normal,normal,notpresent,notpresent,104,31,1.2,135,5,16.1,45,4300,5.2,no,no,no,good,no,no,notckd
|
| 234 |
+
30,80,1.02,0,0,normal,normal,notpresent,notpresent,131,38,1,147,3.8,14.1,45,9400,5.3,no,no,no,good,no,no,notckd
|
| 235 |
+
49,80,1.02,0,0,normal,normal,notpresent,notpresent,122,32,1.2,139,3.9,17,41,5600,4.9,no,no,no,good,no,no,notckd
|
| 236 |
+
55,80,1.02,0,0,normal,normal,notpresent,notpresent,118,18,0.9,135,3.6,15.5,43,7200,5.4,no,no,no,good,no,no,notckd
|
| 237 |
+
45,80,1.02,0,0,normal,normal,notpresent,notpresent,117,46,1.2,137,5,16.2,45,8600,5.2,no,no,no,good,no,no,notckd
|
| 238 |
+
42,80,1.02,0,0,normal,normal,notpresent,notpresent,132,24,0.7,140,4.1,14.4,50,5000,4.5,no,no,no,good,no,no,notckd
|
| 239 |
+
50,80,1.02,0,0,normal,normal,notpresent,notpresent,97,40,0.6,150,4.5,14.2,48,10500,5,no,no,no,good,no,no,notckd
|
| 240 |
+
55,80,1.02,0,0,normal,normal,notpresent,notpresent,133,17,1.2,135,4.8,13.2,41,6800,5.3,no,no,no,good,no,no,notckd
|
| 241 |
+
48,80,1.025,0,0,normal,normal,notpresent,notpresent,122,33,0.9,146,3.9,13.9,48,9500,4.8,no,no,no,good,no,no,notckd
|
| 242 |
+
25,80,1.025,0,0,normal,normal,notpresent,notpresent,121,19,1.2,142,4.9,15,48,6900,5.3,no,no,no,good,no,no,notckd
|
| 243 |
+
23,80,1.025,0,0,normal,normal,notpresent,notpresent,111,34,1.1,145,4,14.3,41,7200,5,no,no,no,good,no,no,notckd
|
| 244 |
+
30,80,1.025,0,0,normal,normal,notpresent,notpresent,96,25,0.5,144,4.8,13.8,42,9000,4.5,no,no,no,good,no,no,notckd
|
| 245 |
+
56,80,1.025,0,0,normal,normal,notpresent,notpresent,139,15,1.2,135,5,14.8,42,5600,5.5,no,no,no,good,no,no,notckd
|
| 246 |
+
47,80,1.02,0,0,normal,normal,notpresent,notpresent,95,35,0.9,140,4.1,?,?,?,?,no,no,no,good,no,no,notckd
|
| 247 |
+
19,80,1.02,0,0,normal,normal,notpresent,notpresent,107,23,0.7,141,4.2,14.4,44,?,?,no,no,no,good,no,no,notckd
|
| 248 |
+
52,80,1.02,0,0,normal,normal,notpresent,notpresent,125,22,1.2,139,4.6,16.5,43,4700,4.6,no,no,no,good,no,no,notckd
|
| 249 |
+
46,60,1.025,0,0,normal,normal,notpresent,notpresent,123,46,1,135,5,15.7,50,6300,4.8,no,no,no,good,no,no,notckd
|
| 250 |
+
48,60,1.02,0,0,normal,normal,notpresent,notpresent,112,44,1.2,142,4.9,14.5,44,9400,6.4,no,no,no,good,no,no,notckd
|
| 251 |
+
24,70,1.025,0,0,normal,normal,notpresent,notpresent,140,23,0.6,140,4.7,16.3,48,5800,5.6,no,no,no,good,no,no,notckd
|
| 252 |
+
47,80,?,?,?,?,?,notpresent,notpresent,93,33,0.9,144,4.5,13.3,52,8100,5.2,no,no,no,good,no,no,notckd
|
| 253 |
+
55,80,1.025,0,0,normal,normal,notpresent,notpresent,130,50,1.2,147,5,15.5,41,9100,6,no,no,no,good,no,no,notckd
|
| 254 |
+
20,70,1.02,0,0,normal,normal,notpresent,notpresent,123,44,1,135,3.8,14.6,44,5500,4.8,no,no,no,good,no,no,notckd
|
| 255 |
+
33,80,1.025,0,0,normal,normal,notpresent,notpresent,100,37,1.2,142,4,16.9,52,6700,6,no,no,no,good,no,no,notckd
|
| 256 |
+
66,70,1.02,0,0,normal,normal,notpresent,notpresent,94,19,0.7,135,3.9,16,41,5300,5.9,no,no,no,good,no,no,notckd
|
| 257 |
+
71,70,1.02,0,0,normal,normal,notpresent,notpresent,81,18,0.8,145,5,14.7,44,9800,6,no,no,no,good,no,no,notckd
|
| 258 |
+
39,70,1.025,0,0,normal,normal,notpresent,notpresent,124,22,0.6,137,3.8,13.4,43,?,?,no,no,no,good,no,no,notckd
|
| 259 |
+
42,70,1.02,0,0,normal,normal,notpresent,notpresent,93,32,0.9,143,4.7,16.6,43,7100,5.3,no,no,no,good,no,no,notckd
|
| 260 |
+
54,70,1.02,0,0,?,?,?,?,76,28,0.6,146,3.5,14.8,52,8400,5.9,no,no,no,good,no,no,notckd
|
| 261 |
+
47,80,1.025,0,0,normal,normal,notpresent,notpresent,124,44,1,140,4.9,14.9,41,7000,5.7,no,no,no,good,no,no,notckd
|
| 262 |
+
30,80,1.02,0,0,normal,normal,notpresent,notpresent,89,42,0.5,139,5,16.7,52,10200,5,no,no,no,good,no,no,notckd
|
| 263 |
+
75,60,1.02,0,0,normal,normal,notpresent,notpresent,110,50,0.7,135,5,14.3,40,8300,5.8,no,no,no,?,?,?,notckd
|
| 264 |
+
44,70,?,?,?,?,?,notpresent,notpresent,106,25,0.9,150,3.6,15,50,9600,6.5,no,no,no,good,no,no,notckd
|
| 265 |
+
41,70,1.02,0,0,normal,normal,notpresent,notpresent,125,38,0.6,140,5,16.8,41,6300,5.9,no,no,no,good,no,no,notckd
|
| 266 |
+
34,60,1.02,0,0,normal,normal,notpresent,notpresent,91,49,1.2,135,4.5,13.5,48,8600,4.9,no,no,no,good,no,no,notckd
|
| 267 |
+
73,60,1.02,0,0,normal,normal,notpresent,notpresent,127,48,0.5,150,3.5,15.1,52,11000,4.7,no,no,no,good,no,no,notckd
|
| 268 |
+
45,60,1.02,0,0,normal,normal,?,?,114,26,0.7,141,4.2,15,43,9200,5.8,no,no,no,good,no,no,notckd
|
| 269 |
+
44,60,1.025,0,0,normal,normal,notpresent,notpresent,96,33,0.9,147,4.5,16.9,41,7200,5,no,no,no,good,no,no,notckd
|
| 270 |
+
29,70,1.02,0,0,normal,normal,notpresent,notpresent,127,44,1.2,145,5,14.8,48,?,?,no,no,no,good,no,no,notckd
|
| 271 |
+
55,70,1.02,0,0,normal,normal,notpresent,notpresent,107,26,1.1,?,?,17,50,6700,6.1,no,no,no,good,no,no,notckd
|
| 272 |
+
33,80,1.025,0,0,normal,normal,notpresent,notpresent,128,38,0.6,135,3.9,13.1,45,6200,4.5,no,no,no,good,no,no,notckd
|
| 273 |
+
41,80,1.02,0,0,normal,normal,notpresent,notpresent,122,25,0.8,138,5,17.1,41,9100,5.2,no,no,no,good,no,no,notckd
|
| 274 |
+
52,80,1.02,0,0,normal,normal,notpresent,notpresent,128,30,1.2,140,4.5,15.2,52,4300,5.7,no,no,no,good,no,no,notckd
|
| 275 |
+
47,60,1.02,0,0,normal,normal,notpresent,notpresent,137,17,0.5,150,3.5,13.6,44,7900,4.5,no,no,no,good,no,no,notckd
|
| 276 |
+
43,80,1.025,0,0,normal,normal,notpresent,notpresent,81,46,0.6,135,4.9,13.9,48,6900,4.9,no,no,no,good,no,no,notckd
|
| 277 |
+
51,60,1.02,0,0,?,?,notpresent,notpresent,129,25,1.2,139,5,17.2,40,8100,5.9,no,no,no,good,no,no,notckd
|
| 278 |
+
46,60,1.02,0,0,normal,normal,notpresent,notpresent,102,27,0.7,142,4.9,13.2,44,11000,5.4,no,no,no,good,no,no,notckd
|
| 279 |
+
56,60,1.025,0,0,normal,normal,notpresent,notpresent,132,18,1.1,147,4.7,13.7,45,7500,5.6,no,no,no,good,no,no,notckd
|
| 280 |
+
55,80,1.02,0,0,normal,normal,notpresent,notpresent,104,28,0.9,142,4.8,17.3,52,8200,4.8,no,no,no,good,no,no,notckd
|
| 281 |
+
39,70,1.025,0,0,normal,normal,notpresent,notpresent,131,46,0.6,145,5,15.6,41,9400,4.7,no,no,no,good,no,no,notckd
|
| 282 |
+
58,70,1.02,0,0,normal,normal,notpresent,notpresent,102,48,1.2,139,4.3,15,40,8100,4.9,no,no,no,good,no,no,notckd
|
| 283 |
+
61,70,1.025,0,0,normal,normal,notpresent,notpresent,120,29,0.7,137,3.5,17.4,52,7000,5.3,no,no,no,good,no,no,notckd
|
| 284 |
+
30,60,1.02,0,0,normal,normal,notpresent,notpresent,138,15,1.1,135,4.4,?,?,?,?,no,no,no,good,no,no,notckd
|
| 285 |
+
57,60,1.02,0,0,normal,normal,notpresent,notpresent,105,49,1.2,150,4.7,15.7,44,10400,6.2,no,no,no,good,no,no,notckd
|
| 286 |
+
65,60,1.02,0,0,normal,normal,notpresent,notpresent,109,39,1,144,3.5,13.9,48,9600,4.8,no,no,no,good,no,no,notckd
|
| 287 |
+
70,60,?,?,?,?,?,notpresent,notpresent,120,40,0.5,140,4.6,16,43,4500,4.9,no,no,no,good,no,no,notckd
|
| 288 |
+
43,80,1.025,0,0,normal,normal,notpresent,notpresent,130,30,1.1,143,5,15.9,45,7800,4.5,no,no,no,good,no,no,notckd
|
| 289 |
+
40,80,1.02,0,0,normal,normal,notpresent,notpresent,119,15,0.7,150,4.9,?,?,?,?,no,no,no,good,no,no,notckd
|
| 290 |
+
58,80,1.02,0,0,normal,normal,notpresent,notpresent,100,50,1.2,140,3.5,14,50,6700,6.5,no,no,no,good,no,no,notckd
|
| 291 |
+
47,60,1.02,0,0,normal,normal,notpresent,notpresent,109,25,1.1,141,4.7,15.8,41,8300,5.2,no,no,no,good,no,no,notckd
|
| 292 |
+
30,60,1.025,0,0,normal,normal,notpresent,notpresent,120,31,0.8,150,4.6,13.4,44,10700,5.8,no,no,no,good,no,no,notckd
|
| 293 |
+
28,70,1.02,0,0,normal,normal,?,?,131,29,0.6,145,4.9,?,45,8600,6.5,no,no,no,good,no,no,notckd
|
| 294 |
+
33,60,1.025,0,0,normal,normal,notpresent,notpresent,80,25,0.9,146,3.5,14.1,48,7800,5.1,no,no,no,good,no,no,notckd
|
| 295 |
+
43,80,1.02,0,0,normal,normal,notpresent,notpresent,114,32,1.1,135,3.9,?,42,?,?,no,no,no,good,no,no,notckd
|
| 296 |
+
59,70,1.025,0,0,normal,normal,notpresent,notpresent,130,39,0.7,147,4.7,13.5,46,6700,4.5,no,no,no,good,no,no,notckd
|
| 297 |
+
34,70,1.025,0,0,normal,normal,notpresent,notpresent,?,33,1,150,5,15.3,44,10500,6.1,no,no,no,good,no,no,notckd
|
| 298 |
+
23,80,1.02,0,0,normal,normal,notpresent,notpresent,99,46,1.2,142,4,17.7,46,4300,5.5,no,no,no,good,no,no,notckd
|
| 299 |
+
60,60,1.02,0,0,normal,normal,notpresent,notpresent,134,45,0.5,139,4.8,14.2,48,10700,5.6,no,no,no,good,no,no,notckd
|
| 300 |
+
25,60,1.02,0,0,normal,normal,notpresent,notpresent,119,27,0.5,?,?,15.2,40,9200,5.2,no,no,no,good,no,no,notckd
|
| 301 |
+
44,70,1.025,0,0,normal,normal,notpresent,notpresent,92,40,0.9,141,4.9,14,52,7500,6.2,no,no,no,good,no,no,notckd
|
| 302 |
+
62,80,1.02,0,0,normal,normal,notpresent,notpresent,132,34,0.8,147,3.5,17.8,44,4700,4.5,no,no,no,good,no,no,notckd
|
| 303 |
+
25,70,1.02,0,0,normal,normal,notpresent,notpresent,88,42,0.5,136,3.5,13.3,48,7000,4.9,no,no,no,good,no,no,notckd
|
| 304 |
+
32,70,1.025,0,0,normal,normal,notpresent,notpresent,100,29,1.1,142,4.5,14.3,43,6700,5.9,no,no,no,good,no,no,notckd
|
| 305 |
+
63,70,1.025,0,0,normal,normal,notpresent,notpresent,130,37,0.9,150,5,13.4,41,7300,4.7,no,no,no,good,no,no,notckd
|
| 306 |
+
44,60,1.02,0,0,normal,normal,notpresent,notpresent,95,46,0.5,138,4.2,15,50,7700,6.3,no,no,no,good,no,no,notckd
|
| 307 |
+
37,60,1.025,0,0,normal,normal,notpresent,notpresent,111,35,0.8,135,4.1,16.2,50,5500,5.7,no,no,no,good,no,no,notckd
|
| 308 |
+
64,60,1.02,0,0,normal,normal,notpresent,notpresent,106,27,0.7,150,3.3,14.4,42,8100,4.7,no,no,no,good,no,no,notckd
|
| 309 |
+
22,60,1.025,0,0,normal,normal,notpresent,notpresent,97,18,1.2,138,4.3,13.5,42,7900,6.4,no,no,no,good,no,no,notckd
|
| 310 |
+
33,60,?,?,?,normal,normal,notpresent,notpresent,130,41,0.9,141,4.4,15.5,52,4300,5.8,no,no,no,good,no,no,notckd
|
| 311 |
+
43,60,1.025,0,0,normal,normal,notpresent,notpresent,108,25,1,144,5,17.8,43,7200,5.5,no,no,no,good,no,no,notckd
|
| 312 |
+
38,80,1.02,0,0,normal,normal,notpresent,notpresent,99,19,0.5,147,3.5,13.6,44,7300,6.4,no,no,no,good,no,no,notckd
|
| 313 |
+
35,70,1.025,0,0,?,?,notpresent,notpresent,82,36,1.1,150,3.5,14.5,52,9400,6.1,no,no,no,good,no,no,notckd
|
| 314 |
+
65,70,1.025,0,0,?,?,notpresent,notpresent,85,20,1,142,4.8,16.1,43,9600,4.5,no,no,no,good,no,no,notckd
|
| 315 |
+
29,80,1.02,0,0,normal,normal,notpresent,notpresent,83,49,0.9,139,3.3,17.5,40,9900,4.7,no,no,no,good,no,no,notckd
|
| 316 |
+
37,60,1.02,0,0,normal,normal,notpresent,notpresent,109,47,1.1,141,4.9,15,48,7000,5.2,no,no,no,good,no,no,notckd
|
| 317 |
+
39,60,1.02,0,0,normal,normal,notpresent,notpresent,86,37,0.6,150,5,13.6,51,5800,4.5,no,no,no,good,no,no,notckd
|
| 318 |
+
32,60,1.025,0,0,normal,normal,notpresent,notpresent,102,17,0.4,147,4.7,14.6,41,6800,5.1,no,no,no,good,no,no,notckd
|
| 319 |
+
23,60,1.02,0,0,normal,normal,notpresent,notpresent,95,24,0.8,145,5,15,52,6300,4.6,no,no,no,good,no,no,notckd
|
| 320 |
+
34,70,1.025,0,0,normal,normal,notpresent,notpresent,87,38,0.5,144,4.8,17.1,47,7400,6.1,no,no,no,good,no,no,notckd
|
| 321 |
+
66,70,1.025,0,0,normal,normal,notpresent,notpresent,107,16,1.1,140,3.6,13.6,42,11000,4.9,no,no,no,good,no,no,notckd
|
| 322 |
+
47,60,1.02,0,0,normal,normal,notpresent,notpresent,117,22,1.2,138,3.5,13,45,5200,5.6,no,no,no,good,no,no,notckd
|
| 323 |
+
74,60,1.02,0,0,normal,normal,notpresent,notpresent,88,50,0.6,147,3.7,17.2,53,6000,4.5,no,no,no,good,no,no,notckd
|
| 324 |
+
35,60,1.025,0,0,normal,normal,notpresent,notpresent,105,39,0.5,135,3.9,14.7,43,5800,6.2,no,no,no,good,no,no,notckd
|
| 325 |
+
29,80,1.02,0,0,normal,normal,notpresent,notpresent,70,16,0.7,138,3.5,13.7,54,5400,5.8,no,no,no,good,no,no,notckd
|
| 326 |
+
33,80,1.025,0,0,normal,normal,notpresent,notpresent,89,19,1.1,144,5,15,40,10300,4.8,no,no,no,good,no,no,notckd
|
| 327 |
+
67,80,1.025,0,0,normal,normal,notpresent,notpresent,99,40,0.5,?,?,17.8,44,5900,5.2,no,no,no,good,no,no,notckd
|
| 328 |
+
73,80,1.025,0,0,normal,normal,notpresent,notpresent,118,44,0.7,137,3.5,14.8,45,9300,4.7,no,no,no,good,no,no,notckd
|
| 329 |
+
24,80,1.02,0,0,normal,normal,notpresent,notpresent,93,46,1,145,3.5,?,?,10700,6.3,no,no,no,good,no,no,notckd
|
| 330 |
+
60,80,1.025,0,0,normal,normal,notpresent,notpresent,81,15,0.5,141,3.6,15,46,10500,5.3,no,no,no,good,no,no,notckd
|
| 331 |
+
68,60,1.025,0,0,normal,normal,notpresent,notpresent,125,41,1.1,139,3.8,17.4,50,6700,6.1,no,no,no,good,no,no,notckd
|
| 332 |
+
30,80,1.025,0,0,normal,normal,notpresent,notpresent,82,42,0.7,146,5,14.9,45,9400,5.9,no,no,no,good,no,no,notckd
|
| 333 |
+
75,70,1.02,0,0,normal,normal,notpresent,notpresent,107,48,0.8,144,3.5,13.6,46,10300,4.8,no,no,no,good,no,no,notckd
|
| 334 |
+
69,70,1.02,0,0,normal,normal,notpresent,notpresent,83,42,1.2,139,3.7,16.2,50,9300,5.4,no,no,no,good,no,no,notckd
|
| 335 |
+
28,60,1.025,0,0,normal,normal,notpresent,notpresent,79,50,0.5,145,5,17.6,51,6500,5,no,no,no,good,no,no,notckd
|
| 336 |
+
72,60,1.02,0,0,normal,normal,notpresent,notpresent,109,26,0.9,150,4.9,15,52,10500,5.5,no,no,no,good,no,no,notckd
|
| 337 |
+
61,70,1.025,0,0,normal,normal,notpresent,notpresent,133,38,1,142,3.6,13.7,47,9200,4.9,no,no,no,good,no,no,notckd
|
| 338 |
+
79,80,1.025,0,0,normal,normal,notpresent,notpresent,111,44,1.2,146,3.6,16.3,40,8000,6.4,no,no,no,good,no,no,notckd
|
| 339 |
+
70,80,1.02,0,0,normal,normal,notpresent,notpresent,74,41,0.5,143,4.5,15.1,48,9700,5.6,no,no,no,good,no,no,notckd
|
| 340 |
+
58,70,1.025,0,0,normal,normal,notpresent,notpresent,88,16,1.1,147,3.5,16.4,53,9100,5.2,no,no,no,good,no,no,notckd
|
| 341 |
+
64,70,1.02,0,0,normal,normal,notpresent,notpresent,97,27,0.7,145,4.8,13.8,49,6400,4.8,no,no,no,good,no,no,notckd
|
| 342 |
+
71,60,1.025,0,0,normal,normal,notpresent,notpresent,?,?,0.9,140,4.8,15.2,42,7700,5.5,no,no,no,good,no,no,notckd
|
| 343 |
+
62,80,1.025,0,0,normal,normal,notpresent,notpresent,78,45,0.6,138,3.5,16.1,50,5400,5.7,no,no,no,good,no,no,notckd
|
| 344 |
+
59,60,1.02,0,0,normal,normal,notpresent,notpresent,113,23,1.1,139,3.5,15.3,54,6500,4.9,no,no,no,good,no,no,notckd
|
| 345 |
+
71,70,1.025,0,0,?,?,notpresent,notpresent,79,47,0.5,142,4.8,16.6,40,5800,5.9,no,no,no,good,no,no,notckd
|
| 346 |
+
48,80,1.025,0,0,normal,normal,notpresent,notpresent,75,22,0.8,137,5,16.8,51,6000,6.5,no,no,no,good,no,no,notckd
|
| 347 |
+
80,80,1.025,0,0,normal,normal,notpresent,notpresent,119,46,0.7,141,4.9,13.9,49,5100,5,no,no,no,good,no,no,notckd
|
| 348 |
+
57,60,1.02,0,0,normal,normal,notpresent,notpresent,132,18,1.1,150,4.7,15.4,42,11000,4.5,no,no,no,good,no,no,notckd
|
| 349 |
+
63,70,1.02,0,0,normal,normal,notpresent,notpresent,113,25,0.6,146,4.9,16.5,52,8000,5.1,no,no,no,good,no,no,notckd
|
| 350 |
+
46,70,1.025,0,0,normal,normal,notpresent,notpresent,100,47,0.5,142,3.5,16.4,43,5700,6.5,no,no,no,good,no,no,notckd
|
| 351 |
+
15,80,1.025,0,0,normal,normal,notpresent,notpresent,93,17,0.9,136,3.9,16.7,50,6200,5.2,no,no,no,good,no,no,notckd
|
| 352 |
+
51,80,1.02,0,0,normal,normal,notpresent,notpresent,94,15,1.2,144,3.7,15.5,46,9500,6.4,no,no,no,good,no,no,notckd
|
| 353 |
+
41,80,1.025,0,0,normal,normal,notpresent,notpresent,112,48,0.7,140,5,17,52,7200,5.8,no,no,no,good,no,no,notckd
|
| 354 |
+
52,80,1.025,0,0,normal,normal,notpresent,notpresent,99,25,0.8,135,3.7,15,52,6300,5.3,no,no,no,good,no,no,notckd
|
| 355 |
+
36,80,1.025,0,0,normal,normal,notpresent,notpresent,85,16,1.1,142,4.1,15.6,44,5800,6.3,no,no,no,good,no,no,notckd
|
| 356 |
+
57,80,1.02,0,0,normal,normal,notpresent,notpresent,133,48,1.2,147,4.3,14.8,46,6600,5.5,no,no,no,good,no,no,notckd
|
| 357 |
+
43,60,1.025,0,0,normal,normal,notpresent,notpresent,117,45,0.7,141,4.4,13,54,7400,5.4,no,no,no,good,no,no,notckd
|
| 358 |
+
50,80,1.02,0,0,normal,normal,notpresent,notpresent,137,46,0.8,139,5,14.1,45,9500,4.6,no,no,no,good,no,no,notckd
|
| 359 |
+
55,80,1.02,0,0,normal,normal,notpresent,notpresent,140,49,0.5,150,4.9,15.7,47,6700,4.9,no,no,no,good,no,no,notckd
|
| 360 |
+
42,70,1.025,0,0,normal,normal,notpresent,notpresent,75,31,1.2,141,3.5,16.5,54,7800,6.2,no,no,no,good,no,no,notckd
|
| 361 |
+
12,80,1.02,0,0,normal,normal,notpresent,notpresent,100,26,0.6,137,4.4,15.8,49,6600,5.4,no,no,no,good,no,no,notckd
|
| 362 |
+
17,60,1.025,0,0,normal,normal,notpresent,notpresent,114,50,1,135,4.9,14.2,51,7200,5.9,no,no,no,good,no,no,notckd
|
| 363 |
+
58,80,1.025,0,0,normal,normal,notpresent,notpresent,131,18,1.1,141,3.5,15.8,53,6800,6.1,no,no,no,good,no,no,notckd
|
data_cache/neurology_parkinsons.csv
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name,MDVP:Fo(Hz),MDVP:Fhi(Hz),MDVP:Flo(Hz),MDVP:Jitter(%),MDVP:Jitter(Abs),MDVP:RAP,MDVP:PPQ,Jitter:DDP,MDVP:Shimmer,MDVP:Shimmer(dB),Shimmer:APQ3,Shimmer:APQ5,MDVP:APQ,Shimmer:DDA,NHR,HNR,status,RPDE,DFA,spread1,spread2,D2,PPE
|
| 2 |
+
phon_R01_S01_1,119.99200,157.30200,74.99700,0.00784,0.00007,0.00370,0.00554,0.01109,0.04374,0.42600,0.02182,0.03130,0.02971,0.06545,0.02211,21.03300,1,0.414783,0.815285,-4.813031,0.266482,2.301442,0.284654
|
| 3 |
+
phon_R01_S01_2,122.40000,148.65000,113.81900,0.00968,0.00008,0.00465,0.00696,0.01394,0.06134,0.62600,0.03134,0.04518,0.04368,0.09403,0.01929,19.08500,1,0.458359,0.819521,-4.075192,0.335590,2.486855,0.368674
|
| 4 |
+
phon_R01_S01_3,116.68200,131.11100,111.55500,0.01050,0.00009,0.00544,0.00781,0.01633,0.05233,0.48200,0.02757,0.03858,0.03590,0.08270,0.01309,20.65100,1,0.429895,0.825288,-4.443179,0.311173,2.342259,0.332634
|
| 5 |
+
phon_R01_S01_4,116.67600,137.87100,111.36600,0.00997,0.00009,0.00502,0.00698,0.01505,0.05492,0.51700,0.02924,0.04005,0.03772,0.08771,0.01353,20.64400,1,0.434969,0.819235,-4.117501,0.334147,2.405554,0.368975
|
| 6 |
+
phon_R01_S01_5,116.01400,141.78100,110.65500,0.01284,0.00011,0.00655,0.00908,0.01966,0.06425,0.58400,0.03490,0.04825,0.04465,0.10470,0.01767,19.64900,1,0.417356,0.823484,-3.747787,0.234513,2.332180,0.410335
|
| 7 |
+
phon_R01_S01_6,120.55200,131.16200,113.78700,0.00968,0.00008,0.00463,0.00750,0.01388,0.04701,0.45600,0.02328,0.03526,0.03243,0.06985,0.01222,21.37800,1,0.415564,0.825069,-4.242867,0.299111,2.187560,0.357775
|
| 8 |
+
phon_R01_S02_1,120.26700,137.24400,114.82000,0.00333,0.00003,0.00155,0.00202,0.00466,0.01608,0.14000,0.00779,0.00937,0.01351,0.02337,0.00607,24.88600,1,0.596040,0.764112,-5.634322,0.257682,1.854785,0.211756
|
| 9 |
+
phon_R01_S02_2,107.33200,113.84000,104.31500,0.00290,0.00003,0.00144,0.00182,0.00431,0.01567,0.13400,0.00829,0.00946,0.01256,0.02487,0.00344,26.89200,1,0.637420,0.763262,-6.167603,0.183721,2.064693,0.163755
|
| 10 |
+
phon_R01_S02_3,95.73000,132.06800,91.75400,0.00551,0.00006,0.00293,0.00332,0.00880,0.02093,0.19100,0.01073,0.01277,0.01717,0.03218,0.01070,21.81200,1,0.615551,0.773587,-5.498678,0.327769,2.322511,0.231571
|
| 11 |
+
phon_R01_S02_4,95.05600,120.10300,91.22600,0.00532,0.00006,0.00268,0.00332,0.00803,0.02838,0.25500,0.01441,0.01725,0.02444,0.04324,0.01022,21.86200,1,0.547037,0.798463,-5.011879,0.325996,2.432792,0.271362
|
| 12 |
+
phon_R01_S02_5,88.33300,112.24000,84.07200,0.00505,0.00006,0.00254,0.00330,0.00763,0.02143,0.19700,0.01079,0.01342,0.01892,0.03237,0.01166,21.11800,1,0.611137,0.776156,-5.249770,0.391002,2.407313,0.249740
|
| 13 |
+
phon_R01_S02_6,91.90400,115.87100,86.29200,0.00540,0.00006,0.00281,0.00336,0.00844,0.02752,0.24900,0.01424,0.01641,0.02214,0.04272,0.01141,21.41400,1,0.583390,0.792520,-4.960234,0.363566,2.642476,0.275931
|
| 14 |
+
phon_R01_S04_1,136.92600,159.86600,131.27600,0.00293,0.00002,0.00118,0.00153,0.00355,0.01259,0.11200,0.00656,0.00717,0.01140,0.01968,0.00581,25.70300,1,0.460600,0.646846,-6.547148,0.152813,2.041277,0.138512
|
| 15 |
+
phon_R01_S04_2,139.17300,179.13900,76.55600,0.00390,0.00003,0.00165,0.00208,0.00496,0.01642,0.15400,0.00728,0.00932,0.01797,0.02184,0.01041,24.88900,1,0.430166,0.665833,-5.660217,0.254989,2.519422,0.199889
|
| 16 |
+
phon_R01_S04_3,152.84500,163.30500,75.83600,0.00294,0.00002,0.00121,0.00149,0.00364,0.01828,0.15800,0.01064,0.00972,0.01246,0.03191,0.00609,24.92200,1,0.474791,0.654027,-6.105098,0.203653,2.125618,0.170100
|
| 17 |
+
phon_R01_S04_4,142.16700,217.45500,83.15900,0.00369,0.00003,0.00157,0.00203,0.00471,0.01503,0.12600,0.00772,0.00888,0.01359,0.02316,0.00839,25.17500,1,0.565924,0.658245,-5.340115,0.210185,2.205546,0.234589
|
| 18 |
+
phon_R01_S04_5,144.18800,349.25900,82.76400,0.00544,0.00004,0.00211,0.00292,0.00632,0.02047,0.19200,0.00969,0.01200,0.02074,0.02908,0.01859,22.33300,1,0.567380,0.644692,-5.440040,0.239764,2.264501,0.218164
|
| 19 |
+
phon_R01_S04_6,168.77800,232.18100,75.60300,0.00718,0.00004,0.00284,0.00387,0.00853,0.03327,0.34800,0.01441,0.01893,0.03430,0.04322,0.02919,20.37600,1,0.631099,0.605417,-2.931070,0.434326,3.007463,0.430788
|
| 20 |
+
phon_R01_S05_1,153.04600,175.82900,68.62300,0.00742,0.00005,0.00364,0.00432,0.01092,0.05517,0.54200,0.02471,0.03572,0.05767,0.07413,0.03160,17.28000,1,0.665318,0.719467,-3.949079,0.357870,3.109010,0.377429
|
| 21 |
+
phon_R01_S05_2,156.40500,189.39800,142.82200,0.00768,0.00005,0.00372,0.00399,0.01116,0.03995,0.34800,0.01721,0.02374,0.04310,0.05164,0.03365,17.15300,1,0.649554,0.686080,-4.554466,0.340176,2.856676,0.322111
|
| 22 |
+
phon_R01_S05_3,153.84800,165.73800,65.78200,0.00840,0.00005,0.00428,0.00450,0.01285,0.03810,0.32800,0.01667,0.02383,0.04055,0.05000,0.03871,17.53600,1,0.660125,0.704087,-4.095442,0.262564,2.739710,0.365391
|
| 23 |
+
phon_R01_S05_4,153.88000,172.86000,78.12800,0.00480,0.00003,0.00232,0.00267,0.00696,0.04137,0.37000,0.02021,0.02591,0.04525,0.06062,0.01849,19.49300,1,0.629017,0.698951,-5.186960,0.237622,2.557536,0.259765
|
| 24 |
+
phon_R01_S05_5,167.93000,193.22100,79.06800,0.00442,0.00003,0.00220,0.00247,0.00661,0.04351,0.37700,0.02228,0.02540,0.04246,0.06685,0.01280,22.46800,1,0.619060,0.679834,-4.330956,0.262384,2.916777,0.285695
|
| 25 |
+
phon_R01_S05_6,173.91700,192.73500,86.18000,0.00476,0.00003,0.00221,0.00258,0.00663,0.04192,0.36400,0.02187,0.02470,0.03772,0.06562,0.01840,20.42200,1,0.537264,0.686894,-5.248776,0.210279,2.547508,0.253556
|
| 26 |
+
phon_R01_S06_1,163.65600,200.84100,76.77900,0.00742,0.00005,0.00380,0.00390,0.01140,0.01659,0.16400,0.00738,0.00948,0.01497,0.02214,0.01778,23.83100,1,0.397937,0.732479,-5.557447,0.220890,2.692176,0.215961
|
| 27 |
+
phon_R01_S06_2,104.40000,206.00200,77.96800,0.00633,0.00006,0.00316,0.00375,0.00948,0.03767,0.38100,0.01732,0.02245,0.03780,0.05197,0.02887,22.06600,1,0.522746,0.737948,-5.571843,0.236853,2.846369,0.219514
|
| 28 |
+
phon_R01_S06_3,171.04100,208.31300,75.50100,0.00455,0.00003,0.00250,0.00234,0.00750,0.01966,0.18600,0.00889,0.01169,0.01872,0.02666,0.01095,25.90800,1,0.418622,0.720916,-6.183590,0.226278,2.589702,0.147403
|
| 29 |
+
phon_R01_S06_4,146.84500,208.70100,81.73700,0.00496,0.00003,0.00250,0.00275,0.00749,0.01919,0.19800,0.00883,0.01144,0.01826,0.02650,0.01328,25.11900,1,0.358773,0.726652,-6.271690,0.196102,2.314209,0.162999
|
| 30 |
+
phon_R01_S06_5,155.35800,227.38300,80.05500,0.00310,0.00002,0.00159,0.00176,0.00476,0.01718,0.16100,0.00769,0.01012,0.01661,0.02307,0.00677,25.97000,1,0.470478,0.676258,-7.120925,0.279789,2.241742,0.108514
|
| 31 |
+
phon_R01_S06_6,162.56800,198.34600,77.63000,0.00502,0.00003,0.00280,0.00253,0.00841,0.01791,0.16800,0.00793,0.01057,0.01799,0.02380,0.01170,25.67800,1,0.427785,0.723797,-6.635729,0.209866,1.957961,0.135242
|
| 32 |
+
phon_R01_S07_1,197.07600,206.89600,192.05500,0.00289,0.00001,0.00166,0.00168,0.00498,0.01098,0.09700,0.00563,0.00680,0.00802,0.01689,0.00339,26.77500,0,0.422229,0.741367,-7.348300,0.177551,1.743867,0.085569
|
| 33 |
+
phon_R01_S07_2,199.22800,209.51200,192.09100,0.00241,0.00001,0.00134,0.00138,0.00402,0.01015,0.08900,0.00504,0.00641,0.00762,0.01513,0.00167,30.94000,0,0.432439,0.742055,-7.682587,0.173319,2.103106,0.068501
|
| 34 |
+
phon_R01_S07_3,198.38300,215.20300,193.10400,0.00212,0.00001,0.00113,0.00135,0.00339,0.01263,0.11100,0.00640,0.00825,0.00951,0.01919,0.00119,30.77500,0,0.465946,0.738703,-7.067931,0.175181,1.512275,0.096320
|
| 35 |
+
phon_R01_S07_4,202.26600,211.60400,197.07900,0.00180,0.000009,0.00093,0.00107,0.00278,0.00954,0.08500,0.00469,0.00606,0.00719,0.01407,0.00072,32.68400,0,0.368535,0.742133,-7.695734,0.178540,1.544609,0.056141
|
| 36 |
+
phon_R01_S07_5,203.18400,211.52600,196.16000,0.00178,0.000009,0.00094,0.00106,0.00283,0.00958,0.08500,0.00468,0.00610,0.00726,0.01403,0.00065,33.04700,0,0.340068,0.741899,-7.964984,0.163519,1.423287,0.044539
|
| 37 |
+
phon_R01_S07_6,201.46400,210.56500,195.70800,0.00198,0.000010,0.00105,0.00115,0.00314,0.01194,0.10700,0.00586,0.00760,0.00957,0.01758,0.00135,31.73200,0,0.344252,0.742737,-7.777685,0.170183,2.447064,0.057610
|
| 38 |
+
phon_R01_S08_1,177.87600,192.92100,168.01300,0.00411,0.00002,0.00233,0.00241,0.00700,0.02126,0.18900,0.01154,0.01347,0.01612,0.03463,0.00586,23.21600,1,0.360148,0.778834,-6.149653,0.218037,2.477082,0.165827
|
| 39 |
+
phon_R01_S08_2,176.17000,185.60400,163.56400,0.00369,0.00002,0.00205,0.00218,0.00616,0.01851,0.16800,0.00938,0.01160,0.01491,0.02814,0.00340,24.95100,1,0.341435,0.783626,-6.006414,0.196371,2.536527,0.173218
|
| 40 |
+
phon_R01_S08_3,180.19800,201.24900,175.45600,0.00284,0.00002,0.00153,0.00166,0.00459,0.01444,0.13100,0.00726,0.00885,0.01190,0.02177,0.00231,26.73800,1,0.403884,0.766209,-6.452058,0.212294,2.269398,0.141929
|
| 41 |
+
phon_R01_S08_4,187.73300,202.32400,173.01500,0.00316,0.00002,0.00168,0.00182,0.00504,0.01663,0.15100,0.00829,0.01003,0.01366,0.02488,0.00265,26.31000,1,0.396793,0.758324,-6.006647,0.266892,2.382544,0.160691
|
| 42 |
+
phon_R01_S08_5,186.16300,197.72400,177.58400,0.00298,0.00002,0.00165,0.00175,0.00496,0.01495,0.13500,0.00774,0.00941,0.01233,0.02321,0.00231,26.82200,1,0.326480,0.765623,-6.647379,0.201095,2.374073,0.130554
|
| 43 |
+
phon_R01_S08_6,184.05500,196.53700,166.97700,0.00258,0.00001,0.00134,0.00147,0.00403,0.01463,0.13200,0.00742,0.00901,0.01234,0.02226,0.00257,26.45300,1,0.306443,0.759203,-7.044105,0.063412,2.361532,0.115730
|
| 44 |
+
phon_R01_S10_1,237.22600,247.32600,225.22700,0.00298,0.00001,0.00169,0.00182,0.00507,0.01752,0.16400,0.01035,0.01024,0.01133,0.03104,0.00740,22.73600,0,0.305062,0.654172,-7.310550,0.098648,2.416838,0.095032
|
| 45 |
+
phon_R01_S10_2,241.40400,248.83400,232.48300,0.00281,0.00001,0.00157,0.00173,0.00470,0.01760,0.15400,0.01006,0.01038,0.01251,0.03017,0.00675,23.14500,0,0.457702,0.634267,-6.793547,0.158266,2.256699,0.117399
|
| 46 |
+
phon_R01_S10_3,243.43900,250.91200,232.43500,0.00210,0.000009,0.00109,0.00137,0.00327,0.01419,0.12600,0.00777,0.00898,0.01033,0.02330,0.00454,25.36800,0,0.438296,0.635285,-7.057869,0.091608,2.330716,0.091470
|
| 47 |
+
phon_R01_S10_4,242.85200,255.03400,227.91100,0.00225,0.000009,0.00117,0.00139,0.00350,0.01494,0.13400,0.00847,0.00879,0.01014,0.02542,0.00476,25.03200,0,0.431285,0.638928,-6.995820,0.102083,2.365800,0.102706
|
| 48 |
+
phon_R01_S10_5,245.51000,262.09000,231.84800,0.00235,0.000010,0.00127,0.00148,0.00380,0.01608,0.14100,0.00906,0.00977,0.01149,0.02719,0.00476,24.60200,0,0.467489,0.631653,-7.156076,0.127642,2.392122,0.097336
|
| 49 |
+
phon_R01_S10_6,252.45500,261.48700,182.78600,0.00185,0.000007,0.00092,0.00113,0.00276,0.01152,0.10300,0.00614,0.00730,0.00860,0.01841,0.00432,26.80500,0,0.610367,0.635204,-7.319510,0.200873,2.028612,0.086398
|
| 50 |
+
phon_R01_S13_1,122.18800,128.61100,115.76500,0.00524,0.00004,0.00169,0.00203,0.00507,0.01613,0.14300,0.00855,0.00776,0.01433,0.02566,0.00839,23.16200,0,0.579597,0.733659,-6.439398,0.266392,2.079922,0.133867
|
| 51 |
+
phon_R01_S13_2,122.96400,130.04900,114.67600,0.00428,0.00003,0.00124,0.00155,0.00373,0.01681,0.15400,0.00930,0.00802,0.01400,0.02789,0.00462,24.97100,0,0.538688,0.754073,-6.482096,0.264967,2.054419,0.128872
|
| 52 |
+
phon_R01_S13_3,124.44500,135.06900,117.49500,0.00431,0.00003,0.00141,0.00167,0.00422,0.02184,0.19700,0.01241,0.01024,0.01685,0.03724,0.00479,25.13500,0,0.553134,0.775933,-6.650471,0.254498,1.840198,0.103561
|
| 53 |
+
phon_R01_S13_4,126.34400,134.23100,112.77300,0.00448,0.00004,0.00131,0.00169,0.00393,0.02033,0.18500,0.01143,0.00959,0.01614,0.03429,0.00474,25.03000,0,0.507504,0.760361,-6.689151,0.291954,2.431854,0.105993
|
| 54 |
+
phon_R01_S13_5,128.00100,138.05200,122.08000,0.00436,0.00003,0.00137,0.00166,0.00411,0.02297,0.21000,0.01323,0.01072,0.01677,0.03969,0.00481,24.69200,0,0.459766,0.766204,-7.072419,0.220434,1.972297,0.119308
|
| 55 |
+
phon_R01_S13_6,129.33600,139.86700,118.60400,0.00490,0.00004,0.00165,0.00183,0.00495,0.02498,0.22800,0.01396,0.01219,0.01947,0.04188,0.00484,25.42900,0,0.420383,0.785714,-6.836811,0.269866,2.223719,0.147491
|
| 56 |
+
phon_R01_S16_1,108.80700,134.65600,102.87400,0.00761,0.00007,0.00349,0.00486,0.01046,0.02719,0.25500,0.01483,0.01609,0.02067,0.04450,0.01036,21.02800,1,0.536009,0.819032,-4.649573,0.205558,1.986899,0.316700
|
| 57 |
+
phon_R01_S16_2,109.86000,126.35800,104.43700,0.00874,0.00008,0.00398,0.00539,0.01193,0.03209,0.30700,0.01789,0.01992,0.02454,0.05368,0.01180,20.76700,1,0.558586,0.811843,-4.333543,0.221727,2.014606,0.344834
|
| 58 |
+
phon_R01_S16_3,110.41700,131.06700,103.37000,0.00784,0.00007,0.00352,0.00514,0.01056,0.03715,0.33400,0.02032,0.02302,0.02802,0.06097,0.00969,21.42200,1,0.541781,0.821364,-4.438453,0.238298,1.922940,0.335041
|
| 59 |
+
phon_R01_S16_4,117.27400,129.91600,110.40200,0.00752,0.00006,0.00299,0.00469,0.00898,0.02293,0.22100,0.01189,0.01459,0.01948,0.03568,0.00681,22.81700,1,0.530529,0.817756,-4.608260,0.290024,2.021591,0.314464
|
| 60 |
+
phon_R01_S16_5,116.87900,131.89700,108.15300,0.00788,0.00007,0.00334,0.00493,0.01003,0.02645,0.26500,0.01394,0.01625,0.02137,0.04183,0.00786,22.60300,1,0.540049,0.813432,-4.476755,0.262633,1.827012,0.326197
|
| 61 |
+
phon_R01_S16_6,114.84700,271.31400,104.68000,0.00867,0.00008,0.00373,0.00520,0.01120,0.03225,0.35000,0.01805,0.01974,0.02519,0.05414,0.01143,21.66000,1,0.547975,0.817396,-4.609161,0.221711,1.831691,0.316395
|
| 62 |
+
phon_R01_S17_1,209.14400,237.49400,109.37900,0.00282,0.00001,0.00147,0.00152,0.00442,0.01861,0.17000,0.00975,0.01258,0.01382,0.02925,0.00871,25.55400,0,0.341788,0.678874,-7.040508,0.066994,2.460791,0.101516
|
| 63 |
+
phon_R01_S17_2,223.36500,238.98700,98.66400,0.00264,0.00001,0.00154,0.00151,0.00461,0.01906,0.16500,0.01013,0.01296,0.01340,0.03039,0.00301,26.13800,0,0.447979,0.686264,-7.293801,0.086372,2.321560,0.098555
|
| 64 |
+
phon_R01_S17_3,222.23600,231.34500,205.49500,0.00266,0.00001,0.00152,0.00144,0.00457,0.01643,0.14500,0.00867,0.01108,0.01200,0.02602,0.00340,25.85600,0,0.364867,0.694399,-6.966321,0.095882,2.278687,0.103224
|
| 65 |
+
phon_R01_S17_4,228.83200,234.61900,223.63400,0.00296,0.00001,0.00175,0.00155,0.00526,0.01644,0.14500,0.00882,0.01075,0.01179,0.02647,0.00351,25.96400,0,0.256570,0.683296,-7.245620,0.018689,2.498224,0.093534
|
| 66 |
+
phon_R01_S17_5,229.40100,252.22100,221.15600,0.00205,0.000009,0.00114,0.00113,0.00342,0.01457,0.12900,0.00769,0.00957,0.01016,0.02308,0.00300,26.41500,0,0.276850,0.673636,-7.496264,0.056844,2.003032,0.073581
|
| 67 |
+
phon_R01_S17_6,228.96900,239.54100,113.20100,0.00238,0.00001,0.00136,0.00140,0.00408,0.01745,0.15400,0.00942,0.01160,0.01234,0.02827,0.00420,24.54700,0,0.305429,0.681811,-7.314237,0.006274,2.118596,0.091546
|
| 68 |
+
phon_R01_S18_1,140.34100,159.77400,67.02100,0.00817,0.00006,0.00430,0.00440,0.01289,0.03198,0.31300,0.01830,0.01810,0.02428,0.05490,0.02183,19.56000,1,0.460139,0.720908,-5.409423,0.226850,2.359973,0.226156
|
| 69 |
+
phon_R01_S18_2,136.96900,166.60700,66.00400,0.00923,0.00007,0.00507,0.00463,0.01520,0.03111,0.30800,0.01638,0.01759,0.02603,0.04914,0.02659,19.97900,1,0.498133,0.729067,-5.324574,0.205660,2.291558,0.226247
|
| 70 |
+
phon_R01_S18_3,143.53300,162.21500,65.80900,0.01101,0.00008,0.00647,0.00467,0.01941,0.05384,0.47800,0.03152,0.02422,0.03392,0.09455,0.04882,20.33800,1,0.513237,0.731444,-5.869750,0.151814,2.118496,0.185580
|
| 71 |
+
phon_R01_S18_4,148.09000,162.82400,67.34300,0.00762,0.00005,0.00467,0.00354,0.01400,0.05428,0.49700,0.03357,0.02494,0.03635,0.10070,0.02431,21.71800,1,0.487407,0.727313,-6.261141,0.120956,2.137075,0.141958
|
| 72 |
+
phon_R01_S18_5,142.72900,162.40800,65.47600,0.00831,0.00006,0.00469,0.00419,0.01407,0.03485,0.36500,0.01868,0.01906,0.02949,0.05605,0.02599,20.26400,1,0.489345,0.730387,-5.720868,0.158830,2.277927,0.180828
|
| 73 |
+
phon_R01_S18_6,136.35800,176.59500,65.75000,0.00971,0.00007,0.00534,0.00478,0.01601,0.04978,0.48300,0.02749,0.02466,0.03736,0.08247,0.03361,18.57000,1,0.543299,0.733232,-5.207985,0.224852,2.642276,0.242981
|
| 74 |
+
phon_R01_S19_1,120.08000,139.71000,111.20800,0.00405,0.00003,0.00180,0.00220,0.00540,0.01706,0.15200,0.00974,0.00925,0.01345,0.02921,0.00442,25.74200,1,0.495954,0.762959,-5.791820,0.329066,2.205024,0.188180
|
| 75 |
+
phon_R01_S19_2,112.01400,588.51800,107.02400,0.00533,0.00005,0.00268,0.00329,0.00805,0.02448,0.22600,0.01373,0.01375,0.01956,0.04120,0.00623,24.17800,1,0.509127,0.789532,-5.389129,0.306636,1.928708,0.225461
|
| 76 |
+
phon_R01_S19_3,110.79300,128.10100,107.31600,0.00494,0.00004,0.00260,0.00283,0.00780,0.02442,0.21600,0.01432,0.01325,0.01831,0.04295,0.00479,25.43800,1,0.437031,0.815908,-5.313360,0.201861,2.225815,0.244512
|
| 77 |
+
phon_R01_S19_4,110.70700,122.61100,105.00700,0.00516,0.00005,0.00277,0.00289,0.00831,0.02215,0.20600,0.01284,0.01219,0.01715,0.03851,0.00472,25.19700,1,0.463514,0.807217,-5.477592,0.315074,1.862092,0.228624
|
| 78 |
+
phon_R01_S19_5,112.87600,148.82600,106.98100,0.00500,0.00004,0.00270,0.00289,0.00810,0.03999,0.35000,0.02413,0.02231,0.02704,0.07238,0.00905,23.37000,1,0.489538,0.789977,-5.775966,0.341169,2.007923,0.193918
|
| 79 |
+
phon_R01_S19_6,110.56800,125.39400,106.82100,0.00462,0.00004,0.00226,0.00280,0.00677,0.02199,0.19700,0.01284,0.01199,0.01636,0.03852,0.00420,25.82000,1,0.429484,0.816340,-5.391029,0.250572,1.777901,0.232744
|
| 80 |
+
phon_R01_S20_1,95.38500,102.14500,90.26400,0.00608,0.00006,0.00331,0.00332,0.00994,0.03202,0.26300,0.01803,0.01886,0.02455,0.05408,0.01062,21.87500,1,0.644954,0.779612,-5.115212,0.249494,2.017753,0.260015
|
| 81 |
+
phon_R01_S20_2,100.77000,115.69700,85.54500,0.01038,0.00010,0.00622,0.00576,0.01865,0.03121,0.36100,0.01773,0.01783,0.02139,0.05320,0.02220,19.20000,1,0.594387,0.790117,-4.913885,0.265699,2.398422,0.277948
|
| 82 |
+
phon_R01_S20_3,96.10600,108.66400,84.51000,0.00694,0.00007,0.00389,0.00415,0.01168,0.04024,0.36400,0.02266,0.02451,0.02876,0.06799,0.01823,19.05500,1,0.544805,0.770466,-4.441519,0.155097,2.645959,0.327978
|
| 83 |
+
phon_R01_S20_4,95.60500,107.71500,87.54900,0.00702,0.00007,0.00428,0.00371,0.01283,0.03156,0.29600,0.01792,0.01841,0.02190,0.05377,0.01825,19.65900,1,0.576084,0.778747,-5.132032,0.210458,2.232576,0.260633
|
| 84 |
+
phon_R01_S20_5,100.96000,110.01900,95.62800,0.00606,0.00006,0.00351,0.00348,0.01053,0.02427,0.21600,0.01371,0.01421,0.01751,0.04114,0.01237,20.53600,1,0.554610,0.787896,-5.022288,0.146948,2.428306,0.264666
|
| 85 |
+
phon_R01_S20_6,98.80400,102.30500,87.80400,0.00432,0.00004,0.00247,0.00258,0.00742,0.02223,0.20200,0.01277,0.01343,0.01552,0.03831,0.00882,22.24400,1,0.576644,0.772416,-6.025367,0.078202,2.053601,0.177275
|
| 86 |
+
phon_R01_S21_1,176.85800,205.56000,75.34400,0.00747,0.00004,0.00418,0.00420,0.01254,0.04795,0.43500,0.02679,0.03022,0.03510,0.08037,0.05470,13.89300,1,0.556494,0.729586,-5.288912,0.343073,3.099301,0.242119
|
| 87 |
+
phon_R01_S21_2,180.97800,200.12500,155.49500,0.00406,0.00002,0.00220,0.00244,0.00659,0.03852,0.33100,0.02107,0.02493,0.02877,0.06321,0.02782,16.17600,1,0.583574,0.727747,-5.657899,0.315903,3.098256,0.200423
|
| 88 |
+
phon_R01_S21_3,178.22200,202.45000,141.04700,0.00321,0.00002,0.00163,0.00194,0.00488,0.03759,0.32700,0.02073,0.02415,0.02784,0.06219,0.03151,15.92400,1,0.598714,0.712199,-6.366916,0.335753,2.654271,0.144614
|
| 89 |
+
phon_R01_S21_4,176.28100,227.38100,125.61000,0.00520,0.00003,0.00287,0.00312,0.00862,0.06511,0.58000,0.03671,0.04159,0.04683,0.11012,0.04824,13.92200,1,0.602874,0.740837,-5.515071,0.299549,3.136550,0.220968
|
| 90 |
+
phon_R01_S21_5,173.89800,211.35000,74.67700,0.00448,0.00003,0.00237,0.00254,0.00710,0.06727,0.65000,0.03788,0.04254,0.04802,0.11363,0.04214,14.73900,1,0.599371,0.743937,-5.783272,0.299793,3.007096,0.194052
|
| 91 |
+
phon_R01_S21_6,179.71100,225.93000,144.87800,0.00709,0.00004,0.00391,0.00419,0.01172,0.04313,0.44200,0.02297,0.02768,0.03455,0.06892,0.07223,11.86600,1,0.590951,0.745526,-4.379411,0.375531,3.671155,0.332086
|
| 92 |
+
phon_R01_S21_7,166.60500,206.00800,78.03200,0.00742,0.00004,0.00387,0.00453,0.01161,0.06640,0.63400,0.03650,0.04282,0.05114,0.10949,0.08725,11.74400,1,0.653410,0.733165,-4.508984,0.389232,3.317586,0.301952
|
| 93 |
+
phon_R01_S22_1,151.95500,163.33500,147.22600,0.00419,0.00003,0.00224,0.00227,0.00672,0.07959,0.77200,0.04421,0.04962,0.05690,0.13262,0.01658,19.66400,1,0.501037,0.714360,-6.411497,0.207156,2.344876,0.134120
|
| 94 |
+
phon_R01_S22_2,148.27200,164.98900,142.29900,0.00459,0.00003,0.00250,0.00256,0.00750,0.04190,0.38300,0.02383,0.02521,0.03051,0.07150,0.01914,18.78000,1,0.454444,0.734504,-5.952058,0.087840,2.344336,0.186489
|
| 95 |
+
phon_R01_S22_3,152.12500,161.46900,76.59600,0.00382,0.00003,0.00191,0.00226,0.00574,0.05925,0.63700,0.03341,0.03794,0.04398,0.10024,0.01211,20.96900,1,0.447456,0.697790,-6.152551,0.173520,2.080121,0.160809
|
| 96 |
+
phon_R01_S22_4,157.82100,172.97500,68.40100,0.00358,0.00002,0.00196,0.00196,0.00587,0.03716,0.30700,0.02062,0.02321,0.02764,0.06185,0.00850,22.21900,1,0.502380,0.712170,-6.251425,0.188056,2.143851,0.160812
|
| 97 |
+
phon_R01_S22_5,157.44700,163.26700,149.60500,0.00369,0.00002,0.00201,0.00197,0.00602,0.03272,0.28300,0.01813,0.01909,0.02571,0.05439,0.01018,21.69300,1,0.447285,0.705658,-6.247076,0.180528,2.344348,0.164916
|
| 98 |
+
phon_R01_S22_6,159.11600,168.91300,144.81100,0.00342,0.00002,0.00178,0.00184,0.00535,0.03381,0.30700,0.01806,0.02024,0.02809,0.05417,0.00852,22.66300,1,0.366329,0.693429,-6.417440,0.194627,2.473239,0.151709
|
| 99 |
+
phon_R01_S24_1,125.03600,143.94600,116.18700,0.01280,0.00010,0.00743,0.00623,0.02228,0.03886,0.34200,0.02135,0.02174,0.03088,0.06406,0.08151,15.33800,1,0.629574,0.714485,-4.020042,0.265315,2.671825,0.340623
|
| 100 |
+
phon_R01_S24_2,125.79100,140.55700,96.20600,0.01378,0.00011,0.00826,0.00655,0.02478,0.04689,0.42200,0.02542,0.02630,0.03908,0.07625,0.10323,15.43300,1,0.571010,0.690892,-5.159169,0.202146,2.441612,0.260375
|
| 101 |
+
phon_R01_S24_3,126.51200,141.75600,99.77000,0.01936,0.00015,0.01159,0.00990,0.03476,0.06734,0.65900,0.03611,0.03963,0.05783,0.10833,0.16744,12.43500,1,0.638545,0.674953,-3.760348,0.242861,2.634633,0.378483
|
| 102 |
+
phon_R01_S24_4,125.64100,141.06800,116.34600,0.03316,0.00026,0.02144,0.01522,0.06433,0.09178,0.89100,0.05358,0.04791,0.06196,0.16074,0.31482,8.86700,1,0.671299,0.656846,-3.700544,0.260481,2.991063,0.370961
|
| 103 |
+
phon_R01_S24_5,128.45100,150.44900,75.63200,0.01551,0.00012,0.00905,0.00909,0.02716,0.06170,0.58400,0.03223,0.03672,0.05174,0.09669,0.11843,15.06000,1,0.639808,0.643327,-4.202730,0.310163,2.638279,0.356881
|
| 104 |
+
phon_R01_S24_6,139.22400,586.56700,66.15700,0.03011,0.00022,0.01854,0.01628,0.05563,0.09419,0.93000,0.05551,0.05005,0.06023,0.16654,0.25930,10.48900,1,0.596362,0.641418,-3.269487,0.270641,2.690917,0.444774
|
| 105 |
+
phon_R01_S25_1,150.25800,154.60900,75.34900,0.00248,0.00002,0.00105,0.00136,0.00315,0.01131,0.10700,0.00522,0.00659,0.01009,0.01567,0.00495,26.75900,1,0.296888,0.722356,-6.878393,0.089267,2.004055,0.113942
|
| 106 |
+
phon_R01_S25_2,154.00300,160.26700,128.62100,0.00183,0.00001,0.00076,0.00100,0.00229,0.01030,0.09400,0.00469,0.00582,0.00871,0.01406,0.00243,28.40900,1,0.263654,0.691483,-7.111576,0.144780,2.065477,0.093193
|
| 107 |
+
phon_R01_S25_3,149.68900,160.36800,133.60800,0.00257,0.00002,0.00116,0.00134,0.00349,0.01346,0.12600,0.00660,0.00818,0.01059,0.01979,0.00578,27.42100,1,0.365488,0.719974,-6.997403,0.210279,1.994387,0.112878
|
| 108 |
+
phon_R01_S25_4,155.07800,163.73600,144.14800,0.00168,0.00001,0.00068,0.00092,0.00204,0.01064,0.09700,0.00522,0.00632,0.00928,0.01567,0.00233,29.74600,1,0.334171,0.677930,-6.981201,0.184550,2.129924,0.106802
|
| 109 |
+
phon_R01_S25_5,151.88400,157.76500,133.75100,0.00258,0.00002,0.00115,0.00122,0.00346,0.01450,0.13700,0.00633,0.00788,0.01267,0.01898,0.00659,26.83300,1,0.393563,0.700246,-6.600023,0.249172,2.499148,0.105306
|
| 110 |
+
phon_R01_S25_6,151.98900,157.33900,132.85700,0.00174,0.00001,0.00075,0.00096,0.00225,0.01024,0.09300,0.00455,0.00576,0.00993,0.01364,0.00238,29.92800,1,0.311369,0.676066,-6.739151,0.160686,2.296873,0.115130
|
| 111 |
+
phon_R01_S26_1,193.03000,208.90000,80.29700,0.00766,0.00004,0.00450,0.00389,0.01351,0.03044,0.27500,0.01771,0.01815,0.02084,0.05312,0.00947,21.93400,1,0.497554,0.740539,-5.845099,0.278679,2.608749,0.185668
|
| 112 |
+
phon_R01_S26_2,200.71400,223.98200,89.68600,0.00621,0.00003,0.00371,0.00337,0.01112,0.02286,0.20700,0.01192,0.01439,0.01852,0.03576,0.00704,23.23900,1,0.436084,0.727863,-5.258320,0.256454,2.550961,0.232520
|
| 113 |
+
phon_R01_S26_3,208.51900,220.31500,199.02000,0.00609,0.00003,0.00368,0.00339,0.01105,0.01761,0.15500,0.00952,0.01058,0.01307,0.02855,0.00830,22.40700,1,0.338097,0.712466,-6.471427,0.184378,2.502336,0.136390
|
| 114 |
+
phon_R01_S26_4,204.66400,221.30000,189.62100,0.00841,0.00004,0.00502,0.00485,0.01506,0.02378,0.21000,0.01277,0.01483,0.01767,0.03831,0.01316,21.30500,1,0.498877,0.722085,-4.876336,0.212054,2.376749,0.268144
|
| 115 |
+
phon_R01_S26_5,210.14100,232.70600,185.25800,0.00534,0.00003,0.00321,0.00280,0.00964,0.01680,0.14900,0.00861,0.01017,0.01301,0.02583,0.00620,23.67100,1,0.441097,0.722254,-5.963040,0.250283,2.489191,0.177807
|
| 116 |
+
phon_R01_S26_6,206.32700,226.35500,92.02000,0.00495,0.00002,0.00302,0.00246,0.00905,0.02105,0.20900,0.01107,0.01284,0.01604,0.03320,0.01048,21.86400,1,0.331508,0.715121,-6.729713,0.181701,2.938114,0.115515
|
| 117 |
+
phon_R01_S27_1,151.87200,492.89200,69.08500,0.00856,0.00006,0.00404,0.00385,0.01211,0.01843,0.23500,0.00796,0.00832,0.01271,0.02389,0.06051,23.69300,1,0.407701,0.662668,-4.673241,0.261549,2.702355,0.274407
|
| 118 |
+
phon_R01_S27_2,158.21900,442.55700,71.94800,0.00476,0.00003,0.00214,0.00207,0.00642,0.01458,0.14800,0.00606,0.00747,0.01312,0.01818,0.01554,26.35600,1,0.450798,0.653823,-6.051233,0.273280,2.640798,0.170106
|
| 119 |
+
phon_R01_S27_3,170.75600,450.24700,79.03200,0.00555,0.00003,0.00244,0.00261,0.00731,0.01725,0.17500,0.00757,0.00971,0.01652,0.02270,0.01802,25.69000,1,0.486738,0.676023,-4.597834,0.372114,2.975889,0.282780
|
| 120 |
+
phon_R01_S27_4,178.28500,442.82400,82.06300,0.00462,0.00003,0.00157,0.00194,0.00472,0.01279,0.12900,0.00617,0.00744,0.01151,0.01851,0.00856,25.02000,1,0.470422,0.655239,-4.913137,0.393056,2.816781,0.251972
|
| 121 |
+
phon_R01_S27_5,217.11600,233.48100,93.97800,0.00404,0.00002,0.00127,0.00128,0.00381,0.01299,0.12400,0.00679,0.00631,0.01075,0.02038,0.00681,24.58100,1,0.462516,0.582710,-5.517173,0.389295,2.925862,0.220657
|
| 122 |
+
phon_R01_S27_6,128.94000,479.69700,88.25100,0.00581,0.00005,0.00241,0.00314,0.00723,0.02008,0.22100,0.00849,0.01117,0.01734,0.02548,0.02350,24.74300,1,0.487756,0.684130,-6.186128,0.279933,2.686240,0.152428
|
| 123 |
+
phon_R01_S27_7,176.82400,215.29300,83.96100,0.00460,0.00003,0.00209,0.00221,0.00628,0.01169,0.11700,0.00534,0.00630,0.01104,0.01603,0.01161,27.16600,1,0.400088,0.656182,-4.711007,0.281618,2.655744,0.234809
|
| 124 |
+
phon_R01_S31_1,138.19000,203.52200,83.34000,0.00704,0.00005,0.00406,0.00398,0.01218,0.04479,0.44100,0.02587,0.02567,0.03220,0.07761,0.01968,18.30500,1,0.538016,0.741480,-5.418787,0.160267,2.090438,0.229892
|
| 125 |
+
phon_R01_S31_2,182.01800,197.17300,79.18700,0.00842,0.00005,0.00506,0.00449,0.01517,0.02503,0.23100,0.01372,0.01580,0.01931,0.04115,0.01813,18.78400,1,0.589956,0.732903,-5.445140,0.142466,2.174306,0.215558
|
| 126 |
+
phon_R01_S31_3,156.23900,195.10700,79.82000,0.00694,0.00004,0.00403,0.00395,0.01209,0.02343,0.22400,0.01289,0.01420,0.01720,0.03867,0.02020,19.19600,1,0.618663,0.728421,-5.944191,0.143359,1.929715,0.181988
|
| 127 |
+
phon_R01_S31_4,145.17400,198.10900,80.63700,0.00733,0.00005,0.00414,0.00422,0.01242,0.02362,0.23300,0.01235,0.01495,0.01944,0.03706,0.01874,18.85700,1,0.637518,0.735546,-5.594275,0.127950,1.765957,0.222716
|
| 128 |
+
phon_R01_S31_5,138.14500,197.23800,81.11400,0.00544,0.00004,0.00294,0.00327,0.00883,0.02791,0.24600,0.01484,0.01805,0.02259,0.04451,0.01794,18.17800,1,0.623209,0.738245,-5.540351,0.087165,1.821297,0.214075
|
| 129 |
+
phon_R01_S31_6,166.88800,198.96600,79.51200,0.00638,0.00004,0.00368,0.00351,0.01104,0.02857,0.25700,0.01547,0.01859,0.02301,0.04641,0.01796,18.33000,1,0.585169,0.736964,-5.825257,0.115697,1.996146,0.196535
|
| 130 |
+
phon_R01_S32_1,119.03100,127.53300,109.21600,0.00440,0.00004,0.00214,0.00192,0.00641,0.01033,0.09800,0.00538,0.00570,0.00811,0.01614,0.01724,26.84200,1,0.457541,0.699787,-6.890021,0.152941,2.328513,0.112856
|
| 131 |
+
phon_R01_S32_2,120.07800,126.63200,105.66700,0.00270,0.00002,0.00116,0.00135,0.00349,0.01022,0.09000,0.00476,0.00588,0.00903,0.01428,0.00487,26.36900,1,0.491345,0.718839,-5.892061,0.195976,2.108873,0.183572
|
| 132 |
+
phon_R01_S32_3,120.28900,128.14300,100.20900,0.00492,0.00004,0.00269,0.00238,0.00808,0.01412,0.12500,0.00703,0.00820,0.01194,0.02110,0.01610,23.94900,1,0.467160,0.724045,-6.135296,0.203630,2.539724,0.169923
|
| 133 |
+
phon_R01_S32_4,120.25600,125.30600,104.77300,0.00407,0.00003,0.00224,0.00205,0.00671,0.01516,0.13800,0.00721,0.00815,0.01310,0.02164,0.01015,26.01700,1,0.468621,0.735136,-6.112667,0.217013,2.527742,0.170633
|
| 134 |
+
phon_R01_S32_5,119.05600,125.21300,86.79500,0.00346,0.00003,0.00169,0.00170,0.00508,0.01201,0.10600,0.00633,0.00701,0.00915,0.01898,0.00903,23.38900,1,0.470972,0.721308,-5.436135,0.254909,2.516320,0.232209
|
| 135 |
+
phon_R01_S32_6,118.74700,123.72300,109.83600,0.00331,0.00003,0.00168,0.00171,0.00504,0.01043,0.09900,0.00490,0.00621,0.00903,0.01471,0.00504,25.61900,1,0.482296,0.723096,-6.448134,0.178713,2.034827,0.141422
|
| 136 |
+
phon_R01_S33_1,106.51600,112.77700,93.10500,0.00589,0.00006,0.00291,0.00319,0.00873,0.04932,0.44100,0.02683,0.03112,0.03651,0.08050,0.03031,17.06000,1,0.637814,0.744064,-5.301321,0.320385,2.375138,0.243080
|
| 137 |
+
phon_R01_S33_2,110.45300,127.61100,105.55400,0.00494,0.00004,0.00244,0.00315,0.00731,0.04128,0.37900,0.02229,0.02592,0.03316,0.06688,0.02529,17.70700,1,0.653427,0.706687,-5.333619,0.322044,2.631793,0.228319
|
| 138 |
+
phon_R01_S33_3,113.40000,133.34400,107.81600,0.00451,0.00004,0.00219,0.00283,0.00658,0.04879,0.43100,0.02385,0.02973,0.04370,0.07154,0.02278,19.01300,1,0.647900,0.708144,-4.378916,0.300067,2.445502,0.259451
|
| 139 |
+
phon_R01_S33_4,113.16600,130.27000,100.67300,0.00502,0.00004,0.00257,0.00312,0.00772,0.05279,0.47600,0.02896,0.03347,0.04134,0.08689,0.03690,16.74700,1,0.625362,0.708617,-4.654894,0.304107,2.672362,0.274387
|
| 140 |
+
phon_R01_S33_5,112.23900,126.60900,104.09500,0.00472,0.00004,0.00238,0.00290,0.00715,0.05643,0.51700,0.03070,0.03530,0.04451,0.09211,0.02629,17.36600,1,0.640945,0.701404,-5.634576,0.306014,2.419253,0.209191
|
| 141 |
+
phon_R01_S33_6,116.15000,131.73100,109.81500,0.00381,0.00003,0.00181,0.00232,0.00542,0.03026,0.26700,0.01514,0.01812,0.02770,0.04543,0.01827,18.80100,1,0.624811,0.696049,-5.866357,0.233070,2.445646,0.184985
|
| 142 |
+
phon_R01_S34_1,170.36800,268.79600,79.54300,0.00571,0.00003,0.00232,0.00269,0.00696,0.03273,0.28100,0.01713,0.01964,0.02824,0.05139,0.02485,18.54000,1,0.677131,0.685057,-4.796845,0.397749,2.963799,0.277227
|
| 143 |
+
phon_R01_S34_2,208.08300,253.79200,91.80200,0.00757,0.00004,0.00428,0.00428,0.01285,0.06725,0.57100,0.04016,0.04003,0.04464,0.12047,0.04238,15.64800,1,0.606344,0.665945,-5.410336,0.288917,2.665133,0.231723
|
| 144 |
+
phon_R01_S34_3,198.45800,219.29000,148.69100,0.00376,0.00002,0.00182,0.00215,0.00546,0.03527,0.29700,0.02055,0.02076,0.02530,0.06165,0.01728,18.70200,1,0.606273,0.661735,-5.585259,0.310746,2.465528,0.209863
|
| 145 |
+
phon_R01_S34_4,202.80500,231.50800,86.23200,0.00370,0.00002,0.00189,0.00211,0.00568,0.01997,0.18000,0.01117,0.01177,0.01506,0.03350,0.02010,18.68700,1,0.536102,0.632631,-5.898673,0.213353,2.470746,0.189032
|
| 146 |
+
phon_R01_S34_5,202.54400,241.35000,164.16800,0.00254,0.00001,0.00100,0.00133,0.00301,0.02662,0.22800,0.01475,0.01558,0.02006,0.04426,0.01049,20.68000,1,0.497480,0.630409,-6.132663,0.220617,2.576563,0.159777
|
| 147 |
+
phon_R01_S34_6,223.36100,263.87200,87.63800,0.00352,0.00002,0.00169,0.00188,0.00506,0.02536,0.22500,0.01379,0.01478,0.01909,0.04137,0.01493,20.36600,1,0.566849,0.574282,-5.456811,0.345238,2.840556,0.232861
|
| 148 |
+
phon_R01_S35_1,169.77400,191.75900,151.45100,0.01568,0.00009,0.00863,0.00946,0.02589,0.08143,0.82100,0.03804,0.05426,0.08808,0.11411,0.07530,12.35900,1,0.561610,0.793509,-3.297668,0.414758,3.413649,0.457533
|
| 149 |
+
phon_R01_S35_2,183.52000,216.81400,161.34000,0.01466,0.00008,0.00849,0.00819,0.02546,0.06050,0.61800,0.02865,0.04101,0.06359,0.08595,0.06057,14.36700,1,0.478024,0.768974,-4.276605,0.355736,3.142364,0.336085
|
| 150 |
+
phon_R01_S35_3,188.62000,216.30200,165.98200,0.01719,0.00009,0.00996,0.01027,0.02987,0.07118,0.72200,0.03474,0.04580,0.06824,0.10422,0.08069,12.29800,1,0.552870,0.764036,-3.377325,0.335357,3.274865,0.418646
|
| 151 |
+
phon_R01_S35_4,202.63200,565.74000,177.25800,0.01627,0.00008,0.00919,0.00963,0.02756,0.07170,0.83300,0.03515,0.04265,0.06460,0.10546,0.07889,14.98900,1,0.427627,0.775708,-4.892495,0.262281,2.910213,0.270173
|
| 152 |
+
phon_R01_S35_5,186.69500,211.96100,149.44200,0.01872,0.00010,0.01075,0.01154,0.03225,0.05830,0.78400,0.02699,0.03714,0.06259,0.08096,0.10952,12.52900,1,0.507826,0.762726,-4.484303,0.340256,2.958815,0.301487
|
| 153 |
+
phon_R01_S35_6,192.81800,224.42900,168.79300,0.03107,0.00016,0.01800,0.01958,0.05401,0.11908,1.30200,0.05647,0.07940,0.13778,0.16942,0.21713,8.44100,1,0.625866,0.768320,-2.434031,0.450493,3.079221,0.527367
|
| 154 |
+
phon_R01_S35_7,198.11600,233.09900,174.47800,0.02714,0.00014,0.01568,0.01699,0.04705,0.08684,1.01800,0.04284,0.05556,0.08318,0.12851,0.16265,9.44900,1,0.584164,0.754449,-2.839756,0.356224,3.184027,0.454721
|
| 155 |
+
phon_R01_S37_1,121.34500,139.64400,98.25000,0.00684,0.00006,0.00388,0.00332,0.01164,0.02534,0.24100,0.01340,0.01399,0.02056,0.04019,0.04179,21.52000,1,0.566867,0.670475,-4.865194,0.246404,2.013530,0.168581
|
| 156 |
+
phon_R01_S37_2,119.10000,128.44200,88.83300,0.00692,0.00006,0.00393,0.00300,0.01179,0.02682,0.23600,0.01484,0.01405,0.02018,0.04451,0.04611,21.82400,1,0.651680,0.659333,-4.239028,0.175691,2.451130,0.247455
|
| 157 |
+
phon_R01_S37_3,117.87000,127.34900,95.65400,0.00647,0.00005,0.00356,0.00300,0.01067,0.03087,0.27600,0.01659,0.01804,0.02402,0.04977,0.02631,22.43100,1,0.628300,0.652025,-3.583722,0.207914,2.439597,0.206256
|
| 158 |
+
phon_R01_S37_4,122.33600,142.36900,94.79400,0.00727,0.00006,0.00415,0.00339,0.01246,0.02293,0.22300,0.01205,0.01289,0.01771,0.03615,0.03191,22.95300,1,0.611679,0.623731,-5.435100,0.230532,2.699645,0.220546
|
| 159 |
+
phon_R01_S37_5,117.96300,134.20900,100.75700,0.01813,0.00015,0.01117,0.00718,0.03351,0.04912,0.43800,0.02610,0.02161,0.02916,0.07830,0.10748,19.07500,1,0.630547,0.646786,-3.444478,0.303214,2.964568,0.261305
|
| 160 |
+
phon_R01_S37_6,126.14400,154.28400,97.54300,0.00975,0.00008,0.00593,0.00454,0.01778,0.02852,0.26600,0.01500,0.01581,0.02157,0.04499,0.03828,21.53400,1,0.635015,0.627337,-5.070096,0.280091,2.892300,0.249703
|
| 161 |
+
phon_R01_S39_1,127.93000,138.75200,112.17300,0.00605,0.00005,0.00321,0.00318,0.00962,0.03235,0.33900,0.01360,0.01650,0.03105,0.04079,0.02663,19.65100,1,0.654945,0.675865,-5.498456,0.234196,2.103014,0.216638
|
| 162 |
+
phon_R01_S39_2,114.23800,124.39300,77.02200,0.00581,0.00005,0.00299,0.00316,0.00896,0.04009,0.40600,0.01579,0.01994,0.04114,0.04736,0.02073,20.43700,1,0.653139,0.694571,-5.185987,0.259229,2.151121,0.244948
|
| 163 |
+
phon_R01_S39_3,115.32200,135.73800,107.80200,0.00619,0.00005,0.00352,0.00329,0.01057,0.03273,0.32500,0.01644,0.01722,0.02931,0.04933,0.02810,19.38800,1,0.577802,0.684373,-5.283009,0.226528,2.442906,0.238281
|
| 164 |
+
phon_R01_S39_4,114.55400,126.77800,91.12100,0.00651,0.00006,0.00366,0.00340,0.01097,0.03658,0.36900,0.01864,0.01940,0.03091,0.05592,0.02707,18.95400,1,0.685151,0.719576,-5.529833,0.242750,2.408689,0.220520
|
| 165 |
+
phon_R01_S39_5,112.15000,131.66900,97.52700,0.00519,0.00005,0.00291,0.00284,0.00873,0.01756,0.15500,0.00967,0.01033,0.01363,0.02902,0.01435,21.21900,1,0.557045,0.673086,-5.617124,0.184896,1.871871,0.212386
|
| 166 |
+
phon_R01_S39_6,102.27300,142.83000,85.90200,0.00907,0.00009,0.00493,0.00461,0.01480,0.02814,0.27200,0.01579,0.01553,0.02073,0.04736,0.03882,18.44700,1,0.671378,0.674562,-2.929379,0.396746,2.560422,0.367233
|
| 167 |
+
phon_R01_S42_1,236.20000,244.66300,102.13700,0.00277,0.00001,0.00154,0.00153,0.00462,0.02448,0.21700,0.01410,0.01426,0.01621,0.04231,0.00620,24.07800,0,0.469928,0.628232,-6.816086,0.172270,2.235197,0.119652
|
| 168 |
+
phon_R01_S42_2,237.32300,243.70900,229.25600,0.00303,0.00001,0.00173,0.00159,0.00519,0.01242,0.11600,0.00696,0.00747,0.00882,0.02089,0.00533,24.67900,0,0.384868,0.626710,-7.018057,0.176316,1.852402,0.091604
|
| 169 |
+
phon_R01_S42_3,260.10500,264.91900,237.30300,0.00339,0.00001,0.00205,0.00186,0.00616,0.02030,0.19700,0.01186,0.01230,0.01367,0.03557,0.00910,21.08300,0,0.440988,0.628058,-7.517934,0.160414,1.881767,0.075587
|
| 170 |
+
phon_R01_S42_4,197.56900,217.62700,90.79400,0.00803,0.00004,0.00490,0.00448,0.01470,0.02177,0.18900,0.01279,0.01272,0.01439,0.03836,0.01337,19.26900,0,0.372222,0.725216,-5.736781,0.164529,2.882450,0.202879
|
| 171 |
+
phon_R01_S42_5,240.30100,245.13500,219.78300,0.00517,0.00002,0.00316,0.00283,0.00949,0.02018,0.21200,0.01176,0.01191,0.01344,0.03529,0.00965,21.02000,0,0.371837,0.646167,-7.169701,0.073298,2.266432,0.100881
|
| 172 |
+
phon_R01_S42_6,244.99000,272.21000,239.17000,0.00451,0.00002,0.00279,0.00237,0.00837,0.01897,0.18100,0.01084,0.01121,0.01255,0.03253,0.01049,21.52800,0,0.522812,0.646818,-7.304500,0.171088,2.095237,0.096220
|
| 173 |
+
phon_R01_S43_1,112.54700,133.37400,105.71500,0.00355,0.00003,0.00166,0.00190,0.00499,0.01358,0.12900,0.00664,0.00786,0.01140,0.01992,0.00435,26.43600,0,0.413295,0.756700,-6.323531,0.218885,2.193412,0.160376
|
| 174 |
+
phon_R01_S43_2,110.73900,113.59700,100.13900,0.00356,0.00003,0.00170,0.00200,0.00510,0.01484,0.13300,0.00754,0.00950,0.01285,0.02261,0.00430,26.55000,0,0.369090,0.776158,-6.085567,0.192375,1.889002,0.174152
|
| 175 |
+
phon_R01_S43_3,113.71500,116.44300,96.91300,0.00349,0.00003,0.00171,0.00203,0.00514,0.01472,0.13300,0.00748,0.00905,0.01148,0.02245,0.00478,26.54700,0,0.380253,0.766700,-5.943501,0.192150,1.852542,0.179677
|
| 176 |
+
phon_R01_S43_4,117.00400,144.46600,99.92300,0.00353,0.00003,0.00176,0.00218,0.00528,0.01657,0.14500,0.00881,0.01062,0.01318,0.02643,0.00590,25.44500,0,0.387482,0.756482,-6.012559,0.229298,1.872946,0.163118
|
| 177 |
+
phon_R01_S43_5,115.38000,123.10900,108.63400,0.00332,0.00003,0.00160,0.00199,0.00480,0.01503,0.13700,0.00812,0.00933,0.01133,0.02436,0.00401,26.00500,0,0.405991,0.761255,-5.966779,0.197938,1.974857,0.184067
|
| 178 |
+
phon_R01_S43_6,116.38800,129.03800,108.97000,0.00346,0.00003,0.00169,0.00213,0.00507,0.01725,0.15500,0.00874,0.01021,0.01331,0.02623,0.00415,26.14300,0,0.361232,0.763242,-6.016891,0.109256,2.004719,0.174429
|
| 179 |
+
phon_R01_S44_1,151.73700,190.20400,129.85900,0.00314,0.00002,0.00135,0.00162,0.00406,0.01469,0.13200,0.00728,0.00886,0.01230,0.02184,0.00570,24.15100,1,0.396610,0.745957,-6.486822,0.197919,2.449763,0.132703
|
| 180 |
+
phon_R01_S44_2,148.79000,158.35900,138.99000,0.00309,0.00002,0.00152,0.00186,0.00456,0.01574,0.14200,0.00839,0.00956,0.01309,0.02518,0.00488,24.41200,1,0.402591,0.762508,-6.311987,0.182459,2.251553,0.160306
|
| 181 |
+
phon_R01_S44_3,148.14300,155.98200,135.04100,0.00392,0.00003,0.00204,0.00231,0.00612,0.01450,0.13100,0.00725,0.00876,0.01263,0.02175,0.00540,23.68300,1,0.398499,0.778349,-5.711205,0.240875,2.845109,0.192730
|
| 182 |
+
phon_R01_S44_4,150.44000,163.44100,144.73600,0.00396,0.00003,0.00206,0.00233,0.00619,0.02551,0.23700,0.01321,0.01574,0.02148,0.03964,0.00611,23.13300,1,0.352396,0.759320,-6.261446,0.183218,2.264226,0.144105
|
| 183 |
+
phon_R01_S44_5,148.46200,161.07800,141.99800,0.00397,0.00003,0.00202,0.00235,0.00605,0.01831,0.16300,0.00950,0.01103,0.01559,0.02849,0.00639,22.86600,1,0.408598,0.768845,-5.704053,0.216204,2.679185,0.197710
|
| 184 |
+
phon_R01_S44_6,149.81800,163.41700,144.78600,0.00336,0.00002,0.00174,0.00198,0.00521,0.02145,0.19800,0.01155,0.01341,0.01666,0.03464,0.00595,23.00800,1,0.329577,0.757180,-6.277170,0.109397,2.209021,0.156368
|
| 185 |
+
phon_R01_S49_1,117.22600,123.92500,106.65600,0.00417,0.00004,0.00186,0.00270,0.00558,0.01909,0.17100,0.00864,0.01223,0.01949,0.02592,0.00955,23.07900,0,0.603515,0.669565,-5.619070,0.191576,2.027228,0.215724
|
| 186 |
+
phon_R01_S49_2,116.84800,217.55200,99.50300,0.00531,0.00005,0.00260,0.00346,0.00780,0.01795,0.16300,0.00810,0.01144,0.01756,0.02429,0.01179,22.08500,0,0.663842,0.656516,-5.198864,0.206768,2.120412,0.252404
|
| 187 |
+
phon_R01_S49_3,116.28600,177.29100,96.98300,0.00314,0.00003,0.00134,0.00192,0.00403,0.01564,0.13600,0.00667,0.00990,0.01691,0.02001,0.00737,24.19900,0,0.598515,0.654331,-5.592584,0.133917,2.058658,0.214346
|
| 188 |
+
phon_R01_S49_4,116.55600,592.03000,86.22800,0.00496,0.00004,0.00254,0.00263,0.00762,0.01660,0.15400,0.00820,0.00972,0.01491,0.02460,0.01397,23.95800,0,0.566424,0.667654,-6.431119,0.153310,2.161936,0.120605
|
| 189 |
+
phon_R01_S49_5,116.34200,581.28900,94.24600,0.00267,0.00002,0.00115,0.00148,0.00345,0.01300,0.11700,0.00631,0.00789,0.01144,0.01892,0.00680,25.02300,0,0.528485,0.663884,-6.359018,0.116636,2.152083,0.138868
|
| 190 |
+
phon_R01_S49_6,114.56300,119.16700,86.64700,0.00327,0.00003,0.00146,0.00184,0.00439,0.01185,0.10600,0.00557,0.00721,0.01095,0.01672,0.00703,24.77500,0,0.555303,0.659132,-6.710219,0.149694,1.913990,0.121777
|
| 191 |
+
phon_R01_S50_1,201.77400,262.70700,78.22800,0.00694,0.00003,0.00412,0.00396,0.01235,0.02574,0.25500,0.01454,0.01582,0.01758,0.04363,0.04441,19.36800,0,0.508479,0.683761,-6.934474,0.159890,2.316346,0.112838
|
| 192 |
+
phon_R01_S50_2,174.18800,230.97800,94.26100,0.00459,0.00003,0.00263,0.00259,0.00790,0.04087,0.40500,0.02336,0.02498,0.02745,0.07008,0.02764,19.51700,0,0.448439,0.657899,-6.538586,0.121952,2.657476,0.133050
|
| 193 |
+
phon_R01_S50_3,209.51600,253.01700,89.48800,0.00564,0.00003,0.00331,0.00292,0.00994,0.02751,0.26300,0.01604,0.01657,0.01879,0.04812,0.01810,19.14700,0,0.431674,0.683244,-6.195325,0.129303,2.784312,0.168895
|
| 194 |
+
phon_R01_S50_4,174.68800,240.00500,74.28700,0.01360,0.00008,0.00624,0.00564,0.01873,0.02308,0.25600,0.01268,0.01365,0.01667,0.03804,0.10715,17.88300,0,0.407567,0.655683,-6.787197,0.158453,2.679772,0.131728
|
| 195 |
+
phon_R01_S50_5,198.76400,396.96100,74.90400,0.00740,0.00004,0.00370,0.00390,0.01109,0.02296,0.24100,0.01265,0.01321,0.01588,0.03794,0.07223,19.02000,0,0.451221,0.643956,-6.744577,0.207454,2.138608,0.123306
|
| 196 |
+
phon_R01_S50_6,214.28900,260.27700,77.97300,0.00567,0.00003,0.00295,0.00317,0.00885,0.01884,0.19000,0.01026,0.01161,0.01373,0.03078,0.04398,21.20900,0,0.462803,0.664357,-5.724056,0.190667,2.555477,0.148569
|
data_cache/obstetrics_fetal.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data_cache/oncology_cervical.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data_cache/ophthalmology.arff
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data_cache/orthopaedics.arff
ADDED
|
@@ -0,0 +1,322 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
@relation column_2C_weka
|
| 2 |
+
|
| 3 |
+
@attribute pelvic_incidence numeric
|
| 4 |
+
@attribute pelvic_tilt numeric
|
| 5 |
+
@attribute lumbar_lordosis_angle numeric
|
| 6 |
+
@attribute sacral_slope numeric
|
| 7 |
+
@attribute pelvic_radius numeric
|
| 8 |
+
@attribute degree_spondylolisthesis numeric
|
| 9 |
+
|
| 10 |
+
@attribute class {Abnormal, Normal}
|
| 11 |
+
|
| 12 |
+
@data
|
| 13 |
+
63.0278175,22.55258597,39.60911701,40.47523153,98.67291675,-0.254399986,Abnormal
|
| 14 |
+
39.05695098,10.06099147,25.01537822,28.99595951,114.4054254,4.564258645,Abnormal
|
| 15 |
+
68.83202098,22.21848205,50.09219357,46.61353893,105.9851355,-3.530317314,Abnormal
|
| 16 |
+
69.29700807,24.65287791,44.31123813,44.64413017,101.8684951,11.21152344,Abnormal
|
| 17 |
+
49.71285934,9.652074879,28.317406,40.06078446,108.1687249,7.918500615,Abnormal
|
| 18 |
+
40.25019968,13.92190658,25.1249496,26.32829311,130.3278713,2.230651729,Abnormal
|
| 19 |
+
53.43292815,15.86433612,37.16593387,37.56859203,120.5675233,5.988550702,Abnormal
|
| 20 |
+
45.36675362,10.75561143,29.03834896,34.61114218,117.2700675,-10.67587083,Abnormal
|
| 21 |
+
43.79019026,13.5337531,42.69081398,30.25643716,125.0028927,13.28901817,Abnormal
|
| 22 |
+
36.68635286,5.010884121,41.9487509,31.67546874,84.24141517,0.664437117,Abnormal
|
| 23 |
+
49.70660953,13.04097405,31.33450009,36.66563548,108.6482654,-7.825985755,Abnormal
|
| 24 |
+
31.23238734,17.71581923,15.5,13.51656811,120.0553988,0.499751446,Abnormal
|
| 25 |
+
48.91555137,19.96455616,40.26379358,28.95099521,119.321358,8.028894629,Abnormal
|
| 26 |
+
53.5721702,20.46082824,33.1,33.11134196,110.9666978,7.044802938,Abnormal
|
| 27 |
+
57.30022656,24.1888846,46.99999999,33.11134196,116.8065868,5.766946943,Abnormal
|
| 28 |
+
44.31890674,12.53799164,36.098763,31.78091509,124.1158358,5.415825143,Abnormal
|
| 29 |
+
63.83498162,20.36250706,54.55243367,43.47247456,112.3094915,-0.622526643,Abnormal
|
| 30 |
+
31.27601184,3.14466948,32.56299592,28.13134236,129.0114183,3.623020073,Abnormal
|
| 31 |
+
38.69791243,13.44474904,31,25.25316339,123.1592507,1.429185758,Abnormal
|
| 32 |
+
41.72996308,12.25407408,30.12258646,29.475889,116.5857056,-1.244402488,Abnormal
|
| 33 |
+
43.92283983,14.17795853,37.8325467,29.7448813,134.4610156,6.451647637,Abnormal
|
| 34 |
+
54.91944259,21.06233245,42.19999999,33.85711014,125.2127163,2.432561437,Abnormal
|
| 35 |
+
63.07361096,24.41380271,53.99999999,38.65980825,106.4243295,15.77969683,Abnormal
|
| 36 |
+
45.54078988,13.06959759,30.29832059,32.47119229,117.9808303,-4.987129618,Abnormal
|
| 37 |
+
36.12568347,22.75875277,29,13.3669307,115.5771163,-3.237562489,Abnormal
|
| 38 |
+
54.12492019,26.65048856,35.32974693,27.47443163,121.447011,1.571204816,Abnormal
|
| 39 |
+
26.14792141,10.75945357,14,15.38846783,125.2032956,-10.09310817,Abnormal
|
| 40 |
+
43.58096394,16.5088837,46.99999999,27.07208024,109.271634,8.992815727,Abnormal
|
| 41 |
+
44.5510115,21.93114655,26.78591597,22.61986495,111.0729197,2.652320636,Abnormal
|
| 42 |
+
66.87921138,24.89199889,49.27859673,41.9872125,113.4770183,-2.005891748,Abnormal
|
| 43 |
+
50.81926781,15.40221253,42.52893886,35.41705528,112.192804,10.86956554,Abnormal
|
| 44 |
+
46.39026008,11.07904664,32.13655345,35.31121344,98.77454633,6.386831648,Abnormal
|
| 45 |
+
44.93667457,17.44383762,27.78057555,27.49283695,117.9803245,5.569619587,Abnormal
|
| 46 |
+
38.66325708,12.98644139,39.99999999,25.67681568,124.914118,2.703008052,Abnormal
|
| 47 |
+
59.59554032,31.99824445,46.56025198,27.59729587,119.3303537,1.474285836,Abnormal
|
| 48 |
+
31.48421834,7.82622134,24.28481815,23.657997,113.8331446,4.393080498,Abnormal
|
| 49 |
+
32.09098679,6.989378081,35.99819848,25.10160871,132.264735,6.413427708,Abnormal
|
| 50 |
+
35.70345781,19.44325311,20.7,16.26020471,137.5406125,-0.263489651,Abnormal
|
| 51 |
+
55.84328595,28.84744756,47.69054322,26.99583839,123.3118449,2.812426855,Abnormal
|
| 52 |
+
52.41938511,19.01156052,35.87265953,33.40782459,116.5597709,1.694705102,Abnormal
|
| 53 |
+
35.49244617,11.7016723,15.59036345,23.79077387,106.9388517,-3.460357991,Abnormal
|
| 54 |
+
46.44207842,8.39503589,29.0372302,38.04704253,115.4814047,2.045475795,Abnormal
|
| 55 |
+
53.85479842,19.23064334,32.77905978,34.62415508,121.6709148,5.329843204,Abnormal
|
| 56 |
+
66.28539377,26.32784484,47.49999999,39.95754893,121.2196839,-0.799624469,Abnormal
|
| 57 |
+
56.03021778,16.2979149,62.27527456,39.73230287,114.0231172,-2.325683841,Abnormal
|
| 58 |
+
50.91244034,23.01516931,46.99999999,27.89727103,117.4222591,-2.526701511,Abnormal
|
| 59 |
+
48.332638,22.22778399,36.18199318,26.10485401,117.3846251,6.481709096,Abnormal
|
| 60 |
+
41.35250407,16.57736351,30.70619135,24.77514057,113.2666746,-4.497957556,Abnormal
|
| 61 |
+
40.55735663,17.97778407,34,22.57957256,121.0462458,-1.537383074,Abnormal
|
| 62 |
+
41.76773173,17.89940172,20.0308863,23.86833001,118.3633889,2.062962549,Abnormal
|
| 63 |
+
55.28585178,20.44011836,34,34.84573342,115.8770174,3.558372358,Abnormal
|
| 64 |
+
74.43359316,41.55733141,27.7,32.87626175,107.9493045,5.000088788,Abnormal
|
| 65 |
+
50.20966979,29.76012218,36.10400731,20.44954761,128.2925148,5.740614083,Abnormal
|
| 66 |
+
30.14993632,11.91744524,34,18.23249108,112.6841408,11.46322327,Abnormal
|
| 67 |
+
41.17167989,17.32120599,33.46940277,23.85047391,116.3778894,-9.569249858,Abnormal
|
| 68 |
+
47.65772963,13.27738491,36.67998541,34.38034472,98.24978071,6.273012173,Abnormal
|
| 69 |
+
43.34960621,7.467468964,28.06548279,35.88213725,112.7761866,5.753277458,Abnormal
|
| 70 |
+
46.85578065,15.35151393,38,31.50426672,116.2509174,1.662705589,Abnormal
|
| 71 |
+
43.20318499,19.66314572,35,23.54003927,124.8461088,-2.919075955,Abnormal
|
| 72 |
+
48.10923638,14.93072472,35.56468278,33.17851166,124.0564518,7.947904861,Abnormal
|
| 73 |
+
74.37767772,32.05310438,78.77201304,42.32457334,143.5606905,56.12590603,Abnormal
|
| 74 |
+
89.68056731,32.70443487,83.13073216,56.97613244,129.9554764,92.02727682,Abnormal
|
| 75 |
+
44.529051,9.433234213,51.99999999,35.09581679,134.7117723,29.10657504,Abnormal
|
| 76 |
+
77.69057712,21.38064464,64.42944191,56.30993248,114.818751,26.93184095,Abnormal
|
| 77 |
+
76.1472121,21.93618556,82.96150249,54.21102654,123.9320096,10.43197194,Abnormal
|
| 78 |
+
83.93300857,41.28630543,61.99999999,42.64670314,115.012334,26.58810016,Abnormal
|
| 79 |
+
78.49173027,22.1817978,59.99999999,56.30993248,118.5303266,27.38321314,Abnormal
|
| 80 |
+
75.64973136,19.33979889,64.14868477,56.30993248,95.9036288,69.55130292,Abnormal
|
| 81 |
+
72.07627839,18.94617604,50.99999999,53.13010236,114.2130126,1.01004051,Abnormal
|
| 82 |
+
58.59952852,-0.261499046,51.49999999,58.86102756,102.0428116,28.05969711,Abnormal
|
| 83 |
+
72.56070163,17.38519079,51.99999999,55.17551084,119.1937238,32.10853735,Abnormal
|
| 84 |
+
86.90079431,32.9281677,47.79434664,53.97262661,135.0753635,101.7190919,Abnormal
|
| 85 |
+
84.97413208,33.02117462,60.85987263,51.95295747,125.6595336,74.33340864,Abnormal
|
| 86 |
+
55.512212,20.09515673,43.99999999,35.41705528,122.648753,34.55294641,Abnormal
|
| 87 |
+
72.2223343,23.07771056,90.99999999,49.14462374,137.7366546,56.80409277,Abnormal
|
| 88 |
+
70.22145219,39.82272448,68.11840309,30.39872771,148.5255624,145.3781432,Abnormal
|
| 89 |
+
86.75360946,36.04301632,69.22104479,50.71059314,139.414504,110.8607824,Abnormal
|
| 90 |
+
58.78254775,7.667044186,53.33894082,51.11550357,98.50115697,51.58412476,Abnormal
|
| 91 |
+
67.41253785,17.44279712,60.14464036,49.96974073,111.12397,33.15764573,Abnormal
|
| 92 |
+
47.74467877,12.08935067,38.99999999,35.6553281,117.5120039,21.68240136,Abnormal
|
| 93 |
+
77.10657122,30.46999418,69.48062839,46.63657704,112.1516,70.75908308,Abnormal
|
| 94 |
+
74.00554124,21.12240192,57.37950226,52.88313932,120.2059626,74.55516588,Abnormal
|
| 95 |
+
88.62390839,29.08945331,47.56426247,59.53445508,121.7647796,51.80589921,Abnormal
|
| 96 |
+
81.10410039,24.79416792,77.88702048,56.30993247,151.8398566,65.21461611,Abnormal
|
| 97 |
+
76.32600187,42.39620445,57.19999999,33.92979742,124.267007,50.12745689,Abnormal
|
| 98 |
+
45.44374959,9.906071798,44.99999999,35.53767779,163.0710405,20.31531532,Abnormal
|
| 99 |
+
59.78526526,17.87932332,59.20646143,41.90594194,119.3191109,22.12386874,Abnormal
|
| 100 |
+
44.91414916,10.21899563,44.63091389,34.69515353,130.0756599,37.36453993,Abnormal
|
| 101 |
+
56.60577127,16.80020017,41.99999999,39.80557109,127.2945222,24.0185747,Abnormal
|
| 102 |
+
71.18681115,23.89620111,43.6966651,47.29061004,119.8649383,27.28398451,Abnormal
|
| 103 |
+
81.65603206,28.74886935,58.23282055,52.9071627,114.7698556,30.60914842,Abnormal
|
| 104 |
+
70.95272771,20.15993121,62.85910914,50.7927965,116.1779325,32.522331,Abnormal
|
| 105 |
+
85.35231529,15.84491006,71.66865979,69.50740523,124.4197875,76.0206034,Abnormal
|
| 106 |
+
58.10193455,14.83763914,79.64983825,43.26429541,113.5876551,50.23787808,Abnormal
|
| 107 |
+
94.17482232,15.38076983,67.70572132,78.79405249,114.8901128,53.25522004,Abnormal
|
| 108 |
+
57.52235608,33.64707522,50.90985841,23.87528085,140.9817119,148.7537109,Abnormal
|
| 109 |
+
96.65731511,19.46158117,90.21149828,77.19573393,120.6730408,64.08099841,Abnormal
|
| 110 |
+
74.72074622,19.75694203,82.73535954,54.96380419,109.3565941,33.30606685,Abnormal
|
| 111 |
+
77.65511874,22.4329501,93.89277881,55.22216863,123.0557067,61.2111866,Abnormal
|
| 112 |
+
58.52162283,13.92228609,41.46785522,44.59933674,115.514798,30.3879839,Abnormal
|
| 113 |
+
84.5856071,30.36168482,65.47948563,54.22392228,108.0102185,25.11847846,Abnormal
|
| 114 |
+
79.93857026,18.7740711,63.31183486,61.16449915,114.787107,38.53874133,Abnormal
|
| 115 |
+
70.39930842,13.46998624,61.19999999,56.92932218,102.3375244,25.53842852,Abnormal
|
| 116 |
+
49.78212054,6.46680486,52.99999999,43.31531568,110.8647831,25.33564729,Abnormal
|
| 117 |
+
77.40933294,29.39654543,63.23230243,48.0127875,118.4507311,93.56373734,Abnormal
|
| 118 |
+
65.00796426,27.60260762,50.94751899,37.40535663,116.5811088,7.015977884,Abnormal
|
| 119 |
+
65.01377322,9.838262375,57.73583722,55.17551084,94.73852542,49.69695462,Abnormal
|
| 120 |
+
78.42595126,33.42595126,76.27743927,45,138.5541111,77.15517241,Abnormal
|
| 121 |
+
63.17298709,6.330910974,62.99999999,56.84207612,110.6440206,42.60807567,Abnormal
|
| 122 |
+
68.61300092,15.0822353,63.01469619,53.53076561,123.4311742,39.49798659,Abnormal
|
| 123 |
+
63.90063261,13.7062037,62.12433389,50.19442891,114.1292425,41.42282844,Abnormal
|
| 124 |
+
84.99895554,29.61009772,83.35219438,55.38885782,126.9129899,71.32117542,Abnormal
|
| 125 |
+
42.02138603,-6.554948347,67.89999999,48.57633437,111.5857819,27.33867086,Abnormal
|
| 126 |
+
69.75666532,19.27929659,48.49999999,50.47736873,96.49136982,51.1696403,Abnormal
|
| 127 |
+
80.98807441,36.84317181,86.96060151,44.1449026,141.0881494,85.87215224,Abnormal
|
| 128 |
+
129.8340406,8.404475005,48.38405705,121.4295656,107.690466,418.5430821,Abnormal
|
| 129 |
+
70.48410444,12.48948765,62.41714208,57.99461679,114.1900488,56.90244779,Abnormal
|
| 130 |
+
86.04127982,38.75066978,47.87140494,47.29061004,122.0929536,61.98827709,Abnormal
|
| 131 |
+
65.53600255,24.15748726,45.77516991,41.3785153,136.4403015,16.37808564,Abnormal
|
| 132 |
+
60.7538935,15.7538935,43.19915768,45,113.0533309,31.69354839,Abnormal
|
| 133 |
+
54.74177518,12.09507205,40.99999999,42.64670314,117.6432188,40.3823266,Abnormal
|
| 134 |
+
83.87994081,23.07742686,87.14151223,60.80251395,124.6460723,80.55560527,Abnormal
|
| 135 |
+
80.07491418,48.06953097,52.40343873,32.00538321,110.7099121,67.72731595,Abnormal
|
| 136 |
+
65.66534698,10.54067533,56.48913545,55.12467166,109.1627768,53.93202006,Abnormal
|
| 137 |
+
74.71722805,14.32167879,32.5,60.39554926,107.1822176,37.01708012,Abnormal
|
| 138 |
+
48.06062649,5.687032126,57.05716117,42.37359436,95.44375749,32.83587702,Abnormal
|
| 139 |
+
70.67689818,21.70440224,59.18116082,48.97249594,103.0083545,27.8101478,Abnormal
|
| 140 |
+
80.43342782,16.998479,66.53601753,63.43494882,116.4389807,57.78125,Abnormal
|
| 141 |
+
90.51396072,28.27250132,69.8139423,62.2414594,100.8921596,58.82364821,Abnormal
|
| 142 |
+
77.23689752,16.73762214,49.77553438,60.49927538,110.6903772,39.7871542,Abnormal
|
| 143 |
+
50.06678595,9.120340183,32.16846267,40.94644577,99.71245318,26.76669655,Abnormal
|
| 144 |
+
69.78100617,13.77746531,57.99999999,56.00354085,118.9306656,17.91456046,Abnormal
|
| 145 |
+
69.62628302,21.12275138,52.76659472,48.50353164,116.8030913,54.81686729,Abnormal
|
| 146 |
+
81.75441933,20.12346562,70.56044038,61.63095371,119.4250857,55.50688907,Abnormal
|
| 147 |
+
52.20469309,17.21267289,78.09496877,34.9920202,136.9725168,54.93913416,Abnormal
|
| 148 |
+
77.12134424,30.3498745,77.48108264,46.77146974,110.6111484,82.09360704,Abnormal
|
| 149 |
+
88.0244989,39.84466878,81.77447308,48.17983012,116.6015376,56.76608323,Abnormal
|
| 150 |
+
83.39660609,34.31098931,78.42329287,49.08561678,110.4665164,49.67209559,Abnormal
|
| 151 |
+
72.05403412,24.70073725,79.87401586,47.35329687,107.1723576,56.42615873,Abnormal
|
| 152 |
+
85.09550254,21.06989651,91.73479193,64.02560604,109.062312,38.03283108,Abnormal
|
| 153 |
+
69.56348614,15.4011391,74.43849743,54.16234705,105.0673556,29.70121083,Abnormal
|
| 154 |
+
89.5049473,48.90365265,72.0034229,40.60129465,134.6342912,118.3533701,Abnormal
|
| 155 |
+
85.29017283,18.27888963,100.7442198,67.0112832,110.6607005,58.88494802,Abnormal
|
| 156 |
+
60.62621697,20.5959577,64.53526221,40.03025927,117.2255542,104.8592474,Abnormal
|
| 157 |
+
60.04417717,14.30965614,58.03886519,45.73452103,105.1316639,30.40913315,Abnormal
|
| 158 |
+
85.64378664,42.68919513,78.7506635,42.95459151,105.1440758,42.88742577,Abnormal
|
| 159 |
+
85.58171024,30.45703858,78.23137949,55.12467166,114.8660487,68.37612182,Abnormal
|
| 160 |
+
55.08076562,-3.759929872,55.99999999,58.84069549,109.9153669,31.77358318,Abnormal
|
| 161 |
+
65.75567895,9.832874231,50.82289501,55.92280472,104.3949585,39.30721246,Abnormal
|
| 162 |
+
79.24967118,23.94482471,40.79669829,55.30484647,98.62251165,36.7063954,Abnormal
|
| 163 |
+
81.11260488,20.69044356,60.68700588,60.42216132,94.01878339,40.51098228,Abnormal
|
| 164 |
+
48.0306238,3.969814743,58.34451924,44.06080905,125.3509625,35.00007784,Abnormal
|
| 165 |
+
63.40448058,14.11532726,48.13680562,49.28915333,111.9160075,31.78449499,Abnormal
|
| 166 |
+
57.28694488,15.1493501,63.99999999,42.13759477,116.7353868,30.34120327,Abnormal
|
| 167 |
+
41.18776972,5.792973871,42.86739151,35.39479584,103.3488802,27.66027669,Abnormal
|
| 168 |
+
66.80479632,14.55160171,72.08491177,52.25319461,82.45603817,41.6854736,Abnormal
|
| 169 |
+
79.4769781,26.73226755,70.65098189,52.74471055,118.5886691,61.70059824,Abnormal
|
| 170 |
+
44.21646446,1.507074501,46.11033909,42.70938996,108.6295666,42.81048066,Abnormal
|
| 171 |
+
57.03509717,0.34572799,49.19800263,56.68936918,103.0486975,52.16514503,Abnormal
|
| 172 |
+
64.27481758,12.50864276,68.70237672,51.76617482,95.25245421,39.40982612,Abnormal
|
| 173 |
+
92.02630795,35.39267395,77.41696348,56.633634,115.72353,58.05754155,Abnormal
|
| 174 |
+
67.26314926,7.194661096,51.69688681,60.06848816,97.8010854,42.13694325,Abnormal
|
| 175 |
+
118.1446548,38.44950127,50.83851954,79.69515353,81.0245406,74.04376736,Abnormal
|
| 176 |
+
115.9232606,37.51543601,76.79999999,78.40782459,104.6986033,81.19892712,Abnormal
|
| 177 |
+
53.94165809,9.306594428,43.10049819,44.63506366,124.3978211,25.0821266,Abnormal
|
| 178 |
+
83.7031774,20.26822858,77.1105979,63.43494882,125.4801739,69.279571,Abnormal
|
| 179 |
+
56.99140382,6.87408897,57.00900516,50.11731485,109.978045,36.81011057,Abnormal
|
| 180 |
+
72.34359434,16.42078962,59.86901238,55.92280472,70.08257486,12.07264427,Abnormal
|
| 181 |
+
95.38259648,24.82263131,95.15763273,70.55996517,89.3075466,57.66084135,Abnormal
|
| 182 |
+
44.25347645,1.101086714,38,43.15238973,98.27410705,23.9106354,Abnormal
|
| 183 |
+
64.80954139,15.17407796,58.83999352,49.63546343,111.679961,21.40719845,Abnormal
|
| 184 |
+
78.40125389,14.04225971,79.69426258,64.35899418,104.7312342,12.39285327,Abnormal
|
| 185 |
+
56.66829282,13.45820343,43.76970978,43.21008939,93.69220863,21.10812135,Abnormal
|
| 186 |
+
50.82502875,9.064729049,56.29999999,41.7602997,78.99945411,23.04152435,Abnormal
|
| 187 |
+
61.41173702,25.38436364,39.09686927,36.02737339,103.4045971,21.84340688,Abnormal
|
| 188 |
+
56.56382381,8.961261611,52.57784639,47.6025622,98.77711506,50.70187326,Abnormal
|
| 189 |
+
67.02766447,13.28150221,66.15040334,53.74616226,100.7154129,33.98913551,Abnormal
|
| 190 |
+
80.81777144,19.23898066,61.64245116,61.57879078,89.47183446,44.167602,Abnormal
|
| 191 |
+
80.65431956,26.34437939,60.89811835,54.30994017,120.1034928,52.46755185,Abnormal
|
| 192 |
+
68.72190982,49.4318636,68.0560124,19.29004622,125.0185168,54.69128928,Abnormal
|
| 193 |
+
37.90391014,4.47909896,24.71027447,33.42481118,157.848799,33.60702661,Abnormal
|
| 194 |
+
64.62400798,15.22530262,67.63216653,49.39870535,90.298468,31.32641123,Abnormal
|
| 195 |
+
75.43774787,31.53945399,89.59999999,43.89829388,106.8295898,54.96578902,Abnormal
|
| 196 |
+
71.00194076,37.51577195,84.53709256,33.48616882,125.1642324,67.77118983,Abnormal
|
| 197 |
+
81.05661087,20.80149217,91.78449512,60.2551187,125.430176,38.18178176,Abnormal
|
| 198 |
+
91.46874146,24.50817744,84.62027202,66.96056402,117.3078968,52.62304673,Abnormal
|
| 199 |
+
81.08232025,21.25584028,78.76675639,59.82647997,90.07187999,49.159426,Abnormal
|
| 200 |
+
60.419932,5.265665422,59.8142356,55.15426658,109.0330745,30.26578534,Abnormal
|
| 201 |
+
85.68094951,38.65003527,82.68097744,47.03091424,120.8407069,61.95903428,Abnormal
|
| 202 |
+
82.4065243,29.27642195,77.05456489,53.13010235,117.0422439,62.76534831,Abnormal
|
| 203 |
+
43.7182623,9.811985315,51.99999999,33.90627699,88.43424213,40.88092253,Abnormal
|
| 204 |
+
86.472905,40.30376567,61.14101155,46.16913933,97.4041888,55.75222146,Abnormal
|
| 205 |
+
74.46908181,33.28315665,66.94210105,41.18592517,146.4660009,124.9844057,Abnormal
|
| 206 |
+
70.25043628,10.34012252,76.37007032,59.91031376,119.2370072,32.66650243,Abnormal
|
| 207 |
+
72.64385013,18.92911726,67.99999999,53.71473287,116.9634162,25.38424676,Abnormal
|
| 208 |
+
71.24176388,5.268270454,85.99958417,65.97349342,110.703107,38.2598637,Abnormal
|
| 209 |
+
63.7723908,12.76338484,65.36052425,51.00900596,89.82274067,55.99545386,Abnormal
|
| 210 |
+
58.82837872,37.57787321,125.7423855,21.25050551,135.6294176,117.3146829,Abnormal
|
| 211 |
+
74.85448008,13.90908417,62.69325884,60.9453959,115.2087008,33.17225512,Abnormal
|
| 212 |
+
75.29847847,16.67148361,61.29620362,58.62699486,118.8833881,31.57582292,Abnormal
|
| 213 |
+
63.36433898,20.02462134,67.49870507,43.33971763,130.9992576,37.55670552,Abnormal
|
| 214 |
+
67.51305267,33.2755899,96.28306169,34.23746278,145.6010328,88.30148594,Abnormal
|
| 215 |
+
76.31402766,41.93368293,93.2848628,34.38034472,132.2672855,101.2187828,Abnormal
|
| 216 |
+
73.63596236,9.711317947,62.99999999,63.92464442,98.72792982,26.97578722,Abnormal
|
| 217 |
+
56.53505139,14.37718927,44.99154663,42.15786212,101.7233343,25.77317356,Abnormal
|
| 218 |
+
80.11157156,33.94243223,85.10160773,46.16913933,125.5936237,100.2921068,Abnormal
|
| 219 |
+
95.48022873,46.55005318,58.99999999,48.93017555,96.68390337,77.28307195,Abnormal
|
| 220 |
+
74.09473084,18.82372712,76.03215571,55.27100372,128.4057314,73.38821617,Abnormal
|
| 221 |
+
87.67908663,20.36561331,93.82241589,67.31347333,120.9448288,76.73062904,Abnormal
|
| 222 |
+
48.25991962,16.41746236,36.32913708,31.84245726,94.88233607,28.34379914,Abnormal
|
| 223 |
+
38.50527283,16.96429691,35.11281407,21.54097592,127.6328747,7.986683227,Normal
|
| 224 |
+
54.92085752,18.96842952,51.60145541,35.952428,125.8466462,2.001642472,Normal
|
| 225 |
+
44.36249017,8.945434892,46.90209626,35.41705528,129.220682,4.994195288,Normal
|
| 226 |
+
48.3189305,17.45212105,47.99999999,30.86680945,128.9803079,-0.910940567,Normal
|
| 227 |
+
45.70178875,10.65985935,42.5778464,35.0419294,130.1783144,-3.38890999,Normal
|
| 228 |
+
30.74193812,13.35496594,35.90352597,17.38697218,142.4101072,-2.005372903,Normal
|
| 229 |
+
50.91310144,6.6769999,30.89652243,44.23610154,118.151531,-1.057985526,Normal
|
| 230 |
+
38.12658854,6.557617408,50.44507473,31.56897113,132.114805,6.338199339,Normal
|
| 231 |
+
51.62467183,15.96934373,35,35.6553281,129.385308,1.00922834,Normal
|
| 232 |
+
64.31186727,26.32836901,50.95896417,37.98349826,106.1777511,3.118221289,Normal
|
| 233 |
+
44.48927476,21.78643263,31.47415392,22.70284212,113.7784936,-0.284129366,Normal
|
| 234 |
+
54.9509702,5.865353416,52.99999999,49.08561678,126.9703283,-0.631602951,Normal
|
| 235 |
+
56.10377352,13.10630665,62.63701952,42.99746687,116.2285032,31.17276727,Normal
|
| 236 |
+
69.3988184,18.89840693,75.96636144,50.50041147,103.5825398,-0.44366081,Normal
|
| 237 |
+
89.83467631,22.63921678,90.56346144,67.19545953,100.5011917,3.040973261,Normal
|
| 238 |
+
59.72614016,7.724872599,55.34348527,52.00126756,125.1742214,3.235159224,Normal
|
| 239 |
+
63.95952166,16.06094486,63.12373633,47.8985768,142.3601245,6.298970934,Normal
|
| 240 |
+
61.54059876,19.67695713,52.89222856,41.86364163,118.6862678,4.815031084,Normal
|
| 241 |
+
38.04655072,8.30166942,26.23683004,29.7448813,123.8034132,3.885773488,Normal
|
| 242 |
+
43.43645061,10.09574326,36.03222439,33.34070735,137.4396942,-3.114450861,Normal
|
| 243 |
+
65.61180231,23.13791922,62.58217893,42.47388309,124.1280012,-4.083298414,Normal
|
| 244 |
+
53.91105429,12.93931796,38.99999999,40.97173633,118.1930354,5.074353176,Normal
|
| 245 |
+
43.11795103,13.81574355,40.34738779,29.30220748,128.5177217,0.970926407,Normal
|
| 246 |
+
40.6832291,9.148437195,31.02159252,31.53479191,139.1184721,-2.511618596,Normal
|
| 247 |
+
37.7319919,9.386298276,41.99999999,28.34569362,135.740926,13.68304672,Normal
|
| 248 |
+
63.92947003,19.97109671,40.17704963,43.95837332,113.0659387,-11.05817866,Normal
|
| 249 |
+
61.82162717,13.59710457,63.99999999,48.22452261,121.779803,1.296191194,Normal
|
| 250 |
+
62.14080535,13.96097523,57.99999999,48.17983012,133.2818339,4.955105669,Normal
|
| 251 |
+
69.00491277,13.29178975,55.5701429,55.71312302,126.6116215,10.83201105,Normal
|
| 252 |
+
56.44702568,19.44449915,43.5778464,37.00252653,139.1896903,-1.859688529,Normal
|
| 253 |
+
41.6469159,8.835549101,36.03197484,32.8113668,116.5551679,-6.054537956,Normal
|
| 254 |
+
51.52935759,13.51784732,35,38.01151027,126.7185156,13.92833085,Normal
|
| 255 |
+
39.08726449,5.536602477,26.93203835,33.55066201,131.5844199,-0.75946135,Normal
|
| 256 |
+
34.64992241,7.514782784,42.99999999,27.13513962,123.9877408,-4.082937601,Normal
|
| 257 |
+
63.02630005,27.33624023,51.60501665,35.69005983,114.5066078,7.439869802,Normal
|
| 258 |
+
47.80555887,10.68869819,53.99999999,37.11686068,125.3911378,-0.402523218,Normal
|
| 259 |
+
46.63786363,15.85371711,39.99999999,30.78414653,119.3776026,9.06458168,Normal
|
| 260 |
+
49.82813487,16.73643493,28,33.09169994,121.4355585,1.91330704,Normal
|
| 261 |
+
47.31964755,8.573680295,35.56025198,38.74596726,120.5769719,1.630663508,Normal
|
| 262 |
+
50.75329025,20.23505957,37,30.51823068,122.343516,2.288487746,Normal
|
| 263 |
+
36.15782981,-0.810514093,33.62731353,36.96834391,135.9369096,-2.092506504,Normal
|
| 264 |
+
40.74699612,1.835524271,49.99999999,38.91147185,139.2471502,0.668556793,Normal
|
| 265 |
+
42.91804052,-5.845994341,57.99999999,48.76403486,121.6068586,-3.362044654,Normal
|
| 266 |
+
63.79242525,21.34532339,65.99999999,42.44710185,119.5503909,12.38260373,Normal
|
| 267 |
+
72.95564397,19.57697146,61.00707117,53.37867251,111.2340468,0.813491154,Normal
|
| 268 |
+
67.53818154,14.65504222,58.00142908,52.88313932,123.6322597,25.9702063,Normal
|
| 269 |
+
54.75251965,9.752519649,47.99999999,45,123.0379985,8.235294118,Normal
|
| 270 |
+
50.16007802,-2.970024337,41.99999999,53.13010235,131.8024914,-8.290203373,Normal
|
| 271 |
+
40.34929637,10.19474845,37.96774659,30.15454792,128.0099272,0.458901373,Normal
|
| 272 |
+
63.61919213,16.93450781,49.34926218,46.68468432,117.0897469,-0.357811974,Normal
|
| 273 |
+
54.14240778,11.93511014,42.99999999,42.20729763,122.2090834,0.153549242,Normal
|
| 274 |
+
74.97602148,14.92170492,53.73007172,60.05431656,105.6453997,1.594747729,Normal
|
| 275 |
+
42.51727249,14.37567126,25.32356538,28.14160123,128.9056892,0.75702014,Normal
|
| 276 |
+
33.78884314,3.675109986,25.5,30.11373315,128.3253556,-1.776111234,Normal
|
| 277 |
+
54.5036853,6.819910138,46.99999999,47.68377516,111.7911722,-4.406769011,Normal
|
| 278 |
+
48.17074627,9.594216702,39.71092029,38.57652956,135.6233101,5.360050572,Normal
|
| 279 |
+
46.37408781,10.21590237,42.69999999,36.15818544,121.2476572,-0.54202201,Normal
|
| 280 |
+
52.86221391,9.410371613,46.98805181,43.4518423,123.0912395,1.856659161,Normal
|
| 281 |
+
57.1458515,16.48909145,42.84214764,40.65676005,113.8061775,5.0151857,Normal
|
| 282 |
+
37.14014978,16.48123972,24,20.65891006,125.0143609,7.366425398,Normal
|
| 283 |
+
51.31177106,8.875541276,56.99999999,42.43622979,126.4722584,-2.144043911,Normal
|
| 284 |
+
42.51561014,16.54121618,41.99999999,25.97439396,120.631941,7.876730692,Normal
|
| 285 |
+
39.35870531,7.011261806,37,32.3474435,117.8187599,1.904048199,Normal
|
| 286 |
+
35.8775708,1.112373561,43.45725694,34.76519724,126.9239062,-1.632238263,Normal
|
| 287 |
+
43.1919153,9.976663803,28.93814927,33.21525149,123.4674001,1.741017579,Normal
|
| 288 |
+
67.28971201,16.7175142,50.99999999,50.5721978,137.5917777,4.960343813,Normal
|
| 289 |
+
51.32546366,13.63122319,33.25857782,37.69424047,131.3061224,1.78886965,Normal
|
| 290 |
+
65.7563482,13.20692644,43.99999999,52.54942177,129.3935728,-1.982120038,Normal
|
| 291 |
+
40.41336566,-1.329412398,30.98276809,41.74277806,119.3356546,-6.173674823,Normal
|
| 292 |
+
48.80190855,18.01776202,51.99999999,30.78414653,139.1504066,10.44286169,Normal
|
| 293 |
+
50.08615264,13.43004422,34.45754051,36.65610842,119.1346221,3.089484465,Normal
|
| 294 |
+
64.26150724,14.49786554,43.90250363,49.76364169,115.3882683,5.951454368,Normal
|
| 295 |
+
53.68337998,13.44702168,41.58429713,40.23635831,113.9137026,2.737035292,Normal
|
| 296 |
+
48.99595771,13.11382047,51.87351997,35.88213725,126.3981876,0.535471617,Normal
|
| 297 |
+
59.16761171,14.56274875,43.19915768,44.60486296,121.0356423,2.830504124,Normal
|
| 298 |
+
67.80469442,16.55066167,43.25680184,51.25403274,119.6856451,4.867539941,Normal
|
| 299 |
+
61.73487533,17.11431203,46.89999999,44.6205633,120.9201997,3.087725997,Normal
|
| 300 |
+
33.04168754,-0.324678459,19.0710746,33.366366,120.3886112,9.354364925,Normal
|
| 301 |
+
74.56501543,15.72431994,58.61858244,58.84069549,105.417304,0.599247113,Normal
|
| 302 |
+
44.43070103,14.17426387,32.2434952,30.25643716,131.7176127,-3.604255336,Normal
|
| 303 |
+
36.42248549,13.87942449,20.24256187,22.543061,126.0768612,0.179717077,Normal
|
| 304 |
+
51.07983294,14.20993529,35.95122893,36.86989765,115.8037111,6.905089963,Normal
|
| 305 |
+
34.75673809,2.631739646,29.50438112,32.12499844,127.1398495,-0.460894198,Normal
|
| 306 |
+
48.90290434,5.587588658,55.49999999,43.31531568,137.1082886,19.85475919,Normal
|
| 307 |
+
46.23639915,10.0627701,37,36.17362905,128.0636203,-5.100053328,Normal
|
| 308 |
+
46.42636614,6.620795049,48.09999999,39.80557109,130.3500956,2.449382401,Normal
|
| 309 |
+
39.65690201,16.20883944,36.67485694,23.44806258,131.922009,-4.968979881,Normal
|
| 310 |
+
45.57548229,18.75913544,33.77414297,26.81634684,116.7970069,3.131909921,Normal
|
| 311 |
+
66.50717865,20.89767207,31.72747138,45.60950658,128.9029049,1.517203356,Normal
|
| 312 |
+
82.90535054,29.89411893,58.25054221,53.01123161,110.7089577,6.079337831,Normal
|
| 313 |
+
50.67667667,6.461501271,35,44.2151754,116.5879699,-0.214710615,Normal
|
| 314 |
+
89.01487529,26.07598143,69.02125897,62.93889386,111.4810746,6.061508401,Normal
|
| 315 |
+
54.60031622,21.48897426,29.36021618,33.11134196,118.3433212,-1.471067262,Normal
|
| 316 |
+
34.38229939,2.062682882,32.39081996,32.31961651,128.3001991,-3.365515555,Normal
|
| 317 |
+
45.07545026,12.30695118,44.58317718,32.76849908,147.8946372,-8.941709421,Normal
|
| 318 |
+
47.90356517,13.61668819,36,34.28687698,117.4490622,-4.245395422,Normal
|
| 319 |
+
53.93674778,20.72149628,29.22053381,33.21525149,114.365845,-0.421010392,Normal
|
| 320 |
+
61.44659663,22.6949683,46.17034732,38.75162833,125.6707246,-2.707879517,Normal
|
| 321 |
+
45.25279209,8.693157364,41.5831264,36.55963472,118.5458418,0.214750167,Normal
|
| 322 |
+
33.84164075,5.073991409,36.64123294,28.76764934,123.9452436,-0.199249089,Normal
|
data_cache/pharmacy_readmission.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data_cache/pulmonology_copd.csv
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
age,sex,smoking_pack_years,fev1_litres,fvc_litres,fev1_fvc_ratio,bmi,mrc_dyspnea_scale,sgrq_score,copd_gold_stage,exacerbation
|
| 2 |
+
77,1,60.0,1.21,2.4,0.504,,,69.55,3,1
|
| 3 |
+
79,0,50.0,1.09,1.64,0.665,,,44.24,2,0
|
| 4 |
+
80,0,11.0,1.52,2.3,0.661,,,44.09,2,0
|
| 5 |
+
56,1,60.0,0.47,1.14,0.412,,,62.04,4,1
|
| 6 |
+
65,1,68.0,1.07,2.91,0.368,,,75.56,3,1
|
| 7 |
+
67,0,26.0,1.09,1.99,0.548,,,73.82,2,0
|
| 8 |
+
67,0,50.0,0.69,1.31,0.527,,,77.44,3,1
|
| 9 |
+
83,1,90.0,0.68,2.23,0.305,,,45.41,3,1
|
| 10 |
+
72,1,50.0,2.13,4.38,0.486,,,69.61,2,0
|
| 11 |
+
75,0,6.0,1.06,2.06,0.515,,,55.56,3,1
|
| 12 |
+
76,0,6.0,1.1,2.06,0.534,,,55.56,3,1
|
| 13 |
+
59,0,28.0,0.68,2.02,0.337,,,55.23,4,1
|
| 14 |
+
64,1,30.0,0.45,1.56,0.288,,,50.53,4,1
|
| 15 |
+
74,0,75.0,1.79,2.62,0.683,,,45.0,1,0
|
| 16 |
+
70,0,103.0,1.2,2.09,0.574,,,39.66,2,0
|
| 17 |
+
71,0,105.0,0.72,2.09,0.344,,,39.66,2,0
|
| 18 |
+
69,1,78.0,1.46,3.33,0.438,,,28.86,3,1
|
| 19 |
+
55,0,109.0,1.54,2.15,0.716,,,76.5,2,0
|
| 20 |
+
72,1,15.0,0.6,1.81,0.331,,,38.74,4,1
|
| 21 |
+
72,1,15.0,0.89,1.81,0.492,,,38.74,4,1
|
| 22 |
+
74,0,24.0,0.51,2.06,0.248,,,71.21,4,1
|
| 23 |
+
75,1,40.0,0.79,1.81,0.436,,,35.79,4,1
|
| 24 |
+
69,0,15.0,0.91,2.9,0.314,,,58.78,3,1
|
| 25 |
+
73,1,75.0,1.46,2.37,0.616,,,34.71,3,1
|
| 26 |
+
75,1,45.0,2.35,4.12,0.57,,,58.25,1,0
|
| 27 |
+
80,1,67.0,1.77,2.77,0.639,,,67.66,2,0
|
| 28 |
+
76,1,38.0,1.06,3.11,0.341,,,56.8,3,1
|
| 29 |
+
73,1,31.0,1.88,2.71,0.694,,,66.51,2,0
|
| 30 |
+
77,1,75.0,1.92,2.66,0.722,,,36.39,2,0
|
| 31 |
+
88,1,1.0,1.3,2.0,0.65,,,47.2,3,1
|
| 32 |
+
44,1,30.0,1.66,3.08,0.539,,,72.24,3,1
|
| 33 |
+
82,1,45.0,1.18,2.57,0.459,,,37.04,3,1
|
| 34 |
+
73,1,38.0,1.86,3.69,0.504,,,35.81,2,0
|
| 35 |
+
64,0,40.0,1.81,3.24,0.559,,,27.27,2,0
|
| 36 |
+
76,1,23.0,2.01,3.63,0.554,,,37.71,2,0
|
| 37 |
+
83,1,11.0,1.11,2.04,0.544,,,25.34,3,1
|
| 38 |
+
65,0,66.0,2.0,3.35,0.597,,,25.02,2,0
|
| 39 |
+
74,1,64.0,2.37,4.7,0.504,,,43.57,2,0
|
| 40 |
+
70,1,50.0,1.07,2.91,0.368,,,64.68,3,1
|
| 41 |
+
71,0,20.0,1.32,2.27,0.581,,,38.43,1,0
|
| 42 |
+
78,1,37.5,1.6,2.68,0.597,,,27.52,2,0
|
| 43 |
+
75,0,10.0,0.92,2.29,0.402,,,54.49,2,0
|
| 44 |
+
67,1,36.0,1.79,3.19,0.561,,,50.03,2,0
|
| 45 |
+
78,1,55.0,1.6,3.87,0.413,,,38.21,3,0
|
| 46 |
+
73,1,59.0,2.43,5.37,0.453,,,19.94,2,0
|
| 47 |
+
53,1,35.0,2.06,3.77,0.546,,,72.56,1,0
|
| 48 |
+
64,1,90.0,1.26,2.1,0.6,,,42.01,3,1
|
| 49 |
+
81,1,54.0,1.48,2.29,0.646,,,16.29,2,0
|
| 50 |
+
82,1,54.0,1.34,2.29,0.585,,,16.29,2,0
|
| 51 |
+
71,0,3.0,1.67,2.58,0.647,,,29.29,1,0
|
| 52 |
+
65,0,34.0,1.45,2.85,0.509,,,41.1,2,0
|
| 53 |
+
71,1,20.0,2.97,3.5,0.849,,,38.57,1,0
|
| 54 |
+
78,1,55.0,1.78,4.0,0.445,,,28.51,2,0
|
| 55 |
+
73,0,34.0,0.72,1.47,0.49,,,32.47,3,1
|
| 56 |
+
72,0,34.0,0.73,1.47,0.497,,,32.47,3,1
|
| 57 |
+
63,1,44.0,1.28,3.56,0.36,,,62.09,3,1
|
| 58 |
+
60,1,14.0,2.12,3.62,0.586,,,51.77,2,0
|
| 59 |
+
75,1,45.0,2.62,4.9,0.535,,,18.72,1,0
|
| 60 |
+
73,0,49.0,1.42,2.14,0.664,,,46.77,2,0
|
| 61 |
+
66,1,20.0,3.02,5.23,0.577,,,17.97,1,0
|
| 62 |
+
80,1,3.0,1.97,2.33,0.845,,,36.74,1,0
|
| 63 |
+
81,1,3.0,1.83,2.33,0.785,,,36.74,1,0
|
| 64 |
+
73,1,100.0,1.26,2.28,0.553,,,15.05,3,1
|
| 65 |
+
71,0,47.0,1.28,2.29,0.559,,,28.41,2,0
|
| 66 |
+
69,0,47.0,0.65,2.29,0.284,,,28.41,2,0
|
| 67 |
+
74,1,55.0,3.06,4.46,0.686,,,24.48,1,0
|
| 68 |
+
62,0,80.0,1.93,3.39,0.569,,,10.01,1,0
|
| 69 |
+
68,1,20.0,1.12,3.22,0.348,,,61.97,3,1
|
| 70 |
+
70,0,36.0,2.11,3.51,0.601,,,10.92,1,0
|
| 71 |
+
67,1,20.0,1.11,3.22,0.345,,,61.97,3,1
|
| 72 |
+
70,0,36.0,1.89,3.51,0.538,,,10.92,1,0
|
| 73 |
+
49,0,39.0,0.74,2.31,0.32,,,28.33,4,1
|
| 74 |
+
75,1,5.0,2.43,4.33,0.561,,,47.88,2,0
|
| 75 |
+
73,1,60.0,1.92,3.76,0.511,,,56.96,2,0
|
| 76 |
+
78,1,30.0,1.14,3.04,0.375,,,34.46,3,1
|
| 77 |
+
67,1,45.0,2.79,4.11,0.679,,,29.98,1,0
|
| 78 |
+
75,1,30.0,1.64,3.1,0.529,,,32.38,2,0
|
| 79 |
+
76,1,30.0,1.74,3.1,0.561,,,32.38,2,0
|
| 80 |
+
63,0,50.0,1.69,2.31,0.732,,,47.36,2,0
|
| 81 |
+
65,0,6.0,3.18,4.54,0.7,,,56.2,1,0
|
| 82 |
+
65,0,20.0,2.13,3.2,0.666,,,2.0,1,0
|
| 83 |
+
62,1,8.0,2.52,3.89,0.648,,,32.69,2,0
|
| 84 |
+
63,0,20.0,1.01,2.48,0.407,,,17.95,2,0
|
| 85 |
+
64,0,35.0,0.82,1.52,0.539,,,67.56,3,1
|
| 86 |
+
72,1,75.0,0.94,2.47,0.381,,,45.3,3,1
|
| 87 |
+
61,0,9.0,1.37,2.42,0.566,,,39.51,2,0
|
| 88 |
+
60,1,10.0,1.96,4.01,0.489,,,24.52,2,0
|
| 89 |
+
78,0,51.0,1.23,1.89,0.651,,,33.69,2,0
|
| 90 |
+
67,1,20.0,2.74,4.54,0.604,,,8.12,2,0
|
| 91 |
+
68,1,30.0,1.75,5.15,0.34,,,33.2,1,0
|
| 92 |
+
72,1,30.0,2.8,4.39,0.638,,,42.88,1,0
|
| 93 |
+
69,1,27.0,1.89,2.61,0.724,,,8.25,2,0
|
| 94 |
+
68,1,27.0,1.89,2.61,0.724,,,8.25,2,0
|
| 95 |
+
52,1,40.0,2.93,3.63,0.807,,,25.62,1,0
|
| 96 |
+
55,1,40.0,2.75,4.54,0.606,,,58.41,1,0
|
| 97 |
+
72,1,30.0,1.61,3.14,0.513,,,34.64,2,0
|
| 98 |
+
68,1,25.0,2.7,3.87,0.698,,,35.84,1,0
|
| 99 |
+
75,1,40.0,2.9,4.72,0.614,,,15.05,1,0
|
| 100 |
+
68,0,30.0,1.65,2.8,0.589,,,19.7,2,0
|
| 101 |
+
54,1,30.0,1.72,4.07,0.423,,,20.55,3,1
|
| 102 |
+
78,1,55.0,1.15,2.01,0.572,,,30.21,2,0
|
data_cache/radiology_pneumonia.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data_cache/thyroid.csv
ADDED
|
@@ -0,0 +1,215 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
1,107,10.1,2.2,0.9,2.7
|
| 2 |
+
1,113,9.9,3.1,2.0,5.9
|
| 3 |
+
1,127,12.9,2.4,1.4,0.6
|
| 4 |
+
1,109,5.3,1.6,1.4,1.5
|
| 5 |
+
1,105,7.3,1.5,1.5,-0.1
|
| 6 |
+
1,105,6.1,2.1,1.4,7.0
|
| 7 |
+
1,110,10.4,1.6,1.6,2.7
|
| 8 |
+
1,114,9.9,2.4,1.5,5.7
|
| 9 |
+
1,106,9.4,2.2,1.5,0.0
|
| 10 |
+
1,107,13.0,1.1,0.9,3.1
|
| 11 |
+
1,106,4.2,1.2,1.6,1.4
|
| 12 |
+
1,110,11.3,2.3,0.9,3.3
|
| 13 |
+
1,116,9.2,2.7,1.0,4.2
|
| 14 |
+
1,112,8.1,1.9,3.7,2.0
|
| 15 |
+
1,122,9.7,1.6,0.9,2.2
|
| 16 |
+
1,109,8.4,2.1,1.1,3.6
|
| 17 |
+
1,111,8.4,1.5,0.8,1.2
|
| 18 |
+
1,114,6.7,1.5,1.0,3.5
|
| 19 |
+
1,119,10.6,2.1,1.3,1.1
|
| 20 |
+
1,115,7.1,1.3,1.3,2.0
|
| 21 |
+
1,101,7.8,1.2,1.0,1.7
|
| 22 |
+
1,103,10.1,1.3,0.7,0.1
|
| 23 |
+
1,109,10.4,1.9,0.4,-0.1
|
| 24 |
+
1,102,7.6,1.8,2.0,2.5
|
| 25 |
+
1,121,10.1,1.7,1.3,0.1
|
| 26 |
+
1,100,6.1,2.4,1.8,3.8
|
| 27 |
+
1,106,9.6,2.4,1.0,1.3
|
| 28 |
+
1,116,10.1,2.2,1.6,0.8
|
| 29 |
+
1,105,11.1,2.0,1.0,1.0
|
| 30 |
+
1,110,10.4,1.8,1.0,2.3
|
| 31 |
+
1,120,8.4,1.1,1.4,1.4
|
| 32 |
+
1,116,11.1,2.0,1.2,2.3
|
| 33 |
+
1,110,7.8,1.9,2.1,6.4
|
| 34 |
+
1,90,8.1,1.6,1.4,1.1
|
| 35 |
+
1,117,12.2,1.9,1.2,3.9
|
| 36 |
+
1,117,11.0,1.4,1.5,2.1
|
| 37 |
+
1,113,9.0,2.0,1.8,1.6
|
| 38 |
+
1,106,9.4,1.5,0.8,0.5
|
| 39 |
+
1,130,9.5,1.7,0.4,3.2
|
| 40 |
+
1,100,10.5,2.4,0.9,1.9
|
| 41 |
+
1,121,10.1,2.4,0.8,3.0
|
| 42 |
+
1,110,9.2,1.6,1.5,0.3
|
| 43 |
+
1,129,11.9,2.7,1.2,3.5
|
| 44 |
+
1,121,13.5,1.5,1.6,0.5
|
| 45 |
+
1,123,8.1,2.3,1.0,5.1
|
| 46 |
+
1,107,8.4,1.8,1.5,0.8
|
| 47 |
+
1,109,10.0,1.3,1.8,4.3
|
| 48 |
+
1,120,6.8,1.9,1.3,1.9
|
| 49 |
+
1,100,9.5,2.5,1.3,-0.2
|
| 50 |
+
1,118,8.1,1.9,1.5,13.7
|
| 51 |
+
1,100,11.3,2.5,0.7,-0.3
|
| 52 |
+
1,103,12.2,1.2,1.3,2.7
|
| 53 |
+
1,115,8.1,1.7,0.6,2.2
|
| 54 |
+
1,119,8.0,2.0,0.6,3.2
|
| 55 |
+
1,106,9.4,1.7,0.9,3.1
|
| 56 |
+
1,114,10.9,2.1,0.3,1.4
|
| 57 |
+
1,93,8.9,1.5,0.8,2.7
|
| 58 |
+
1,120,10.4,2.1,1.1,1.8
|
| 59 |
+
1,106,11.3,1.8,0.9,1.0
|
| 60 |
+
1,110,8.7,1.9,1.6,4.4
|
| 61 |
+
1,103,8.1,1.4,0.5,3.8
|
| 62 |
+
1,101,7.1,2.2,0.8,2.2
|
| 63 |
+
1,115,10.4,1.8,1.6,2.0
|
| 64 |
+
1,116,10.0,1.7,1.5,4.3
|
| 65 |
+
1,117,9.2,1.9,1.5,6.8
|
| 66 |
+
1,106,6.7,1.5,1.2,3.9
|
| 67 |
+
1,118,10.5,2.1,0.7,3.5
|
| 68 |
+
1,97,7.8,1.3,1.2,0.9
|
| 69 |
+
1,113,11.1,1.7,0.8,2.3
|
| 70 |
+
1,104,6.3,2.0,1.2,4.0
|
| 71 |
+
1,96,9.4,1.5,1.0,3.1
|
| 72 |
+
1,120,12.4,2.4,0.8,1.9
|
| 73 |
+
1,133,9.7,2.9,0.8,1.9
|
| 74 |
+
1,126,9.4,2.3,1.0,4.0
|
| 75 |
+
1,113,8.5,1.8,0.8,0.5
|
| 76 |
+
1,109,9.7,1.4,1.1,2.1
|
| 77 |
+
1,119,12.9,1.5,1.3,3.6
|
| 78 |
+
1,101,7.1,1.6,1.5,1.6
|
| 79 |
+
1,108,10.4,2.1,1.3,2.4
|
| 80 |
+
1,117,6.7,2.2,1.8,6.7
|
| 81 |
+
1,115,15.3,2.3,2.0,2.0
|
| 82 |
+
1,91,8.0,1.7,2.1,4.6
|
| 83 |
+
1,103,8.5,1.8,1.9,1.1
|
| 84 |
+
1,98,9.1,1.4,1.9,-0.3
|
| 85 |
+
1,111,7.8,2.0,1.8,4.1
|
| 86 |
+
1,107,13.0,1.5,2.8,1.7
|
| 87 |
+
1,119,11.4,2.3,2.2,1.6
|
| 88 |
+
1,122,11.8,2.7,1.7,2.3
|
| 89 |
+
1,105,8.1,2.0,1.9,-0.5
|
| 90 |
+
1,109,7.6,1.3,2.2,1.9
|
| 91 |
+
1,105,9.5,1.8,1.6,3.6
|
| 92 |
+
1,112,5.9,1.7,2.0,1.3
|
| 93 |
+
1,112,9.5,2.0,1.2,0.7
|
| 94 |
+
1,98,8.6,1.6,1.6,6.0
|
| 95 |
+
1,109,12.4,2.3,1.7,0.8
|
| 96 |
+
1,114,9.1,2.6,1.5,1.5
|
| 97 |
+
1,114,11.1,2.4,2.0,-0.3
|
| 98 |
+
1,110,8.4,1.4,1.0,1.9
|
| 99 |
+
1,120,7.1,1.2,1.5,4.3
|
| 100 |
+
1,108,10.9,1.2,1.9,1.0
|
| 101 |
+
1,108,8.7,1.2,2.2,2.5
|
| 102 |
+
1,116,11.9,1.8,1.9,1.5
|
| 103 |
+
1,113,11.5,1.5,1.9,2.9
|
| 104 |
+
1,105,7.0,1.5,2.7,4.3
|
| 105 |
+
1,114,8.4,1.6,1.6,-0.2
|
| 106 |
+
1,114,8.1,1.6,1.6,0.5
|
| 107 |
+
1,105,11.1,1.1,0.8,1.2
|
| 108 |
+
1,107,13.8,1.5,1.0,1.9
|
| 109 |
+
1,116,11.5,1.8,1.4,5.4
|
| 110 |
+
1,102,9.5,1.4,1.1,1.6
|
| 111 |
+
1,116,16.1,0.9,1.3,1.5
|
| 112 |
+
1,118,10.6,1.8,1.4,3.0
|
| 113 |
+
1,109,8.9,1.7,1.0,0.9
|
| 114 |
+
1,110,7.0,1.0,1.6,4.3
|
| 115 |
+
1,104,9.6,1.1,1.3,0.8
|
| 116 |
+
1,105,8.7,1.5,1.1,1.5
|
| 117 |
+
1,102,8.5,1.2,1.3,1.4
|
| 118 |
+
1,112,6.8,1.7,1.4,3.3
|
| 119 |
+
1,111,8.5,1.6,1.1,3.9
|
| 120 |
+
1,111,8.5,1.6,1.2,7.7
|
| 121 |
+
1,103,7.3,1.0,0.7,0.5
|
| 122 |
+
1,98,10.4,1.6,2.3,-0.7
|
| 123 |
+
1,117,7.8,2.0,1.0,3.9
|
| 124 |
+
1,111,9.1,1.7,1.2,4.1
|
| 125 |
+
1,101,6.3,1.5,0.9,2.9
|
| 126 |
+
1,106,8.9,0.7,1.0,2.3
|
| 127 |
+
1,102,8.4,1.5,0.8,2.4
|
| 128 |
+
1,115,10.6,0.8,2.1,4.6
|
| 129 |
+
1,130,10.0,1.6,0.9,4.6
|
| 130 |
+
1,101,6.7,1.3,1.0,5.7
|
| 131 |
+
1,110,6.3,1.0,0.8,1.0
|
| 132 |
+
1,103,9.5,2.9,1.4,-0.1
|
| 133 |
+
1,113,7.8,2.0,1.1,3.0
|
| 134 |
+
1,112,10.6,1.6,0.9,-0.1
|
| 135 |
+
1,118,6.5,1.2,1.2,1.7
|
| 136 |
+
1,109,9.2,1.8,1.1,4.4
|
| 137 |
+
1,116,7.8,1.4,1.1,3.7
|
| 138 |
+
1,127,7.7,1.8,1.9,6.4
|
| 139 |
+
1,108,6.5,1.0,0.9,1.5
|
| 140 |
+
1,108,7.1,1.3,1.6,2.2
|
| 141 |
+
1,105,5.7,1.0,0.9,0.9
|
| 142 |
+
1,98,5.7,0.4,1.3,2.8
|
| 143 |
+
1,112,6.5,1.2,1.2,2.0
|
| 144 |
+
1,118,12.2,1.5,1.0,2.3
|
| 145 |
+
1,94,7.5,1.2,1.3,4.4
|
| 146 |
+
1,126,10.4,1.7,1.2,3.5
|
| 147 |
+
1,114,7.5,1.1,1.6,4.4
|
| 148 |
+
1,111,11.9,2.3,0.9,3.8
|
| 149 |
+
1,104,6.1,1.8,0.5,0.8
|
| 150 |
+
1,102,6.6,1.2,1.4,1.3
|
| 151 |
+
2,139,16.4,3.8,1.1,-0.2
|
| 152 |
+
2,111,16.0,2.1,0.9,-0.1
|
| 153 |
+
2,113,17.2,1.8,1.0,0.0
|
| 154 |
+
2,65,25.3,5.8,1.3,0.2
|
| 155 |
+
2,88,24.1,5.5,0.8,0.1
|
| 156 |
+
2,65,18.2,10.0,1.3,0.1
|
| 157 |
+
2,134,16.4,4.8,0.6,0.1
|
| 158 |
+
2,110,20.3,3.7,0.6,0.2
|
| 159 |
+
2,67,23.3,7.4,1.8,-0.6
|
| 160 |
+
2,95,11.1,2.7,1.6,-0.3
|
| 161 |
+
2,89,14.3,4.1,0.5,0.2
|
| 162 |
+
2,89,23.8,5.4,0.5,0.1
|
| 163 |
+
2,88,12.9,2.7,0.1,0.2
|
| 164 |
+
2,105,17.4,1.6,0.3,0.4
|
| 165 |
+
2,89,20.1,7.3,1.1,-0.2
|
| 166 |
+
2,99,13.0,3.6,0.7,-0.1
|
| 167 |
+
2,80,23.0,10.0,0.9,-0.1
|
| 168 |
+
2,89,21.8,7.1,0.7,-0.1
|
| 169 |
+
2,99,13.0,3.1,0.5,-0.1
|
| 170 |
+
2,68,14.7,7.8,0.6,-0.2
|
| 171 |
+
2,97,14.2,3.6,1.5,0.3
|
| 172 |
+
2,84,21.5,2.7,1.1,-0.6
|
| 173 |
+
2,84,18.5,4.4,1.1,-0.3
|
| 174 |
+
2,98,16.7,4.3,1.7,0.2
|
| 175 |
+
2,94,20.5,1.8,1.4,-0.5
|
| 176 |
+
2,99,17.5,1.9,1.4,0.3
|
| 177 |
+
2,76,25.3,4.5,1.2,-0.1
|
| 178 |
+
2,110,15.2,1.9,0.7,-0.2
|
| 179 |
+
2,144,22.3,3.3,1.3,0.6
|
| 180 |
+
2,105,12.0,3.3,1.1,0.0
|
| 181 |
+
2,88,16.5,4.9,0.8,0.1
|
| 182 |
+
2,97,15.1,1.8,1.2,-0.2
|
| 183 |
+
2,106,13.4,3.0,1.1,0.0
|
| 184 |
+
2,79,19.0,5.5,0.9,0.3
|
| 185 |
+
2,92,11.1,2.0,0.7,-0.2
|
| 186 |
+
3,125,2.3,0.9,16.5,9.5
|
| 187 |
+
3,120,6.8,2.1,10.4,38.6
|
| 188 |
+
3,108,3.5,0.6,1.7,1.4
|
| 189 |
+
3,120,3.0,2.5,1.2,4.5
|
| 190 |
+
3,119,3.8,1.1,23.0,5.7
|
| 191 |
+
3,141,5.6,1.8,9.2,14.4
|
| 192 |
+
3,129,1.5,0.6,12.5,2.9
|
| 193 |
+
3,118,3.6,1.5,11.6,48.8
|
| 194 |
+
3,120,1.9,0.7,18.5,24.0
|
| 195 |
+
3,119,0.8,0.7,56.4,21.6
|
| 196 |
+
3,123,5.6,1.1,13.7,56.3
|
| 197 |
+
3,115,6.3,1.2,4.7,14.4
|
| 198 |
+
3,126,0.5,0.2,12.2,8.8
|
| 199 |
+
3,121,4.7,1.8,11.2,53.0
|
| 200 |
+
3,131,2.7,0.8,9.9,4.7
|
| 201 |
+
3,134,2.0,0.5,12.2,2.2
|
| 202 |
+
3,141,2.5,1.3,8.5,7.5
|
| 203 |
+
3,113,5.1,0.7,5.8,19.6
|
| 204 |
+
3,136,1.4,0.3,32.6,8.4
|
| 205 |
+
3,120,3.4,1.8,7.5,21.5
|
| 206 |
+
3,125,3.7,1.1,8.5,25.9
|
| 207 |
+
3,123,1.9,0.3,22.8,22.2
|
| 208 |
+
3,112,2.6,0.7,41.0,19.0
|
| 209 |
+
3,134,1.9,0.6,18.4,8.2
|
| 210 |
+
3,119,5.1,1.1,7.0,40.8
|
| 211 |
+
3,118,6.5,1.3,1.7,11.5
|
| 212 |
+
3,139,4.2,0.7,4.3,6.3
|
| 213 |
+
3,103,5.1,1.4,1.2,5.0
|
| 214 |
+
3,97,4.7,1.1,2.1,12.6
|
| 215 |
+
3,102,5.3,1.4,1.3,6.7
|
datasets/.gitkeep
ADDED
|
File without changes
|
main_hf.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""HuggingFace Spaces entry — serves API + static frontend."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import os
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
|
| 7 |
+
from fastapi import FastAPI, Request
|
| 8 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 9 |
+
from fastapi.responses import FileResponse
|
| 10 |
+
from fastapi.staticfiles import StaticFiles
|
| 11 |
+
|
| 12 |
+
from app.services.certificate_service import CertificateService
|
| 13 |
+
from app.services.data_service import DataService
|
| 14 |
+
from app.services.ethics_service import EthicsService
|
| 15 |
+
from app.services.explain_service import ExplainService
|
| 16 |
+
from app.services.insight_service import InsightService
|
| 17 |
+
from app.services.ml_service import MLService
|
| 18 |
+
from arena.service import ArenaService
|
| 19 |
+
|
| 20 |
+
app = FastAPI(title="HealthWithSevgi API", version="1.3.1")
|
| 21 |
+
|
| 22 |
+
app.add_middleware(
|
| 23 |
+
CORSMiddleware,
|
| 24 |
+
allow_origins=["*"],
|
| 25 |
+
allow_credentials=False,
|
| 26 |
+
allow_methods=["*"],
|
| 27 |
+
allow_headers=["*"],
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
app.state.data_service = DataService()
|
| 31 |
+
app.state.ml_service = MLService()
|
| 32 |
+
app.state.explain_service = ExplainService()
|
| 33 |
+
app.state.ethics_service = EthicsService()
|
| 34 |
+
app.state.insight_service = InsightService()
|
| 35 |
+
app.state.certificate_service = CertificateService()
|
| 36 |
+
app.state.arena_service = ArenaService(app.state.ml_service)
|
| 37 |
+
|
| 38 |
+
from app.routers.data_router import router as data_router
|
| 39 |
+
from app.routers.explain_router import router as explain_router
|
| 40 |
+
from app.routers.ml_router import router as ml_router
|
| 41 |
+
from arena.router import router as arena_router
|
| 42 |
+
|
| 43 |
+
app.include_router(data_router)
|
| 44 |
+
app.include_router(ml_router)
|
| 45 |
+
app.include_router(explain_router)
|
| 46 |
+
app.include_router(arena_router)
|
| 47 |
+
|
| 48 |
+
STATIC_DIR = Path(__file__).parent / "static"
|
| 49 |
+
|
| 50 |
+
# Health check — verify critical native libraries load correctly
|
| 51 |
+
@app.get("/health")
|
| 52 |
+
async def health_check() -> dict:
|
| 53 |
+
errors: list[str] = []
|
| 54 |
+
for lib in ("sklearn", "xgboost", "lightgbm", "shap", "scipy"):
|
| 55 |
+
try:
|
| 56 |
+
__import__(lib)
|
| 57 |
+
except Exception as exc:
|
| 58 |
+
errors.append(f"{lib}: {exc}")
|
| 59 |
+
if errors:
|
| 60 |
+
return {"status": "degraded", "errors": errors}
|
| 61 |
+
return {"status": "healthy"}
|
| 62 |
+
|
| 63 |
+
# Serve frontend static files
|
| 64 |
+
if STATIC_DIR.is_dir():
|
| 65 |
+
app.mount("/assets", StaticFiles(directory=STATIC_DIR / "assets"), name="assets")
|
| 66 |
+
|
| 67 |
+
@app.get("/{full_path:path}")
|
| 68 |
+
async def serve_spa(request: Request, full_path: str):
|
| 69 |
+
file = STATIC_DIR / full_path
|
| 70 |
+
if file.is_file():
|
| 71 |
+
return FileResponse(file)
|
| 72 |
+
return FileResponse(STATIC_DIR / "index.html")
|
requirements.txt
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi>=0.110.0
|
| 2 |
+
uvicorn[standard]>=0.29.0
|
| 3 |
+
scikit-learn>=1.4.0
|
| 4 |
+
pandas>=2.2.0
|
| 5 |
+
numpy>=1.26.0
|
| 6 |
+
imbalanced-learn>=0.12.0 # SMOTE
|
| 7 |
+
shap>=0.45.0 # Explainability
|
| 8 |
+
reportlab>=4.1.0 # PDF certificate generation
|
| 9 |
+
python-multipart>=0.0.9 # File upload support
|
| 10 |
+
pydantic>=2.6.0
|
| 11 |
+
xgboost>=2.0.0 # Gradient boosting (high performance)
|
| 12 |
+
lightgbm>=4.3.0 # Fast gradient boosting
|
| 13 |
+
requests>=2.31.0 # Real dataset downloads with caching
|
| 14 |
+
httpx>=0.28.0 # FastAPI TestClient dependency (used by backend tests)
|
| 15 |
+
python-dotenv>=1.0.0 # Load .env file for API keys
|
| 16 |
+
scipy>=1.12.0 # ARFF file parsing (vertebral column dataset)
|
| 17 |
+
openpyxl>=3.1.0 # Excel .xlsx reading (fetal health CTG dataset)
|
| 18 |
+
xlrd>=2.0.0 # Excel .xls reading (legacy UCI datasets)
|
| 19 |
+
ucimlrepo>=0.0.3 # UCI ML Repository API (CKD, CTG, and other datasets)
|
static/.gitkeep
ADDED
|
File without changes
|
static/apple-touch-icon.png
ADDED
|
|
static/assets/ArenaPage-C8SsT3v3.js
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
static/assets/ArenaPage-C8SsT3v3.js.map
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|